From e80d628a3573820a5d49687a8c22bfcd7fbfa891 Mon Sep 17 00:00:00 2001 From: Robert Neale Date: Mon, 16 Dec 2019 15:53:51 -0800 Subject: [PATCH] Create colab as answer to question #174; which shows a Keras example with TF Text. PiperOrigin-RevId: 285868857 --- examples/keras_example_174.ipynb | 163 +++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 examples/keras_example_174.ipynb diff --git a/examples/keras_example_174.ipynb b/examples/keras_example_174.ipynb new file mode 100644 index 000000000..628de4b15 --- /dev/null +++ b/examples/keras_example_174.ipynb @@ -0,0 +1,163 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "TF Text / Keras example (#174)", + "provenance": [] + }, + "source": [ + "##### Copyright 2018 The TensorFlow Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ], + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "0aJ6YQE6oB9x", + "colab_type": "code", + "outputId": "ca711144-aed0-4b5b-ac66-9353508fceca", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 632 + } + }, + "source": [ + "!pip install tensorflow_text==2.0.1" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "bMEL3ylpoIEP", + "colab_type": "code", + "colab": {} + }, + "source": [ + "import tensorflow as tf\n", + "import tensorflow_text as text" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "J4pMEBBBondS", + "colab_type": "code", + "outputId": "09fd1152-9ea0-4d97-fca7-0f68e6bef011", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + } + }, + "source": [ + "ragged_input = tf.ragged.constant([[1, 2, 3, 4, 5], [5, 6]])\n", + "input_data = tf.data.Dataset.from_tensor_slices(ragged_input).batch(2)\n", + "\n", + "model = tf.keras.Sequential([\n", + " tf.keras.layers.InputLayer(input_shape=(None,), dtype='int32', ragged=True),\n", + " text.keras.layers.ToDense(pad_value=0, mask=True),\n", + " tf.keras.layers.Embedding(100, 16),\n", + " tf.keras.layers.LSTM(32),\n", + " tf.keras.layers.Dense(32, activation='relu'),\n", + " tf.keras.layers.Dense(1, activation='sigmoid')\n", + "])\n", + "\n", + "model.compile(\n", + " optimizer=\"rmsprop\",\n", + " loss=\"binary_crossentropy\",\n", + " metrics=[\"accuracy\"])\n", + "\n", + "output = model.predict(input_data)\n", + "print(output)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[[0.49998033]\n", + " [0.5012409 ]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nRJJaWFfsgA3", + "colab_type": "code", + "outputId": "d7d0434f-4de5-4e0a-b57a-06dd03c47d9f", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + } + }, + "source": [ + "def _CreateTable(vocab, num_oov=1):\n", + " init = tf.lookup.KeyValueTensorInitializer(\n", + " vocab,\n", + " tf.range(tf.size(vocab, out_type=tf.int64), dtype=tf.int64),\n", + " key_dtype=tf.string,\n", + " value_dtype=tf.int64)\n", + " return tf.lookup.StaticVocabularyTable(\n", + " init, num_oov, lookup_key_dtype=tf.string)\n", + "\n", + "reviews_data_array = ['I really liked this movie', 'not my favorite']\n", + "reviews_labels_array = [1,0]\n", + "train_x = tf.constant(reviews_data_array)\n", + "train_y = tf.constant(reviews_labels_array)\n", + "\n", + "a = _CreateTable(['I', 'really', 'liked', 'this', 'movie', 'not', 'my', 'favorite'])\n", + "\n", + "def preprocess(data, labels):\n", + " t = text.WhitespaceTokenizer()\n", + " data = t.tokenize(data)\n", + " # data = data.merge_dims(-2,-1)\n", + " ids = tf.ragged.map_flat_values(a.lookup, data)\n", + " return (ids, labels)\n", + "\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(2)\n", + "train_dataset = train_dataset.map(preprocess)\n", + "\n", + "model = tf.keras.Sequential([\n", + " tf.keras.layers.InputLayer(input_shape=(None,), dtype='int64', ragged=True),\n", + " text.keras.layers.ToDense(pad_value=0, mask=True),\n", + " tf.keras.layers.Embedding(100, 16),\n", + " tf.keras.layers.LSTM(32),\n", + " tf.keras.layers.Dense(32, activation='relu'),\n", + " tf.keras.layers.Dense(1, activation='sigmoid')\n", + "])\n", + "\n", + "model.compile(\n", + " optimizer=\"rmsprop\",\n", + " loss=\"binary_crossentropy\",\n", + " metrics=[\"accuracy\"])\n", + "\n", + "output = model.fit(train_dataset, epochs=1, verbose=1)\n", + "print(output)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1/1 [==============================] - 2s 2s/step - loss: 0.6915 - accuracy: 1.0000\n", + "\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file