Create colab as answer to question #174; which shows a Keras example …

…with TF Text. PiperOrigin-RevId: 285868857
tensorflow · Dec 16, 2019 · e80d628 · e80d628
1 parent bc7c213
commit e80d628
Showing 1 changed file with 163 additions and 0 deletions.
diff --git a/examples/keras_example_174.ipynb b/examples/keras_example_174.ipynb
@@ -0,0 +1,163 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "TF Text / Keras example (#174)",
+      "provenance": []
+    },
+    "source": [
+      "##### Copyright 2018 The TensorFlow Authors.\n",
+      "\n",
+      "Licensed under the Apache License, Version 2.0 (the \"License\");"
+    ],
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "0aJ6YQE6oB9x",
+        "colab_type": "code",
+        "outputId": "ca711144-aed0-4b5b-ac66-9353508fceca",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 632
+        }
+      },
+      "source": [
+        "!pip install tensorflow_text==2.0.1"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bMEL3ylpoIEP",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import tensorflow as tf\n",
+        "import tensorflow_text as text"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "J4pMEBBBondS",
+        "colab_type": "code",
+        "outputId": "09fd1152-9ea0-4d97-fca7-0f68e6bef011",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 51
+        }
+      },
+      "source": [
+        "ragged_input = tf.ragged.constant([[1, 2, 3, 4, 5], [5, 6]])\n",
+        "input_data = tf.data.Dataset.from_tensor_slices(ragged_input).batch(2)\n",
+        "\n",
+        "model = tf.keras.Sequential([\n",
+        "  tf.keras.layers.InputLayer(input_shape=(None,), dtype='int32', ragged=True),\n",
+        "  text.keras.layers.ToDense(pad_value=0, mask=True),\n",
+        "  tf.keras.layers.Embedding(100, 16),\n",
+        "  tf.keras.layers.LSTM(32),\n",
+        "  tf.keras.layers.Dense(32, activation='relu'),\n",
+        "  tf.keras.layers.Dense(1, activation='sigmoid')\n",
+        "])\n",
+        "\n",
+        "model.compile(\n",
+        "  optimizer=\"rmsprop\",\n",
+        "  loss=\"binary_crossentropy\",\n",
+        "  metrics=[\"accuracy\"])\n",
+        "\n",
+        "output = model.predict(input_data)\n",
+        "print(output)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "[[0.49998033]\n",
+            " [0.5012409 ]]\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nRJJaWFfsgA3",
+        "colab_type": "code",
+        "outputId": "d7d0434f-4de5-4e0a-b57a-06dd03c47d9f",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 51
+        }
+      },
+      "source": [
+        "def _CreateTable(vocab, num_oov=1):\n",
+        "  init = tf.lookup.KeyValueTensorInitializer(\n",
+        "      vocab,\n",
+        "      tf.range(tf.size(vocab, out_type=tf.int64), dtype=tf.int64),\n",
+        "      key_dtype=tf.string,\n",
+        "      value_dtype=tf.int64)\n",
+        "  return tf.lookup.StaticVocabularyTable(\n",
+        "      init, num_oov, lookup_key_dtype=tf.string)\n",
+        "\n",
+        "reviews_data_array = ['I really liked this movie', 'not my favorite']\n",
+        "reviews_labels_array = [1,0]\n",
+        "train_x = tf.constant(reviews_data_array)\n",
+        "train_y = tf.constant(reviews_labels_array)\n",
+        "\n",
+        "a = _CreateTable(['I', 'really', 'liked', 'this', 'movie', 'not', 'my', 'favorite'])\n",
+        "\n",
+        "def preprocess(data, labels):\n",
+        "  t = text.WhitespaceTokenizer()\n",
+        "  data = t.tokenize(data)\n",
+        "  # data = data.merge_dims(-2,-1)\n",
+        "  ids = tf.ragged.map_flat_values(a.lookup, data)\n",
+        "  return (ids, labels)\n",
+        "\n",
+        "train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(2)\n",
+        "train_dataset = train_dataset.map(preprocess)\n",
+        "\n",
+        "model = tf.keras.Sequential([\n",
+        "  tf.keras.layers.InputLayer(input_shape=(None,), dtype='int64', ragged=True),\n",
+        "  text.keras.layers.ToDense(pad_value=0, mask=True),\n",
+        "  tf.keras.layers.Embedding(100, 16),\n",
+        "  tf.keras.layers.LSTM(32),\n",
+        "  tf.keras.layers.Dense(32, activation='relu'),\n",
+        "  tf.keras.layers.Dense(1, activation='sigmoid')\n",
+        "])\n",
+        "\n",
+        "model.compile(\n",
+        "  optimizer=\"rmsprop\",\n",
+        "  loss=\"binary_crossentropy\",\n",
+        "  metrics=[\"accuracy\"])\n",
+        "\n",
+        "output = model.fit(train_dataset, epochs=1, verbose=1)\n",
+        "print(output)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "1/1 [==============================] - 2s 2s/step - loss: 0.6915 - accuracy: 1.0000\n",
+            "<tensorflow.python.keras.callbacks.History object at 0x7f7d64b5e5f8>\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    }
+  ]
+}