Add files via upload

coqui-ai · Golesheed · Nov 5, 2022 · Nov 5, 2022 · Nov 5, 2022 · Nov 5, 2022
commit c42ead34e98ed0065857c689dac41e7382b8d681
diff --git a/notebooks/Golshid's_personal_model_with_common_voice.ipynb b/notebooks/Golshid's_personal_model_with_common_voice.ipynb
@@ -0,0 +1,315 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 5,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.5"
+    },
+    "colab": {
+      "private_outputs": true,
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ✅ Description\n",
+        "This is a STT which was made as an assignment for my Programming course from MSc Voice Technology at Rijksuniversiteit Groningen/Campus Fryslân.\n",
+        "\n",
+        "The data is my 105 English recordings in [Common Voice](https://commonvoice.mozilla.org/en).\n",
+        "\n",
+        "How I first did it was just like the notebook but then realized the link will be expired after some time so I just downloaded my data and uploaded it here so I  can train my STT easier."
+      ],
+      "metadata": {
+        "id": "r8pjZ7TLvPny"
+      },
+      "id": "r8pjZ7TLvPny"
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ✅ Upload My .txt file and .zip file\n",
+        "\n",
+        "Just drag and drop the files in the /content and you are done.\n"
+      ],
+      "metadata": {
+        "id": "imUOwCWsNI_w"
+      },
+      "id": "imUOwCWsNI_w"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "96e8b708"
+      },
+      "source": [
+        "### 👀 Take a look at your data"
+      ],
+      "id": "96e8b708"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fa2aec77"
+      },
+      "source": [
+        "# take a look at what you've uploaded\n",
+        "! ls ."
+      ],
+      "id": "fa2aec77",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ✅ Install 🐸STT"
+      ],
+      "metadata": {
+        "id": "W8KcmpsTX1p2"
+      },
+      "id": "W8KcmpsTX1p2"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "fa2aec78"
+      },
+      "source": [
+        "## Install Coqui STT\n",
+        "# dependencies\n",
+        "! apt-get install sox libsox-fmt-mp3 libopusfile0 libopus-dev libopusfile-dev\n",
+        "! pip install --upgrade pip\n",
+        "# the Coqui training package\n",
+        "! pip install coqui_stt_training\n",
+        "! pip uninstall -y tensorflow; pip install \"tensorflow-gpu==1.15\"\n",
+        "# code with importer scripts\n",
+        "! git clone --depth=1 https://github.com/coqui-ai/STT.git"
+      ],
+      "id": "fa2aec78",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ✅ Import my data for 🐸STT\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "6ITXoId-bRIQ"
+      },
+      "id": "6ITXoId-bRIQ"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "! python STT/bin/import_cv_personal.py --normalize takeout_442_metadata.txt  takeout_442_pt_0.zip\n",
+        "\n",
+        "# now we're going to split the dataset into {train,dev,test} with auto_import_dataset\n",
+        "from coqui_stt_training.util.config import initialize_globals_from_args\n",
+        "import glob\n",
+        "\n",
+        "DATA_CSV=glob.glob(\"/content/takeout_442_pt_0/data.csv\")[0]\n",
+        "\n",
+        "initialize_globals_from_args(\n",
+        "    auto_input_dataset=DATA_CSV,\n",
+        "    save_checkpoint_dir='checkpoints/'\n",
+        ")"
+      ],
+      "metadata": {
+        "id": "Dmm5aIYvbm_q"
+      },
+      "id": "Dmm5aIYvbm_q",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## ✅ Download a pre-trained English 🐸STT model"
+      ],
+      "metadata": {
+        "id": "c8AWzPclhxwu"
+      },
+      "id": "c8AWzPclhxwu"
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "608d203f"
+      },
+      "outputs": [],
+      "source": [
+        "### Download pre-trained model\n",
+        "import os\n",
+        "import tarfile\n",
+        "from coqui_stt_training.util.downloader import maybe_download\n",
+        "\n",
+        "def download_pretrained_model():\n",
+        "    model_dir=\"english/\"\n",
+        "    if not os.path.exists(\"english/coqui-stt-1.1.0-checkpoint\"):\n",
+        "        maybe_download(\"model.tar.gz\", model_dir, \"https://github.com/coqui-ai/STT/releases/download/v1.1.0/coqui-stt-1.1.0-checkpoint.tar.gz\")\n",
+        "        print('\\nNo extracted pre-trained model found. Extracting now...')\n",
+        "        tar = tarfile.open(\"english/model.tar.gz\")\n",
+        "        tar.extractall(\"english/\")\n",
+        "        tar.close()\n",
+        "    else:\n",
+        "        print('Found pre-trained 🐸STT model, skipping download.')\n",
+        "\n",
+        "def download_language_model():\n",
+        "    model_dir=\"english/\"\n",
+        "    if not os.path.exists(\"english/huge-vocabulary.scorer\"):\n",
+        "        maybe_download(\"huge-vocabulary.scorer\", model_dir, \"https://github.com/coqui-ai/STT-models/releases/download/english/coqui/v1.0.0-huge-vocab/huge-vocabulary.scorer\")\n",
+        "    else:\n",
+        "        print('Found 🐸STT language model, skipping download.')\n",
+        "\n",
+        "# Download + extract pre-trained English model\n",
+        "download_pretrained_model()\n",
+        "download_language_model()"
+      ],
+      "id": "608d203f"
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ae82fd75"
+      },
+      "source": [
+        "## ✅ Customize to my Voice\n",
+        "\n",
+        "Let's kick off a training run 🚀🚀🚀 and fine-tune the pre-trained 🐸STT model to my voice."
+      ],
+      "id": "ae82fd75"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from coqui_stt_training.util.config import initialize_globals_from_args\n",
+        "from coqui_stt_training.train import train\n",
+        "import glob\n",
+        "\n",
+        "TRAIN_CSV = glob.glob(\"/content/takeout_442_pt_0/train.csv\")\n",
+        "DEV_CSV = glob.glob(\"/content/takeout_442_pt_0/dev.csv\")\n",
+        "\n",
+        "initialize_globals_from_args(\n",
+        "    load_checkpoint_dir=\"english/coqui-stt-1.1.0-checkpoint\",\n",
+        "    save_checkpoint_dir=\"my-model/checkpoints\",\n",
+        "    alphabet_config_path=\"english/coqui-stt-1.1.0-checkpoint/alphabet.txt\",\n",
+        "    train_files=TRAIN_CSV,\n",
+        "    dev_files=DEV_CSV,\n",
+        "    epochs=10,\n",
+        "    load_cudnn=True,\n",
+        "    train_batch_size=32,\n",
+        "    dev_batch_size=32,\n",
+        ")\n",
+        "\n",
+        "train()"
+      ],
+      "metadata": {
+        "id": "ZG01HHCpi_6S"
+      },
+      "id": "ZG01HHCpi_6S",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9f6dc959"
+      },
+      "source": [
+        "## ✅ Test the model\n",
+        "\n",
+        "We made it, yeaaaaaaaaaay! 🙌\n",
+        "\n",
+        "Let's kick off the testing run, and see how the model performs.\n",
+        "\n",
+        "First, let's see how well the off-the-shelf English model recognizes my voice."
+      ],
+      "id": "9f6dc959"
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Test STT v1.0 on my voice data\n",
+        "\n",
+        "from coqui_stt_training.util.config import initialize_globals_from_args\n",
+        "from coqui_stt_training.evaluate import test\n",
+        "import glob\n",
+        "\n",
+        "TEST_CSV = glob.glob(\"/content/takeout_442_pt_0/test.csv\")\n",
+        "\n",
+        "initialize_globals_from_args(\n",
+        "    checkpoint_dir=\"/content/english/coqui-stt-1.1.0-checkpoint\",\n",
+        "    scorer_path=\"english/huge-vocabulary.scorer\",\n",
+        "    test_files = TEST_CSV,\n",
+        "    test_batch_size = 32,\n",
+        ")\n",
+        "\n",
+        "test()"
+      ],
+      "metadata": {
+        "id": "o9prwww3upwO"
+      },
+      "id": "o9prwww3upwO",
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Now, let's see how much better ✨ my new custom model ✨ performs 🤩"
+      ],
+      "metadata": {
+        "id": "txez_adC1b_m"
+      },
+      "id": "txez_adC1b_m"
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "dd42bc7a"
+      },
+      "source": [
+        "# Test my custom model on my held-out test data\n",
+        "\n",
+        "from coqui_stt_training.util.config import initialize_globals_from_args\n",
+        "from coqui_stt_training.evaluate import test\n",
+        "import glob\n",
+        "\n",
+        "TEST_CSV = glob.glob(\"/content/takeout_442_pt_0/test.csv\")\n",
+        "\n",
+        "initialize_globals_from_args(\n",
+        "    checkpoint_dir=\"my-model/checkpoints\",\n",
+        "    scorer_path=\"english/huge-vocabulary.scorer\",\n",
+        "    test_files = TEST_CSV,\n",
+        "    test_batch_size=32,\n",
+        ")\n",
+        "\n",
+        "test()"
+      ],
+      "id": "dd42bc7a",
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}