diff --git a/notebooks/synthesize.ipynb b/notebooks/synthesize.ipynb
new file mode 100644
index 0000000..473db5b
--- /dev/null
+++ b/notebooks/synthesize.ipynb
@@ -0,0 +1,550 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "synthesize",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zdMgfG7GMF_R",
+ "colab_type": "text"
+ },
+ "source": [
+ "# Transformer TTS: A Text-to-Speech Transformer in TensorFlow 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "JQ5YuFPAxXUy",
+ "colab_type": "code",
+ "outputId": "e9f81ab0-adbe-4741-daee-fd115387b047",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 323
+ }
+ },
+ "source": [
+ "# Clone the repo, the pretrained model and WaveRNN for the vocoder\n",
+ "!git clone https://github.com/as-ideas/TransformerTTS.git\n",
+ "!git clone https://github.com/as-ideas/tts_model_outputs.git\n",
+ "!git clone https://github.com/fatchord/WaveRNN"
+ ],
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Cloning into 'TransformerTTS'...\n",
+ "remote: Enumerating objects: 110, done.\u001b[K\n",
+ "remote: Counting objects: 100% (110/110), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (90/90), done.\u001b[K\n",
+ "remote: Total 2334 (delta 55), reused 48 (delta 17), pack-reused 2224\u001b[K\n",
+ "Receiving objects: 100% (2334/2334), 1.60 MiB | 1.82 MiB/s, done.\n",
+ "Resolving deltas: 100% (1573/1573), done.\n",
+ "Cloning into 'tts_model_outputs'...\n",
+ "remote: Enumerating objects: 22, done.\u001b[K\n",
+ "remote: Counting objects: 100% (22/22), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (21/21), done.\u001b[K\n",
+ "remote: Total 65 (delta 9), reused 0 (delta 0), pack-reused 43\u001b[K\n",
+ "Unpacking objects: 100% (65/65), done.\n",
+ "Cloning into 'WaveRNN'...\n",
+ "remote: Enumerating objects: 928, done.\u001b[K\n",
+ "remote: Total 928 (delta 0), reused 0 (delta 0), pack-reused 928\n",
+ "Receiving objects: 100% (928/928), 241.65 MiB | 13.75 MiB/s, done.\n",
+ "Resolving deltas: 100% (540/540), done.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "9bIzkIGjMRwm",
+ "colab_type": "code",
+ "outputId": "89e451ea-c101-4694-c404-d3c15a358854",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ }
+ },
+ "source": [
+ "# Install requirements\n",
+ "!apt-get install -y espeak\n",
+ "!pip install -r TransformerTTS/requirements.txt"
+ ],
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Reading package lists... Done\n",
+ "Building dependency tree \n",
+ "Reading state information... Done\n",
+ "The following additional packages will be installed:\n",
+ " espeak-data libespeak1 libportaudio2 libsonic0\n",
+ "The following NEW packages will be installed:\n",
+ " espeak espeak-data libespeak1 libportaudio2 libsonic0\n",
+ "0 upgraded, 5 newly installed, 0 to remove and 31 not upgraded.\n",
+ "Need to get 1,219 kB of archives.\n",
+ "After this operation, 3,031 kB of additional disk space will be used.\n",
+ "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]\n",
+ "Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsonic0 amd64 0.2.0-6 [13.4 kB]\n",
+ "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak-data amd64 1.48.04+dfsg-5 [934 kB]\n",
+ "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libespeak1 amd64 1.48.04+dfsg-5 [145 kB]\n",
+ "Get:5 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak amd64 1.48.04+dfsg-5 [61.6 kB]\n",
+ "Fetched 1,219 kB in 3s (457 kB/s)\n",
+ "Selecting previously unselected package libportaudio2:amd64.\n",
+ "(Reading database ... 144433 files and directories currently installed.)\n",
+ "Preparing to unpack .../libportaudio2_19.6.0-1_amd64.deb ...\n",
+ "Unpacking libportaudio2:amd64 (19.6.0-1) ...\n",
+ "Selecting previously unselected package libsonic0:amd64.\n",
+ "Preparing to unpack .../libsonic0_0.2.0-6_amd64.deb ...\n",
+ "Unpacking libsonic0:amd64 (0.2.0-6) ...\n",
+ "Selecting previously unselected package espeak-data:amd64.\n",
+ "Preparing to unpack .../espeak-data_1.48.04+dfsg-5_amd64.deb ...\n",
+ "Unpacking espeak-data:amd64 (1.48.04+dfsg-5) ...\n",
+ "Selecting previously unselected package libespeak1:amd64.\n",
+ "Preparing to unpack .../libespeak1_1.48.04+dfsg-5_amd64.deb ...\n",
+ "Unpacking libespeak1:amd64 (1.48.04+dfsg-5) ...\n",
+ "Selecting previously unselected package espeak.\n",
+ "Preparing to unpack .../espeak_1.48.04+dfsg-5_amd64.deb ...\n",
+ "Unpacking espeak (1.48.04+dfsg-5) ...\n",
+ "Setting up libportaudio2:amd64 (19.6.0-1) ...\n",
+ "Setting up espeak-data:amd64 (1.48.04+dfsg-5) ...\n",
+ "Setting up libsonic0:amd64 (0.2.0-6) ...\n",
+ "Setting up libespeak1:amd64 (1.48.04+dfsg-5) ...\n",
+ "Setting up espeak (1.48.04+dfsg-5) ...\n",
+ "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n",
+ "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n",
+ "/sbin/ldconfig.real: /usr/local/lib/python3.6/dist-packages/ideep4py/lib/libmkldnn.so.0 is not a symbolic link\n",
+ "\n",
+ "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 1)) (3.2.1)\n",
+ "Collecting librosa>=0.7.1\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/77/b5/1817862d64a7c231afd15419d8418ae1f000742cac275e85c74b219cbccb/librosa-0.7.2.tar.gz (1.6MB)\n",
+ "\u001b[K |████████████████████████████████| 1.6MB 2.8MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy>=1.17.4 in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 3)) (1.18.4)\n",
+ "Collecting phonemizer==2.1\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d3/82/666045375029df9c2f274923539f43346a7b7abc349b02e33dff585da56f/phonemizer-2.1-py3-none-any.whl (47kB)\n",
+ "\u001b[K |████████████████████████████████| 51kB 6.4MB/s \n",
+ "\u001b[?25hCollecting ruamel.yaml>=0.16.6\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a6/92/59af3e38227b9cc14520bf1e59516d99ceca53e3b8448094248171e9432b/ruamel.yaml-0.16.10-py2.py3-none-any.whl (111kB)\n",
+ "\u001b[K |████████████████████████████████| 112kB 19.5MB/s \n",
+ "\u001b[?25hRequirement already satisfied: tensorflow>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 6)) (2.2.0)\n",
+ "Requirement already satisfied: tqdm>=4.38.0 in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 7)) (4.41.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (1.2.0)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (0.10.0)\n",
+ "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (2.4.7)\n",
+ "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (2.8.1)\n",
+ "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (2.1.8)\n",
+ "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (1.4.1)\n",
+ "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.22.2.post1)\n",
+ "Requirement already satisfied: joblib>=0.12 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.15.0)\n",
+ "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (4.4.2)\n",
+ "Requirement already satisfied: six>=1.3 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (1.12.0)\n",
+ "Requirement already satisfied: resampy>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.2.2)\n",
+ "Requirement already satisfied: numba>=0.43.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.48.0)\n",
+ "Collecting soundfile>=0.9.0\n",
+ " Downloading https://files.pythonhosted.org/packages/eb/f2/3cbbbf3b96fb9fa91582c438b574cff3f45b29c772f94c400e2c99ef5db9/SoundFile-0.10.3.post1-py2.py3-none-any.whl\n",
+ "Requirement already satisfied: attrs>=18.1 in /usr/local/lib/python3.6/dist-packages (from phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (19.3.0)\n",
+ "Collecting segments\n",
+ " Downloading https://files.pythonhosted.org/packages/5b/a0/0c3fe64787745c39eb3f2f5f5f9ed8d008d9ef22e9d7f9f52f71ea4712f7/segments-2.1.3-py2.py3-none-any.whl\n",
+ "Collecting ruamel.yaml.clib>=0.1.2; platform_python_implementation == \"CPython\" and python_version < \"3.9\"\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/53/77/4bcd63f362bcb6c8f4f06253c11f9772f64189bf08cf3f40c5ccbda9e561/ruamel.yaml.clib-0.2.0-cp36-cp36m-manylinux1_x86_64.whl (548kB)\n",
+ "\u001b[K |████████████████████████████████| 552kB 14.8MB/s \n",
+ "\u001b[?25hRequirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.10.0)\n",
+ "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.1.0)\n",
+ "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.9.0)\n",
+ "Requirement already satisfied: tensorboard<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.2.1)\n",
+ "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.29.0)\n",
+ "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.12.1)\n",
+ "Requirement already satisfied: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.3.3)\n",
+ "Requirement already satisfied: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.6.3)\n",
+ "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.2.1)\n",
+ "Requirement already satisfied: tensorflow-estimator<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.2.0)\n",
+ "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.34.2)\n",
+ "Requirement already satisfied: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.2.0)\n",
+ "Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.1.2)\n",
+ "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.10.0)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from numba>=0.43.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (46.3.0)\n",
+ "Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba>=0.43.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.31.0)\n",
+ "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.6/dist-packages (from soundfile>=0.9.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (1.14.0)\n",
+ "Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from segments->phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (2019.12.20)\n",
+ "Collecting clldutils>=1.7.3\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f1/ec/76860c7c36e8f6683a6d5041ebda054f4c1deca1a8aac9ea3357105139f5/clldutils-3.5.1-py2.py3-none-any.whl (188kB)\n",
+ "\u001b[K |████████████████████████████████| 194kB 20.4MB/s \n",
+ "\u001b[?25hCollecting csvw>=1.5.6\n",
+ " Downloading https://files.pythonhosted.org/packages/d1/b6/8fef6788b8f05b21424a17ae3881eff916d42e5c7e87f57a85d9d7abf0a1/csvw-1.7.0-py2.py3-none-any.whl\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.2.2)\n",
+ "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.4.1)\n",
+ "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.6.0.post3)\n",
+ "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.7.2)\n",
+ "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.0.1)\n",
+ "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.23.0)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi>=1.0->soundfile>=0.9.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (2.20)\n",
+ "Collecting colorlog\n",
+ " Downloading https://files.pythonhosted.org/packages/00/0d/22c73c2eccb21dd3498df7d22c0b1d4a30f5a5fb3feb64e1ce06bc247747/colorlog-4.1.0-py2.py3-none-any.whl\n",
+ "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (0.8.7)\n",
+ "Collecting rfc3986\n",
+ " Downloading https://files.pythonhosted.org/packages/78/be/7b8b99fd74ff5684225f50dd0e865393d2265656ef3b4ba9eaaaffe622b8/rfc3986-1.4.0-py2.py3-none-any.whl\n",
+ "Collecting isodate\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9b/9f/b36f7774ff5ea8e428fdcfc4bb332c39ee5b9362ddd3d40d9516a55221b2/isodate-0.6.0-py2.py3-none-any.whl (45kB)\n",
+ "\u001b[K |████████████████████████████████| 51kB 6.1MB/s \n",
+ "\u001b[?25hRequirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from csvw>=1.5.6->segments->phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (3.0.1)\n",
+ "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.6.0)\n",
+ "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.3.0)\n",
+ "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (4.0)\n",
+ "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.1.1)\n",
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.2.8)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2020.4.5.1)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.24.3)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.9)\n",
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.1.0)\n",
+ "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.1.0)\n",
+ "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.4.8)\n",
+ "Building wheels for collected packages: librosa\n",
+ " Building wheel for librosa (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for librosa: filename=librosa-0.7.2-cp36-none-any.whl size=1612885 sha256=24d7bbec2757303377b47d49db4db3bfc6608222ba90930a2c1e0564e6a04aee\n",
+ " Stored in directory: /root/.cache/pip/wheels/4c/6e/d7/bb93911540d2d1e44d690a1561871e5b6af82b69e80938abef\n",
+ "Successfully built librosa\n",
+ "Installing collected packages: soundfile, librosa, colorlog, rfc3986, isodate, csvw, clldutils, segments, phonemizer, ruamel.yaml.clib, ruamel.yaml\n",
+ " Found existing installation: librosa 0.6.3\n",
+ " Uninstalling librosa-0.6.3:\n",
+ " Successfully uninstalled librosa-0.6.3\n",
+ "Successfully installed clldutils-3.5.1 colorlog-4.1.0 csvw-1.7.0 isodate-0.6.0 librosa-0.7.2 phonemizer-2.1 rfc3986-1.4.0 ruamel.yaml-0.16.10 ruamel.yaml.clib-0.2.0 segments-2.1.3 soundfile-0.10.3.post1\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "LucwkAK1yEVq",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 88
+ },
+ "outputId": "bef65ba5-1549-488c-d4dc-788965fa93ee"
+ },
+ "source": [
+ "# Load pretrained models\n",
+ "config_path = 'tts_model_outputs/ljspeech_transformertts/standard'\n",
+ "project_path = 'TransformerTTS'\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append(project_path)\n",
+ "from utils.config_manager import ConfigManager\n",
+ "from utils.audio import reconstruct_waveform\n",
+ "\n",
+ "import IPython.display as ipd\n",
+ "\n",
+ "config_loader = ConfigManager(config_path)\n",
+ "model = config_loader.load_model('tts_model_outputs/ljspeech_transformertts/standard/model_weights/ckpt-90')"
+ ],
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "WARNING: could not retrieve git hash. Command '['git', 'describe', '--always']' returned non-zero exit status 128.\n",
+ "WARNING: could not check git hash. Command '['git', 'describe', '--always']' returned non-zero exit status 128.\n",
+ "restored weights from tts_model_outputs/ljspeech_transformertts/standard/model_weights/ckpt-90 at step 900000\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "_5RKHIDQyZvo",
+ "colab_type": "code",
+ "outputId": "a8c04963-ab23-480e-9826-53de2db0c67c",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "source": [
+ "# Synthesize text\n",
+ "sentence = 'Scientists at the CERN laboratory, say they have discovered a new particle.'\n",
+ "out = model.predict(sentence)"
+ ],
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "pred text mel: 397 stop out: -1.9915766716003418Stopping\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "GXxdDHOAyZ6f",
+ "colab_type": "code",
+ "outputId": "d319bc2c-2843-4b51-e1ce-76c2857f255e",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 75
+ }
+ },
+ "source": [
+ "# Convert spectrogram to wav (with griffin lim)\n",
+ "wav = reconstruct_waveform(out['mel'].numpy().T, config=config_loader.config)\n",
+ "ipd.display(ipd.Audio(wav, rate=config_loader.config['sampling_rate']))"
+ ],
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eZJo81viVus-",
+ "colab_type": "text"
+ },
+ "source": [
+ "### WaveRNN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "5oQhgBhUPB9C",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "# Export for WaveRNN\n",
+ "import numpy as np\n",
+ "from pathlib import Path\n",
+ "WaveRNN_path = Path('WaveRNN/')\n",
+ "np.save(WaveRNN_path / 'scientists.npy', (out['mel'].numpy().T+4.)/8.)"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "WjIuQALHTr-R",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "# Do some sys cleaning and imports\n",
+ "sys.path.remove('TransformerTTS')\n",
+ "sys.modules.pop('utils')\n",
+ "\n",
+ "import sys\n",
+ "sys.path.append('WaveRNN/')\n",
+ "from utils.dsp import hp\n",
+ "from models.fatchord_version import WaveRNN\n",
+ "import torch\n",
+ "import numpy as np\n",
+ "from pathlib import Path"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dptoYzL1XFAr",
+ "colab_type": "code",
+ "outputId": "a87f9520-94cb-4306-d1b9-b8aa6b5b68bc",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 51
+ }
+ },
+ "source": [
+ "# Unzip the pretrained model\n",
+ "!unzip WaveRNN/pretrained/ljspeech.wavernn.mol.800k.zip -d WaveRNN/pretrained/"
+ ],
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Archive: WaveRNN/pretrained/ljspeech.wavernn.mol.800k.zip\n",
+ " inflating: WaveRNN/pretrained/latest_weights.pyt \n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "rKixR97aTtwX",
+ "colab_type": "code",
+ "outputId": "5bf538f8-bf7c-4ca3-f6a8-93926a457ba3",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ }
+ },
+ "source": [
+ "# Load pretrained model\n",
+ "hp.configure(WaveRNN_path / 'hparams.py') # Load hparams from file\n",
+ "device = torch.device('cpu')\n",
+ "model = WaveRNN(rnn_dims=hp.voc_rnn_dims,\n",
+ " fc_dims=hp.voc_fc_dims,\n",
+ " bits=hp.bits,\n",
+ " pad=hp.voc_pad,\n",
+ " upsample_factors=hp.voc_upsample_factors,\n",
+ " feat_dims=hp.num_mels,\n",
+ " compute_dims=hp.voc_compute_dims,\n",
+ " res_out_dims=hp.voc_res_out_dims,\n",
+ " res_blocks=hp.voc_res_blocks,\n",
+ " hop_length=hp.hop_length,\n",
+ " sample_rate=hp.sample_rate,\n",
+ " mode=hp.voc_mode).to(device)\n",
+ "\n",
+ "model.load(str(WaveRNN_path / 'pretrained/latest_weights.pyt'))"
+ ],
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Trainable Parameters: 4.234M\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "mPF7TrqDOE8S",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ "# Ignore some TF warnings\n",
+ "import tensorflow as tf\n",
+ "tf.get_logger().setLevel('ERROR')"
+ ],
+ "execution_count": 0,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "EVkdFQeRUGQ-",
+ "colab_type": "code",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "outputId": "62d4a302-de02-4acb-8979-4b30b3903db5"
+ },
+ "source": [
+ "# Generate sample with pre-trained WaveRNN vocoder\n",
+ "mel = np.load(WaveRNN_path / 'scientists.npy')\n",
+ "_ = model.generate(mel[np.newaxis,:,:], 'scientists.wav', False, 1, hp.voc_overlap, hp.mu_law)"
+ ],
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "| ████████████████ 109400/109450 | Batch Size: 1 | Gen Rate: 0.7kHz | "
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "vQYaZawLXTJI",
+ "colab_type": "code",
+ "outputId": "bc677767-da4c-4125-b4a0-0e2f43a93efc",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 75
+ }
+ },
+ "source": [
+ "# Load wav file\n",
+ "ipd.display(ipd.Audio('scientists.wav'))"
+ ],
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "jWX00MuHYojU",
+ "colab_type": "code",
+ "colab": {}
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": 0,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file