diff --git a/notebooks/synthesize.ipynb b/notebooks/synthesize.ipynb new file mode 100644 index 0000000..473db5b --- /dev/null +++ b/notebooks/synthesize.ipynb @@ -0,0 +1,550 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "synthesize", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "zdMgfG7GMF_R", + "colab_type": "text" + }, + "source": [ + "# Transformer TTS: A Text-to-Speech Transformer in TensorFlow 2" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JQ5YuFPAxXUy", + "colab_type": "code", + "outputId": "e9f81ab0-adbe-4741-daee-fd115387b047", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 323 + } + }, + "source": [ + "# Clone the repo, the pretrained model and WaveRNN for the vocoder\n", + "!git clone https://github.com/as-ideas/TransformerTTS.git\n", + "!git clone https://github.com/as-ideas/tts_model_outputs.git\n", + "!git clone https://github.com/fatchord/WaveRNN" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'TransformerTTS'...\n", + "remote: Enumerating objects: 110, done.\u001b[K\n", + "remote: Counting objects: 100% (110/110), done.\u001b[K\n", + "remote: Compressing objects: 100% (90/90), done.\u001b[K\n", + "remote: Total 2334 (delta 55), reused 48 (delta 17), pack-reused 2224\u001b[K\n", + "Receiving objects: 100% (2334/2334), 1.60 MiB | 1.82 MiB/s, done.\n", + "Resolving deltas: 100% (1573/1573), done.\n", + "Cloning into 'tts_model_outputs'...\n", + "remote: Enumerating objects: 22, done.\u001b[K\n", + "remote: Counting objects: 100% (22/22), done.\u001b[K\n", + "remote: Compressing objects: 100% (21/21), done.\u001b[K\n", + "remote: Total 65 (delta 9), reused 0 (delta 0), pack-reused 43\u001b[K\n", + "Unpacking objects: 100% (65/65), done.\n", + "Cloning into 'WaveRNN'...\n", + "remote: Enumerating objects: 928, done.\u001b[K\n", + "remote: Total 928 (delta 0), reused 0 (delta 0), pack-reused 928\n", + "Receiving objects: 100% (928/928), 241.65 MiB | 13.75 MiB/s, done.\n", + "Resolving deltas: 100% (540/540), done.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9bIzkIGjMRwm", + "colab_type": "code", + "outputId": "89e451ea-c101-4694-c404-d3c15a358854", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + } + }, + "source": [ + "# Install requirements\n", + "!apt-get install -y espeak\n", + "!pip install -r TransformerTTS/requirements.txt" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "The following additional packages will be installed:\n", + " espeak-data libespeak1 libportaudio2 libsonic0\n", + "The following NEW packages will be installed:\n", + " espeak espeak-data libespeak1 libportaudio2 libsonic0\n", + "0 upgraded, 5 newly installed, 0 to remove and 31 not upgraded.\n", + "Need to get 1,219 kB of archives.\n", + "After this operation, 3,031 kB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]\n", + "Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsonic0 amd64 0.2.0-6 [13.4 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak-data amd64 1.48.04+dfsg-5 [934 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libespeak1 amd64 1.48.04+dfsg-5 [145 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak amd64 1.48.04+dfsg-5 [61.6 kB]\n", + "Fetched 1,219 kB in 3s (457 kB/s)\n", + "Selecting previously unselected package libportaudio2:amd64.\n", + "(Reading database ... 144433 files and directories currently installed.)\n", + "Preparing to unpack .../libportaudio2_19.6.0-1_amd64.deb ...\n", + "Unpacking libportaudio2:amd64 (19.6.0-1) ...\n", + "Selecting previously unselected package libsonic0:amd64.\n", + "Preparing to unpack .../libsonic0_0.2.0-6_amd64.deb ...\n", + "Unpacking libsonic0:amd64 (0.2.0-6) ...\n", + "Selecting previously unselected package espeak-data:amd64.\n", + "Preparing to unpack .../espeak-data_1.48.04+dfsg-5_amd64.deb ...\n", + "Unpacking espeak-data:amd64 (1.48.04+dfsg-5) ...\n", + "Selecting previously unselected package libespeak1:amd64.\n", + "Preparing to unpack .../libespeak1_1.48.04+dfsg-5_amd64.deb ...\n", + "Unpacking libespeak1:amd64 (1.48.04+dfsg-5) ...\n", + "Selecting previously unselected package espeak.\n", + "Preparing to unpack .../espeak_1.48.04+dfsg-5_amd64.deb ...\n", + "Unpacking espeak (1.48.04+dfsg-5) ...\n", + "Setting up libportaudio2:amd64 (19.6.0-1) ...\n", + "Setting up espeak-data:amd64 (1.48.04+dfsg-5) ...\n", + "Setting up libsonic0:amd64 (0.2.0-6) ...\n", + "Setting up libespeak1:amd64 (1.48.04+dfsg-5) ...\n", + "Setting up espeak (1.48.04+dfsg-5) ...\n", + "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n", + "Processing triggers for libc-bin (2.27-3ubuntu1) ...\n", + "/sbin/ldconfig.real: /usr/local/lib/python3.6/dist-packages/ideep4py/lib/libmkldnn.so.0 is not a symbolic link\n", + "\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 1)) (3.2.1)\n", + "Collecting librosa>=0.7.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/77/b5/1817862d64a7c231afd15419d8418ae1f000742cac275e85c74b219cbccb/librosa-0.7.2.tar.gz (1.6MB)\n", + "\u001b[K |████████████████████████████████| 1.6MB 2.8MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17.4 in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 3)) (1.18.4)\n", + "Collecting phonemizer==2.1\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/d3/82/666045375029df9c2f274923539f43346a7b7abc349b02e33dff585da56f/phonemizer-2.1-py3-none-any.whl (47kB)\n", + "\u001b[K |████████████████████████████████| 51kB 6.4MB/s \n", + "\u001b[?25hCollecting ruamel.yaml>=0.16.6\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/a6/92/59af3e38227b9cc14520bf1e59516d99ceca53e3b8448094248171e9432b/ruamel.yaml-0.16.10-py2.py3-none-any.whl (111kB)\n", + "\u001b[K |████████████████████████████████| 112kB 19.5MB/s \n", + "\u001b[?25hRequirement already satisfied: tensorflow>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 6)) (2.2.0)\n", + "Requirement already satisfied: tqdm>=4.38.0 in /usr/local/lib/python3.6/dist-packages (from -r TransformerTTS/requirements.txt (line 7)) (4.41.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (1.2.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (0.10.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (2.4.7)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib->-r TransformerTTS/requirements.txt (line 1)) (2.8.1)\n", + "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (2.1.8)\n", + "Requirement already satisfied: scipy>=1.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (1.4.1)\n", + "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.22.2.post1)\n", + "Requirement already satisfied: joblib>=0.12 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.15.0)\n", + "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (4.4.2)\n", + "Requirement already satisfied: six>=1.3 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (1.12.0)\n", + "Requirement already satisfied: resampy>=0.2.2 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.2.2)\n", + "Requirement already satisfied: numba>=0.43.0 in /usr/local/lib/python3.6/dist-packages (from librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.48.0)\n", + "Collecting soundfile>=0.9.0\n", + " Downloading https://files.pythonhosted.org/packages/eb/f2/3cbbbf3b96fb9fa91582c438b574cff3f45b29c772f94c400e2c99ef5db9/SoundFile-0.10.3.post1-py2.py3-none-any.whl\n", + "Requirement already satisfied: attrs>=18.1 in /usr/local/lib/python3.6/dist-packages (from phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (19.3.0)\n", + "Collecting segments\n", + " Downloading https://files.pythonhosted.org/packages/5b/a0/0c3fe64787745c39eb3f2f5f5f9ed8d008d9ef22e9d7f9f52f71ea4712f7/segments-2.1.3-py2.py3-none-any.whl\n", + "Collecting ruamel.yaml.clib>=0.1.2; platform_python_implementation == \"CPython\" and python_version < \"3.9\"\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/53/77/4bcd63f362bcb6c8f4f06253c11f9772f64189bf08cf3f40c5ccbda9e561/ruamel.yaml.clib-0.2.0-cp36-cp36m-manylinux1_x86_64.whl (548kB)\n", + "\u001b[K |████████████████████████████████| 552kB 14.8MB/s \n", + "\u001b[?25hRequirement already satisfied: protobuf>=3.8.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.10.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.1.0)\n", + "Requirement already satisfied: absl-py>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.9.0)\n", + "Requirement already satisfied: tensorboard<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.2.1)\n", + "Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.29.0)\n", + "Requirement already satisfied: wrapt>=1.11.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.12.1)\n", + "Requirement already satisfied: gast==0.3.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.3.3)\n", + "Requirement already satisfied: astunparse==1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.6.3)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.2.1)\n", + "Requirement already satisfied: tensorflow-estimator<2.3.0,>=2.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.2.0)\n", + "Requirement already satisfied: wheel>=0.26; python_version >= \"3\" in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.34.2)\n", + "Requirement already satisfied: google-pasta>=0.1.8 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.2.0)\n", + "Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.1.2)\n", + "Requirement already satisfied: h5py<2.11.0,>=2.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.10.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from numba>=0.43.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (46.3.0)\n", + "Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /usr/local/lib/python3.6/dist-packages (from numba>=0.43.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (0.31.0)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.6/dist-packages (from soundfile>=0.9.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (1.14.0)\n", + "Requirement already satisfied: regex in /usr/local/lib/python3.6/dist-packages (from segments->phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (2019.12.20)\n", + "Collecting clldutils>=1.7.3\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/f1/ec/76860c7c36e8f6683a6d5041ebda054f4c1deca1a8aac9ea3357105139f5/clldutils-3.5.1-py2.py3-none-any.whl (188kB)\n", + "\u001b[K |████████████████████████████████| 194kB 20.4MB/s \n", + "\u001b[?25hCollecting csvw>=1.5.6\n", + " Downloading https://files.pythonhosted.org/packages/d1/b6/8fef6788b8f05b21424a17ae3881eff916d42e5c7e87f57a85d9d7abf0a1/csvw-1.7.0-py2.py3-none-any.whl\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.2.2)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.4.1)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.6.0.post3)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.7.2)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.0.1)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.23.0)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi>=1.0->soundfile>=0.9.0->librosa>=0.7.1->-r TransformerTTS/requirements.txt (line 2)) (2.20)\n", + "Collecting colorlog\n", + " Downloading https://files.pythonhosted.org/packages/00/0d/22c73c2eccb21dd3498df7d22c0b1d4a30f5a5fb3feb64e1ce06bc247747/colorlog-4.1.0-py2.py3-none-any.whl\n", + "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.6/dist-packages (from clldutils>=1.7.3->segments->phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (0.8.7)\n", + "Collecting rfc3986\n", + " Downloading https://files.pythonhosted.org/packages/78/be/7b8b99fd74ff5684225f50dd0e865393d2265656ef3b4ba9eaaaffe622b8/rfc3986-1.4.0-py2.py3-none-any.whl\n", + "Collecting isodate\n", + "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9b/9f/b36f7774ff5ea8e428fdcfc4bb332c39ee5b9362ddd3d40d9516a55221b2/isodate-0.6.0-py2.py3-none-any.whl (45kB)\n", + "\u001b[K |████████████████████████████████| 51kB 6.1MB/s \n", + "\u001b[?25hRequirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from csvw>=1.5.6->segments->phonemizer==2.1->-r TransformerTTS/requirements.txt (line 4)) (3.0.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.6/dist-packages (from markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.6.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.6/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.3.0)\n", + "Requirement already satisfied: rsa<4.1,>=3.1.4 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (4.0)\n", + "Requirement already satisfied: cachetools<3.2,>=2.0.0 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.1.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.6/dist-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.2.8)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2020.4.5.1)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (1.24.3)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests<3,>=2.21.0->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (2.9)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.6/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.1.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.6/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (3.1.0)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.6/dist-packages (from rsa<4.1,>=3.1.4->google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.2.0->-r TransformerTTS/requirements.txt (line 6)) (0.4.8)\n", + "Building wheels for collected packages: librosa\n", + " Building wheel for librosa (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for librosa: filename=librosa-0.7.2-cp36-none-any.whl size=1612885 sha256=24d7bbec2757303377b47d49db4db3bfc6608222ba90930a2c1e0564e6a04aee\n", + " Stored in directory: /root/.cache/pip/wheels/4c/6e/d7/bb93911540d2d1e44d690a1561871e5b6af82b69e80938abef\n", + "Successfully built librosa\n", + "Installing collected packages: soundfile, librosa, colorlog, rfc3986, isodate, csvw, clldutils, segments, phonemizer, ruamel.yaml.clib, ruamel.yaml\n", + " Found existing installation: librosa 0.6.3\n", + " Uninstalling librosa-0.6.3:\n", + " Successfully uninstalled librosa-0.6.3\n", + "Successfully installed clldutils-3.5.1 colorlog-4.1.0 csvw-1.7.0 isodate-0.6.0 librosa-0.7.2 phonemizer-2.1 rfc3986-1.4.0 ruamel.yaml-0.16.10 ruamel.yaml.clib-0.2.0 segments-2.1.3 soundfile-0.10.3.post1\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LucwkAK1yEVq", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 88 + }, + "outputId": "bef65ba5-1549-488c-d4dc-788965fa93ee" + }, + "source": [ + "# Load pretrained models\n", + "config_path = 'tts_model_outputs/ljspeech_transformertts/standard'\n", + "project_path = 'TransformerTTS'\n", + "\n", + "import sys\n", + "sys.path.append(project_path)\n", + "from utils.config_manager import ConfigManager\n", + "from utils.audio import reconstruct_waveform\n", + "\n", + "import IPython.display as ipd\n", + "\n", + "config_loader = ConfigManager(config_path)\n", + "model = config_loader.load_model('tts_model_outputs/ljspeech_transformertts/standard/model_weights/ckpt-90')" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "WARNING: could not retrieve git hash. Command '['git', 'describe', '--always']' returned non-zero exit status 128.\n", + "WARNING: could not check git hash. Command '['git', 'describe', '--always']' returned non-zero exit status 128.\n", + "restored weights from tts_model_outputs/ljspeech_transformertts/standard/model_weights/ckpt-90 at step 900000\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_5RKHIDQyZvo", + "colab_type": "code", + "outputId": "a8c04963-ab23-480e-9826-53de2db0c67c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "source": [ + "# Synthesize text\n", + "sentence = 'Scientists at the CERN laboratory, say they have discovered a new particle.'\n", + "out = model.predict(sentence)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "pred text mel: 397 stop out: -1.9915766716003418Stopping\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GXxdDHOAyZ6f", + "colab_type": "code", + "outputId": "d319bc2c-2843-4b51-e1ce-76c2857f255e", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + } + }, + "source": [ + "# Convert spectrogram to wav (with griffin lim)\n", + "wav = reconstruct_waveform(out['mel'].numpy().T, config=config_loader.config)\n", + "ipd.display(ipd.Audio(wav, rate=config_loader.config['sampling_rate']))" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eZJo81viVus-", + "colab_type": "text" + }, + "source": [ + "### WaveRNN" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5oQhgBhUPB9C", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Export for WaveRNN\n", + "import numpy as np\n", + "from pathlib import Path\n", + "WaveRNN_path = Path('WaveRNN/')\n", + "np.save(WaveRNN_path / 'scientists.npy', (out['mel'].numpy().T+4.)/8.)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "WjIuQALHTr-R", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Do some sys cleaning and imports\n", + "sys.path.remove('TransformerTTS')\n", + "sys.modules.pop('utils')\n", + "\n", + "import sys\n", + "sys.path.append('WaveRNN/')\n", + "from utils.dsp import hp\n", + "from models.fatchord_version import WaveRNN\n", + "import torch\n", + "import numpy as np\n", + "from pathlib import Path" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "dptoYzL1XFAr", + "colab_type": "code", + "outputId": "a87f9520-94cb-4306-d1b9-b8aa6b5b68bc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + } + }, + "source": [ + "# Unzip the pretrained model\n", + "!unzip WaveRNN/pretrained/ljspeech.wavernn.mol.800k.zip -d WaveRNN/pretrained/" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Archive: WaveRNN/pretrained/ljspeech.wavernn.mol.800k.zip\n", + " inflating: WaveRNN/pretrained/latest_weights.pyt \n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rKixR97aTtwX", + "colab_type": "code", + "outputId": "5bf538f8-bf7c-4ca3-f6a8-93926a457ba3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + } + }, + "source": [ + "# Load pretrained model\n", + "hp.configure(WaveRNN_path / 'hparams.py') # Load hparams from file\n", + "device = torch.device('cpu')\n", + "model = WaveRNN(rnn_dims=hp.voc_rnn_dims,\n", + " fc_dims=hp.voc_fc_dims,\n", + " bits=hp.bits,\n", + " pad=hp.voc_pad,\n", + " upsample_factors=hp.voc_upsample_factors,\n", + " feat_dims=hp.num_mels,\n", + " compute_dims=hp.voc_compute_dims,\n", + " res_out_dims=hp.voc_res_out_dims,\n", + " res_blocks=hp.voc_res_blocks,\n", + " hop_length=hp.hop_length,\n", + " sample_rate=hp.sample_rate,\n", + " mode=hp.voc_mode).to(device)\n", + "\n", + "model.load(str(WaveRNN_path / 'pretrained/latest_weights.pyt'))" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Trainable Parameters: 4.234M\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mPF7TrqDOE8S", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Ignore some TF warnings\n", + "import tensorflow as tf\n", + "tf.get_logger().setLevel('ERROR')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "EVkdFQeRUGQ-", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "62d4a302-de02-4acb-8979-4b30b3903db5" + }, + "source": [ + "# Generate sample with pre-trained WaveRNN vocoder\n", + "mel = np.load(WaveRNN_path / 'scientists.npy')\n", + "_ = model.generate(mel[np.newaxis,:,:], 'scientists.wav', False, 1, hp.voc_overlap, hp.mu_law)" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "| ████████████████ 109400/109450 | Batch Size: 1 | Gen Rate: 0.7kHz | " + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vQYaZawLXTJI", + "colab_type": "code", + "outputId": "bc677767-da4c-4125-b4a0-0e2f43a93efc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + } + }, + "source": [ + "# Load wav file\n", + "ipd.display(ipd.Audio('scientists.wav'))" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jWX00MuHYojU", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file