From fb6b5dff43a6a35954fc8fd83ab746e66facd405 Mon Sep 17 00:00:00 2001
From: Arka <110426173+arkajyotimitra@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:00:01 -0500
Subject: [PATCH] Added model parameter to TextGenerator
---
chapter12_part01_text-generation.ipynb | 961 +++++++++++++------------
1 file changed, 483 insertions(+), 478 deletions(-)
diff --git a/chapter12_part01_text-generation.ipynb b/chapter12_part01_text-generation.ipynb
index f683c1d73..2d6cd56ff 100644
--- a/chapter12_part01_text-generation.ipynb
+++ b/chapter12_part01_text-generation.ipynb
@@ -1,481 +1,486 @@
{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n\n**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n\nThis notebook was generated for TensorFlow 2.6."
- ]
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OTIXFfrzQ-ic"
+ },
+ "source": [
+ "This is a companion notebook for the book [Deep Learning with Python, Second Edition](https://www.manning.com/books/deep-learning-with-python-second-edition?a_aid=keras&a_bid=76564dff). For readability, it only contains runnable code blocks and section titles, and omits everything else in the book: text paragraphs, figures, and pseudocode.\n",
+ "\n",
+ "**If you want to be able to follow what's going on, I recommend reading the notebook side by side with your copy of the book.**\n",
+ "\n",
+ "This notebook was generated for TensorFlow 2.6."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "cdQOORDCQ-ie"
+ },
+ "source": [
+ "# Generative deep learning"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7UAhCgyPQ-ie"
+ },
+ "source": [
+ "## Text generation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0XcgU0OSQ-ie"
+ },
+ "source": [
+ "### A brief history of generative deep learning for sequence generation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "HJzn9Za1Q-ie"
+ },
+ "source": [
+ "### How do you generate sequence data?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TRFgyUsjQ-ie"
+ },
+ "source": [
+ "### The importance of the sampling strategy"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qAqaW_veQ-ie"
+ },
+ "source": [
+ "**Reweighting a probability distribution to a different temperature**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "ejv8hKzQQ-if"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "def reweight_distribution(original_distribution, temperature=0.5):\n",
+ " distribution = np.log(original_distribution) / temperature\n",
+ " distribution = np.exp(distribution)\n",
+ " return distribution / np.sum(distribution)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "lUX7tRlfQ-if"
+ },
+ "source": [
+ "### Implementing text generation with Keras"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "XsA384dyQ-if"
+ },
+ "source": [
+ "#### Preparing the data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "1S5Uep70Q-ig"
+ },
+ "source": [
+ "**Downloading and uncompressing the IMDB movie reviews dataset**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "6xQZdiS7Q-ig"
+ },
+ "outputs": [],
+ "source": [
+ "!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n",
+ "!tar -xf aclImdb_v1.tar.gz"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "iOs57JUIQ-ig"
+ },
+ "source": [
+ "**Creating a dataset from text files (one file = one sample)**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "_2JRvi_SQ-ig"
+ },
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "from tensorflow import keras\n",
+ "dataset = keras.utils.text_dataset_from_directory(\n",
+ " directory=\"aclImdb\", label_mode=None, batch_size=256)\n",
+ "dataset = dataset.map(lambda x: tf.strings.regex_replace(x, \"
\", \" \"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gpZ_bubiQ-ig"
+ },
+ "source": [
+ "**Preparing a `TextVectorization` layer**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "id": "VdUicEzfQ-ig"
+ },
+ "outputs": [],
+ "source": [
+ "from tensorflow.keras.layers import TextVectorization\n",
+ "\n",
+ "sequence_length = 100\n",
+ "vocab_size = 15000\n",
+ "text_vectorization = TextVectorization(\n",
+ " max_tokens=vocab_size,\n",
+ " output_mode=\"int\",\n",
+ " output_sequence_length=sequence_length,\n",
+ ")\n",
+ "text_vectorization.adapt(dataset)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "VEzmOCqpQ-ig"
+ },
+ "source": [
+ "**Setting up a language modeling dataset**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "id": "yzC8C4h5Q-ig"
+ },
+ "outputs": [],
+ "source": [
+ "def prepare_lm_dataset(text_batch):\n",
+ " vectorized_sequences = text_vectorization(text_batch)\n",
+ " x = vectorized_sequences[:, :-1]\n",
+ " y = vectorized_sequences[:, 1:]\n",
+ " return x, y\n",
+ "\n",
+ "lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "obOK1gHcQ-ig"
+ },
+ "source": [
+ "#### A Transformer-based sequence-to-sequence model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "id": "JMrRKxBNQ-ig"
+ },
+ "outputs": [],
+ "source": [
+ "import tensorflow as tf\n",
+ "from tensorflow.keras import layers\n",
+ "\n",
+ "class PositionalEmbedding(layers.Layer):\n",
+ " def __init__(self, sequence_length, input_dim, output_dim, **kwargs):\n",
+ " super().__init__(**kwargs)\n",
+ " self.token_embeddings = layers.Embedding(\n",
+ " input_dim=input_dim, output_dim=output_dim)\n",
+ " self.position_embeddings = layers.Embedding(\n",
+ " input_dim=sequence_length, output_dim=output_dim)\n",
+ " self.sequence_length = sequence_length\n",
+ " self.input_dim = input_dim\n",
+ " self.output_dim = output_dim\n",
+ "\n",
+ " def call(self, inputs):\n",
+ " length = tf.shape(inputs)[-1]\n",
+ " positions = tf.range(start=0, limit=length, delta=1)\n",
+ " embedded_tokens = self.token_embeddings(inputs)\n",
+ " embedded_positions = self.position_embeddings(positions)\n",
+ " return embedded_tokens + embedded_positions\n",
+ "\n",
+ " def compute_mask(self, inputs, mask=None):\n",
+ " return tf.math.not_equal(inputs, 0)\n",
+ "\n",
+ " def get_config(self):\n",
+ " config = super(PositionalEmbedding, self).get_config()\n",
+ " config.update({\n",
+ " \"output_dim\": self.output_dim,\n",
+ " \"sequence_length\": self.sequence_length,\n",
+ " \"input_dim\": self.input_dim,\n",
+ " })\n",
+ " return config\n",
+ "\n",
+ "\n",
+ "class TransformerDecoder(layers.Layer):\n",
+ " def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):\n",
+ " super().__init__(**kwargs)\n",
+ " self.embed_dim = embed_dim\n",
+ " self.dense_dim = dense_dim\n",
+ " self.num_heads = num_heads\n",
+ " self.attention_1 = layers.MultiHeadAttention(\n",
+ " num_heads=num_heads, key_dim=embed_dim)\n",
+ " self.attention_2 = layers.MultiHeadAttention(\n",
+ " num_heads=num_heads, key_dim=embed_dim)\n",
+ " self.dense_proj = keras.Sequential(\n",
+ " [layers.Dense(dense_dim, activation=\"relu\"),\n",
+ " layers.Dense(embed_dim),]\n",
+ " )\n",
+ " self.layernorm_1 = layers.LayerNormalization()\n",
+ " self.layernorm_2 = layers.LayerNormalization()\n",
+ " self.layernorm_3 = layers.LayerNormalization()\n",
+ " self.supports_masking = True\n",
+ "\n",
+ " def get_config(self):\n",
+ " config = super(TransformerDecoder, self).get_config()\n",
+ " config.update({\n",
+ " \"embed_dim\": self.embed_dim,\n",
+ " \"num_heads\": self.num_heads,\n",
+ " \"dense_dim\": self.dense_dim,\n",
+ " })\n",
+ " return config\n",
+ "\n",
+ " def get_causal_attention_mask(self, inputs):\n",
+ " input_shape = tf.shape(inputs)\n",
+ " batch_size, sequence_length = input_shape[0], input_shape[1]\n",
+ " i = tf.range(sequence_length)[:, tf.newaxis]\n",
+ " j = tf.range(sequence_length)\n",
+ " mask = tf.cast(i >= j, dtype=\"int32\")\n",
+ " mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))\n",
+ " mult = tf.concat(\n",
+ " [tf.expand_dims(batch_size, -1),\n",
+ " tf.constant([1, 1], dtype=tf.int32)], axis=0)\n",
+ " return tf.tile(mask, mult)\n",
+ "\n",
+ " def call(self, inputs, encoder_outputs, mask=None):\n",
+ " causal_mask = self.get_causal_attention_mask(inputs)\n",
+ " if mask is not None:\n",
+ " padding_mask = tf.cast(\n",
+ " mask[:, tf.newaxis, :], dtype=\"int32\")\n",
+ " padding_mask = tf.minimum(padding_mask, causal_mask)\n",
+ " else:\n",
+ " padding_mask = mask\n",
+ " attention_output_1 = self.attention_1(\n",
+ " query=inputs,\n",
+ " value=inputs,\n",
+ " key=inputs,\n",
+ " attention_mask=causal_mask)\n",
+ " attention_output_1 = self.layernorm_1(inputs + attention_output_1)\n",
+ " attention_output_2 = self.attention_2(\n",
+ " query=attention_output_1,\n",
+ " value=encoder_outputs,\n",
+ " key=encoder_outputs,\n",
+ " attention_mask=padding_mask,\n",
+ " )\n",
+ " attention_output_2 = self.layernorm_2(\n",
+ " attention_output_1 + attention_output_2)\n",
+ " proj_output = self.dense_proj(attention_output_2)\n",
+ " return self.layernorm_3(attention_output_2 + proj_output)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TV-yHul9Q-ih"
+ },
+ "source": [
+ "**A simple Transformer-based language model**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "id": "C6JhyWuUQ-ih"
+ },
+ "outputs": [],
+ "source": [
+ "from tensorflow.keras import layers\n",
+ "embed_dim = 256\n",
+ "latent_dim = 2048\n",
+ "num_heads = 2\n",
+ "\n",
+ "inputs = keras.Input(shape=(None,), dtype=\"int64\")\n",
+ "x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)\n",
+ "x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, x)\n",
+ "outputs = layers.Dense(vocab_size, activation=\"softmax\")(x)\n",
+ "model = keras.Model(inputs, outputs)\n",
+ "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"rmsprop\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-CUcUJwRQ-ih"
+ },
+ "source": [
+ "### A text-generation callback with variable-temperature sampling"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5FdXRRxKQ-ih"
+ },
+ "source": [
+ "**The text-generation callback**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "RZpG4PxOQ-ih"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "\n",
+ "tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))\n",
+ "\n",
+ "def sample_next(predictions, temperature=1.0):\n",
+ " predictions = np.asarray(predictions).astype(\"float64\")\n",
+ " predictions = np.log(predictions) / temperature\n",
+ " exp_preds = np.exp(predictions)\n",
+ " predictions = exp_preds / np.sum(exp_preds)\n",
+ " probas = np.random.multinomial(1, predictions, 1)\n",
+ " return np.argmax(probas)\n",
+ "\n",
+ "class TextGenerator(keras.callbacks.Callback):\n",
+ " def __init__(self,\n",
+ " prompt,\n",
+ " generate_length,\n",
+ " model_input_length,\n",
+ " temperatures=(1.,),\n",
+ " model=model,\n",
+ " print_freq=1):\n",
+ " self.prompt = prompt\n",
+ " self.generate_length = generate_length\n",
+ " self.model_input_length = model_input_length\n",
+ " self.temperatures = temperatures\n",
+ " self.print_freq = print_freq\n",
+ " self.model = model\n",
+ " vectorized_prompt = text_vectorization([prompt])[0].numpy()\n",
+ " self.prompt_length = np.nonzero(vectorized_prompt == 0)[0][0]\n",
+ "\n",
+ " def on_epoch_end(self, epoch, logs=None):\n",
+ " if (epoch + 1) % self.print_freq != 0:\n",
+ " return\n",
+ " for temperature in self.temperatures:\n",
+ " print(\"== Generating with temperature\", temperature)\n",
+ " sentence = self.prompt\n",
+ " for i in range(self.generate_length):\n",
+ " tokenized_sentence = text_vectorization([sentence])\n",
+ " predictions = self.model(tokenized_sentence)\n",
+ " next_token = sample_next(\n",
+ " predictions[0, self.prompt_length - 1 + i, :]\n",
+ " )\n",
+ " sampled_token = tokens_index[next_token]\n",
+ " sentence += \" \" + sampled_token\n",
+ " print(sentence)\n",
+ "\n",
+ "prompt = \"This movie\"\n",
+ "text_gen_callback = TextGenerator(\n",
+ " prompt,\n",
+ " generate_length=50,\n",
+ " model_input_length=sequence_length,\n",
+ " temperatures=(0.2, 0.5, 0.7, 1., 1.5),\n",
+ " model=model)\n",
+ "\n",
+ "text_gen_callback.on_epoch_end(0)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "21_JMCTMQ-ih"
+ },
+ "source": [
+ "**Fitting the language model**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "9xXWPliCQ-ih"
+ },
+ "outputs": [],
+ "source": [
+ "model.fit(lm_dataset, epochs=200, callbacks=[text_gen_callback])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "j4KzglOzQ-ih"
+ },
+ "source": [
+ "### Wrapping up"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.0"
+ }
},
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "# Generative deep learning"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "## Text generation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "### A brief history of generative deep learning for sequence generation"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "### How do you generate sequence data?"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "### The importance of the sampling strategy"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**Reweighting a probability distribution to a different temperature**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "def reweight_distribution(original_distribution, temperature=0.5):\n",
- " distribution = np.log(original_distribution) / temperature\n",
- " distribution = np.exp(distribution)\n",
- " return distribution / np.sum(distribution)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "### Implementing text generation with Keras"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "#### Preparing the data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**Downloading and uncompressing the IMDB movie reviews dataset**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "!wget https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n",
- "!tar -xf aclImdb_v1.tar.gz"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**Creating a dataset from text files (one file = one sample)**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "import tensorflow as tf\n",
- "from tensorflow import keras\n",
- "dataset = keras.utils.text_dataset_from_directory(\n",
- " directory=\"aclImdb\", label_mode=None, batch_size=256)\n",
- "dataset = dataset.map(lambda x: tf.strings.regex_replace(x, \"
\", \" \"))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**Preparing a `TextVectorization` layer**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "from tensorflow.keras.layers import TextVectorization\n",
- "\n",
- "sequence_length = 100\n",
- "vocab_size = 15000\n",
- "text_vectorization = TextVectorization(\n",
- " max_tokens=vocab_size,\n",
- " output_mode=\"int\",\n",
- " output_sequence_length=sequence_length,\n",
- ")\n",
- "text_vectorization.adapt(dataset)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**Setting up a language modeling dataset**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "def prepare_lm_dataset(text_batch):\n",
- " vectorized_sequences = text_vectorization(text_batch)\n",
- " x = vectorized_sequences[:, :-1]\n",
- " y = vectorized_sequences[:, 1:]\n",
- " return x, y\n",
- "\n",
- "lm_dataset = dataset.map(prepare_lm_dataset, num_parallel_calls=4)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "#### A Transformer-based sequence-to-sequence model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "import tensorflow as tf\n",
- "from tensorflow.keras import layers\n",
- "\n",
- "class PositionalEmbedding(layers.Layer):\n",
- " def __init__(self, sequence_length, input_dim, output_dim, **kwargs):\n",
- " super().__init__(**kwargs)\n",
- " self.token_embeddings = layers.Embedding(\n",
- " input_dim=input_dim, output_dim=output_dim)\n",
- " self.position_embeddings = layers.Embedding(\n",
- " input_dim=sequence_length, output_dim=output_dim)\n",
- " self.sequence_length = sequence_length\n",
- " self.input_dim = input_dim\n",
- " self.output_dim = output_dim\n",
- "\n",
- " def call(self, inputs):\n",
- " length = tf.shape(inputs)[-1]\n",
- " positions = tf.range(start=0, limit=length, delta=1)\n",
- " embedded_tokens = self.token_embeddings(inputs)\n",
- " embedded_positions = self.position_embeddings(positions)\n",
- " return embedded_tokens + embedded_positions\n",
- "\n",
- " def compute_mask(self, inputs, mask=None):\n",
- " return tf.math.not_equal(inputs, 0)\n",
- "\n",
- " def get_config(self):\n",
- " config = super(PositionalEmbedding, self).get_config()\n",
- " config.update({\n",
- " \"output_dim\": self.output_dim,\n",
- " \"sequence_length\": self.sequence_length,\n",
- " \"input_dim\": self.input_dim,\n",
- " })\n",
- " return config\n",
- "\n",
- "\n",
- "class TransformerDecoder(layers.Layer):\n",
- " def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):\n",
- " super().__init__(**kwargs)\n",
- " self.embed_dim = embed_dim\n",
- " self.dense_dim = dense_dim\n",
- " self.num_heads = num_heads\n",
- " self.attention_1 = layers.MultiHeadAttention(\n",
- " num_heads=num_heads, key_dim=embed_dim)\n",
- " self.attention_2 = layers.MultiHeadAttention(\n",
- " num_heads=num_heads, key_dim=embed_dim)\n",
- " self.dense_proj = keras.Sequential(\n",
- " [layers.Dense(dense_dim, activation=\"relu\"),\n",
- " layers.Dense(embed_dim),]\n",
- " )\n",
- " self.layernorm_1 = layers.LayerNormalization()\n",
- " self.layernorm_2 = layers.LayerNormalization()\n",
- " self.layernorm_3 = layers.LayerNormalization()\n",
- " self.supports_masking = True\n",
- "\n",
- " def get_config(self):\n",
- " config = super(TransformerDecoder, self).get_config()\n",
- " config.update({\n",
- " \"embed_dim\": self.embed_dim,\n",
- " \"num_heads\": self.num_heads,\n",
- " \"dense_dim\": self.dense_dim,\n",
- " })\n",
- " return config\n",
- "\n",
- " def get_causal_attention_mask(self, inputs):\n",
- " input_shape = tf.shape(inputs)\n",
- " batch_size, sequence_length = input_shape[0], input_shape[1]\n",
- " i = tf.range(sequence_length)[:, tf.newaxis]\n",
- " j = tf.range(sequence_length)\n",
- " mask = tf.cast(i >= j, dtype=\"int32\")\n",
- " mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))\n",
- " mult = tf.concat(\n",
- " [tf.expand_dims(batch_size, -1),\n",
- " tf.constant([1, 1], dtype=tf.int32)], axis=0)\n",
- " return tf.tile(mask, mult)\n",
- "\n",
- " def call(self, inputs, encoder_outputs, mask=None):\n",
- " causal_mask = self.get_causal_attention_mask(inputs)\n",
- " if mask is not None:\n",
- " padding_mask = tf.cast(\n",
- " mask[:, tf.newaxis, :], dtype=\"int32\")\n",
- " padding_mask = tf.minimum(padding_mask, causal_mask)\n",
- " else:\n",
- " padding_mask = mask\n",
- " attention_output_1 = self.attention_1(\n",
- " query=inputs,\n",
- " value=inputs,\n",
- " key=inputs,\n",
- " attention_mask=causal_mask)\n",
- " attention_output_1 = self.layernorm_1(inputs + attention_output_1)\n",
- " attention_output_2 = self.attention_2(\n",
- " query=attention_output_1,\n",
- " value=encoder_outputs,\n",
- " key=encoder_outputs,\n",
- " attention_mask=padding_mask,\n",
- " )\n",
- " attention_output_2 = self.layernorm_2(\n",
- " attention_output_1 + attention_output_2)\n",
- " proj_output = self.dense_proj(attention_output_2)\n",
- " return self.layernorm_3(attention_output_2 + proj_output)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**A simple Transformer-based language model**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "from tensorflow.keras import layers\n",
- "embed_dim = 256\n",
- "latent_dim = 2048\n",
- "num_heads = 2\n",
- "\n",
- "inputs = keras.Input(shape=(None,), dtype=\"int64\")\n",
- "x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(inputs)\n",
- "x = TransformerDecoder(embed_dim, latent_dim, num_heads)(x, x)\n",
- "outputs = layers.Dense(vocab_size, activation=\"softmax\")(x)\n",
- "model = keras.Model(inputs, outputs)\n",
- "model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=\"rmsprop\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "### A text-generation callback with variable-temperature sampling"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**The text-generation callback**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "\n",
- "tokens_index = dict(enumerate(text_vectorization.get_vocabulary()))\n",
- "\n",
- "def sample_next(predictions, temperature=1.0):\n",
- " predictions = np.asarray(predictions).astype(\"float64\")\n",
- " predictions = np.log(predictions) / temperature\n",
- " exp_preds = np.exp(predictions)\n",
- " predictions = exp_preds / np.sum(exp_preds)\n",
- " probas = np.random.multinomial(1, predictions, 1)\n",
- " return np.argmax(probas)\n",
- "\n",
- "class TextGenerator(keras.callbacks.Callback):\n",
- " def __init__(self,\n",
- " prompt,\n",
- " generate_length,\n",
- " model_input_length,\n",
- " temperatures=(1.,),\n",
- " print_freq=1):\n",
- " self.prompt = prompt\n",
- " self.generate_length = generate_length\n",
- " self.model_input_length = model_input_length\n",
- " self.temperatures = temperatures\n",
- " self.print_freq = print_freq\n",
- " vectorized_prompt = text_vectorization([prompt])[0].numpy()\n",
- " self.prompt_length = np.nonzero(vectorized_prompt == 0)[0][0]\n",
- "\n",
- " def on_epoch_end(self, epoch, logs=None):\n",
- " if (epoch + 1) % self.print_freq != 0:\n",
- " return\n",
- " for temperature in self.temperatures:\n",
- " print(\"== Generating with temperature\", temperature)\n",
- " sentence = self.prompt\n",
- " for i in range(self.generate_length):\n",
- " tokenized_sentence = text_vectorization([sentence])\n",
- " predictions = self.model(tokenized_sentence)\n",
- " next_token = sample_next(\n",
- " predictions[0, self.prompt_length - 1 + i, :]\n",
- " )\n",
- " sampled_token = tokens_index[next_token]\n",
- " sentence += \" \" + sampled_token\n",
- " print(sentence)\n",
- "\n",
- "prompt = \"This movie\"\n",
- "text_gen_callback = TextGenerator(\n",
- " prompt,\n",
- " generate_length=50,\n",
- " model_input_length=sequence_length,\n",
- " temperatures=(0.2, 0.5, 0.7, 1., 1.5))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "**Fitting the language model**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 0,
- "metadata": {
- "colab_type": "code"
- },
- "outputs": [],
- "source": [
- "model.fit(lm_dataset, epochs=200, callbacks=[text_gen_callback])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "colab_type": "text"
- },
- "source": [
- "### Wrapping up"
- ]
- }
- ],
- "metadata": {
- "colab": {
- "collapsed_sections": [],
- "name": "chapter12_part01_text-generation.i",
- "private_outputs": false,
- "provenance": [],
- "toc_visible": true
- },
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.0"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat": 4,
+ "nbformat_minor": 0
}
\ No newline at end of file