diff --git a/T4Rec_repro/train_runs/clm_item_id_min.ipynb b/T4Rec_repro/train_runs/clm_item_id_min.ipynb new file mode 100644 index 0000000000..ff5eabbe86 --- /dev/null +++ b/T4Rec_repro/train_runs/clm_item_id_min.ipynb @@ -0,0 +1,640 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:19:41.332031: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-10 13:19:43.702598: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:43.703049: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:43.703227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-10 13:19:44.148806: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-10 13:19:44.149822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.150030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.150185: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891194: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891419: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891582: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:19:44.891696: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-10 13:19:44.891761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " '../',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + "\n", + " input_block = mm.InputBlockV2(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " embeddings=mm.Embeddings(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " sequence_combiner=None,\n", + " dim=d_model\n", + " ),\n", + " # pre=mm.StochasticSwapNoise()\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "markdown", + "id": "46b9f788", + "metadata": {}, + "source": [ + "# Run 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:19:51.258201: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 83s 110ms/step - loss: 8.9114 - recall_at_10: 0.0409 - mrr_at_10: 0.0151 - ndcg_at_10: 0.0211 - map_at_10: 0.0151 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.9101\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.7403 - recall_at_10: 0.2994 - mrr_at_10: 0.1857 - ndcg_at_10: 0.2126 - map_at_10: 0.1857 - precision_at_10: 0.0299 - regularization_loss: 0.0000e+00 - loss_batch: 5.7358\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.8640 - recall_at_10: 0.5785 - mrr_at_10: 0.4536 - ndcg_at_10: 0.4835 - map_at_10: 0.4536 - precision_at_10: 0.0579 - regularization_loss: 0.0000e+00 - loss_batch: 3.8700\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.0672 - recall_at_10: 0.6808 - mrr_at_10: 0.5664 - ndcg_at_10: 0.5939 - map_at_10: 0.5664 - precision_at_10: 0.0681 - regularization_loss: 0.0000e+00 - loss_batch: 3.0720\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 2.6008 - recall_at_10: 0.7369 - mrr_at_10: 0.6298 - ndcg_at_10: 0.6556 - map_at_10: 0.6298 - precision_at_10: 0.0737 - regularization_loss: 0.0000e+00 - loss_batch: 2.6062\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.7419 - recall_at_10: 0.1679 - mrr_at_10: 0.0639 - ndcg_at_10: 0.0881 - map_at_10: 0.0639 - precision_at_10: 0.0168 - regularization_loss: 0.0000e+00 - loss_batch: 8.7705\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.741933822631836,\n", + " 'recall_at_10': 0.16701146960258484,\n", + " 'mrr_at_10': 0.06411589682102203,\n", + " 'ndcg_at_10': 0.08810190856456757,\n", + " 'map_at_10': 0.06411589682102203,\n", + " 'precision_at_10': 0.016701148822903633,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.003721237182617}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a070554f", + "metadata": {}, + "source": [ + "# Run 2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "566e2f90", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.0406 - recall_at_10: 0.0356 - mrr_at_10: 0.0130 - ndcg_at_10: 0.0183 - map_at_10: 0.0130 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0326\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.0845 - recall_at_10: 0.2649 - mrr_at_10: 0.1565 - ndcg_at_10: 0.1821 - map_at_10: 0.1565 - precision_at_10: 0.0265 - regularization_loss: 0.0000e+00 - loss_batch: 6.0807\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.2293 - recall_at_10: 0.5184 - mrr_at_10: 0.3883 - ndcg_at_10: 0.4194 - map_at_10: 0.3883 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.2323\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.3441 - recall_at_10: 0.6468 - mrr_at_10: 0.5229 - ndcg_at_10: 0.5526 - map_at_10: 0.5229 - precision_at_10: 0.0647 - regularization_loss: 0.0000e+00 - loss_batch: 3.3486\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 2.8789 - recall_at_10: 0.7051 - mrr_at_10: 0.5871 - ndcg_at_10: 0.6155 - map_at_10: 0.5871 - precision_at_10: 0.0705 - regularization_loss: 0.0000e+00 - loss_batch: 2.8854\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.8279 - recall_at_10: 0.1584 - mrr_at_10: 0.0624 - ndcg_at_10: 0.0847 - map_at_10: 0.0624 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.8674\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.827858924865723,\n", + " 'recall_at_10': 0.15591499209403992,\n", + " 'mrr_at_10': 0.06090494617819786,\n", + " 'ndcg_at_10': 0.08297329396009445,\n", + " 'map_at_10': 0.06090494617819786,\n", + " 'precision_at_10': 0.01559150218963623,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.57563304901123}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8c785bb1", + "metadata": {}, + "source": [ + "# Run 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7cc9685e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.9635 - recall_at_10: 0.0396 - mrr_at_10: 0.0146 - ndcg_at_10: 0.0204 - map_at_10: 0.0146 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9589\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.2358 - recall_at_10: 0.2417 - mrr_at_10: 0.1391 - ndcg_at_10: 0.1633 - map_at_10: 0.1391 - precision_at_10: 0.0242 - regularization_loss: 0.0000e+00 - loss_batch: 6.2350\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.4725 - recall_at_10: 0.4996 - mrr_at_10: 0.3675 - ndcg_at_10: 0.3991 - map_at_10: 0.3675 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4748\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.6489 - recall_at_10: 0.6128 - mrr_at_10: 0.4839 - ndcg_at_10: 0.5148 - map_at_10: 0.4839 - precision_at_10: 0.0613 - regularization_loss: 0.0000e+00 - loss_batch: 3.6543\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.2122 - recall_at_10: 0.6626 - mrr_at_10: 0.5353 - ndcg_at_10: 0.5659 - map_at_10: 0.5353 - precision_at_10: 0.0663 - regularization_loss: 0.0000e+00 - loss_batch: 3.2164\n", + "84/84 [==============================] - 7s 39ms/step - loss: 8.8321 - recall_at_10: 0.1434 - mrr_at_10: 0.0582 - ndcg_at_10: 0.0781 - map_at_10: 0.0582 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.8607\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.832069396972656,\n", + " 'recall_at_10': 0.1426556259393692,\n", + " 'mrr_at_10': 0.05639006569981575,\n", + " 'ndcg_at_10': 0.07650619745254517,\n", + " 'map_at_10': 0.05639006569981575,\n", + " 'precision_at_10': 0.01426556333899498,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.100401878356934}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7b90a1c5", + "metadata": {}, + "source": [ + "# Run 4" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "66f1dbfe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.0739 - recall_at_10: 0.0361 - mrr_at_10: 0.0125 - ndcg_at_10: 0.0180 - map_at_10: 0.0125 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0756\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.4023 - recall_at_10: 0.2372 - mrr_at_10: 0.1349 - ndcg_at_10: 0.1591 - map_at_10: 0.1349 - precision_at_10: 0.0237 - regularization_loss: 0.0000e+00 - loss_batch: 6.4020\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.7934 - recall_at_10: 0.4544 - mrr_at_10: 0.3194 - ndcg_at_10: 0.3516 - map_at_10: 0.3194 - precision_at_10: 0.0454 - regularization_loss: 0.0000e+00 - loss_batch: 4.7958\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.7131 - recall_at_10: 0.5913 - mrr_at_10: 0.4595 - ndcg_at_10: 0.4911 - map_at_10: 0.4595 - precision_at_10: 0.0591 - regularization_loss: 0.0000e+00 - loss_batch: 3.7160\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.0900 - recall_at_10: 0.6752 - mrr_at_10: 0.5537 - ndcg_at_10: 0.5829 - map_at_10: 0.5537 - precision_at_10: 0.0675 - regularization_loss: 0.0000e+00 - loss_batch: 3.0945\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.9225 - recall_at_10: 0.1426 - mrr_at_10: 0.0581 - ndcg_at_10: 0.0778 - map_at_10: 0.0581 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.9683\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.922541618347168,\n", + " 'recall_at_10': 0.14425428211688995,\n", + " 'mrr_at_10': 0.057682257145643234,\n", + " 'ndcg_at_10': 0.077837273478508,\n", + " 'map_at_10': 0.057682257145643234,\n", + " 'precision_at_10': 0.014425428584218025,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.947548866271973}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a1734c21", + "metadata": {}, + "source": [ + "# Run 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "03b380f7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.0454 - recall_at_10: 0.0381 - mrr_at_10: 0.0139 - ndcg_at_10: 0.0195 - map_at_10: 0.0139 - precision_at_10: 0.0038 - regularization_loss: 0.0000e+00 - loss_batch: 9.0386\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.3489 - recall_at_10: 0.2430 - mrr_at_10: 0.1385 - ndcg_at_10: 0.1632 - map_at_10: 0.1385 - precision_at_10: 0.0243 - regularization_loss: 0.0000e+00 - loss_batch: 6.3435\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.7853 - recall_at_10: 0.4602 - mrr_at_10: 0.3227 - ndcg_at_10: 0.3555 - map_at_10: 0.3227 - precision_at_10: 0.0460 - regularization_loss: 0.0000e+00 - loss_batch: 4.7868\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.6873 - recall_at_10: 0.6026 - mrr_at_10: 0.4710 - ndcg_at_10: 0.5025 - map_at_10: 0.4710 - precision_at_10: 0.0603 - regularization_loss: 0.0000e+00 - loss_batch: 3.6936\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.0298 - recall_at_10: 0.6856 - mrr_at_10: 0.5650 - ndcg_at_10: 0.5940 - map_at_10: 0.5650 - precision_at_10: 0.0686 - regularization_loss: 0.0000e+00 - loss_batch: 3.0363\n", + "84/84 [==============================] - 8s 40ms/step - loss: 8.6711 - recall_at_10: 0.1505 - mrr_at_10: 0.0595 - ndcg_at_10: 0.0807 - map_at_10: 0.0595 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.6999\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.671070098876953,\n", + " 'recall_at_10': 0.15074290335178375,\n", + " 'mrr_at_10': 0.05898994952440262,\n", + " 'ndcg_at_10': 0.08035662025213242,\n", + " 'map_at_10': 0.05898994952440262,\n", + " 'precision_at_10': 0.015074292197823524,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.946744918823242}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fe3b07c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/T4Rec_repro/train_runs/mlm_item_id_min.ipynb b/T4Rec_repro/train_runs/mlm_item_id_min.ipynb new file mode 100644 index 0000000000..df90cc786b --- /dev/null +++ b/T4Rec_repro/train_runs/mlm_item_id_min.ipynb @@ -0,0 +1,640 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:57:07.721314: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-10 13:57:10.129984: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.130437: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.130617: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-10 13:57:10.581209: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-10 13:57:10.582030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.582283: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:10.582439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330454: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330615: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-10 13:57:11.330728: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-10 13:57:11.330790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "# a couple of starter hyperparams\n", + "\n", + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " '../',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + "\n", + " input_block = mm.InputBlockV2(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " embeddings=mm.Embeddings(\n", + " train.schema.select_by_name('sess_pid_seq'), \n", + " sequence_combiner=None,\n", + " dim=d_model\n", + " ),\n", + " # pre=mm.StochasticSwapNoise()\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [128,d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "markdown", + "id": "78302207", + "metadata": {}, + "source": [ + "# Run 1" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-10 13:57:17.631317: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 82s 110ms/step - loss: 8.8265 - recall_at_10: 0.0432 - mrr_at_10: 0.0166 - ndcg_at_10: 0.0228 - map_at_10: 0.0166 - precision_at_10: 0.0043 - regularization_loss: 0.0000e+00 - loss_batch: 8.8191\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.8014 - recall_at_10: 0.3091 - mrr_at_10: 0.1936 - ndcg_at_10: 0.2210 - map_at_10: 0.1936 - precision_at_10: 0.0309 - regularization_loss: 0.0000e+00 - loss_batch: 5.8019\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.1718 - recall_at_10: 0.5397 - mrr_at_10: 0.4080 - ndcg_at_10: 0.4394 - map_at_10: 0.4080 - precision_at_10: 0.0540 - regularization_loss: 0.0000e+00 - loss_batch: 4.1734\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.2806 - recall_at_10: 0.6585 - mrr_at_10: 0.5362 - ndcg_at_10: 0.5656 - map_at_10: 0.5362 - precision_at_10: 0.0658 - regularization_loss: 0.0000e+00 - loss_batch: 3.2849\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 2.8188 - recall_at_10: 0.7125 - mrr_at_10: 0.6007 - ndcg_at_10: 0.6276 - map_at_10: 0.6007 - precision_at_10: 0.0712 - regularization_loss: 0.0000e+00 - loss_batch: 2.8246\n", + "84/84 [==============================] - 7s 39ms/step - loss: 8.8107 - recall_at_10: 0.1511 - mrr_at_10: 0.0623 - ndcg_at_10: 0.0829 - map_at_10: 0.0623 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.8298\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.810694694519043,\n", + " 'recall_at_10': 0.15318788588047028,\n", + " 'mrr_at_10': 0.06131112948060036,\n", + " 'ndcg_at_10': 0.08268804848194122,\n", + " 'map_at_10': 0.06131112948060036,\n", + " 'precision_at_10': 0.015318789519369602,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.6568603515625}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "3513d28a", + "metadata": {}, + "source": [ + "# Run 2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2e624551", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 9.1281 - recall_at_10: 0.0359 - mrr_at_10: 0.0128 - ndcg_at_10: 0.0181 - map_at_10: 0.0128 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.1243\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.7038 - recall_at_10: 0.1907 - mrr_at_10: 0.1006 - ndcg_at_10: 0.1218 - map_at_10: 0.1006 - precision_at_10: 0.0191 - regularization_loss: 0.0000e+00 - loss_batch: 6.6971\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 4.9471 - recall_at_10: 0.4404 - mrr_at_10: 0.3077 - ndcg_at_10: 0.3393 - map_at_10: 0.3077 - precision_at_10: 0.0440 - regularization_loss: 0.0000e+00 - loss_batch: 4.9478\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.9842 - recall_at_10: 0.5607 - mrr_at_10: 0.4197 - ndcg_at_10: 0.4534 - map_at_10: 0.4197 - precision_at_10: 0.0561 - regularization_loss: 0.0000e+00 - loss_batch: 3.9878\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.3262 - recall_at_10: 0.6442 - mrr_at_10: 0.5172 - ndcg_at_10: 0.5477 - map_at_10: 0.5172 - precision_at_10: 0.0644 - regularization_loss: 0.0000e+00 - loss_batch: 3.3307\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.9716 - recall_at_10: 0.1277 - mrr_at_10: 0.0513 - ndcg_at_10: 0.0692 - map_at_10: 0.0513 - precision_at_10: 0.0128 - regularization_loss: 0.0000e+00 - loss_batch: 8.9960\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.971626281738281,\n", + " 'recall_at_10': 0.12817378342151642,\n", + " 'mrr_at_10': 0.05082216113805771,\n", + " 'ndcg_at_10': 0.06883765012025833,\n", + " 'map_at_10': 0.05082216113805771,\n", + " 'precision_at_10': 0.012817380018532276,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.049013137817383}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d42dea65", + "metadata": {}, + "source": [ + "# Run 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "97e7322c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.9307 - recall_at_10: 0.0396 - mrr_at_10: 0.0142 - ndcg_at_10: 0.0201 - map_at_10: 0.0142 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9265\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.9376 - recall_at_10: 0.2951 - mrr_at_10: 0.1842 - ndcg_at_10: 0.2105 - map_at_10: 0.1842 - precision_at_10: 0.0295 - regularization_loss: 0.0000e+00 - loss_batch: 5.9350\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.3616 - recall_at_10: 0.5184 - mrr_at_10: 0.3844 - ndcg_at_10: 0.4164 - map_at_10: 0.3844 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.3657\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.4916 - recall_at_10: 0.6319 - mrr_at_10: 0.5057 - ndcg_at_10: 0.5359 - map_at_10: 0.5057 - precision_at_10: 0.0632 - regularization_loss: 0.0000e+00 - loss_batch: 3.4969\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.0021 - recall_at_10: 0.6889 - mrr_at_10: 0.5684 - ndcg_at_10: 0.5973 - map_at_10: 0.5684 - precision_at_10: 0.0689 - regularization_loss: 0.0000e+00 - loss_batch: 3.0072\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.7983 - recall_at_10: 0.1534 - mrr_at_10: 0.0599 - ndcg_at_10: 0.0816 - map_at_10: 0.0599 - precision_at_10: 0.0153 - regularization_loss: 0.0000e+00 - loss_batch: 8.8378\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.798320770263672,\n", + " 'recall_at_10': 0.15647922456264496,\n", + " 'mrr_at_10': 0.05985381081700325,\n", + " 'ndcg_at_10': 0.08228185027837753,\n", + " 'map_at_10': 0.05985381081700325,\n", + " 'precision_at_10': 0.015647921711206436,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 10.545936584472656}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "610da911", + "metadata": {}, + "source": [ + "# Run 4" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9e0f0891", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.8791 - recall_at_10: 0.0414 - mrr_at_10: 0.0155 - ndcg_at_10: 0.0215 - map_at_10: 0.0155 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.8746\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 5.5817 - recall_at_10: 0.3289 - mrr_at_10: 0.2127 - ndcg_at_10: 0.2403 - map_at_10: 0.2127 - precision_at_10: 0.0329 - regularization_loss: 0.0000e+00 - loss_batch: 5.5795\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.8784 - recall_at_10: 0.5761 - mrr_at_10: 0.4489 - ndcg_at_10: 0.4793 - map_at_10: 0.4489 - precision_at_10: 0.0576 - regularization_loss: 0.0000e+00 - loss_batch: 3.8833\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 3.0679 - recall_at_10: 0.6797 - mrr_at_10: 0.5656 - ndcg_at_10: 0.5930 - map_at_10: 0.5656 - precision_at_10: 0.0680 - regularization_loss: 0.0000e+00 - loss_batch: 3.0749\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 2.5693 - recall_at_10: 0.7397 - mrr_at_10: 0.6350 - ndcg_at_10: 0.6602 - map_at_10: 0.6350 - precision_at_10: 0.0740 - regularization_loss: 0.0000e+00 - loss_batch: 2.5767\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.6399 - recall_at_10: 0.1581 - mrr_at_10: 0.0621 - ndcg_at_10: 0.0844 - map_at_10: 0.0621 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.6637\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.63992691040039,\n", + " 'recall_at_10': 0.1588301658630371,\n", + " 'mrr_at_10': 0.06323756277561188,\n", + " 'ndcg_at_10': 0.0855293795466423,\n", + " 'map_at_10': 0.06323756277561188,\n", + " 'precision_at_10': 0.01588302105665207,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.691500663757324}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6cffc60d", + "metadata": {}, + "source": [ + "# Run 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6981ff6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 80s 110ms/step - loss: 8.9605 - recall_at_10: 0.0390 - mrr_at_10: 0.0141 - ndcg_at_10: 0.0199 - map_at_10: 0.0141 - precision_at_10: 0.0039 - regularization_loss: 0.0000e+00 - loss_batch: 8.9571\n", + "Epoch 2/5\n", + "677/677 [==============================] - 75s 110ms/step - loss: 6.1194 - recall_at_10: 0.2618 - mrr_at_10: 0.1575 - ndcg_at_10: 0.1821 - map_at_10: 0.1575 - precision_at_10: 0.0262 - regularization_loss: 0.0000e+00 - loss_batch: 6.1199\n", + "Epoch 3/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 4.4762 - recall_at_10: 0.5000 - mrr_at_10: 0.3647 - ndcg_at_10: 0.3970 - map_at_10: 0.3647 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4783\n", + "Epoch 4/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.6222 - recall_at_10: 0.6166 - mrr_at_10: 0.4884 - ndcg_at_10: 0.5191 - map_at_10: 0.4884 - precision_at_10: 0.0617 - regularization_loss: 0.0000e+00 - loss_batch: 3.6248\n", + "Epoch 5/5\n", + "677/677 [==============================] - 75s 111ms/step - loss: 3.1115 - recall_at_10: 0.6744 - mrr_at_10: 0.5505 - ndcg_at_10: 0.5803 - map_at_10: 0.5505 - precision_at_10: 0.0674 - regularization_loss: 0.0000e+00 - loss_batch: 3.1192\n", + "84/84 [==============================] - 7s 40ms/step - loss: 8.8991 - recall_at_10: 0.1457 - mrr_at_10: 0.0572 - ndcg_at_10: 0.0776 - map_at_10: 0.0572 - precision_at_10: 0.0146 - regularization_loss: 0.0000e+00 - loss_batch: 8.9238\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.899141311645508,\n", + " 'recall_at_10': 0.14763964712619781,\n", + " 'mrr_at_10': 0.05743885040283203,\n", + " 'ndcg_at_10': 0.07836496829986572,\n", + " 'map_at_10': 0.05743885040283203,\n", + " 'precision_at_10': 0.014763964340090752,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.991716384887695}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d195f16d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}