diff --git a/T4Rec_repro/train_runs/clm_item_id_min.ipynb b/T4Rec_repro/train_runs/clm_item_id.ipynb similarity index 60% rename from T4Rec_repro/train_runs/clm_item_id_min.ipynb rename to T4Rec_repro/train_runs/clm_item_id.ipynb index ff5eabbe86..afe12df6f5 100644 --- a/T4Rec_repro/train_runs/clm_item_id_min.ipynb +++ b/T4Rec_repro/train_runs/clm_item_id.ipynb @@ -10,7 +10,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-03-10 13:19:41.332031: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-13 20:54:36.957592: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] }, @@ -27,21 +27,21 @@ "text": [ "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-03-10 13:19:43.702598: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:43.703049: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:43.703227: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.345898: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.346296: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.346453: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-10 13:19:44.148806: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-03-13 20:54:39.777830: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-10 13:19:44.149822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.150030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.150185: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891194: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891419: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891582: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:19:44.891696: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-10 13:19:44.891761: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + "2023-03-13 20:54:39.778681: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.778886: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:39.779040: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525430: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525647: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525808: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:54:40.525922: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-13 20:54:40.525983: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" ] } ], @@ -66,7 +66,7 @@ "outputs": [], "source": [ "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", - "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" ] }, { @@ -93,7 +93,9 @@ "n_head = 16\n", "batch_size = 128\n", "learning_rate = 0.0006667377132554976\n", - "n_epoch = 5" + "n_epoch = 5\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" ] }, { @@ -105,7 +107,7 @@ "source": [ "def get_model():\n", " mlp_block = mm.MLPBlock(\n", - " [128,d_model],\n", + " [d_model],\n", " activation='relu',\n", " no_activation_last_layer=True,\n", " )\n", @@ -118,16 +120,16 @@ " ).to_merlin_schema()\n", "\n", " train.schema = schema\n", - "\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", " input_block = mm.InputBlockV2(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " embeddings=mm.Embeddings(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " sequence_combiner=None,\n", - " dim=d_model\n", - " ),\n", - " # pre=mm.StochasticSwapNoise()\n", - " )\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", "\n", " train.schema = train.schema.select_by_name('sess_pid_seq')\n", "\n", @@ -140,7 +142,7 @@ " )\n", "\n", " mlp_block2 = mm.MLPBlock(\n", - " [128,d_model],\n", + " [item_embedding_dim],\n", " activation='relu',\n", " no_activation_last_layer=True,\n", " )\n", @@ -156,7 +158,7 @@ " )\n", "\n", " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", " )\n", " return model_transformer, xlnet_block" ] @@ -171,46 +173,23 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "e7474131", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1/5\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-10 13:19:51.258201: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", " warnings.warn(\n" ] }, @@ -218,33 +197,33 @@ "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 83s 110ms/step - loss: 8.9114 - recall_at_10: 0.0409 - mrr_at_10: 0.0151 - ndcg_at_10: 0.0211 - map_at_10: 0.0151 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.9101\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 150s 214ms/step - loss: 8.4001 - recall_at_20: 0.0827 - mrr_at_20: 0.0323 - ndcg_at_20: 0.0433 - map_at_20: 0.0323 - precision_at_20: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.3857\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.7403 - recall_at_10: 0.2994 - mrr_at_10: 0.1857 - ndcg_at_10: 0.2126 - map_at_10: 0.1857 - precision_at_10: 0.0299 - regularization_loss: 0.0000e+00 - loss_batch: 5.7358\n", + "677/677 [==============================] - 159s 234ms/step - loss: 3.2173 - recall_at_20: 0.6874 - mrr_at_20: 0.5632 - ndcg_at_20: 0.5917 - map_at_20: 0.5632 - precision_at_20: 0.0344 - regularization_loss: 0.0000e+00 - loss_batch: 3.2233\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.8640 - recall_at_10: 0.5785 - mrr_at_10: 0.4536 - ndcg_at_10: 0.4835 - map_at_10: 0.4536 - precision_at_10: 0.0579 - regularization_loss: 0.0000e+00 - loss_batch: 3.8700\n", + "677/677 [==============================] - 159s 235ms/step - loss: 2.0390 - recall_at_20: 0.8298 - mrr_at_20: 0.7342 - ndcg_at_20: 0.7561 - map_at_20: 0.7342 - precision_at_20: 0.0415 - regularization_loss: 0.0000e+00 - loss_batch: 2.0462\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.0672 - recall_at_10: 0.6808 - mrr_at_10: 0.5664 - ndcg_at_10: 0.5939 - map_at_10: 0.5664 - precision_at_10: 0.0681 - regularization_loss: 0.0000e+00 - loss_batch: 3.0720\n", + "677/677 [==============================] - 160s 235ms/step - loss: 1.5995 - recall_at_20: 0.8662 - mrr_at_20: 0.7825 - ndcg_at_20: 0.8016 - map_at_20: 0.7825 - precision_at_20: 0.0433 - regularization_loss: 0.0000e+00 - loss_batch: 1.6068\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 2.6008 - recall_at_10: 0.7369 - mrr_at_10: 0.6298 - ndcg_at_10: 0.6556 - map_at_10: 0.6298 - precision_at_10: 0.0737 - regularization_loss: 0.0000e+00 - loss_batch: 2.6062\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.7419 - recall_at_10: 0.1679 - mrr_at_10: 0.0639 - ndcg_at_10: 0.0881 - map_at_10: 0.0639 - precision_at_10: 0.0168 - regularization_loss: 0.0000e+00 - loss_batch: 8.7705\n" + "677/677 [==============================] - 160s 236ms/step - loss: 1.3356 - recall_at_20: 0.8955 - mrr_at_20: 0.8085 - ndcg_at_20: 0.8284 - map_at_20: 0.8085 - precision_at_20: 0.0448 - regularization_loss: 0.0000e+00 - loss_batch: 1.3422\n", + "84/84 [==============================] - 13s 89ms/step - loss: 8.9283 - recall_at_20: 0.3217 - mrr_at_20: 0.1205 - ndcg_at_20: 0.1651 - map_at_20: 0.1205 - precision_at_20: 0.0161 - regularization_loss: 0.0000e+00 - loss_batch: 8.9661\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.741933822631836,\n", - " 'recall_at_10': 0.16701146960258484,\n", - " 'mrr_at_10': 0.06411589682102203,\n", - " 'ndcg_at_10': 0.08810190856456757,\n", - " 'map_at_10': 0.06411589682102203,\n", - " 'precision_at_10': 0.016701148822903633,\n", + "{'loss': 8.928336143493652,\n", + " 'recall_at_20': 0.32508933544158936,\n", + " 'mrr_at_20': 0.11867032200098038,\n", + " 'ndcg_at_20': 0.16441309452056885,\n", + " 'map_at_20': 0.11867032200098038,\n", + " 'precision_at_20': 0.016254469752311707,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.003721237182617}" + " 'loss_batch': 10.598859786987305}" ] }, - "execution_count": 6, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -267,6 +246,47 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": 12, + "id": "117174c6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_5\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " sequential_block_29 (Sequen multiple 176252608 \n", + " tialBlock) \n", + " \n", + " sequential_block_30 (Sequen multiple 86464 \n", + " tialBlock) \n", + " \n", + " sess_pid_seq/categorical_ou multiple 175110449 \n", + " tput (CategoricalOutput) \n", + " \n", + " model_context_5 (ModelConte multiple 0 \n", + " xt) \n", + " \n", + " prepare_features_11 (Prepar multiple 0 \n", + " eFeatures) \n", + " \n", + "=================================================================\n", + "Total params: 176,729,074\n", + "Trainable params: 176,729,073\n", + "Non-trainable params: 1\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model_transformer.summary()" + ] + }, { "cell_type": "markdown", "id": "a070554f", @@ -310,29 +330,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.0406 - recall_at_10: 0.0356 - mrr_at_10: 0.0130 - ndcg_at_10: 0.0183 - map_at_10: 0.0130 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0326\n", + "677/677 [==============================] - 105s 146ms/step - loss: 7.2092 - recall_at_20: 0.1524 - mrr_at_20: 0.0873 - ndcg_at_20: 0.1018 - map_at_20: 0.0873 - precision_at_20: 0.0076 - regularization_loss: 0.0000e+00 - loss_batch: 7.2024\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.0845 - recall_at_10: 0.2649 - mrr_at_10: 0.1565 - ndcg_at_10: 0.1821 - map_at_10: 0.1565 - precision_at_10: 0.0265 - regularization_loss: 0.0000e+00 - loss_batch: 6.0807\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.8315 - recall_at_20: 0.7410 - mrr_at_20: 0.6393 - ndcg_at_20: 0.6625 - map_at_20: 0.6393 - precision_at_20: 0.0370 - regularization_loss: 0.0000e+00 - loss_batch: 2.8376\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.2293 - recall_at_10: 0.5184 - mrr_at_10: 0.3883 - ndcg_at_10: 0.4194 - map_at_10: 0.3883 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.2323\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9715 - recall_at_20: 0.8361 - mrr_at_20: 0.7449 - ndcg_at_20: 0.7658 - map_at_20: 0.7449 - precision_at_20: 0.0418 - regularization_loss: 0.0000e+00 - loss_batch: 1.9781\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.3441 - recall_at_10: 0.6468 - mrr_at_10: 0.5229 - ndcg_at_10: 0.5526 - map_at_10: 0.5229 - precision_at_10: 0.0647 - regularization_loss: 0.0000e+00 - loss_batch: 3.3486\n", + "677/677 [==============================] - 100s 148ms/step - loss: 1.5735 - recall_at_20: 0.8706 - mrr_at_20: 0.7848 - ndcg_at_20: 0.8044 - map_at_20: 0.7848 - precision_at_20: 0.0435 - regularization_loss: 0.0000e+00 - loss_batch: 1.5798\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 2.8789 - recall_at_10: 0.7051 - mrr_at_10: 0.5871 - ndcg_at_10: 0.6155 - map_at_10: 0.5871 - precision_at_10: 0.0705 - regularization_loss: 0.0000e+00 - loss_batch: 2.8854\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.8279 - recall_at_10: 0.1584 - mrr_at_10: 0.0624 - ndcg_at_10: 0.0847 - map_at_10: 0.0624 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.8674\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3392 - recall_at_20: 0.8979 - mrr_at_20: 0.8098 - ndcg_at_20: 0.8299 - map_at_20: 0.8098 - precision_at_20: 0.0449 - regularization_loss: 0.0000e+00 - loss_batch: 1.3459\n", + "84/84 [==============================] - 8s 44ms/step - loss: 8.8929 - recall_at_20: 0.3268 - mrr_at_20: 0.1240 - ndcg_at_20: 0.1687 - map_at_20: 0.1240 - precision_at_20: 0.0163 - regularization_loss: 0.0000e+00 - loss_batch: 8.9432\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.827858924865723,\n", - " 'recall_at_10': 0.15591499209403992,\n", - " 'mrr_at_10': 0.06090494617819786,\n", - " 'ndcg_at_10': 0.08297329396009445,\n", - " 'map_at_10': 0.06090494617819786,\n", - " 'precision_at_10': 0.01559150218963623,\n", + "{'loss': 8.892891883850098,\n", + " 'recall_at_20': 0.3253714442253113,\n", + " 'mrr_at_20': 0.11890144646167755,\n", + " 'ndcg_at_20': 0.16443441808223724,\n", + " 'map_at_20': 0.11890144646167755,\n", + " 'precision_at_20': 0.016268571838736534,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.57563304901123}" + " 'loss_batch': 11.120135307312012}" ] }, "execution_count": 7, @@ -393,29 +413,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.9635 - recall_at_10: 0.0396 - mrr_at_10: 0.0146 - ndcg_at_10: 0.0204 - map_at_10: 0.0146 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9589\n", + "677/677 [==============================] - 105s 146ms/step - loss: 7.7130 - recall_at_20: 0.1178 - mrr_at_20: 0.0575 - ndcg_at_20: 0.0708 - map_at_20: 0.0575 - precision_at_20: 0.0059 - regularization_loss: 0.0000e+00 - loss_batch: 7.7064\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.2358 - recall_at_10: 0.2417 - mrr_at_10: 0.1391 - ndcg_at_10: 0.1633 - map_at_10: 0.1391 - precision_at_10: 0.0242 - regularization_loss: 0.0000e+00 - loss_batch: 6.2350\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.9553 - recall_at_20: 0.7234 - mrr_at_20: 0.6112 - ndcg_at_20: 0.6369 - map_at_20: 0.6112 - precision_at_20: 0.0362 - regularization_loss: 0.0000e+00 - loss_batch: 2.9622\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.4725 - recall_at_10: 0.4996 - mrr_at_10: 0.3675 - ndcg_at_10: 0.3991 - map_at_10: 0.3675 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4748\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9463 - recall_at_20: 0.8379 - mrr_at_20: 0.7465 - ndcg_at_20: 0.7675 - map_at_20: 0.7465 - precision_at_20: 0.0419 - regularization_loss: 0.0000e+00 - loss_batch: 1.9539\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.6489 - recall_at_10: 0.6128 - mrr_at_10: 0.4839 - ndcg_at_10: 0.5148 - map_at_10: 0.4839 - precision_at_10: 0.0613 - regularization_loss: 0.0000e+00 - loss_batch: 3.6543\n", + "677/677 [==============================] - 100s 148ms/step - loss: 1.5422 - recall_at_20: 0.8713 - mrr_at_20: 0.7875 - ndcg_at_20: 0.8066 - map_at_20: 0.7875 - precision_at_20: 0.0436 - regularization_loss: 0.0000e+00 - loss_batch: 1.5490\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.2122 - recall_at_10: 0.6626 - mrr_at_10: 0.5353 - ndcg_at_10: 0.5659 - map_at_10: 0.5353 - precision_at_10: 0.0663 - regularization_loss: 0.0000e+00 - loss_batch: 3.2164\n", - "84/84 [==============================] - 7s 39ms/step - loss: 8.8321 - recall_at_10: 0.1434 - mrr_at_10: 0.0582 - ndcg_at_10: 0.0781 - map_at_10: 0.0582 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.8607\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3310 - recall_at_20: 0.8989 - mrr_at_20: 0.8116 - ndcg_at_20: 0.8315 - map_at_20: 0.8116 - precision_at_20: 0.0449 - regularization_loss: 0.0000e+00 - loss_batch: 1.3377\n", + "84/84 [==============================] - 7s 43ms/step - loss: 8.8519 - recall_at_20: 0.3266 - mrr_at_20: 0.1215 - ndcg_at_20: 0.1670 - map_at_20: 0.1215 - precision_at_20: 0.0163 - regularization_loss: 0.0000e+00 - loss_batch: 8.8791\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.832069396972656,\n", - " 'recall_at_10': 0.1426556259393692,\n", - " 'mrr_at_10': 0.05639006569981575,\n", - " 'ndcg_at_10': 0.07650619745254517,\n", - " 'map_at_10': 0.05639006569981575,\n", - " 'precision_at_10': 0.01426556333899498,\n", + "{'loss': 8.851947784423828,\n", + " 'recall_at_20': 0.3281925916671753,\n", + " 'mrr_at_20': 0.11986491084098816,\n", + " 'ndcg_at_20': 0.16598893702030182,\n", + " 'map_at_20': 0.11986491084098816,\n", + " 'precision_at_20': 0.016409626230597496,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.100401878356934}" + " 'loss_batch': 10.054880142211914}" ] }, "execution_count": 8, @@ -476,29 +496,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.0739 - recall_at_10: 0.0361 - mrr_at_10: 0.0125 - ndcg_at_10: 0.0180 - map_at_10: 0.0125 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.0756\n", + "677/677 [==============================] - 105s 146ms/step - loss: 7.6534 - recall_at_20: 0.1225 - mrr_at_20: 0.0618 - ndcg_at_20: 0.0752 - map_at_20: 0.0618 - precision_at_20: 0.0061 - regularization_loss: 0.0000e+00 - loss_batch: 7.6446\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.4023 - recall_at_10: 0.2372 - mrr_at_10: 0.1349 - ndcg_at_10: 0.1591 - map_at_10: 0.1349 - precision_at_10: 0.0237 - regularization_loss: 0.0000e+00 - loss_batch: 6.4020\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.9167 - recall_at_20: 0.7304 - mrr_at_20: 0.6236 - ndcg_at_20: 0.6481 - map_at_20: 0.6236 - precision_at_20: 0.0365 - regularization_loss: 0.0000e+00 - loss_batch: 2.9215\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.7934 - recall_at_10: 0.4544 - mrr_at_10: 0.3194 - ndcg_at_10: 0.3516 - map_at_10: 0.3194 - precision_at_10: 0.0454 - regularization_loss: 0.0000e+00 - loss_batch: 4.7958\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9719 - recall_at_20: 0.8351 - mrr_at_20: 0.7428 - ndcg_at_20: 0.7639 - map_at_20: 0.7428 - precision_at_20: 0.0418 - regularization_loss: 0.0000e+00 - loss_batch: 1.9820\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.7131 - recall_at_10: 0.5913 - mrr_at_10: 0.4595 - ndcg_at_10: 0.4911 - map_at_10: 0.4595 - precision_at_10: 0.0591 - regularization_loss: 0.0000e+00 - loss_batch: 3.7160\n", + "677/677 [==============================] - 100s 148ms/step - loss: 1.5807 - recall_at_20: 0.8694 - mrr_at_20: 0.7840 - ndcg_at_20: 0.8035 - map_at_20: 0.7840 - precision_at_20: 0.0435 - regularization_loss: 0.0000e+00 - loss_batch: 1.5877\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.0900 - recall_at_10: 0.6752 - mrr_at_10: 0.5537 - ndcg_at_10: 0.5829 - map_at_10: 0.5537 - precision_at_10: 0.0675 - regularization_loss: 0.0000e+00 - loss_batch: 3.0945\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.9225 - recall_at_10: 0.1426 - mrr_at_10: 0.0581 - ndcg_at_10: 0.0778 - map_at_10: 0.0581 - precision_at_10: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 8.9683\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3320 - recall_at_20: 0.8979 - mrr_at_20: 0.8110 - ndcg_at_20: 0.8308 - map_at_20: 0.8110 - precision_at_20: 0.0449 - regularization_loss: 0.0000e+00 - loss_batch: 1.3375\n", + "84/84 [==============================] - 7s 44ms/step - loss: 8.9484 - recall_at_20: 0.3267 - mrr_at_20: 0.1212 - ndcg_at_20: 0.1669 - map_at_20: 0.1212 - precision_at_20: 0.0163 - regularization_loss: 0.0000e+00 - loss_batch: 8.9818\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.922541618347168,\n", - " 'recall_at_10': 0.14425428211688995,\n", - " 'mrr_at_10': 0.057682257145643234,\n", - " 'ndcg_at_10': 0.077837273478508,\n", - " 'map_at_10': 0.057682257145643234,\n", - " 'precision_at_10': 0.014425428584218025,\n", + "{'loss': 8.948363304138184,\n", + " 'recall_at_20': 0.3253714442253113,\n", + " 'mrr_at_20': 0.11741983145475388,\n", + " 'ndcg_at_20': 0.16352491080760956,\n", + " 'map_at_20': 0.11741983145475388,\n", + " 'precision_at_20': 0.016268571838736534,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.947548866271973}" + " 'loss_batch': 10.429142951965332}" ] }, "execution_count": 9, @@ -559,29 +579,29 @@ "output_type": "stream", "text": [ "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.0454 - recall_at_10: 0.0381 - mrr_at_10: 0.0139 - ndcg_at_10: 0.0195 - map_at_10: 0.0139 - precision_at_10: 0.0038 - regularization_loss: 0.0000e+00 - loss_batch: 9.0386\n", + "677/677 [==============================] - 105s 147ms/step - loss: 7.2975 - recall_at_20: 0.1426 - mrr_at_20: 0.0798 - ndcg_at_20: 0.0937 - map_at_20: 0.0798 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.2845\n", "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.3489 - recall_at_10: 0.2430 - mrr_at_10: 0.1385 - ndcg_at_10: 0.1632 - map_at_10: 0.1385 - precision_at_10: 0.0243 - regularization_loss: 0.0000e+00 - loss_batch: 6.3435\n", + "677/677 [==============================] - 100s 147ms/step - loss: 2.8422 - recall_at_20: 0.7408 - mrr_at_20: 0.6384 - ndcg_at_20: 0.6618 - map_at_20: 0.6384 - precision_at_20: 0.0370 - regularization_loss: 0.0000e+00 - loss_batch: 2.8481\n", "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.7853 - recall_at_10: 0.4602 - mrr_at_10: 0.3227 - ndcg_at_10: 0.3555 - map_at_10: 0.3227 - precision_at_10: 0.0460 - regularization_loss: 0.0000e+00 - loss_batch: 4.7868\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.9844 - recall_at_20: 0.8348 - mrr_at_20: 0.7417 - ndcg_at_20: 0.7630 - map_at_20: 0.7417 - precision_at_20: 0.0417 - regularization_loss: 0.0000e+00 - loss_batch: 1.9915\n", "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.6873 - recall_at_10: 0.6026 - mrr_at_10: 0.4710 - ndcg_at_10: 0.5025 - map_at_10: 0.4710 - precision_at_10: 0.0603 - regularization_loss: 0.0000e+00 - loss_batch: 3.6936\n", + "677/677 [==============================] - 100s 147ms/step - loss: 1.5814 - recall_at_20: 0.8700 - mrr_at_20: 0.7848 - ndcg_at_20: 0.8043 - map_at_20: 0.7848 - precision_at_20: 0.0435 - regularization_loss: 0.0000e+00 - loss_batch: 1.5882\n", "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.0298 - recall_at_10: 0.6856 - mrr_at_10: 0.5650 - ndcg_at_10: 0.5940 - map_at_10: 0.5650 - precision_at_10: 0.0686 - regularization_loss: 0.0000e+00 - loss_batch: 3.0363\n", - "84/84 [==============================] - 8s 40ms/step - loss: 8.6711 - recall_at_10: 0.1505 - mrr_at_10: 0.0595 - ndcg_at_10: 0.0807 - map_at_10: 0.0595 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.6999\n" + "677/677 [==============================] - 100s 147ms/step - loss: 1.3337 - recall_at_20: 0.8990 - mrr_at_20: 0.8101 - ndcg_at_20: 0.8304 - map_at_20: 0.8101 - precision_at_20: 0.0450 - regularization_loss: 0.0000e+00 - loss_batch: 1.3407\n", + "84/84 [==============================] - 8s 44ms/step - loss: 8.8888 - recall_at_20: 0.3240 - mrr_at_20: 0.1225 - ndcg_at_20: 0.1671 - map_at_20: 0.1225 - precision_at_20: 0.0162 - regularization_loss: 0.0000e+00 - loss_batch: 8.9304\n" ] }, { "data": { "text/plain": [ - "{'loss': 8.671070098876953,\n", - " 'recall_at_10': 0.15074290335178375,\n", - " 'mrr_at_10': 0.05898994952440262,\n", - " 'ndcg_at_10': 0.08035662025213242,\n", - " 'map_at_10': 0.05898994952440262,\n", - " 'precision_at_10': 0.015074292197823524,\n", + "{'loss': 8.888774871826172,\n", + " 'recall_at_20': 0.3228324353694916,\n", + " 'mrr_at_20': 0.11914832890033722,\n", + " 'ndcg_at_20': 0.16426056623458862,\n", + " 'map_at_20': 0.11914832890033722,\n", + " 'precision_at_20': 0.01614162139594555,\n", " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.946744918823242}" + " 'loss_batch': 10.727699279785156}" ] }, "execution_count": 10, diff --git a/T4Rec_repro/train_runs/mlm_item_id.ipynb b/T4Rec_repro/train_runs/mlm_item_id.ipynb new file mode 100644 index 0000000000..508b4d6aa5 --- /dev/null +++ b/T4Rec_repro/train_runs/mlm_item_id.ipynb @@ -0,0 +1,808 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:26:22.114565: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", + "2023-03-13 20:26:24.538242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.538645: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.538803: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2023-03-13 20:26:24.965689: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-03-13 20:26:24.966631: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.966839: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:24.966994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703328: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703539: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703699: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-03-13 20:26:25.703813: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", + "2023-03-13 20:26:25.703876: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" + ] + } + ], + "source": [ + "import os\n", + "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset\n", + "import merlin.models.tf as mm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "11647dd3", + "metadata": {}, + "outputs": [], + "source": [ + "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", + "valid = Dataset(\"ecom_dataset/0002/test.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4ab4e0fb", + "metadata": {}, + "outputs": [], + "source": [ + "target = 'sess_pid_seq'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8d9903e6", + "metadata": {}, + "outputs": [], + "source": [ + "d_model = 192\n", + "n_layer = 3\n", + "n_head = 16\n", + "batch_size = 128\n", + "learning_rate = 0.0006667377132554976\n", + "n_epoch = 5\n", + "item_embedding_dim = 448 \n", + "item_id_embeddings_init_std = 3" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " '../',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + "\n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name('sess_pid_seq')\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "markdown", + "id": "78302207", + "metadata": {}, + "source": [ + "# Run 1" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e7474131", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_5/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-14 06:41:41.374760: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 164s 230ms/step - loss: 9.4924 - recall_at_20: 0.0704 - mrr_at_20: 0.0174 - ndcg_at_20: 0.0288 - map_at_20: 0.0174 - precision_at_20: 0.0035 - regularization_loss: 0.0000e+00 - loss_batch: 9.4899\n", + "Epoch 2/5\n", + "677/677 [==============================] - 159s 234ms/step - loss: 8.0755 - recall_at_20: 0.1759 - mrr_at_20: 0.0480 - ndcg_at_20: 0.0758 - map_at_20: 0.0480 - precision_at_20: 0.0088 - regularization_loss: 0.0000e+00 - loss_batch: 8.0726\n", + "Epoch 3/5\n", + "677/677 [==============================] - 159s 235ms/step - loss: 7.3926 - recall_at_20: 0.2427 - mrr_at_20: 0.0671 - ndcg_at_20: 0.1053 - map_at_20: 0.0671 - precision_at_20: 0.0121 - regularization_loss: 0.0000e+00 - loss_batch: 7.3887\n", + "Epoch 4/5\n", + "677/677 [==============================] - 159s 235ms/step - loss: 6.9299 - recall_at_20: 0.2932 - mrr_at_20: 0.0821 - ndcg_at_20: 0.1281 - map_at_20: 0.0821 - precision_at_20: 0.0147 - regularization_loss: 0.0000e+00 - loss_batch: 6.9255\n", + "Epoch 5/5\n", + "677/677 [==============================] - 143s 211ms/step - loss: 6.5825 - recall_at_20: 0.3350 - mrr_at_20: 0.0951 - ndcg_at_20: 0.1476 - map_at_20: 0.0951 - precision_at_20: 0.0167 - regularization_loss: 0.0000e+00 - loss_batch: 6.5791\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-14 06:54:43.265476: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_5/sequential_block_29/xl_net_block_5/sequential_block_32/replace_masked_embeddings_5/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3507 - recall_at_20: 0.2332 - mrr_at_20: 0.0720 - ndcg_at_20: 0.1070 - map_at_20: 0.0720 - precision_at_20: 0.0117 - regularization_loss: 0.0000e+00 - loss_batch: 8.3848\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.350717544555664,\n", + " 'recall_at_20': 0.23180365562438965,\n", + " 'mrr_at_20': 0.06943727284669876,\n", + " 'ndcg_at_20': 0.10483581572771072,\n", + " 'map_at_20': 0.06943727284669876,\n", + " 'precision_at_20': 0.011590182781219482,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.85844612121582}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "02b2e706", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_5\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " sequential_block_29 (Sequen multiple 176252800 \n", + " tialBlock) \n", + " \n", + " sequential_block_30 (Sequen multiple 86464 \n", + " tialBlock) \n", + " \n", + " sess_pid_seq/categorical_ou multiple 175110449 \n", + " tput (CategoricalOutput) \n", + " \n", + " model_context_5 (ModelConte multiple 0 \n", + " xt) \n", + " \n", + " prepare_features_11 (Prepar multiple 0 \n", + " eFeatures) \n", + " \n", + "=================================================================\n", + "Total params: 176,729,266\n", + "Trainable params: 176,729,265\n", + "Non-trainable params: 1\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "model_transformer.summary()" + ] + }, + { + "cell_type": "markdown", + "id": "3513d28a", + "metadata": {}, + "source": [ + "# Run 2" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2e624551", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:31:46.363004: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4953 - recall_at_20: 0.0663 - mrr_at_20: 0.0167 - ndcg_at_20: 0.0274 - map_at_20: 0.0167 - precision_at_20: 0.0033 - regularization_loss: 0.0000e+00 - loss_batch: 9.4908\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.1077 - recall_at_20: 0.1712 - mrr_at_20: 0.0474 - ndcg_at_20: 0.0744 - map_at_20: 0.0474 - precision_at_20: 0.0086 - regularization_loss: 0.0000e+00 - loss_batch: 8.1021\n", + "Epoch 3/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 7.3969 - recall_at_20: 0.2444 - mrr_at_20: 0.0671 - ndcg_at_20: 0.1057 - map_at_20: 0.0671 - precision_at_20: 0.0122 - regularization_loss: 0.0000e+00 - loss_batch: 7.3975\n", + "Epoch 4/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 6.9683 - recall_at_20: 0.2853 - mrr_at_20: 0.0794 - ndcg_at_20: 0.1243 - map_at_20: 0.0794 - precision_at_20: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 6.9657\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6522 - recall_at_20: 0.3234 - mrr_at_20: 0.0917 - ndcg_at_20: 0.1423 - map_at_20: 0.0917 - precision_at_20: 0.0162 - regularization_loss: 0.0000e+00 - loss_batch: 6.6482\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:36:37.576034: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_1/sequential_block_9/xl_net_block_1/sequential_block_12/replace_masked_embeddings_1/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 48ms/step - loss: 8.3509 - recall_at_20: 0.2300 - mrr_at_20: 0.0691 - ndcg_at_20: 0.1041 - map_at_20: 0.0691 - precision_at_20: 0.0115 - regularization_loss: 0.0000e+00 - loss_batch: 8.3545\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.350946426391602,\n", + " 'recall_at_20': 0.22926461696624756,\n", + " 'mrr_at_20': 0.06758848577737808,\n", + " 'ndcg_at_20': 0.10286629945039749,\n", + " 'map_at_20': 0.06758848577737808,\n", + " 'precision_at_20': 0.011463231407105923,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.509391784667969}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d42dea65", + "metadata": {}, + "source": [ + "# Run 3" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "97e7322c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:36:51.268625: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4386 - recall_at_20: 0.0722 - mrr_at_20: 0.0190 - ndcg_at_20: 0.0305 - map_at_20: 0.0190 - precision_at_20: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.4342\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.0171 - recall_at_20: 0.1837 - mrr_at_20: 0.0502 - ndcg_at_20: 0.0792 - map_at_20: 0.0502 - precision_at_20: 0.0092 - regularization_loss: 0.0000e+00 - loss_batch: 8.0103\n", + "Epoch 3/5\n", + "677/677 [==============================] - 58s 85ms/step - loss: 7.3722 - recall_at_20: 0.2467 - mrr_at_20: 0.0691 - ndcg_at_20: 0.1078 - map_at_20: 0.0691 - precision_at_20: 0.0123 - regularization_loss: 0.0000e+00 - loss_batch: 7.3658\n", + "Epoch 4/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.9592 - recall_at_20: 0.2892 - mrr_at_20: 0.0807 - ndcg_at_20: 0.1262 - map_at_20: 0.0807 - precision_at_20: 0.0145 - regularization_loss: 0.0000e+00 - loss_batch: 6.9549\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6706 - recall_at_20: 0.3194 - mrr_at_20: 0.0899 - ndcg_at_20: 0.1401 - map_at_20: 0.0899 - precision_at_20: 0.0160 - regularization_loss: 0.0000e+00 - loss_batch: 6.6659\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:41:42.865959: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_2/sequential_block_14/xl_net_block_2/sequential_block_17/replace_masked_embeddings_2/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3114 - recall_at_20: 0.2264 - mrr_at_20: 0.0687 - ndcg_at_20: 0.1030 - map_at_20: 0.0687 - precision_at_20: 0.0113 - regularization_loss: 0.0000e+00 - loss_batch: 8.3190\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.311356544494629,\n", + " 'recall_at_20': 0.22738386690616608,\n", + " 'mrr_at_20': 0.0663006603717804,\n", + " 'ndcg_at_20': 0.10139463096857071,\n", + " 'map_at_20': 0.0663006603717804,\n", + " 'precision_at_20': 0.011369192972779274,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.649133682250977}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "610da911", + "metadata": {}, + "source": [ + "# Run 4" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9e0f0891", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:41:56.776497: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4786 - recall_at_20: 0.0673 - mrr_at_20: 0.0176 - ndcg_at_20: 0.0283 - map_at_20: 0.0176 - precision_at_20: 0.0034 - regularization_loss: 0.0000e+00 - loss_batch: 9.4794\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.1173 - recall_at_20: 0.1692 - mrr_at_20: 0.0454 - ndcg_at_20: 0.0723 - map_at_20: 0.0454 - precision_at_20: 0.0085 - regularization_loss: 0.0000e+00 - loss_batch: 8.1128\n", + "Epoch 3/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 7.4296 - recall_at_20: 0.2409 - mrr_at_20: 0.0664 - ndcg_at_20: 0.1044 - map_at_20: 0.0664 - precision_at_20: 0.0120 - regularization_loss: 0.0000e+00 - loss_batch: 7.4268\n", + "Epoch 4/5\n", + "677/677 [==============================] - 58s 85ms/step - loss: 6.9533 - recall_at_20: 0.2861 - mrr_at_20: 0.0778 - ndcg_at_20: 0.1232 - map_at_20: 0.0778 - precision_at_20: 0.0143 - regularization_loss: 0.0000e+00 - loss_batch: 6.9502\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6322 - recall_at_20: 0.3285 - mrr_at_20: 0.0931 - ndcg_at_20: 0.1445 - map_at_20: 0.0931 - precision_at_20: 0.0164 - regularization_loss: 0.0000e+00 - loss_batch: 6.6306\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:46:48.752036: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_3/sequential_block_19/xl_net_block_3/sequential_block_22/replace_masked_embeddings_3/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3535 - recall_at_20: 0.2280 - mrr_at_20: 0.0700 - ndcg_at_20: 0.1046 - map_at_20: 0.0700 - precision_at_20: 0.0114 - regularization_loss: 0.0000e+00 - loss_batch: 8.3763\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.353541374206543,\n", + " 'recall_at_20': 0.23067519068717957,\n", + " 'mrr_at_20': 0.06726308912038803,\n", + " 'ndcg_at_20': 0.10282379388809204,\n", + " 'map_at_20': 0.06726308912038803,\n", + " 'precision_at_20': 0.011533760465681553,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 9.360955238342285}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6cffc60d", + "metadata": {}, + "source": [ + "# Run 5" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6981ff6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Reshape_3:0\", shape=(None,), dtype=int64), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Reshape_2:0\", shape=(None, None), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/RaggedTile_2/Reshape_3:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/RaggedTile_2/Reshape_2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/RaggedTile_2/Cast:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:47:02.588234: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Assert/AssertGuard/branch_executed/_31\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "677/677 [==============================] - 65s 84ms/step - loss: 9.4909 - recall_at_20: 0.0707 - mrr_at_20: 0.0184 - ndcg_at_20: 0.0297 - map_at_20: 0.0184 - precision_at_20: 0.0035 - regularization_loss: 0.0000e+00 - loss_batch: 9.4882\n", + "Epoch 2/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 8.1387 - recall_at_20: 0.1653 - mrr_at_20: 0.0453 - ndcg_at_20: 0.0713 - map_at_20: 0.0453 - precision_at_20: 0.0083 - regularization_loss: 0.0000e+00 - loss_batch: 8.1347\n", + "Epoch 3/5\n", + "677/677 [==============================] - 57s 84ms/step - loss: 7.4398 - recall_at_20: 0.2387 - mrr_at_20: 0.0662 - ndcg_at_20: 0.1038 - map_at_20: 0.0662 - precision_at_20: 0.0119 - regularization_loss: 0.0000e+00 - loss_batch: 7.4371\n", + "Epoch 4/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.9831 - recall_at_20: 0.2878 - mrr_at_20: 0.0810 - ndcg_at_20: 0.1261 - map_at_20: 0.0810 - precision_at_20: 0.0144 - regularization_loss: 0.0000e+00 - loss_batch: 6.9787\n", + "Epoch 5/5\n", + "677/677 [==============================] - 57s 85ms/step - loss: 6.6535 - recall_at_20: 0.3246 - mrr_at_20: 0.0905 - ndcg_at_20: 0.1416 - map_at_20: 0.0905 - precision_at_20: 0.0162 - regularization_loss: 0.0000e+00 - loss_batch: 6.6479\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-03-13 20:51:54.265885: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: model_4/sequential_block_24/xl_net_block_4/sequential_block_27/replace_masked_embeddings_4/RaggedWhere/Assert/AssertGuard/branch_executed/_529\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84/84 [==============================] - 9s 49ms/step - loss: 8.3766 - recall_at_20: 0.2347 - mrr_at_20: 0.0690 - ndcg_at_20: 0.1050 - map_at_20: 0.0690 - precision_at_20: 0.0117 - regularization_loss: 0.0000e+00 - loss_batch: 8.3785\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.376553535461426,\n", + " 'recall_at_20': 0.23227383196353912,\n", + " 'mrr_at_20': 0.0675581842660904,\n", + " 'ndcg_at_20': 0.10343420505523682,\n", + " 'map_at_20': 0.0675581842660904,\n", + " 'precision_at_20': 0.011613693088293076,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.46284294128418}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()\n", + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequenceMaskRandom(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d195f16d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/T4Rec_repro/train_runs/mlm_item_id_min.ipynb b/T4Rec_repro/train_runs/mlm_item_id_min.ipynb deleted file mode 100644 index df90cc786b..0000000000 --- a/T4Rec_repro/train_runs/mlm_item_id_min.ipynb +++ /dev/null @@ -1,640 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "ceb3ae93", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-10 13:57:07.721314: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-03-10 13:57:10.129984: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.130437: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.130617: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2023-03-10 13:57:10.581209: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-03-10 13:57:10.582030: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.582283: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:10.582439: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330242: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330454: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330615: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-03-10 13:57:11.330728: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-03-10 13:57:11.330790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n" - ] - } - ], - "source": [ - "import os\n", - "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "import gc\n", - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from merlin.schema.tags import Tags\n", - "from merlin.io.dataset import Dataset\n", - "import merlin.models.tf as mm" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "11647dd3", - "metadata": {}, - "outputs": [], - "source": [ - "train = Dataset(\"ecom_dataset/0001/train.parquet\")\n", - "valid = Dataset(\"ecom_dataset/0002/valid.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "4ab4e0fb", - "metadata": {}, - "outputs": [], - "source": [ - "target = 'sess_pid_seq'" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "8d9903e6", - "metadata": {}, - "outputs": [], - "source": [ - "# a couple of starter hyperparams\n", - "\n", - "d_model = 192\n", - "n_layer = 3\n", - "n_head = 16\n", - "batch_size = 128\n", - "learning_rate = 0.0006667377132554976\n", - "n_epoch = 5" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a6ade14a", - "metadata": {}, - "outputs": [], - "source": [ - "def get_model():\n", - " mlp_block = mm.MLPBlock(\n", - " [128,d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "\n", - " from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", - "\n", - " schema = TensorflowMetadata.from_proto_text_file(\n", - " '../',\n", - " file_name='rees46_schema_modified.pbtxt'\n", - " ).to_merlin_schema()\n", - "\n", - " train.schema = schema\n", - "\n", - " input_block = mm.InputBlockV2(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " embeddings=mm.Embeddings(\n", - " train.schema.select_by_name('sess_pid_seq'), \n", - " sequence_combiner=None,\n", - " dim=d_model\n", - " ),\n", - " # pre=mm.StochasticSwapNoise()\n", - " )\n", - "\n", - " train.schema = train.schema.select_by_name('sess_pid_seq')\n", - "\n", - " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", - "\n", - " dense_block = mm.SequentialBlock(\n", - " input_block,\n", - " mlp_block,\n", - " xlnet_block\n", - " )\n", - "\n", - " mlp_block2 = mm.MLPBlock(\n", - " [128,d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "\n", - " prediction_task = mm.CategoricalOutput(\n", - " to_call=input_block[\"categorical\"][target],\n", - " )\n", - "\n", - " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", - "\n", - " optimizer = tf.keras.optimizers.Adam(\n", - " learning_rate=learning_rate,\n", - " )\n", - "\n", - " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[10])\n", - " )\n", - " return model_transformer, xlnet_block" - ] - }, - { - "cell_type": "markdown", - "id": "78302207", - "metadata": {}, - "source": [ - "# Run 1" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "e7474131", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-03-10 13:57:17.631317: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 82s 110ms/step - loss: 8.8265 - recall_at_10: 0.0432 - mrr_at_10: 0.0166 - ndcg_at_10: 0.0228 - map_at_10: 0.0166 - precision_at_10: 0.0043 - regularization_loss: 0.0000e+00 - loss_batch: 8.8191\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.8014 - recall_at_10: 0.3091 - mrr_at_10: 0.1936 - ndcg_at_10: 0.2210 - map_at_10: 0.1936 - precision_at_10: 0.0309 - regularization_loss: 0.0000e+00 - loss_batch: 5.8019\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.1718 - recall_at_10: 0.5397 - mrr_at_10: 0.4080 - ndcg_at_10: 0.4394 - map_at_10: 0.4080 - precision_at_10: 0.0540 - regularization_loss: 0.0000e+00 - loss_batch: 4.1734\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.2806 - recall_at_10: 0.6585 - mrr_at_10: 0.5362 - ndcg_at_10: 0.5656 - map_at_10: 0.5362 - precision_at_10: 0.0658 - regularization_loss: 0.0000e+00 - loss_batch: 3.2849\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 2.8188 - recall_at_10: 0.7125 - mrr_at_10: 0.6007 - ndcg_at_10: 0.6276 - map_at_10: 0.6007 - precision_at_10: 0.0712 - regularization_loss: 0.0000e+00 - loss_batch: 2.8246\n", - "84/84 [==============================] - 7s 39ms/step - loss: 8.8107 - recall_at_10: 0.1511 - mrr_at_10: 0.0623 - ndcg_at_10: 0.0829 - map_at_10: 0.0623 - precision_at_10: 0.0151 - regularization_loss: 0.0000e+00 - loss_batch: 8.8298\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.810694694519043,\n", - " 'recall_at_10': 0.15318788588047028,\n", - " 'mrr_at_10': 0.06131112948060036,\n", - " 'ndcg_at_10': 0.08268804848194122,\n", - " 'map_at_10': 0.06131112948060036,\n", - " 'precision_at_10': 0.015318789519369602,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.6568603515625}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3513d28a", - "metadata": {}, - "source": [ - "# Run 2" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "2e624551", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_1/sequential_block_9/xl_net_block_1/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_1/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 9.1281 - recall_at_10: 0.0359 - mrr_at_10: 0.0128 - ndcg_at_10: 0.0181 - map_at_10: 0.0128 - precision_at_10: 0.0036 - regularization_loss: 0.0000e+00 - loss_batch: 9.1243\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.7038 - recall_at_10: 0.1907 - mrr_at_10: 0.1006 - ndcg_at_10: 0.1218 - map_at_10: 0.1006 - precision_at_10: 0.0191 - regularization_loss: 0.0000e+00 - loss_batch: 6.6971\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 4.9471 - recall_at_10: 0.4404 - mrr_at_10: 0.3077 - ndcg_at_10: 0.3393 - map_at_10: 0.3077 - precision_at_10: 0.0440 - regularization_loss: 0.0000e+00 - loss_batch: 4.9478\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.9842 - recall_at_10: 0.5607 - mrr_at_10: 0.4197 - ndcg_at_10: 0.4534 - map_at_10: 0.4197 - precision_at_10: 0.0561 - regularization_loss: 0.0000e+00 - loss_batch: 3.9878\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.3262 - recall_at_10: 0.6442 - mrr_at_10: 0.5172 - ndcg_at_10: 0.5477 - map_at_10: 0.5172 - precision_at_10: 0.0644 - regularization_loss: 0.0000e+00 - loss_batch: 3.3307\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.9716 - recall_at_10: 0.1277 - mrr_at_10: 0.0513 - ndcg_at_10: 0.0692 - map_at_10: 0.0513 - precision_at_10: 0.0128 - regularization_loss: 0.0000e+00 - loss_batch: 8.9960\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.971626281738281,\n", - " 'recall_at_10': 0.12817378342151642,\n", - " 'mrr_at_10': 0.05082216113805771,\n", - " 'ndcg_at_10': 0.06883765012025833,\n", - " 'map_at_10': 0.05082216113805771,\n", - " 'precision_at_10': 0.012817380018532276,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.049013137817383}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d42dea65", - "metadata": {}, - "source": [ - "# Run 3" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "97e7322c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_2/sequential_block_14/xl_net_block_2/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_2/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.9307 - recall_at_10: 0.0396 - mrr_at_10: 0.0142 - ndcg_at_10: 0.0201 - map_at_10: 0.0142 - precision_at_10: 0.0040 - regularization_loss: 0.0000e+00 - loss_batch: 8.9265\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.9376 - recall_at_10: 0.2951 - mrr_at_10: 0.1842 - ndcg_at_10: 0.2105 - map_at_10: 0.1842 - precision_at_10: 0.0295 - regularization_loss: 0.0000e+00 - loss_batch: 5.9350\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.3616 - recall_at_10: 0.5184 - mrr_at_10: 0.3844 - ndcg_at_10: 0.4164 - map_at_10: 0.3844 - precision_at_10: 0.0518 - regularization_loss: 0.0000e+00 - loss_batch: 4.3657\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.4916 - recall_at_10: 0.6319 - mrr_at_10: 0.5057 - ndcg_at_10: 0.5359 - map_at_10: 0.5057 - precision_at_10: 0.0632 - regularization_loss: 0.0000e+00 - loss_batch: 3.4969\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.0021 - recall_at_10: 0.6889 - mrr_at_10: 0.5684 - ndcg_at_10: 0.5973 - map_at_10: 0.5684 - precision_at_10: 0.0689 - regularization_loss: 0.0000e+00 - loss_batch: 3.0072\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.7983 - recall_at_10: 0.1534 - mrr_at_10: 0.0599 - ndcg_at_10: 0.0816 - map_at_10: 0.0599 - precision_at_10: 0.0153 - regularization_loss: 0.0000e+00 - loss_batch: 8.8378\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.798320770263672,\n", - " 'recall_at_10': 0.15647922456264496,\n", - " 'mrr_at_10': 0.05985381081700325,\n", - " 'ndcg_at_10': 0.08228185027837753,\n", - " 'map_at_10': 0.05985381081700325,\n", - " 'precision_at_10': 0.015647921711206436,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 10.545936584472656}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "610da911", - "metadata": {}, - "source": [ - "# Run 4" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "9e0f0891", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_3/sequential_block_19/xl_net_block_3/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_3/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.8791 - recall_at_10: 0.0414 - mrr_at_10: 0.0155 - ndcg_at_10: 0.0215 - map_at_10: 0.0155 - precision_at_10: 0.0041 - regularization_loss: 0.0000e+00 - loss_batch: 8.8746\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 5.5817 - recall_at_10: 0.3289 - mrr_at_10: 0.2127 - ndcg_at_10: 0.2403 - map_at_10: 0.2127 - precision_at_10: 0.0329 - regularization_loss: 0.0000e+00 - loss_batch: 5.5795\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.8784 - recall_at_10: 0.5761 - mrr_at_10: 0.4489 - ndcg_at_10: 0.4793 - map_at_10: 0.4489 - precision_at_10: 0.0576 - regularization_loss: 0.0000e+00 - loss_batch: 3.8833\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 3.0679 - recall_at_10: 0.6797 - mrr_at_10: 0.5656 - ndcg_at_10: 0.5930 - map_at_10: 0.5656 - precision_at_10: 0.0680 - regularization_loss: 0.0000e+00 - loss_batch: 3.0749\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 2.5693 - recall_at_10: 0.7397 - mrr_at_10: 0.6350 - ndcg_at_10: 0.6602 - map_at_10: 0.6350 - precision_at_10: 0.0740 - regularization_loss: 0.0000e+00 - loss_batch: 2.5767\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.6399 - recall_at_10: 0.1581 - mrr_at_10: 0.0621 - ndcg_at_10: 0.0844 - map_at_10: 0.0621 - precision_at_10: 0.0158 - regularization_loss: 0.0000e+00 - loss_batch: 8.6637\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.63992691040039,\n", - " 'recall_at_10': 0.1588301658630371,\n", - " 'mrr_at_10': 0.06323756277561188,\n", - " 'ndcg_at_10': 0.0855293795466423,\n", - " 'map_at_10': 0.06323756277561188,\n", - " 'precision_at_10': 0.01588302105665207,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.691500663757324}" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "6cffc60d", - "metadata": {}, - "source": [ - "# Run 5" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "6981ff6e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/5\n", - "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model_4/sequential_block_24/xl_net_block_4/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model_4/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 80s 110ms/step - loss: 8.9605 - recall_at_10: 0.0390 - mrr_at_10: 0.0141 - ndcg_at_10: 0.0199 - map_at_10: 0.0141 - precision_at_10: 0.0039 - regularization_loss: 0.0000e+00 - loss_batch: 8.9571\n", - "Epoch 2/5\n", - "677/677 [==============================] - 75s 110ms/step - loss: 6.1194 - recall_at_10: 0.2618 - mrr_at_10: 0.1575 - ndcg_at_10: 0.1821 - map_at_10: 0.1575 - precision_at_10: 0.0262 - regularization_loss: 0.0000e+00 - loss_batch: 6.1199\n", - "Epoch 3/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 4.4762 - recall_at_10: 0.5000 - mrr_at_10: 0.3647 - ndcg_at_10: 0.3970 - map_at_10: 0.3647 - precision_at_10: 0.0500 - regularization_loss: 0.0000e+00 - loss_batch: 4.4783\n", - "Epoch 4/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.6222 - recall_at_10: 0.6166 - mrr_at_10: 0.4884 - ndcg_at_10: 0.5191 - map_at_10: 0.4884 - precision_at_10: 0.0617 - regularization_loss: 0.0000e+00 - loss_batch: 3.6248\n", - "Epoch 5/5\n", - "677/677 [==============================] - 75s 111ms/step - loss: 3.1115 - recall_at_10: 0.6744 - mrr_at_10: 0.5505 - ndcg_at_10: 0.5803 - map_at_10: 0.5505 - precision_at_10: 0.0674 - regularization_loss: 0.0000e+00 - loss_batch: 3.1192\n", - "84/84 [==============================] - 7s 40ms/step - loss: 8.8991 - recall_at_10: 0.1457 - mrr_at_10: 0.0572 - ndcg_at_10: 0.0776 - map_at_10: 0.0572 - precision_at_10: 0.0146 - regularization_loss: 0.0000e+00 - loss_batch: 8.9238\n" - ] - }, - { - "data": { - "text/plain": [ - "{'loss': 8.899141311645508,\n", - " 'recall_at_10': 0.14763964712619781,\n", - " 'mrr_at_10': 0.05743885040283203,\n", - " 'ndcg_at_10': 0.07836496829986572,\n", - " 'map_at_10': 0.05743885040283203,\n", - " 'precision_at_10': 0.014763964340090752,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.991716384887695}" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d195f16d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}