From 552cf323876a597a7167356b12baf471b809c4ae Mon Sep 17 00:00:00 2001 From: Radek Osmulski Date: Tue, 9 May 2023 12:07:46 +1000 Subject: [PATCH] update --- ...nd_save_model_for_benchmarking-Copy1.ipynb | 13764 +++++++++++++++- 1 file changed, 13438 insertions(+), 326 deletions(-) diff --git a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb index 74b19fa9d3..c048898d04 100644 --- a/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb +++ b/T4Rec_repro/train_and_save_model_for_benchmarking-Copy1.ipynb @@ -2,27 +2,1528 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "d062ceda", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " * [new branch] asvdb_metric_tracking -> origin/asvdb_metric_tracking\n", + " * [new branch] benchmark-session-based -> origin/benchmark-session-based\n", + " * [new branch] ci/horovod -> origin/ci/horovod\n", + " * [new branch] codespell_fix -> origin/codespell_fix\n", + " 16fb4149..fcaefc3e fea-sok-integration-wj -> origin/fea-sok-integration-wj\n", + " * [new branch] fea-sok-load-dump -> origin/fea-sok-load-dump\n", + " * [new branch] fix_datetime_issue_add_inference_on_TIS -> origin/fix_datetime_issue_add_inference_on_TIS\n", + " 95462360..7d68dc88 gh-pages -> origin/gh-pages\n", + " * [new branch] implement_review_comments -> origin/implement_review_comments\n", + " * [new branch] inference_benchmarking_transformers -> origin/inference_benchmarking_transformers\n", + " * [new branch] laiacano/concurrency -> origin/laiacano/concurrency\n", + " 835ad186..d8133b8f main -> origin/main\n", + " * [new branch] mtl_example -> origin/mtl_example\n", + " cb431a8a..b90e9a1b release-22.12 -> origin/release-22.12\n", + " * [new branch] release-23.02 -> origin/release-23.02\n", + " * [new branch] release-23.04 -> origin/release-23.04\n", + " * [new branch] stable -> origin/stable\n", + " * [new branch] tf/batch_predict_fix -> origin/tf/batch_predict_fix\n", + " * [new branch] tf/column_sampling_serialization_fix -> origin/tf/column_sampling_serialization_fix\n", + " * [new branch] tf/continuous_seq_feats_fix -> origin/tf/continuous_seq_feats_fix\n", + " * [new branch] tf/dataloader_changes -> origin/tf/dataloader_changes\n", + " * [new branch] tf/dlrm_dropout_fix -> origin/tf/dlrm_dropout_fix\n", + " * [new branch] tf/fix_broadcast_to_sequence -> origin/tf/fix_broadcast_to_sequence\n", + " * [new branch] tf/fix_logq_correction -> origin/tf/fix_logq_correction\n", + " * [new branch] tf/fix_training_smaller_accuracy -> origin/tf/fix_training_smaller_accuracy\n", + " * [new branch] tf/loglossmetric_callbacks -> origin/tf/loglossmetric_callbacks\n", + " * [new branch] tf/mtl_example_updates_v2 -> origin/tf/mtl_example_updates_v2\n", + " + 169f3df5...06eecddd tf/output-block -> origin/tf/output-block (forced update)\n", + " * [new branch] tf/pretrained_emb -> origin/tf/pretrained_emb\n", + " * [new branch] tf/process_list_to_prepare_features -> origin/tf/process_list_to_prepare_features\n", + " * [new branch] tf/quick_start_ranking -> origin/tf/quick_start_ranking\n", + " * [new branch] tf/transformer-api -> origin/tf/transformer-api\n", + " * [new branch] torch/dev -> origin/torch/dev\n", + " * [new branch] torch/masking -> origin/torch/masking\n", + " * [new branch] torch/prototype -> origin/torch/prototype\n", + " * [new branch] torch/remove-t4r-code -> origin/torch/remove-t4r-code\n", + " * [new branch] tox_github_actions_fix -> origin/tox_github_actions_fix\n", + " * [new branch] transformer-api -> origin/transformer-api\n", + " + 0a65d603...9f53e8ff update_07 -> origin/update_07 (forced update)\n", + " * [new tag] v23.02.00 -> v23.02.00\n", + " * [new tag] v23.04.00 -> v23.04.00\n", + " * [new tag] v23.05.dev0 -> v23.05.dev0\n", + "Previous HEAD position was cb431a8a Fix the serialization of `SequenceSummary` block (#927)\n", + "HEAD is now at a86201ee add masking support to SequencePredictRandom transform\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-dataloader>=0.0.2 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.0.4)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-models==23.2.0+7.ga86201ee) (0.10.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.56.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.0.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.19.6)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.12.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.5)\n", + "Requirement already satisfied: dask>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: distributed>=2022.3.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (22.0)\n", + "Requirement already satisfied: fsspec==2022.5.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.5.0)\n", + "Requirement already satisfied: pandas<1.4.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.5)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (45.2.0)\n", + "Requirement already satisfied: numpy<1.24,>=1.18 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.22.4)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.57.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.2.0)\n", + "Requirement already satisfied: partd>=0.3.10 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.8.2 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (0.12.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0)\n", + "Requirement already satisfied: cloudpickle>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: click>=6.6 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (8.1.3)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (5.9.4)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.7.0)\n", + "Requirement already satisfied: sortedcontainers!=2.0.0,!=2.0.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.4.0)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.26.13)\n", + "Requirement already satisfied: msgpack>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.0)\n", + "Requirement already satisfied: tornado<6.2,>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.1.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.8.2)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (3.11.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.1.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2->distributed>=2022.3.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (2.1.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.4.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (1.14.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-models==23.2.0+7.ga86201ee) (4.0.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Building wheels for collected packages: merlin-models\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.2.0+7.ga86201ee-py3-none-any.whl size=374647 sha256=e83a617585afdc41213cc3cf69dd7c136b778260ce9dc14c37e87c4a5675372a\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-uc9xl_m5/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + "Successfully built merlin-models\n", + "Installing collected packages: merlin-models\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 0.11.0\n", + " Uninstalling merlin-models-0.11.0:\n", + " Successfully uninstalled merlin-models-0.11.0\n", + "Successfully installed merlin-models-23.2.0+7.ga86201ee\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was a86201ee add masking support to SequencePredictRandom transform\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is behind 'origin/main' by 75 commits, and can be fast-forwarded.\n", + " (use \"git pull\" to update your local branch)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/Models\n", + " * branch main -> FETCH_HEAD\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 835ad186..d8133b8f\n", + "Fast-forward\n", + " .github/workflows/blossom-ci.yml | 102 --\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-horovod.yml | 53 +\n", + " .github/workflows/cpu-nvtabular.yml | 10 +-\n", + " .github/workflows/cpu-systems.yml | 10 +-\n", + " .github/workflows/cpu-t4r.yml | 41 +\n", + " .github/workflows/datasets.yml | 8 +-\n", + " .github/workflows/docs-build.yaml | 2 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 12 +-\n", + " .github/workflows/implicit.yml | 8 +-\n", + " .github/workflows/lightfm.yml | 14 +-\n", + " .github/workflows/multi-gpu-ci.yml | 34 +\n", + " .github/workflows/packages.yaml | 120 ++\n", + " .github/workflows/pre-commit.yml | 8 +\n", + " .github/workflows/pytorch.yml | 85 +-\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .github/workflows/tensorflow.yml | 49 +-\n", + " .github/workflows/xgboost.yml | 8 +-\n", + " .pre-commit-config.yaml | 10 +-\n", + " MANIFEST.in | 5 +-\n", + " README.md | 2 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/recipes/meta.yaml | 17 +-\n", + " docs/README.md | 46 +-\n", + " docs/source/api.rst | 99 +-\n", + " examples/01-Getting-started.ipynb | 101 +-\n", + " ...2-Merlin-Models-and-NVTabular-integration.ipynb | 13 +-\n", + " examples/03-Exploring-different-models.ipynb | 25 +-\n", + " examples/04-Exporting-ranking-models.ipynb | 9 +-\n", + " examples/05-Retrieval-Model.ipynb | 30 +-\n", + " ...-your-own-architecture-with-Merlin-Models.ipynb | 546 +++----\n", + " ...nal-ML-models-using-the-Merlin-Models-API.ipynb | 701 +++++++-\n", + " examples/images/mtl_architectures.png | Bin 0 -> 72404 bytes\n", + " ...ing-of-large-embedding-tables-by-LazyAdam.ipynb | 12 +-\n", + " ...on-based-next-item-prediction-for-fashion.ipynb | 11 +-\n", + " .../entertainment-with-pretrained-embeddings.ipynb | 8 +-\n", + " .../incremental-training-with-layer-freezing.ipynb | 275 ++--\n", + " .../multi-gpu-data-parallel-training.ipynb | 7 +-\n", + " .../multi-gpu/install_sparse_operation_kit.sh | 16 +\n", + " .../usecases/ranking_with_multitask_learning.ipynb | 1718 ++++++++++++++++++++\n", + " ...etrieval-with-hyperparameter-optimization.ipynb | 5 +-\n", + " .../transformers-next-item-prediction.ipynb | 1085 ++++++++----\n", + " .../ecommerce/booking/transformed/schema.pbtxt | 15 +-\n", + " merlin/datasets/ecommerce/small/schema.json | 7 +-\n", + " .../entertainment/movielens/100k/schema.pbtxt | 1 +\n", + " .../entertainment/movielens/1m/schema.pbtxt | 3 +-\n", + " .../entertainment/movielens/25m/schema.pbtxt | 1 +\n", + " .../entertainment/music_streaming/schema.json | 10 +-\n", + " .../entertainment/tenrec_video}/__init__.py | 0\n", + " .../entertainment/tenrec_video/schema.pbtxt | 159 ++\n", + " merlin/datasets/synthetic.py | 104 +-\n", + " .../datasets/testing/sequence_testing/schema.json | 24 +-\n", + " merlin/models/implicit/__init__.py | 115 +-\n", + " merlin/models/io.py | 2 -\n", + " merlin/models/lightfm/__init__.py | 132 +-\n", + " merlin/models/tf/__init__.py | 12 +-\n", + " merlin/models/tf/blocks/dlrm.py | 21 +-\n", + " merlin/models/tf/blocks/experts.py | 33 +-\n", + " merlin/models/tf/blocks/optimizer.py | 74 +-\n", + " merlin/models/tf/blocks/retrieval/base.py | 1 -\n", + " merlin/models/tf/core/aggregation.py | 87 +-\n", + " merlin/models/tf/core/combinators.py | 6 +-\n", + " merlin/models/tf/core/encoder.py | 54 +-\n", + " merlin/models/tf/core/tabular.py | 3 +-\n", + " merlin/models/tf/distributed/backend.py | 20 +\n", + " merlin/models/tf/distributed/embedding.py | 232 +++\n", + " merlin/models/tf/experimental/sample_weight.py | 177 ++\n", + " merlin/models/tf/inputs/base.py | 26 +-\n", + " merlin/models/tf/inputs/continuous.py | 41 +-\n", + " merlin/models/tf/inputs/embedding.py | 138 +-\n", + " merlin/models/tf/loader.py | 36 +-\n", + " merlin/models/tf/metrics/__init__.py | 31 +-\n", + " merlin/models/tf/metrics/evaluation.py | 4 +-\n", + " merlin/models/tf/metrics/topk.py | 17 +-\n", + " merlin/models/tf/models/base.py | 887 +++++++---\n", + " merlin/models/tf/models/benchmark.py | 20 +-\n", + " merlin/models/tf/models/ranking.py | 93 +-\n", + " merlin/models/tf/models/retrieval.py | 5 +\n", + " merlin/models/tf/models/utils.py | 38 +\n", + " merlin/models/tf/outputs/base.py | 27 +-\n", + " merlin/models/tf/outputs/block.py | 300 ++++\n", + " merlin/models/tf/outputs/classification.py | 14 +-\n", + " merlin/models/tf/outputs/contrastive.py | 65 +-\n", + " merlin/models/tf/outputs/regression.py | 8 +-\n", + " merlin/models/tf/outputs/sampling/base.py | 34 +-\n", + " merlin/models/tf/outputs/sampling/popularity.py | 93 +-\n", + " merlin/models/tf/outputs/topk.py | 2 -\n", + " merlin/models/tf/prediction_tasks/base.py | 15 +\n", + " .../models/tf/prediction_tasks/classification.py | 11 +-\n", + " merlin/models/tf/prediction_tasks/regression.py | 3 +-\n", + " merlin/models/tf/transformers/block.py | 61 +-\n", + " merlin/models/tf/transformers/transforms.py | 52 +-\n", + " merlin/models/tf/transforms/bias.py | 18 +-\n", + " merlin/models/tf/transforms/features.py | 579 +++++--\n", + " merlin/models/tf/transforms/negative_sampling.py | 25 +-\n", + " merlin/models/tf/transforms/sequence.py | 523 ++++--\n", + " merlin/models/tf/transforms/tensor.py | 249 +--\n", + " merlin/models/tf/utils/batch_utils.py | 8 +-\n", + " merlin/models/tf/utils/testing_utils.py | 81 +-\n", + " merlin/models/tf/utils/tf_utils.py | 85 +-\n", + " merlin/models/torch/__init__.py | 97 --\n", + " merlin/models/torch/block/base.py | 321 ----\n", + " merlin/models/torch/block/mlp.py | 95 --\n", + " merlin/models/torch/features/base.py | 23 -\n", + " merlin/models/torch/features/continuous.py | 66 -\n", + " merlin/models/torch/features/embedding.py | 497 ------\n", + " merlin/models/torch/features/tabular.py | 217 ---\n", + " merlin/models/torch/losses.py | 75 -\n", + " merlin/models/torch/model/__init__.py | 15 -\n", + " merlin/models/torch/model/base.py | 660 --------\n", + " merlin/models/torch/model/prediction_task.py | 101 --\n", + " merlin/models/torch/tabular/__init__.py | 15 -\n", + " merlin/models/torch/tabular/aggregation.py | 149 --\n", + " merlin/models/torch/tabular/base.py | 640 --------\n", + " merlin/models/torch/tabular/transformations.py | 124 --\n", + " merlin/models/torch/typing.py | 30 -\n", + " merlin/models/torch/utils/__init__.py | 15 -\n", + " merlin/models/torch/utils/data_utils.py | 376 -----\n", + " merlin/models/torch/utils/examples_utils.py | 107 --\n", + " merlin/models/torch/utils/torch_utils.py | 210 ---\n", + " merlin/models/utils/dataset.py | 59 +-\n", + " merlin/models/utils/misc_utils.py | 7 +-\n", + " merlin/models/utils/nvt_utils.py | 6 +-\n", + " merlin/models/utils/schema_utils.py | 24 +-\n", + " merlin/models/xgb/__init__.py | 1 -\n", + " pytest.ini | 15 +\n", + " requirements/base.txt | 4 +-\n", + " requirements/docs.txt | 3 +-\n", + " requirements/horovod-cpu-environment.yml | 18 +\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " requirements/horovod.txt | 1 +\n", + " requirements/tensorflow.txt | 2 +-\n", + " requirements/test.txt | 2 +-\n", + " requirements/transformers.txt | 2 +-\n", + " tests/common/tf/retrieval/retrieval_utils.py | 4 +-\n", + " tests/integration/tf/test_ci_01_getting_started.py | 20 +-\n", + " .../tf/test_ci_03_exploring_different_models.py | 8 +-\n", + " .../tf/test_ci_06_advanced_own_architecture.py | 8 +-\n", + " tests/unit/datasets/test_ecommerce.py | 27 +-\n", + " tests/unit/datasets/test_synthetic.py | 15 +-\n", + " tests/unit/implicit/test_implicit.py | 60 +-\n", + " tests/unit/lightfm/test_lightfm.py | 68 +\n", + " .../blocks/retrieval/test_matrix_factorization.py | 7 +-\n", + " tests/unit/tf/blocks/retrieval/test_two_tower.py | 9 +-\n", + " tests/unit/tf/blocks/test_cross.py | 2 -\n", + " tests/unit/tf/blocks/test_interactions.py | 6 +-\n", + " tests/unit/tf/blocks/test_mlp.py | 39 +\n", + " tests/unit/tf/blocks/test_optimizer.py | 64 +-\n", + " tests/unit/tf/core/test_base.py | 5 +-\n", + " tests/unit/tf/core/test_combinators.py | 1 +\n", + " tests/unit/tf/core/test_encoder.py | 6 +-\n", + " tests/unit/tf/core/test_prediction.py | 2 +-\n", + " tests/unit/tf/examples/test_01_getting_started.py | 8 +-\n", + " .../examples/test_03_exploring_different_models.py | 8 +-\n", + " ...test_usecase_accelerate_training_by_lazyadam.py | 1 +\n", + " ..._usecase_incremental_training_layer_freezing.py | 2 +-\n", + " ...test_usecase_ranking_with_multitask_learning.py | 46 +\n", + " ...st_usecase_transformers_next_item_prediction.py | 36 +-\n", + " .../unit/tf/experimental}/__init__.py | 0\n", + " tests/unit/tf/experimental/test_sample_weight.py | 112 ++\n", + " tests/unit/tf/horovod/__init__.py | 2 +-\n", + " tests/unit/tf/horovod/test_embedding.py | 46 +\n", + " tests/unit/tf/horovod/test_horovod.py | 10 +-\n", + " tests/unit/tf/inputs/test_base.py | 2 +-\n", + " tests/unit/tf/inputs/test_block.py | 202 +++\n", + " tests/unit/tf/inputs/test_continuous.py | 4 +-\n", + " tests/unit/tf/inputs/test_embedding.py | 41 +-\n", + " tests/unit/tf/inputs/test_tabular.py | 10 +-\n", + " tests/unit/tf/metrics/test_metrics_topk.py | 2 -\n", + " tests/unit/tf/models/test_base.py | 93 +-\n", + " tests/unit/tf/models/test_benchmark.py | 13 +-\n", + " tests/unit/tf/models/test_ranking.py | 103 +-\n", + " tests/unit/tf/models/test_retrieval.py | 35 +-\n", + " tests/unit/tf/outputs/test_base.py | 78 +-\n", + " tests/unit/tf/outputs/test_block.py | 936 +++++++++++\n", + " tests/unit/tf/outputs/test_classification.py | 69 +-\n", + " tests/unit/tf/outputs/test_contrastive.py | 28 +-\n", + " tests/unit/tf/outputs/test_sampling.py | 17 +-\n", + " tests/unit/tf/prediction_tasks/test_multi_task.py | 281 +++-\n", + " tests/unit/tf/test_loader.py | 28 +-\n", + " tests/unit/tf/transformers/test_block.py | 187 ++-\n", + " tests/unit/tf/transforms/test_features.py | 123 +-\n", + " tests/unit/tf/transforms/test_negative_sampling.py | 63 +-\n", + " tests/unit/tf/transforms/test_noise.py | 1 -\n", + " tests/unit/tf/transforms/test_sequence.py | 55 +-\n", + " tests/unit/tf/transforms/test_tensor.py | 20 +-\n", + " tests/unit/tf/utils/test_batch.py | 20 +-\n", + " tests/unit/torch/__init__.py | 18 -\n", + " tests/unit/torch/_conftest.py | 151 --\n", + " tests/unit/torch/block/__init__.py | 15 -\n", + " tests/unit/torch/block/test_base.py | 62 -\n", + " tests/unit/torch/block/test_mlp.py | 30 -\n", + " tests/unit/torch/features/__init__.py | 15 -\n", + " tests/unit/torch/features/test_continuous.py | 34 -\n", + " tests/unit/torch/features/test_embedding.py | 250 ---\n", + " tests/unit/torch/features/test_tabular.py | 84 -\n", + " tests/unit/torch/model/__init__.py | 15 -\n", + " tests/unit/torch/model/test_head.py | 92 --\n", + " tests/unit/torch/model/test_model.py | 122 --\n", + " tests/unit/torch/tabular/__init__.py | 15 -\n", + " tests/unit/torch/tabular/test_aggregation.py | 106 --\n", + " tests/unit/torch/tabular/test_tabular.py | 88 -\n", + " tests/unit/torch/tabular/test_transformations.py | 122 --\n", + " tests/unit/torch/test_dataloader_utils.py | 86 -\n", + " tests/unit/torch/test_losses.py | 53 -\n", + " tests/unit/torch/test_public_api.py | 27 -\n", + " tests/unit/torch/utils/__init__.py | 15 -\n", + " tests/unit/xgb/test_xgboost.py | 2 +-\n", + " tox.ini | 78 +-\n", + " 210 files changed, 10688 insertions(+), 8019 deletions(-)\n", + " delete mode 100644 .github/workflows/blossom-ci.yml\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " create mode 100644 .github/workflows/cpu-horovod.yml\n", + " create mode 100644 .github/workflows/cpu-t4r.yml\n", + " create mode 100644 .github/workflows/multi-gpu-ci.yml\n", + " create mode 100644 .github/workflows/packages.yaml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 examples/images/mtl_architectures.png\n", + " create mode 100644 examples/usecases/multi-gpu/install_sparse_operation_kit.sh\n", + " create mode 100644 examples/usecases/ranking_with_multitask_learning.ipynb\n", + " rename merlin/{models/torch/block => datasets/entertainment/tenrec_video}/__init__.py (100%)\n", + " create mode 100644 merlin/datasets/entertainment/tenrec_video/schema.pbtxt\n", + " create mode 100644 merlin/models/tf/distributed/embedding.py\n", + " create mode 100644 merlin/models/tf/experimental/sample_weight.py\n", + " create mode 100644 merlin/models/tf/outputs/block.py\n", + " delete mode 100644 merlin/models/torch/__init__.py\n", + " delete mode 100644 merlin/models/torch/block/base.py\n", + " delete mode 100644 merlin/models/torch/block/mlp.py\n", + " delete mode 100644 merlin/models/torch/features/base.py\n", + " delete mode 100644 merlin/models/torch/features/continuous.py\n", + " delete mode 100644 merlin/models/torch/features/embedding.py\n", + " delete mode 100644 merlin/models/torch/features/tabular.py\n", + " delete mode 100644 merlin/models/torch/losses.py\n", + " delete mode 100644 merlin/models/torch/model/__init__.py\n", + " delete mode 100644 merlin/models/torch/model/base.py\n", + " delete mode 100644 merlin/models/torch/model/prediction_task.py\n", + " delete mode 100644 merlin/models/torch/tabular/__init__.py\n", + " delete mode 100644 merlin/models/torch/tabular/aggregation.py\n", + " delete mode 100644 merlin/models/torch/tabular/base.py\n", + " delete mode 100644 merlin/models/torch/tabular/transformations.py\n", + " delete mode 100644 merlin/models/torch/typing.py\n", + " delete mode 100644 merlin/models/torch/utils/__init__.py\n", + " delete mode 100644 merlin/models/torch/utils/data_utils.py\n", + " delete mode 100644 merlin/models/torch/utils/examples_utils.py\n", + " delete mode 100644 merlin/models/torch/utils/torch_utils.py\n", + " create mode 100644 pytest.ini\n", + " create mode 100644 requirements/horovod-cpu-environment.yml\n", + " create mode 100644 tests/unit/tf/examples/test_usecase_ranking_with_multitask_learning.py\n", + " rename {merlin/models/torch/features => tests/unit/tf/experimental}/__init__.py (100%)\n", + " create mode 100644 tests/unit/tf/experimental/test_sample_weight.py\n", + " create mode 100644 tests/unit/tf/horovod/test_embedding.py\n", + " create mode 100644 tests/unit/tf/inputs/test_block.py\n", + " create mode 100644 tests/unit/tf/outputs/test_block.py\n", + " delete mode 100644 tests/unit/torch/__init__.py\n", + " delete mode 100644 tests/unit/torch/_conftest.py\n", + " delete mode 100644 tests/unit/torch/block/__init__.py\n", + " delete mode 100644 tests/unit/torch/block/test_base.py\n", + " delete mode 100644 tests/unit/torch/block/test_mlp.py\n", + " delete mode 100644 tests/unit/torch/features/__init__.py\n", + " delete mode 100644 tests/unit/torch/features/test_continuous.py\n", + " delete mode 100644 tests/unit/torch/features/test_embedding.py\n", + " delete mode 100644 tests/unit/torch/features/test_tabular.py\n", + " delete mode 100644 tests/unit/torch/model/__init__.py\n", + " delete mode 100644 tests/unit/torch/model/test_head.py\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " delete mode 100644 tests/unit/torch/model/test_model.py\n", + " delete mode 100644 tests/unit/torch/tabular/__init__.py\n", + " delete mode 100644 tests/unit/torch/tabular/test_aggregation.py\n", + " delete mode 100644 tests/unit/torch/tabular/test_tabular.py\n", + " delete mode 100644 tests/unit/torch/tabular/test_transformations.py\n", + " delete mode 100644 tests/unit/torch/test_dataloader_utils.py\n", + " delete mode 100644 tests/unit/torch/test_losses.py\n", + " delete mode 100644 tests/unit/torch/test_public_api.py\n", + " delete mode 100644 tests/unit/torch/utils/__init__.py\n", + "Processing /models\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Collecting merlin-core>=23.4.0\n", + " Downloading merlin-core-23.4.0.tar.gz (133 kB)\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Collecting merlin-dataloader>=23.4.0\n", + " Downloading merlin-dataloader-23.4.0.tar.gz (46 kB)\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (8.0.0)\n", + "Collecting dask-cuda>=22.12.0\n", + " Downloading dask_cuda-23.4.0-py3-none-any.whl (125 kB)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.56.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.12.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (11.4.1)\n", + "Collecting fsspec>=2022.7.1\n", + " Downloading fsspec-2023.5.0-py3-none-any.whl (160 kB)\n", + "Collecting dask>=2022.11.1\n", + " Downloading dask-2023.4.1-py3-none-any.whl (1.2 MB)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.22.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (22.0)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.5)\n", + "Collecting distributed>=2022.11.1\n", + " Downloading distributed-2023.4.1-py3-none-any.whl (962 kB)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.64.1)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.39.1)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.12.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (8.1.3)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.2.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.3.0)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.8.2)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.1.2)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.7.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.4.0)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.26.13)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.1)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.4)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.2.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.0.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (3.11.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (1.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (2.1.1)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0.4)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->merlin-models==23.5.dev0+12.gd8133b8f) (4.0.0)\n", + "Building wheels for collected packages: merlin-models, merlin-core, merlin-dataloader\n", + " Building wheel for merlin-models (PEP 517): started\n", + " Building wheel for merlin-models (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-models: filename=merlin_models-23.5.dev0+12.gd8133b8f-py3-none-any.whl size=343289 sha256=1f20f65acef288535cc4e5bca6de216485c546156d707b17b3bb9b8ceedc3ec7\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-0prgr6hn/wheels/4d/e8/98/0493db55fff90dc9af123f55a9455b96f7f8166c912a02c8a6\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-23.4.0-py3-none-any.whl size=159556 sha256=9a716886c9862c32bd19979d286f32eb664022c85bcee19ca2d762fa014c8e85\n", + " Stored in directory: /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-23.4.0-py3-none-any.whl size=34732 sha256=a7853a487205c4a6fdf99d03bda0cacba559264387e507e2f8d6cd87dc471c80\n", + " Stored in directory: /root/.cache/pip/wheels/90/b0/66/48e52cc29f544ffbd105154b8be0901b5bb80cc85842b778fc\n", + "Successfully built merlin-models merlin-core merlin-dataloader\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty requires cupy-cuda118<12,>=9.5.0, which is not installed.\n", + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement dask==2022.7.1, but you'll have dask 2023.4.1 which is incompatible.\n", + "ERROR: dask-cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement distributed==2022.7.1, but you'll have distributed 2023.4.1 which is incompatible.\n", + "ERROR: dask-cuda 23.4.0 has requirement dask==2023.3.2, but you'll have dask 2023.4.1 which is incompatible.\n", + "ERROR: dask-cuda 23.4.0 has requirement distributed==2023.3.2.1, but you'll have distributed 2023.4.1 which is incompatible.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement cuda-python<11.7.1,>=11.5, but you'll have cuda-python 11.8.1 which is incompatible.\n", + "ERROR: cudf 22.8.0a0+304.g6ca81bbc78.dirty has requirement protobuf<3.21.0a0,>=3.20.1, but you'll have protobuf 3.19.6 which is incompatible.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: fsspec, dask, distributed, dask-cuda, merlin-core, merlin-dataloader, merlin-models\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2022.5.0\n", + " Uninstalling fsspec-2022.5.0:\n", + " Successfully uninstalled fsspec-2022.5.0\n", + " Attempting uninstall: dask\n", + " Found existing installation: dask 2022.7.1\n", + " Uninstalling dask-2022.7.1:\n", + " Successfully uninstalled dask-2022.7.1\n", + " Attempting uninstall: distributed\n", + " Found existing installation: distributed 2022.7.1\n", + " Uninstalling distributed-2022.7.1:\n", + " Successfully uninstalled distributed-2022.7.1\n", + " Attempting uninstall: dask-cuda\n", + " Found existing installation: dask-cuda 22.8.0a0+36.g9860cad\n", + " Uninstalling dask-cuda-22.8.0a0+36.g9860cad:\n", + " Successfully uninstalled dask-cuda-22.8.0a0+36.g9860cad\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 0.10.0\n", + " Uninstalling merlin-core-0.10.0:\n", + " Successfully uninstalled merlin-core-0.10.0\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 0.0.4\n", + " Uninstalling merlin-dataloader-0.0.4:\n", + " Successfully uninstalled merlin-dataloader-0.0.4\n", + " Attempting uninstall: merlin-models\n", + " Found existing installation: merlin-models 23.2.0+7.ga86201ee\n", + " Uninstalling merlin-models-23.2.0+7.ga86201ee:\n", + " Successfully uninstalled merlin-models-23.2.0+7.ga86201ee\n", + "Successfully installed dask-2023.4.1 dask-cuda-23.4.0 distributed-2023.4.1 fsspec-2023.5.0 merlin-core-23.4.0 merlin-dataloader-23.4.0 merlin-models-23.5.dev0+12.gd8133b8f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was 2fc6889 add schema parameter to the `repartition` method (#192)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/core\n", + " * branch main -> FETCH_HEAD\n", + " cd96ca5f..a0bcd30f main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating cd96ca5f..a0bcd30f\n", + "Fast-forward\n", + " .github/actionlint.yaml | 5 +\n", + " .github/release-drafter.yml | 44 +--\n", + " .github/workflows/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .../ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .../workflows/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .../workflows/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/workflows/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-ci.yml | 138 +-------\n", + " .github/workflows/cpu-models.yml | 44 ---\n", + " .github/workflows/cpu-nvtabular.yml | 44 ---\n", + " .github/workflows/cpu-systems.yml | 44 ---\n", + " .github/workflows/docs-preview-pr.yaml | 4 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 51 +--\n", + " .github/workflows/gpu-ci.yml | 52 ++-\n", + " .github/workflows/lint.yaml | 11 +-\n", + " .github/workflows/merlin.yml | 35 ++\n", + " .github/workflows/packages.yaml | 154 ++++++++\n", + " .github/workflows/release-drafter.yaml | 4 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .github/workflows/tox.yml | 38 ++\n", + " .pre-commit-config.yaml | 55 +--\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 28 +-\n", + " README.md | 68 ++--\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/recipe/meta.yaml | 4 +-\n", + " docs/README.md | 49 ++-\n", + " merlin/core/compat/__init__.py | 143 ++++++++\n", + " merlin/core/compat/tensorflow.py | 92 +++++\n", + " merlin/core/compat/torch.py | 22 ++\n", + " merlin/core/dispatch.py | 245 ++++++++-----\n", + " merlin/core/has_gpu.py | 46 +++\n", + " merlin/core/utils.py | 88 +----\n", + " merlin/dag/__init__.py | 1 +\n", + " merlin/dag/base_operator.py | 30 +-\n", + " merlin/dag/dictarray.py | 3 +-\n", + " merlin/dag/executors.py | 242 +++++++------\n", + " merlin/dag/graph.py | 20 ++\n", + " merlin/dag/node.py | 5 +-\n", + " merlin/dag/selector.py | 10 +-\n", + " merlin/dag/utils.py | 69 ++++\n", + " merlin/dispatch/lazy.py | 156 +++++++++\n", + " merlin/dtypes/__init__.py | 61 ++++\n", + " merlin/dtypes/aliases.py | 53 +++\n", + " merlin/dtypes/base.py | 179 ++++++++++\n", + " merlin/dtypes/mapping.py | 177 ++++++++++\n", + " .../compat.py => dtypes/mappings/__init__.py} | 17 +-\n", + " merlin/dtypes/mappings/cudf.py | 61 ++++\n", + " merlin/dtypes/mappings/merlin.py | 51 +++\n", + " merlin/dtypes/mappings/numpy.py | 52 +++\n", + " merlin/dtypes/mappings/pandas.py | 38 ++\n", + " merlin/dtypes/mappings/python.py | 28 ++\n", + " merlin/dtypes/mappings/tf.py | 52 +++\n", + " merlin/dtypes/mappings/torch.py | 43 +++\n", + " merlin/dtypes/mappings/triton.py | 53 +++\n", + " merlin/dtypes/registry.py | 136 ++++++++\n", + " merlin/dtypes/shape.py | 200 +++++++++++\n", + " merlin/io/__init__.py | 2 +-\n", + " merlin/io/avro.py | 6 +-\n", + " merlin/io/csv.py | 9 +-\n", + " merlin/io/dask.py | 74 +++-\n", + " merlin/io/dataframe_engine.py | 6 +-\n", + " merlin/io/dataset.py | 112 ++++--\n", + " merlin/io/fsspec_utils.py | 16 +-\n", + " merlin/io/parquet.py | 25 +-\n", + " merlin/io/shuffle.py | 13 +-\n", + " merlin/io/worker.py | 104 +++---\n", + " merlin/io/writer.py | 7 +-\n", + " merlin/io/writer_factory.py | 10 +-\n", + " merlin/schema/io/tensorflow_metadata.py | 115 ++++--\n", + " merlin/schema/schema.py | 331 +++++++++++-------\n", + " merlin/schema/tags.py | 7 +-\n", + " merlin/table/__init__.py | 24 ++\n", + " merlin/table/conversions.py | 226 ++++++++++++\n", + " merlin/table/cupy_column.py | 108 ++++++\n", + " merlin/table/numpy_column.py | 122 +++++++\n", + " merlin/table/tensor_column.py | 261 ++++++++++++++\n", + " merlin/table/tensor_table.py | 294 ++++++++++++++++\n", + " merlin/table/tensorflow_column.py | 173 +++++++++\n", + " merlin/table/torch_column.py | 135 +++++++\n", + " requirements-gpu.txt | 2 +-\n", + " requirements.txt | 13 +-\n", + " tests/conftest.py | 35 +-\n", + " tests/unit/core/test_dispatch.py | 43 ++-\n", + " tests/unit/core/test_protocols.py | 10 +-\n", + " tests/unit/core/test_version.py | 2 +\n", + " tests/unit/dag/test_column_selector.py | 6 +\n", + " tests/unit/dag/test_dag_utils.py | 31 ++\n", + " tests/unit/dispatch/test_lazy_dispatch.py | 61 ++++\n", + " tests/unit/dtypes/test_cudf.py | 30 ++\n", + " tests/unit/dtypes/test_module.py | 61 ++++\n", + " tests/unit/dtypes/test_shape.py | 222 ++++++++++++\n", + " tests/unit/io/test_avro.py | 8 +-\n", + " tests/unit/io/test_dataset.py | 51 +++\n", + " tests/unit/io/test_io.py | 95 ++++-\n", + " tests/unit/io/test_worker.py | 142 ++++++++\n", + " tests/unit/schema/test_column_schemas.py | 142 +++++---\n", + " tests/unit/schema/test_schema.py | 60 +++-\n", + " tests/unit/schema/test_schema_io.py | 54 ++-\n", + " tests/unit/table/test_convert_column.py | 164 +++++++++\n", + " tests/unit/table/test_tensor_column.py | 262 ++++++++++++++\n", + " tests/unit/table/test_tensor_table.py | 387 +++++++++++++++++++++\n", + " tests/unit/utils/test_utils.py | 16 +-\n", + " tox.ini | 49 ++-\n", + " 106 files changed, 6299 insertions(+), 1146 deletions(-)\n", + " create mode 100644 .github/actionlint.yaml\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " delete mode 100644 .github/workflows/cpu-models.yml\n", + " delete mode 100644 .github/workflows/cpu-nvtabular.yml\n", + " delete mode 100644 .github/workflows/cpu-systems.yml\n", + " create mode 100644 .github/workflows/merlin.yml\n", + " create mode 100644 .github/workflows/packages.yaml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .github/workflows/tox.yml\n", + " create mode 100644 .prettierignore\n", + " create mode 100644 merlin/core/compat/__init__.py\n", + " create mode 100644 merlin/core/compat/tensorflow.py\n", + " create mode 100644 merlin/core/compat/torch.py\n", + " create mode 100644 merlin/core/has_gpu.py\n", + " create mode 100644 merlin/dag/utils.py\n", + " create mode 100644 merlin/dispatch/lazy.py\n", + " create mode 100644 merlin/dtypes/__init__.py\n", + " create mode 100644 merlin/dtypes/aliases.py\n", + " create mode 100644 merlin/dtypes/base.py\n", + " create mode 100644 merlin/dtypes/mapping.py\n", + " rename merlin/{core/compat.py => dtypes/mappings/__init__.py} (60%)\n", + " create mode 100644 merlin/dtypes/mappings/cudf.py\n", + " create mode 100644 merlin/dtypes/mappings/merlin.py\n", + " create mode 100644 merlin/dtypes/mappings/numpy.py\n", + " create mode 100644 merlin/dtypes/mappings/pandas.py\n", + " create mode 100644 merlin/dtypes/mappings/python.py\n", + " create mode 100644 merlin/dtypes/mappings/tf.py\n", + " create mode 100644 merlin/dtypes/mappings/torch.py\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " create mode 100644 merlin/dtypes/mappings/triton.py\n", + " create mode 100644 merlin/dtypes/registry.py\n", + " create mode 100644 merlin/dtypes/shape.py\n", + " create mode 100644 merlin/table/__init__.py\n", + " create mode 100644 merlin/table/conversions.py\n", + " create mode 100644 merlin/table/cupy_column.py\n", + " create mode 100644 merlin/table/numpy_column.py\n", + " create mode 100644 merlin/table/tensor_column.py\n", + " create mode 100644 merlin/table/tensor_table.py\n", + " create mode 100644 merlin/table/tensorflow_column.py\n", + " create mode 100644 merlin/table/torch_column.py\n", + " create mode 100644 tests/unit/dag/test_dag_utils.py\n", + " create mode 100644 tests/unit/dispatch/test_lazy_dispatch.py\n", + " create mode 100644 tests/unit/dtypes/test_cudf.py\n", + " create mode 100644 tests/unit/dtypes/test_module.py\n", + " create mode 100644 tests/unit/dtypes/test_shape.py\n", + " create mode 100644 tests/unit/io/test_dataset.py\n", + " create mode 100644 tests/unit/io/test_worker.py\n", + " create mode 100644 tests/unit/table/test_convert_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_column.py\n", + " create mode 100644 tests/unit/table/test_tensor_table.py\n", + "Processing /core\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.2.5)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.3.5)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (0.56.4)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (23.4.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (11.4.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (22.0)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (3.19.6)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.4.1)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.22.4)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (8.0.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (1.12.0)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core==0.9.0+125.ga0bcd30f) (2023.5.0)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (0.4.3)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2022.7)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (2.8.2)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (5.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (45.2.0)\n", + "Requirement already satisfied: zict>=0.1.3 in /usr/local/lib/python3.8/dist-packages (from dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (3.1.2)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (5.9.4)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.26.13)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.1)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (6.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.7.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.2.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.0.4)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (8.1.3)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (0.12.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.57.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.3.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.1.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core==0.9.0+125.ga0bcd30f) (1.14.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core==0.9.0+125.ga0bcd30f) (3.11.0)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=0.1.3->dask-cuda>=22.12.0->merlin-core==0.9.0+125.ga0bcd30f) (1.0.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core==0.9.0+125.ga0bcd30f) (2.1.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (4.0.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core==0.9.0+125.ga0bcd30f) (6.0.1)\n", + "Building wheels for collected packages: merlin-core\n", + " Building wheel for merlin-core (PEP 517): started\n", + " Building wheel for merlin-core (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-core: filename=merlin_core-0.9.0+125.ga0bcd30f-py3-none-any.whl size=161449 sha256=57d8552cb7abbed6b1d1b2860391c64e7dfea045c442fc0f94c0fc940aed7e3d\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-0yemn26u/wheels/8f/da/8c/c779661788874afaa32fd10abeac6016635956e3bad9940584\n", + "Successfully built merlin-core\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: merlin-models 23.5.dev0+12.gd8133b8f has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n", + "ERROR: merlin-dataloader 23.4.0 has requirement merlin-core>=23.4.0, but you'll have merlin-core 0.9.0+125.ga0bcd30f which is incompatible.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: merlin-core\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 23.4.0\n", + " Uninstalling merlin-core-23.4.0:\n", + " Successfully uninstalled merlin-core-23.4.0\n", + "Successfully installed merlin-core-0.9.0+125.ga0bcd30f\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was 020b24b7 Fix output error occurring due to check if it is a dict or not (#1742)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/NVTabular\n", + " * branch main -> FETCH_HEAD\n", + " c5bc4098..67136eba main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating c5bc4098..67136eba\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug_report.md | 11 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/feature_request.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/operator_request.md | 14 +-\n", + " .github/ISSUE_TEMPLATE/research_question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 4 +-\n", + " .github/release-drafter.yml | 44 ++--\n", + " .github/workflows/blossom-ci.yml | 230 ++++++++++-----------\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/conda-env-create.yml | 30 +--\n", + " .github/workflows/cpu-ci.yml | 138 -------------\n", + " .github/workflows/cpu-packages.yml | 179 ++++++++++++++++\n", + " .github/workflows/cpu-tests.yml | 75 +++++++\n", + " .github/workflows/docs-preview-pr.yaml | 4 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 30 ---\n", + " .github/workflows/gpu-tests.yml | 34 +++\n", + " .github/workflows/lint.yaml | 4 +\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .gitlab-ci.yml | 23 +--\n", + " .pre-commit-config.yaml | 47 +++--\n", + " .prettierignore | 2 +\n", + " CHANGELOG.md | 187 ++++++++---------\n", + " CONTRIBUTING.md | 30 +--\n", + " README.md | 48 ++---\n", + " bench/datasets/tools/nvt_etl.py | 4 +-\n", + " bench/datasets/tools/train_tensorflow.py | 1 -\n", + " bench/examples/MultiGPUBench.md | 67 +++---\n", + " bench/examples/dask-nvtabular-criteo-benchmark.py | 4 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/environments/nvtabular_aws_sagemaker.yml | 2 +-\n", + " conda/recipes/meta.yaml | 2 +-\n", + " cpp/nvtabular/inference/categorify.cc | 10 +\n", + " docs/README.md | 29 ++-\n", + " docs/source/core_features.md | 48 ++---\n", + " docs/source/resources/architecture.md | 17 +-\n", + " docs/source/resources/cloud_integration.md | 24 ++-\n", + " docs/source/resources/links.md | 40 ++--\n", + " docs/source/toc.yaml | 12 +-\n", + " examples/01-Getting-started.ipynb | 5 +-\n", + " examples/02-Advanced-NVTabular-workflow.ipynb | 5 +-\n", + " .../03-Running-on-multiple-GPUs-or-on-CPU.ipynb | 24 ++-\n", + " examples/README.md | 1 +\n", + " .../tensorflow/tfrecords_to_parquet.py | 9 +-\n", + " nvtabular/inference/__init__.py | 4 +-\n", + " nvtabular/inference/triton/data_conversions.py | 24 +--\n", + " nvtabular/inference/triton/ensemble.py | 86 ++------\n", + " nvtabular/inference/triton/model/model_pt.py | 1 -\n", + " nvtabular/inference/workflow/hugectr.py | 2 +-\n", + " nvtabular/loader/backend.py | 31 +--\n", + " nvtabular/loader/tensorflow.py | 1 +\n", + " nvtabular/ops/categorify.py | 4 +-\n", + " nvtabular/ops/column_similarity.py | 42 ++--\n", + " nvtabular/ops/groupby.py | 35 ++--\n", + " nvtabular/ops/join_external.py | 7 +-\n", + " nvtabular/ops/join_groupby.py | 18 +-\n", + " nvtabular/ops/list_slice.py | 22 +-\n", + " nvtabular/ops/moments.py | 2 -\n", + " nvtabular/ops/reduce_dtype_size.py | 9 +-\n", + " nvtabular/ops/target_encoding.py | 2 +-\n", + " nvtabular/ops/value_counts.py | 14 +-\n", + " nvtabular/tools/data_gen.py | 31 ++-\n", + " nvtabular/utils.py | 2 +-\n", + " nvtabular/workflow/workflow.py | 169 +++++++++++++--\n", + " requirements-test.txt | 2 -\n", + " requirements/base.txt | 4 +-\n", + " requirements/test.txt | 15 +-\n", + " setup.py | 5 +\n", + " tests/conftest.py | 33 ++-\n", + " .../test_02-Advanced-NVTabular-workflow.py | 17 +-\n", + " .../test_03-Running-on-multiple-GPUs-or-on-CPU.py | 11 +-\n", + " tests/unit/loader/test_tf_dataloader.py | 206 +++---------------\n", + " tests/unit/loader/test_torch_dataloader.py | 79 ++-----\n", + " tests/unit/ops/test_categorify.py | 36 +++-\n", + " tests/unit/ops/test_column_similarity.py | 3 +-\n", + " tests/unit/ops/test_drop_low_cardinality.py | 7 +-\n", + " tests/unit/ops/test_groupyby.py | 9 +-\n", + " tests/unit/ops/test_join.py | 11 +-\n", + " tests/unit/ops/test_lambda.py | 28 ++-\n", + " tests/unit/ops/test_ops.py | 12 +-\n", + " tests/unit/ops/test_ops_schema.py | 25 ++-\n", + " tests/unit/ops/test_reduce_dtype_size.py | 7 +-\n", + " tests/unit/ops/test_target_encode.py | 11 +-\n", + " tests/unit/ops/test_value_count.py | 2 +\n", + " tests/unit/test_dask_nvt.py | 5 +-\n", + " tests/unit/test_s3.py | 8 +-\n", + " tests/unit/test_tf4rec.py | 11 +-\n", + " tests/unit/test_triton_inference.py | 3 +-\n", + " tests/unit/workflow/test_cpu_workflow.py | 6 +-\n", + " tests/unit/workflow/test_workflow.py | 92 ++++++++-\n", + " tox.ini | 10 +-\n", + " 93 files changed, 1448 insertions(+), 1196 deletions(-)\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " delete mode 100644 .github/workflows/cpu-ci.yml\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/cpu-tests.yml\n", + " delete mode 100644 .github/workflows/gpu-ci.yml\n", + " create mode 100644 .github/workflows/gpu-tests.yml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .prettierignore\n", + " delete mode 100644 requirements-test.txt\n", + "Processing /nvtabular\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from nvtabular==1.6.0+66.g67136eba) (1.9.3)\n", + "Processing /root/.cache/pip/wheels/42/ef/87/2c64bce8c3064a2c4e399933df4eda4838939355698ff8f7c7/merlin_core-23.4.0-py3-none-any.whl\n", + "Requirement already satisfied: numpy<1.26.0,>=1.18.5 in /usr/local/lib/python3.8/dist-packages (from scipy->nvtabular==1.6.0+66.g67136eba) (1.22.4)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.56.4)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.5)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.12.0)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.0.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.64.1)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.4.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.19.6)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (23.4.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (11.4.1)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2023.5.0)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.5)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (22.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.39.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (45.2.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.4.3)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.57.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.3.0)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (0.12.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (8.1.3)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (5.9.4)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.4)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.1)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.26.13)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.1.2)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.0)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.4.0)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.2.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.7.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2022.7)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (3.11.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.4)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.0.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (1.14.0)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.4.0->nvtabular==1.6.0+66.g67136eba) (6.0.1)\n", + "Building wheels for collected packages: nvtabular\n", + " Building wheel for nvtabular (PEP 517): started\n", + " Building wheel for nvtabular (PEP 517): finished with status 'done'\n", + " Created wheel for nvtabular: filename=nvtabular-1.6.0+66.g67136eba-cp38-cp38-linux_x86_64.whl size=259850 sha256=957958ecd0f9149dbe203eb5e2a3d1b5ec128421aee4e31572f4ca8574131719\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-btpmur92/wheels/df/bf/c2/9cc2a62fe6da42038c26a9c0c4e25f9767093528b102fa30a2\n", + "Successfully built nvtabular\n", + "Installing collected packages: merlin-core, nvtabular\n", + " Attempting uninstall: merlin-core\n", + " Found existing installation: merlin-core 0.9.0+125.ga0bcd30f\n", + " Uninstalling merlin-core-0.9.0+125.ga0bcd30f:\n", + " Successfully uninstalled merlin-core-0.9.0+125.ga0bcd30f\n", + " Attempting uninstall: nvtabular\n", + " Found existing installation: nvtabular 1.8.0\n", + " Uninstalling nvtabular-1.8.0:\n", + " Successfully uninstalled nvtabular-1.8.0\n", + "Successfully installed merlin-core-23.4.0 nvtabular-1.6.0+66.g67136eba\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was feaf748 adding async tf strategy for gpu memory (#264)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/systems\n", + " * branch main -> FETCH_HEAD\n", + " 20bb231..2b1b90b main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 20bb231..2b1b90b\n", + "Fast-forward\n", + " .github/ISSUE_TEMPLATE/bug-report.md | 17 +-\n", + " .github/ISSUE_TEMPLATE/documentation-request.md | 12 +-\n", + " .github/ISSUE_TEMPLATE/feature-request.md | 5 +-\n", + " .github/ISSUE_TEMPLATE/submit-question.md | 3 +-\n", + " .github/ISSUE_TEMPLATE/task.md | 5 +-\n", + " .github/release-drafter.yml | 44 +-\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-ci.yml | 128 ++--\n", + " .github/workflows/docs-preview-pr.yaml | 6 +-\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 40 +-\n", + " .github/workflows/lint.yaml | 18 +-\n", + " .github/workflows/packages.yaml | 118 ++++\n", + " .github/workflows/postmerge-cpu.yml | 60 ++\n", + " .github/workflows/postmerge-gpu.yml | 27 +\n", + " .github/workflows/release-drafter.yml | 4 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .pre-commit-config.yaml | 71 +-\n", + " .prettierignore | 2 +\n", + " CLA.md | 9 +-\n", + " CONTRIBUTING.md | 2 +-\n", + " README.md | 2 +-\n", + " ci/pr.gpu.Jenkinsfile | 2 +-\n", + " conda/recipes/meta.yaml | 18 +-\n", + " docs/README.md | 53 +-\n", + " ...ing-An-Implicit-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...ving-An-XGboost-Model-With-Merlin-Systems.ipynb | 5 +-\n", + " ...erving-Ranking-Models-With-Merlin-Systems.ipynb | 5 +-\n", + " merlin/systems/dag/__init__.py | 2 -\n", + " merlin/systems/dag/dictarray.py | 345 ----------\n", + " merlin/systems/dag/ensemble.py | 2 +-\n", + " merlin/systems/dag/node.py | 29 +-\n", + " merlin/systems/dag/op_runner.py | 68 --\n", + " merlin/systems/dag/ops/__init__.py | 22 +-\n", + " merlin/systems/dag/ops/faiss.py | 116 +---\n", + " merlin/systems/dag/ops/feast.py | 110 +---\n", + " merlin/systems/dag/ops/fil.py | 74 +--\n", + " merlin/systems/dag/ops/implicit.py | 84 +--\n", + " merlin/systems/dag/ops/operator.py | 216 +-----\n", + " merlin/systems/dag/ops/pytorch.py | 23 +-\n", + " merlin/systems/dag/ops/session_filter.py | 72 +-\n", + " merlin/systems/dag/ops/softmax_sampling.py | 61 +-\n", + " merlin/systems/dag/ops/tensorflow.py | 143 ++--\n", + " merlin/systems/dag/ops/unroll_features.py | 36 +-\n", + " merlin/systems/dag/ops/workflow.py | 29 +-\n", + " merlin/systems/dag/runtimes/triton/ops/fil.py | 51 +-\n", + " merlin/systems/dag/runtimes/triton/ops/operator.py | 84 ++-\n", + " merlin/systems/dag/runtimes/triton/ops/pytorch.py | 27 +-\n", + " .../systems/dag/runtimes/triton/ops/tensorflow.py | 41 +-\n", + " merlin/systems/dag/runtimes/triton/ops/workflow.py | 132 +++-\n", + " merlin/systems/dag/runtimes/triton/runtime.py | 36 +-\n", + " merlin/systems/triton/__init__.py | 118 ++--\n", + " merlin/systems/triton/conversions.py | 198 ++++--\n", + " merlin/systems/triton/export.py | 731 +--------------------\n", + " merlin/systems/triton/models/executor_model.py | 46 +-\n", + " merlin/systems/triton/models/oprunner_model.py | 129 ----\n", + " merlin/systems/triton/models/pytorch_model.py | 139 ++--\n", + " merlin/systems/triton/models/workflow_model.py | 56 +-\n", + " merlin/systems/triton/utils.py | 58 +-\n", + " merlin/systems/workflow/base.py | 30 +-\n", + " merlin/systems/workflow/hugectr.py | 87 ---\n", + " merlin/systems/workflow/pytorch.py | 46 --\n", + " merlin/systems/workflow/tensorflow.py | 68 --\n", + " pytest.ini | 7 +-\n", + " requirements/test.txt | 2 +-\n", + " tests/conftest.py | 36 +-\n", + " ...erving_an_implicit_model_with_merlin_systems.py | 12 +-\n", + " ...serving_an_xgboost_model_with_merlin_systems.py | 4 +-\n", + " tests/integration/tf/test_transformer_model.py | 103 +++\n", + " .../systems/dag/test_column.py => test_passing.py} | 15 +-\n", + " tests/unit/systems/dag/ops/test_ops.py | 101 ++-\n", + " .../dag/runtimes/local/ops/fil/test_lightgbm.py | 15 +-\n", + " .../dag/runtimes/local/ops/fil/test_sklearn.py | 15 +-\n", + " .../dag/runtimes/local/ops/fil/test_xgboost.py | 18 +-\n", + " .../runtimes/local/ops/nvtabular/test_ensemble.py | 10 +-\n", + " .../runtimes/local/ops/tensorflow/test_ensemble.py | 35 +-\n", + " .../dag/runtimes/local/ops/torch/test_op.py | 6 +-\n", + " .../triton/ops/fil/test_lightgbm_triton.py | 11 +-\n", + " .../runtimes/triton/ops/fil/test_sklearn_triton.py | 4 +-\n", + " .../runtimes/triton/ops/fil/test_xgboost_triton.py | 7 +-\n", + " .../dag/runtimes/triton/ops/torch/test_op.py | 4 +-\n", + " .../runtimes/triton/ops/workflow/test_ensemble.py | 305 ++++++++-\n", + " .../systems/dag/runtimes/triton/test_triton.py | 21 +-\n", + " tests/unit/systems/dag/test_dict_array.py | 76 ---\n", + " tests/unit/systems/dag/test_ensemble.py | 4 +-\n", + " tests/unit/systems/dag/test_executors.py | 12 +-\n", + " tests/unit/systems/dag/test_op_runner.py | 210 ------\n", + " tests/unit/systems/ops/embedding_op.py | 56 ++\n", + " tests/unit/systems/ops/faiss/test_executor.py | 25 +-\n", + " tests/unit/systems/ops/feast/test_op.py | 76 +--\n", + " tests/unit/systems/ops/fil/test_ensemble.py | 21 +-\n", + " tests/unit/systems/ops/fil/test_forest.py | 47 +-\n", + " tests/unit/systems/ops/fil/test_op.py | 106 ++-\n", + " tests/unit/systems/ops/implicit/test_executor.py | 4 +-\n", + " tests/unit/systems/ops/implicit/test_op.py | 51 +-\n", + " tests/unit/systems/ops/padding_op.py | 62 ++\n", + " tests/unit/systems/ops/tf/test_ensemble.py | 15 +-\n", + " tests/unit/systems/ops/tf/test_op.py | 6 +-\n", + " tests/unit/systems/ops/torch/test_ensemble.py | 97 +++\n", + " tests/unit/systems/utils/ops.py | 13 +-\n", + " tests/unit/systems/utils/tf.py | 65 +-\n", + " tests/unit/test_export.py | 77 ---\n", + " tox.ini | 42 +-\n", + " 103 files changed, 2427 insertions(+), 3565 deletions(-)\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " create mode 100644 .github/workflows/packages.yaml\n", + " create mode 100644 .github/workflows/postmerge-cpu.yml\n", + " create mode 100644 .github/workflows/postmerge-gpu.yml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .prettierignore\n", + " delete mode 100644 merlin/systems/dag/dictarray.py\n", + " delete mode 100644 merlin/systems/dag/op_runner.py\n", + " delete mode 100644 merlin/systems/triton/models/oprunner_model.py\n", + " delete mode 100644 merlin/systems/workflow/hugectr.py\n", + " delete mode 100644 merlin/systems/workflow/pytorch.py\n", + " delete mode 100644 merlin/systems/workflow/tensorflow.py\n", + " create mode 100644 tests/integration/tf/test_transformer_model.py\n", + " rename tests/{unit/systems/dag/test_column.py => test_passing.py} (66%)\n", + " delete mode 100644 tests/unit/systems/dag/test_dict_array.py\n", + " delete mode 100644 tests/unit/systems/dag/test_op_runner.py\n", + " create mode 100644 tests/unit/systems/ops/embedding_op.py\n", + " create mode 100644 tests/unit/systems/ops/padding_op.py\n", + " create mode 100644 tests/unit/systems/ops/torch/test_ensemble.py\n", + " delete mode 100644 tests/unit/test_export.py\n", + "Processing /systems\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: treelite==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: merlin-core>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: requests<3,>=2.10 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.28.1)\n", + "Requirement already satisfied: treelite-runtime==2.4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: nvtabular>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-systems==0.7.0+98.g2b1b90b) (1.6.0+66.g67136eba)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.22.4)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.8/dist-packages (from treelite==2.4.0->merlin-systems==0.7.0+98.g2b1b90b) (1.9.3)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.19.6)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.5)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.64.1)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.0.0)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (11.4.1)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.5.0)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (22.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.56.4)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.12.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2023.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.8)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (1.26.13)\n", + "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.10->merlin-systems==0.7.0+98.g2b1b90b) (2019.11.28)\n", + "Requirement already satisfied: merlin-dataloader>=23.4.0 in /usr/local/lib/python3.8/dist-packages (from nvtabular>=1.0.0->merlin-systems==0.7.0+98.g2b1b90b) (23.4.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2022.7)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.2.0)\n", + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.4.3)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.7.0)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.9.4)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.4)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.1)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.2.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.12.0)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.1.2)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.0)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (8.1.3)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.4.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (5.2.0)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (0.39.1)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.57.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.3.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.14.0)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.1.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.4)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (1.0.1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (2.1.1)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (3.11.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (6.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=0.2.0->merlin-systems==0.7.0+98.g2b1b90b) (4.0.0)\n", + "Building wheels for collected packages: merlin-systems\n", + " Building wheel for merlin-systems (PEP 517): started\n", + " Building wheel for merlin-systems (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-systems: filename=merlin_systems-0.7.0+98.g2b1b90b-py3-none-any.whl size=83152 sha256=282b1d3abe91766660d30dcbfa6d196c7f13d8d7d1b554eefd02455b7cdc1924\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-ojtyyyod/wheels/1f/e9/71/1b0c6295aa7f4b37cb70292d96d87d9f38204674e6531bdda6\n", + "Successfully built merlin-systems\n", + "Installing collected packages: merlin-systems\n", + " Attempting uninstall: merlin-systems\n", + " Found existing installation: merlin-systems 0.9.0\n", + " Uninstalling merlin-systems-0.9.0:\n", + " Successfully uninstalled merlin-systems-0.9.0\n", + "Successfully installed merlin-systems-0.7.0+98.g2b1b90b\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Previous HEAD position was fd5d3fc Use tf.function for list column operations (#89)\n", + "Switched to branch 'main'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "From https://github.com/NVIDIA-Merlin/dataloader\n", + " * branch main -> FETCH_HEAD\n", + " 5b3fe46..d9e97b4 main -> origin/main\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updating 5b3fe46..d9e97b4\n", + "Fast-forward\n", + " .github/workflows/check-base-branch.yaml | 9 +\n", + " .github/workflows/cpu-ci.yml | 83 +----\n", + " .github/workflows/cpu-packages.yml | 125 +++++++\n", + " .github/workflows/docs-sched-rebuild.yaml | 7 +-\n", + " .github/workflows/gpu-ci.yml | 2 +-\n", + " .github/workflows/jax.yaml | 2 +-\n", + " .github/workflows/models.yml | 43 +++\n", + " .github/workflows/nvtabular.yml | 43 +++\n", + " .github/workflows/release-drafter.yaml | 2 +-\n", + " .github/workflows/set-stable-branch.yaml | 10 +\n", + " .github/workflows/systems.yml | 43 +++\n", + " .github/workflows/tensorflow.yml | 2 +-\n", + " .github/workflows/torch.yaml | 2 +-\n", + " .github/workflows/transformers4rec.yml | 43 +++\n", + " .pre-commit-config.yaml | 14 +-\n", + " ci/pr.gpu.Jenkinsfile | 44 +++\n", + " docs/README.md | 28 +-\n", + " examples/01a-Getting-started-Tensorflow.ipynb | 5 +-\n", + " examples/01b-Getting-started-Pytorch.ipynb | 5 +-\n", + " .../02-Multi-GPU-Tensorflow-with-Horovod.ipynb | 371 ++++++++++++++++++\n", + " merlin/dataloader/jax.py | 52 +--\n", + " merlin/dataloader/loader_base.py | 413 +++++++++------------\n", + " merlin/dataloader/ops/embeddings.py | 110 ++++++\n", + " merlin/dataloader/ops/embeddings/__init__.py | 15 -\n", + " merlin/dataloader/ops/embeddings/embedding_op.py | 237 ------------\n", + " .../dataloader/ops/embeddings/tf_embedding_op.py | 101 -----\n", + " .../ops/embeddings/torch_embedding_op.py | 106 ------\n", + " merlin/dataloader/ops/padding.py | 88 +++++\n", + " merlin/dataloader/tensorflow.py | 337 +++++------------\n", + " merlin/dataloader/torch.py | 225 +++++------\n", + " merlin/dataloader/utils/tf/tf_trainer.py | 13 +-\n", + " requirements/base.txt | 2 +-\n", + " tests/conftest.py | 11 +-\n", + " .../test_multi_GPU_with_horovod_and_tensorflow.py | 28 ++\n", + " tests/unit/dataloader/test_array_dataloader.py | 57 +++\n", + " tests/unit/dataloader/test_array_to_tensorflow.py | 54 +++\n", + " tests/unit/dataloader/test_array_to_torch.py | 69 ++++\n", + " .../{test_tf_embeddings.py => test_embeddings.py} | 188 +++++-----\n", + " tests/unit/dataloader/test_jax_dataloader.py | 29 +-\n", + " tests/unit/dataloader/test_padding.py | 46 +++\n", + " tests/unit/dataloader/test_tf_dataloader.py | 358 +++++++++---------\n", + " tests/unit/dataloader/test_torch_dataloader.py | 245 ++++++++----\n", + " tests/unit/dataloader/test_torch_embeddings.py | 242 ------------\n", + " tox.ini | 55 +++\n", + " 44 files changed, 2154 insertions(+), 1810 deletions(-)\n", + " create mode 100644 .github/workflows/check-base-branch.yaml\n", + " create mode 100644 .github/workflows/cpu-packages.yml\n", + " create mode 100644 .github/workflows/models.yml\n", + " create mode 100644 .github/workflows/nvtabular.yml\n", + " create mode 100644 .github/workflows/set-stable-branch.yaml\n", + " create mode 100644 .github/workflows/systems.yml\n", + " create mode 100644 .github/workflows/transformers4rec.yml\n", + " create mode 100644 ci/pr.gpu.Jenkinsfile\n", + " create mode 100644 examples/02-Multi-GPU-Tensorflow-with-Horovod.ipynb\n", + " create mode 100644 merlin/dataloader/ops/embeddings.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/__init__.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/embedding_op.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/tf_embedding_op.py\n", + " delete mode 100644 merlin/dataloader/ops/embeddings/torch_embedding_op.py\n", + " create mode 100644 merlin/dataloader/ops/padding.py\n", + " create mode 100644 tests/examples/test_multi_GPU_with_horovod_and_tensorflow.py\n", + " create mode 100644 tests/unit/dataloader/test_array_dataloader.py\n", + " create mode 100644 tests/unit/dataloader/test_array_to_tensorflow.py\n", + " create mode 100644 tests/unit/dataloader/test_array_to_torch.py\n", + " rename tests/unit/dataloader/{test_tf_embeddings.py => test_embeddings.py} (52%)\n", + " create mode 100644 tests/unit/dataloader/test_padding.py\n", + " delete mode 100644 tests/unit/dataloader/test_torch_embeddings.py\n", + "Processing /dataloader\n", + " Installing build dependencies: started\n", + " Installing build dependencies: finished with status 'done'\n", + " Getting requirements to build wheel: started\n", + " Getting requirements to build wheel: finished with status 'done'\n", + " Preparing wheel metadata: started\n", + " Preparing wheel metadata: finished with status 'done'\n", + "Requirement already satisfied: merlin-core>=23.04.00 in /usr/local/lib/python3.8/dist-packages (from merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", + "Requirement already satisfied: numba>=0.54 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.56.4)\n", + "Requirement already satisfied: pandas<1.6.0dev0,>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.5)\n", + "Requirement already satisfied: pyarrow>=5.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.0.0)\n", + "Requirement already satisfied: betterproto<2.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.5)\n", + "Requirement already satisfied: tqdm>=4.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.64.1)\n", + "Requirement already satisfied: distributed>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", + "Requirement already satisfied: pynvml<11.5,>=11.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (11.4.1)\n", + "Requirement already satisfied: dask-cuda>=22.12.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (23.4.0)\n", + "Requirement already satisfied: tensorflow-metadata>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.12.0)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (22.0)\n", + "Requirement already satisfied: numpy>=1.22.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.22.4)\n", + "Requirement already satisfied: protobuf>=3.0.0 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.19.6)\n", + "Requirement already satisfied: fsspec>=2022.7.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.5.0)\n", + "Requirement already satisfied: dask>=2022.11.1 in /usr/local/lib/python3.8/dist-packages (from merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2023.4.1)\n", + "Requirement already satisfied: setuptools in /usr/lib/python3/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (45.2.0)\n", + "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.39.1)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.9\" in /usr/local/lib/python3.8/dist-packages (from numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.2.0)\n", + "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.8.2)\n", + "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2022.7)\n", + "Requirement already satisfied: stringcase in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.2.0)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: grpclib in /usr/local/lib/python3.8/dist-packages (from betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.4.3)\n", + "Requirement already satisfied: cloudpickle>=1.5.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", + "Requirement already satisfied: psutil>=5.7.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (5.9.4)\n", + "Requirement already satisfied: locket>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.0)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.4)\n", + "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0)\n", + "Requirement already satisfied: tornado>=6.0.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.1)\n", + "Requirement already satisfied: jinja2>=2.10.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.1.2)\n", + "Requirement already satisfied: sortedcontainers>=2.0.5 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.4.0)\n", + "Requirement already satisfied: tblib>=1.6.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.7.0)\n", + "Requirement already satisfied: toolz>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (0.12.0)\n", + "Requirement already satisfied: urllib3>=1.24.3 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.26.13)\n", + "Requirement already satisfied: click>=8.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (8.1.3)\n", + "Requirement already satisfied: zict>=2.2.0 in /usr/local/lib/python3.8/dist-packages (from distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.2.0)\n", + "Requirement already satisfied: absl-py<2.0.0,>=0.9 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", + "Requirement already satisfied: googleapis-common-protos<2,>=1.52.0 in /usr/local/lib/python3.8/dist-packages (from tensorflow-metadata>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.57.0)\n", + "Requirement already satisfied: partd>=1.2.0 in /usr/local/lib/python3.8/dist-packages (from dask>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.3.0)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.8/dist-packages (from importlib-metadata; python_version < \"3.9\"->numba>=0.54->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (3.11.0)\n", + "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7.3->pandas<1.6.0dev0,>=1.2.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.14.0)\n", + "Requirement already satisfied: multidict in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.4)\n", + "Requirement already satisfied: h2<5,>=3.1.0 in /usr/local/lib/python3.8/dist-packages (from grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.1.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.8/dist-packages (from jinja2>=2.10.3->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (2.1.1)\n", + "Requirement already satisfied: heapdict in /usr/local/lib/python3.8/dist-packages (from zict>=2.2.0->distributed>=2022.11.1->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (1.0.1)\n", + "Requirement already satisfied: hpack<5,>=4.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (4.0.0)\n", + "Requirement already satisfied: hyperframe<7,>=6.0 in /usr/local/lib/python3.8/dist-packages (from h2<5,>=3.1.0->grpclib->betterproto<2.0.0->merlin-core>=23.04.00->merlin-dataloader==0.0.2+72.gd9e97b4) (6.0.1)\n", + "Building wheels for collected packages: merlin-dataloader\n", + " Building wheel for merlin-dataloader (PEP 517): started\n", + " Building wheel for merlin-dataloader (PEP 517): finished with status 'done'\n", + " Created wheel for merlin-dataloader: filename=merlin_dataloader-0.0.2+72.gd9e97b4-py3-none-any.whl size=34881 sha256=c39b7e146f814713447917029d09f8cf4978202ed3852dce51544461cd074e3b\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-t_njcpzr/wheels/8c/19/5b/15dc04f5a977f6a7f73ed66c91996a687b1d9e3154a4765536\n", + "Successfully built merlin-dataloader\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: nvtabular 1.6.0+66.g67136eba has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n", + "ERROR: merlin-models 23.5.dev0+12.gd8133b8f has requirement merlin-dataloader>=23.4.0, but you'll have merlin-dataloader 0.0.2+72.gd9e97b4 which is incompatible.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Installing collected packages: merlin-dataloader\n", + " Attempting uninstall: merlin-dataloader\n", + " Found existing installation: merlin-dataloader 23.4.0\n", + " Uninstalling merlin-dataloader-23.4.0:\n", + " Successfully uninstalled merlin-dataloader-23.4.0\n", + "Successfully installed merlin-dataloader-0.0.2+72.gd9e97b4\n" + ] + } + ], "source": [ "# %%bash\n", "\n", - "# # cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", + "# cd /models && git fetch origin && git checkout origin/tf/transformer-api && pip install .\n", "# cd /models && git checkout main && git pull origin main && pip install .\n", "# cd /core && git checkout main && git pull origin main && pip install .\n", "# cd /nvtabular && git checkout main && git pull origin main && pip install .\n", "# cd /systems && git checkout main && git pull origin main && pip install .\n", - "# cd /dataloader && git checkout main && git pull origin main && pip install .\n", - "\n", - "# ---\n", - "# pip install matplotlib" + "# cd /dataloader && git checkout main && git pull origin main && pip install ." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "e9929dc8", "metadata": {}, "outputs": [ @@ -30,18 +1531,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: gdown in /usr/local/lib/python3.8/dist-packages (4.7.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", + "Collecting gdown\n", + " Downloading gdown-4.7.1-py3-none-any.whl (15 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from gdown) (3.9.0)\n", - "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.8/dist-packages (from gdown) (4.64.1)\n", "Requirement already satisfied: six in /usr/lib/python3/dist-packages (from gdown) (1.14.0)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.8/dist-packages (from gdown) (4.11.1)\n", "Requirement already satisfied: requests[socks] in /usr/local/lib/python3.8/dist-packages (from gdown) (2.28.1)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.8/dist-packages (from beautifulsoup4->gdown) (2.3.2.post1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.26.13)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2019.11.28)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (2.1.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests[socks]->gdown) (2.8)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6; extra == \"socks\" in /usr/local/lib/python3.8/dist-packages (from requests[socks]->gdown) (1.7.1)\n" + "Collecting PySocks!=1.5.7,>=1.5.6; extra == \"socks\"\n", + " Downloading PySocks-1.7.1-py3-none-any.whl (16 kB)\n", + "Installing collected packages: gdown, PySocks\n", + "Successfully installed PySocks-1.7.1 gdown-4.7.1\n" ] }, { @@ -50,26 +1555,42 @@ "text": [ "Downloading...\n", "From (uriginal): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=0dd96474-79af-47bb-9148-b96d64204e14\n", + "From (redirected): https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV&confirm=t&uuid=b5bb23eb-a2dd-4adc-b7b7-be5687c89aca\n", "To: /workspace/T4Rec_repro/rees46_ecom_dataset_small_for_ci.zip\n", - "100%|██████████| 43.4M/43.4M [00:12<00:00, 3.62MB/s]\n" + "100%|██████████| 43.4M/43.4M [00:07<00:00, 6.20MB/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease\n", - "Hit:2 http://archive.ubuntu.com/ubuntu focal InRelease\n", - "Hit:3 http://security.ubuntu.com/ubuntu focal-security InRelease\n", - "Hit:4 http://archive.ubuntu.com/ubuntu focal-updates InRelease\n", - "Hit:5 http://archive.ubuntu.com/ubuntu focal-backports InRelease\n", + "Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 InRelease [1581 B]\n", + "Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 Packages [1009 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu focal InRelease [265 kB]\n", + "Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]\n", + "Get:5 http://security.ubuntu.com/ubuntu focal-security/main amd64 Packages [2674 kB]\n", + "Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]\n", + "Get:7 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]\n", + "Get:8 http://archive.ubuntu.com/ubuntu focal/main amd64 Packages [1275 kB]\n", + "Get:9 http://security.ubuntu.com/ubuntu focal-security/restricted amd64 Packages [2203 kB]\n", + "Get:10 http://archive.ubuntu.com/ubuntu focal/restricted amd64 Packages [33.4 kB]\n", + "Get:11 http://archive.ubuntu.com/ubuntu focal/universe amd64 Packages [11.3 MB]\n", + "Get:12 http://security.ubuntu.com/ubuntu focal-security/multiverse amd64 Packages [28.5 kB]\n", + "Get:13 http://security.ubuntu.com/ubuntu focal-security/universe amd64 Packages [1045 kB]\n", + "Get:14 http://archive.ubuntu.com/ubuntu focal/multiverse amd64 Packages [177 kB]\n", + "Get:15 http://archive.ubuntu.com/ubuntu focal-updates/restricted amd64 Packages [2341 kB]\n", + "Get:16 http://archive.ubuntu.com/ubuntu focal-updates/universe amd64 Packages [1341 kB]\n", + "Get:17 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3157 kB]\n", + "Get:18 http://archive.ubuntu.com/ubuntu focal-updates/multiverse amd64 Packages [31.2 kB]\n", + "Get:19 http://archive.ubuntu.com/ubuntu focal-backports/main amd64 Packages [55.2 kB]\n", + "Get:20 http://archive.ubuntu.com/ubuntu focal-backports/universe amd64 Packages [28.6 kB]\n", + "Fetched 27.3 MB in 9s (2922 kB/s)\n", "Reading package lists...\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "unzip is already the newest version (6.0-25ubuntu1.1).\n", - "0 upgraded, 0 newly installed, 0 to remove and 98 not upgraded.\n", + "0 upgraded, 0 newly installed, 0 to remove and 99 not upgraded.\n", "Archive: rees46_ecom_dataset_small_for_ci.zip\n", " creating: ecom_dataset/0001/\n", " inflating: ecom_dataset/0001/valid.parquet \n", @@ -84,17 +1605,17 @@ } ], "source": [ - "%%bash\n", + "# %%bash\n", "\n", - "rm -rf ecom_dataset\n", - "mkdir -p ecom_dataset\n", + "# rm -rf ecom_dataset\n", + "# mkdir -p ecom_dataset\n", "\n", - "pip install gdown\n", - "# gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", - "gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", - "apt-get update -y\n", - "apt-get install unzip -y\n", - "unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" + "# pip install gdown\n", + "# # gdown https://drive.google.com/uc?id=1BvCHc4eXComuNK93bKhRM6cbg9y5p350 # <-- full dataset\n", + "# gdown https://drive.google.com/uc?id=1NCFZ5ya3zyxPsrmupEoc9UEm4sslAddV\n", + "# apt-get update -y\n", + "# apt-get install unzip -y\n", + "# unzip -d ecom_dataset \"rees46_ecom_dataset_small_for_ci.zip\"" ] }, { @@ -353,18 +1874,47 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "ceb3ae93", + "execution_count": 4, + "id": "0660887b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", + "import gc\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ec38f1a6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:21:28.090236: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" + "2023-05-09 01:50:24.115697: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", + " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n" ] - }, + } + ], + "source": [ + "import tensorflow as tf\n", + "\n", + "from merlin.schema.tags import Tags\n", + "from merlin.io.dataset import Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ceb3ae93", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", @@ -376,11 +1926,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", - " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "2023-04-13 11:21:30.471061: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.471514: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.471678: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" + "2023-05-09 01:50:26.436605: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.437013: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.437158: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n" ] }, { @@ -397,38 +1945,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:21:30.757567: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:50:26.674203: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:21:30.758435: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.758639: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:30.758792: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.508591: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.508802: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.508961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:21:31.509071: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", - "2023-04-13 11:21:31.509079: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-04-13 11:21:31.509140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-05-09 01:50:26.675123: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.675302: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:26.675428: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455564: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455749: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455877: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:50:27.455980: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:42] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n", + "2023-05-09 01:50:27.456001: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 24576 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", "/usr/local/lib/python3.8/dist-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ - "import os\n", - "os.environ[\"TF_GPU_ALLOCATOR\"]=\"cuda_malloc_async\"\n", - "import gc\n", - "import numpy as np\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from merlin.schema.tags import Tags\n", - "from merlin.io.dataset import Dataset\n", "import merlin.models.tf as mm" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "11647dd3", "metadata": {}, "outputs": [], @@ -439,7 +1977,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "4ab4e0fb", "metadata": {}, "outputs": [], @@ -450,7 +1988,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "8d9903e6", "metadata": {}, "outputs": [], @@ -469,7 +2007,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "410ea223", "metadata": {}, "outputs": [], @@ -480,7 +2018,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "4328f03a", "metadata": {}, "outputs": [], @@ -492,231 +2030,11804 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4571b92b", + "execution_count": 12, + "id": "d5a9dd50", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "\n", + "wf = Workflow(ops)\n", + "\n", + "train = wf.fit_transform(train)\n", + "valid = wf.transform(valid)" + ] }, { "cell_type": "code", "execution_count": 13, - "id": "d5a9dd50", + "id": "3116726e", "metadata": {}, "outputs": [], "source": [ - "ops = ['sess_pid_seq'] >> Categorify(dtype=np.int32) #>> Rename(name=seq_name)\n", + "# cat rees46_schema_modified.pbtxt" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "69e8f95c", + "metadata": {}, + "outputs": [], + "source": [ + "# %%writefile rees46_schema_modified_2.pbtxt\n", "\n", - "wf = Workflow(ops)\n", + "# feature {\n", + "# name: \"seq\"\n", + "# value_count {\n", + "# min: 2\n", + "# }\n", + "# type: INT\n", + "# int_domain {\n", + "# name: \"seq\"\n", + "# min: 1\n", + "# max: 390000\n", + "# is_categorical: true\n", + "# }\n", + "# annotation {\n", + "# tag: \"item_id\"\n", + "# tag: \"list\"\n", + "# tag: \"categorical\"\n", + "# tag: \"item\"\n", + "# }\n", + "# }" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a6ade14a", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", "\n", - "train = wf.fit_transform(train)\n", - "valid = wf.transform(valid)" + "def get_model():\n", + " mlp_block = mm.MLPBlock(\n", + " [d_model],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " schema = TensorflowMetadata.from_proto_text_file(\n", + " './',\n", + " file_name='rees46_schema_modified.pbtxt'\n", + " ).to_merlin_schema()\n", + "\n", + " train.schema = schema\n", + " \n", + " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", + " input_block = mm.InputBlockV2(\n", + " schema_model,\n", + " categorical=mm.Embeddings(\n", + " schema_model.select_by_tag(Tags.CATEGORICAL),\n", + " dim=item_embedding_dim,\n", + " sequence_combiner=None,\n", + " )\n", + " )\n", + "\n", + " train.schema = train.schema.select_by_name(seq_name)\n", + "\n", + " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", + "\n", + " dense_block = mm.SequentialBlock(\n", + " input_block,\n", + " mlp_block,\n", + " xlnet_block\n", + " )\n", + "\n", + " mlp_block2 = mm.MLPBlock(\n", + " [item_embedding_dim],\n", + " activation='relu',\n", + " no_activation_last_layer=True,\n", + " )\n", + "\n", + " prediction_task = mm.CategoricalOutput(\n", + " to_call=input_block[\"categorical\"][target],\n", + " )\n", + "\n", + " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", + "\n", + " optimizer = tf.keras.optimizers.Adam(\n", + " learning_rate=learning_rate,\n", + " )\n", + "\n", + " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", + " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", + " )\n", + " return model_transformer, xlnet_block" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "076f42cc", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model_transformer, xlnet_block = get_model()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "523fe2ac", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n", + "2023-05-09 01:50:35.053579: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", + "677/677 [==============================] - 105s 143ms/step - loss: 7.2880 - recall_at_20: 0.1451 - mrr_at_20: 0.0813 - ndcg_at_20: 0.0954 - map_at_20: 0.0813 - precision_at_20: 0.0073 - regularization_loss: 0.0000e+00 - loss_batch: 7.2857\n", + "84/84 [==============================] - 4s 26ms/step - loss: 8.5378 - recall_at_20: 0.2315 - mrr_at_20: 0.0811 - ndcg_at_20: 0.1142 - map_at_20: 0.0811 - precision_at_20: 0.0116 - regularization_loss: 0.0000e+00 - loss_batch: 8.5385\n" + ] + }, + { + "data": { + "text/plain": [ + "{'loss': 8.537825584411621,\n", + " 'recall_at_20': 0.2337784469127655,\n", + " 'mrr_at_20': 0.07926096022129059,\n", + " 'ndcg_at_20': 0.11324834823608398,\n", + " 'map_at_20': 0.07926096022129059,\n", + " 'precision_at_20': 0.011688923463225365,\n", + " 'regularization_loss': 0.0,\n", + " 'loss_batch': 8.566910743713379}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.fit(\n", + " train,\n", + " batch_size=batch_size,\n", + " epochs=n_epoch,\n", + " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", + ")\n", + "\n", + "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", + "model_transformer.evaluate(\n", + " valid,\n", + " batch_size=batch_size,\n", + " pre=predict_last,\n", + " return_dict=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "febab09e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: t4rec_model/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", + "/usr/local/lib/python3.8/dist-packages/keras/saving/saved_model/layer_serialization.py:134: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + " return generic_utils.serialize_keras_object(obj)\n", + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model_transformer.save('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8e0ea1b1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", + " warnings.warn(\n", + "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "Model(\n", + " (_should_compute_train_metrics_for_batch): \n", + " (blocks): _TupleWrapper((SequentialBlock(\n", + " (layers): List(\n", + " (0): ParallelBlock(\n", + " (_aggregation): ConcatFeatures(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (parallel_layers): Dict(\n", + " (categorical): ParallelBlock(\n", + " (parallel_layers): Dict(\n", + " (sess_pid_seq): EmbeddingTable(\n", + " (features): Dict(\n", + " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", + " )\n", + " (table): Embedding(\n", + " (embeddings): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): SequentialBlock(\n", + " (layers): List(\n", + " (0): _Dense(\n", + " (dense): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " ), SequentialBlock(\n", + " (layers): List(\n", + " (0): _Dense(\n", + " (dense): Dense(\n", + " 448, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " ), CategoricalOutput(\n", + " (to_call): EmbeddingTablePrediction(\n", + " (table): EmbeddingTable(\n", + " (features): Dict(\n", + " (sess_pid_seq): ColumnSchema(name='sess_pid_seq', tags={, , , , }, properties={'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, dtype=DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), is_list=True, is_ragged=True)\n", + " )\n", + " (table): Embedding(\n", + " (embeddings): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (output_layer_bias): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )))\n", + " (context): ModelContext(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_prepare_features): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (output_names): List(\n", + " (0): 'sess_pid_seq/categorical_output'\n", + " )\n", + " (optimizer): Adam()\n", + " (loss): Dict(\n", + " (sess_pid_seq/categorical_output): CategoricalCrossEntropy()\n", + " )\n", + " (train_pre): SequencePredictNext(\n", + " (_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (1): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " )\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (transformer): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (test_pre): SequencePredictLast(\n", + " (_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (1): PrepareFeatures(\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " )\n", + " (prepare_lists): PrepareListFeatures()\n", + " )\n", + " (transformer): XLNetBlock(\n", + " (transformer): TFXLNetMainLayer(\n", + " (word_embedding): TFSharedEmbeddings(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer): List(\n", + " (0): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (2): TFXLNetLayer(\n", + " (rel_attn): TFXLNetRelativeAttention(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (q): \n", + " (k): \n", + " (v): \n", + " (o): \n", + " (r): \n", + " (r_r_bias): \n", + " (r_s_bias): \n", + " (r_w_bias): \n", + " (seg_embed): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (ff): TFXLNetFeedForward(\n", + " (layer_norm): LayerNormalization(\n", + " (axis): List(\n", + " (0): 2\n", + " )\n", + " (gamma): \n", + " (beta): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_1): Dense(\n", + " 768, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (layer_2): Dense(\n", + " 192, activation=linear, use_bias=True\n", + " (kernel): \n", + " (bias): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (dropout): Dropout(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_kwargs): Dict(\n", + " (name): 'transformer'\n", + " (trainable): True\n", + " (dtype): 'float32'\n", + " )\n", + " (mask_emb): \n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_pre): PrepareTransformerInputs(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (transformer_post): LastHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_post): SequentialBlock(\n", + " (layers): List(\n", + " (0): TransformerOutputToRagged(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): TransformerInferenceHiddenState(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_masking_pre): SequentialBlock(\n", + " (layers): List(\n", + " (0): SequenceCausalLastInference(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (1): ExtractMaskFromTargets(\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " (_feature_shapes): Dict()\n", + " (_feature_dtypes): Dict()\n", + " )\n", + " )\n", + " (signatures): _SignatureMap({'serving_default': })\n", + ")" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_transformer.load('t4rec_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "2f5a7984", + "metadata": {}, + "outputs": [], + "source": [ + "from merlin.loader.tensorflow import Loader" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "dc4df316", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.8/dist-packages/merlin/dataloader/tensorflow.py:65: UserWarning: Due to a CUDA memory alignment issue in some Tensorflow operations such as Embedding ops, we recommend that 'batch_size' be at least 16 and also a power of two. Please change 'batch_size' to a number that is a power of two that is greater than or equal to 16.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "loader = Loader(valid, batch_size=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "f3bfca3f", + "metadata": {}, + "outputs": [], + "source": [ + "it = iter(loader)" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "3116726e", + "execution_count": 23, + "id": "7e1b9bbc", "metadata": {}, "outputs": [], "source": [ - "# cat rees46_schema_modified.pbtxt" + "while True:\n", + " b = next(it)\n", + " if b[0]['sess_pid_seq__offsets'].numpy()[1] == 20:\n", + " break" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "69e8f95c", + "execution_count": 24, + "id": "7ee5f149", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "({'sess_pid_seq__values': ,\n", + " 'sess_pid_seq__offsets': },\n", + " None)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# %%writefile rees46_schema_modified_2.pbtxt\n", - "\n", - "# feature {\n", - "# name: \"seq\"\n", - "# value_count {\n", - "# min: 2\n", - "# }\n", - "# type: INT\n", - "# int_domain {\n", - "# name: \"seq\"\n", - "# min: 1\n", - "# max: 390000\n", - "# is_categorical: true\n", - "# }\n", - "# annotation {\n", - "# tag: \"item_id\"\n", - "# tag: \"list\"\n", - "# tag: \"categorical\"\n", - "# tag: \"item\"\n", - "# }\n", - "# }" + "b" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "a6ade14a", + "execution_count": 25, + "id": "81d2b071", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "50.1 ms ± 78.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" + ] + } + ], "source": [ - "from merlin.schema.io.tensorflow_metadata import TensorflowMetadata\n", - "\n", - "def get_model():\n", - " mlp_block = mm.MLPBlock(\n", - " [d_model],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", + "%%timeit\n", "\n", - " schema = TensorflowMetadata.from_proto_text_file(\n", - " './',\n", - " file_name='rees46_schema_modified.pbtxt'\n", - " ).to_merlin_schema()\n", - "\n", - " train.schema = schema\n", - " \n", - " schema_model = schema.select_by_tag(Tags.ITEM_ID)\n", - " input_block = mm.InputBlockV2(\n", - " schema_model,\n", - " categorical=mm.Embeddings(\n", - " schema_model.select_by_tag(Tags.CATEGORICAL),\n", - " dim=item_embedding_dim,\n", - " sequence_combiner=None,\n", - " )\n", - " )\n", - "\n", - " train.schema = train.schema.select_by_name(seq_name)\n", - "\n", - " xlnet_block = mm.XLNetBlock(d_model=d_model, n_head=n_head, n_layer=n_layer)\n", - "\n", - " dense_block = mm.SequentialBlock(\n", - " input_block,\n", - " mlp_block,\n", - " xlnet_block\n", - " )\n", - "\n", - " mlp_block2 = mm.MLPBlock(\n", - " [item_embedding_dim],\n", - " activation='relu',\n", - " no_activation_last_layer=True,\n", - " )\n", - "\n", - " prediction_task = mm.CategoricalOutput(\n", - " to_call=input_block[\"categorical\"][target],\n", - " )\n", - "\n", - " model_transformer = mm.Model(dense_block, mlp_block2, prediction_task)\n", - "\n", - " optimizer = tf.keras.optimizers.Adam(\n", - " learning_rate=learning_rate,\n", - " )\n", - "\n", - " model_transformer.compile(run_eagerly=False, optimizer=optimizer, loss=\"categorical_crossentropy\",\n", - " metrics=mm.TopKMetricsAggregator.default_metrics(top_ks=[20])\n", - " )\n", - " return model_transformer, xlnet_block" + "model_transformer.predict_step(b)" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "523fe2ac", + "execution_count": 26, + "id": "7b24e7fa", "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/merlin/schema/tags.py:149: UserWarning: Compound tags like Tags.ITEM_ID have been deprecated and will be removed in a future version. Please use the atomic versions of these tags, like [, ].\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/keras/initializers/initializers_v2.py:120: UserWarning: The initializer TruncatedNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n", - " warnings.warn(\n", - "2023-04-13 11:21:38.342588: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + "297 ms ± 753 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] - }, + } + ], + "source": [ + "%%timeit\n", + "\n", + "with tf.device('/cpu:0'):\n", + " model_transformer.predict_step(b)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "5bd66ba8", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n" + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask_1/GatherV2:0\", shape=(None,), dtype=int32), values=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/boolean_mask/GatherV2:0\", shape=(None, 192), dtype=float32), dense_shape=Tensor(\"gradient_tape/model/sequential_block_4/xl_net_block/prepare_transformer_inputs_4/RaggedToTensor_1/Shape:0\", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.\n", - " warnings.warn(\n", - "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/indexed_slices.py:436: UserWarning: Converting sparse IndexedSlices to a dense Tensor with 174720448 elements. This may consume a large amount of memory.\n", - " warnings.warn(\n" + "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:Gradients do not exist for variables ['model/mask_emb:0', 'transformer/layer_._0/rel_attn/r_s_bias:0', 'transformer/layer_._0/rel_attn/seg_embed:0', 'transformer/layer_._1/rel_attn/r_s_bias:0', 'transformer/layer_._1/rel_attn/seg_embed:0', 'transformer/layer_._2/rel_attn/r_s_bias:0', 'transformer/layer_._2/rel_attn/seg_embed:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n", - "677/677 [==============================] - 106s 144ms/step - loss: 7.3129 - recall_at_20: 0.1424 - mrr_at_20: 0.0802 - ndcg_at_20: 0.0939 - map_at_20: 0.0802 - precision_at_20: 0.0071 - regularization_loss: 0.0000e+00 - loss_batch: 7.3149\n", - "84/84 [==============================] - 4s 27ms/step - loss: 8.5848 - recall_at_20: 0.2229 - mrr_at_20: 0.0736 - ndcg_at_20: 0.1066 - map_at_20: 0.0736 - precision_at_20: 0.0111 - regularization_loss: 0.0000e+00 - loss_batch: 8.5971\n" + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n" ] }, { - "data": { - "text/plain": [ - "{'loss': 8.584781646728516,\n", - " 'recall_at_20': 0.2308632731437683,\n", - " 'mrr_at_20': 0.07471762597560883,\n", - " 'ndcg_at_20': 0.10908268392086029,\n", - " 'map_at_20': 0.07471762597560883,\n", - " 'precision_at_20': 0.011543160304427147,\n", - " 'regularization_loss': 0.0,\n", - " 'loss_batch': 9.130510330200195}" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_transformer, xlnet_block = get_model()\n", - "model_transformer.fit(\n", - " train,\n", - " batch_size=batch_size,\n", - " epochs=n_epoch,\n", - " pre=mm.SequencePredictNext(schema=train.schema, target=target, transformer=xlnet_block)\n", - ")\n", - "\n", - "predict_last = mm.SequencePredictLast(schema=valid.schema, target=target, transformer=xlnet_block)\n", - "model_transformer.evaluate(\n", - " valid,\n", - " batch_size=batch_size,\n", - " pre=predict_last,\n", - " return_dict=True\n", - ")\n", - "\n", - "# model_transformer.save('t4rec_model')" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "5bd66ba8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "WARNING:tensorflow:Skipping full serialization of Keras layer TFSharedEmbeddings(\n", + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", " )\n", " (_feature_dtypes): Dict(\n", " (sess_pid_seq): tf.int32\n", " )\n", - "), because it is not built.\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", @@ -724,7 +13835,13 @@ " (_feature_dtypes): Dict(\n", " (sess_pid_seq): tf.int32\n", " )\n", - "), because it is not built.\n", + "), because it is not built.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", @@ -732,7 +13849,13 @@ " (_feature_dtypes): Dict(\n", " (sess_pid_seq): tf.int32\n", " )\n", - "), because it is not built.\n", + "), because it is not built.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", " (_feature_shapes): Dict(\n", " (sess_pid_seq): TensorShape([128, None, 1])\n", @@ -747,6 +13870,14 @@ "name": "stderr", "output_type": "stream", "text": [ + "WARNING:tensorflow:Skipping full serialization of Keras layer Dropout(\n", + " (_feature_shapes): Dict(\n", + " (sess_pid_seq): TensorShape([128, None, 1])\n", + " )\n", + " (_feature_dtypes): Dict(\n", + " (sess_pid_seq): tf.int32\n", + " )\n", + "), because it is not built.\n", "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, sequence_predict_next_layer_call_fn, sequence_predict_next_layer_call_and_return_conditional_losses, sequence_predict_last_layer_call_fn while saving (showing 5 of 114). These functions will not be directly callable after loading.\n" ] }, @@ -754,15 +13885,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n" + "INFO:tensorflow:Assets written to: /tmp/tmpvsz5e5b2/model.savedmodel/assets\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "INFO:tensorflow:Assets written to: /tmp/tmpkph1f3_r/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + "INFO:tensorflow:Assets written to: /tmp/tmpvsz5e5b2/model.savedmodel/assets\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", @@ -816,7 +13947,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 33, "id": "3ef1e5fc", "metadata": {}, "outputs": [], @@ -830,7 +13961,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 28, "id": "e2a7b6ee", "metadata": {}, "outputs": [], @@ -843,7 +13974,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 29, "id": "55ad012c", "metadata": {}, "outputs": [ @@ -884,7 +14015,7 @@ " \n", " 0\n", " sess_pid_seq\n", - " (Tags.CATEGORICAL, Tags.ITEM, Tags.ID, Tags.IT...\n", + " (Tags.LIST, Tags.ITEM_ID, Tags.CATEGORICAL, Ta...\n", " DType(name='int64', element_type=<ElementType....\n", " True\n", " True\n", @@ -899,10 +14030,10 @@ "" ], "text/plain": [ - "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" + "[{'name': 'sess_pid_seq', 'tags': {, , , , }, 'properties': {'domain': {'min': 1, 'max': 390000, 'name': 'sess_pid_seq'}, 'value_count': {'min': 2, 'max': None}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None), Dimension(min=2, max=None)))), 'is_list': True, 'is_ragged': True}]" ] }, - "execution_count": 21, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -913,7 +14044,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 30, "id": "1a39b4f8", "metadata": {}, "outputs": [ @@ -1042,7 +14173,7 @@ "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel/assets\n", - "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:100: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", + "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/utils/tf_utils.py:101: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[key] = tf.keras.utils.serialize_keras_object(maybe_value)\n", "/usr/local/lib/python3.8/dist-packages/merlin/models/tf/core/combinators.py:288: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.\n", " config[i] = tf.keras.utils.serialize_keras_object(layer)\n", @@ -1078,35 +14209,17 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "1720a5af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ls: cannot access '/workspace/models_for_benchmarking/1': No such file or directory\r\n" - ] - } - ], - "source": [ - "ls /workspace/models_for_benchmarking/1" - ] - }, - { - "cell_type": "code", - "execution_count": 24, + "execution_count": 31, "id": "d7cdc6cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 24, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" }, @@ -1114,89 +14227,88 @@ "name": "stderr", "output_type": "stream", "text": [ - "I0413 11:24:28.716029 1527 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f7f2a000000' with size 268435456\n", - "I0413 11:24:28.716361 1527 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", - "I0413 11:24:28.718446 1527 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", - "I0413 11:24:28.718465 1527 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", - "I0413 11:24:28.718478 1527 model_lifecycle.cc:459] loading: executor_model:1\n", - "I0413 11:24:28.924940 1527 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", - "I0413 11:24:28.924955 1527 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", - "I0413 11:24:28.924960 1527 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", - "I0413 11:24:28.924962 1527 tensorflow.cc:2576] backend configuration:\n", + "I0509 01:55:17.511153 1205 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7ff516000000' with size 268435456\n", + "I0509 01:55:17.511472 1205 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864\n", + "I0509 01:55:17.513574 1205 model_lifecycle.cc:459] loading: executor_model:1\n", + "I0509 01:55:17.513595 1205 model_lifecycle.cc:459] loading: 0_transformworkflowtriton:1\n", + "I0509 01:55:17.513608 1205 model_lifecycle.cc:459] loading: 1_predicttensorflowtriton:1\n", + "I0509 01:55:17.693342 1205 tensorflow.cc:2536] TRITONBACKEND_Initialize: tensorflow\n", + "I0509 01:55:17.693362 1205 tensorflow.cc:2546] Triton TRITONBACKEND API version: 1.10\n", + "I0509 01:55:17.693365 1205 tensorflow.cc:2552] 'tensorflow' TRITONBACKEND API version: 1.10\n", + "I0509 01:55:17.693368 1205 tensorflow.cc:2576] backend configuration:\n", "{\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}}\n", - "2023-04-13 11:24:30.207841: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:55:18.992767: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:32.085748: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:32.086174: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:32.086365: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:20.814292: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:20.814710: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:20.814876: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "I0413 11:24:33.803267 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", - "2023-04-13 11:24:35.316462: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "I0509 01:55:22.571307 1205 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", + "2023-05-09 01:55:22.571962: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:22.592315: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-05-09 01:55:22.592352: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:22.592474: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:37.126873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.127251: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.127427: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "I0413 11:24:37.157059 1527 tensorflow.cc:2642] TRITONBACKEND_ModelInitialize: 1_predicttensorflowtriton (version 1)\n", - "I0413 11:24:37.157179 1527 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", - "2023-04-13 11:24:37.157805: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:37.178699: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", - "2023-04-13 11:24:37.178742: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:37.178876: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:55:22.593417: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.609446: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.609627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855175: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855338: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855479: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:22.855607: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29840 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-05-09 01:55:22.913337: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", + "2023-05-09 01:55:22.922530: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", + "2023-05-09 01:55:23.337695: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:23.403830: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 831878 microseconds.\n", + "2023-05-09 01:55:24.746386: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:37.179781: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196068: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196289: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196570: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196747: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.196909: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:37.197031: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:222] Using CUDA malloc Async allocator for GPU: 0\n", - "2023-04-13 11:24:37.203975: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "2023-04-13 11:24:37.262568: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled\n", - "2023-04-13 11:24:37.271889: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n", - "2023-04-13 11:24:37.678751: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:37.745105: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 587310 microseconds.\n", - "2023-04-13 11:24:39.105154: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:40.997532: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:40.997994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:40.998186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:26.581369: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:26.581724: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:26.581886: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n", " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n", - "I0413 11:24:42.684588 1527 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", - "2023-04-13 11:24:42.684902: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:42.702205: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", - "2023-04-13 11:24:42.702239: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:42.702447: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.702659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.702822: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.703025: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.703189: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:42.703311: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 38618 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", - "2023-04-13 11:24:42.742722: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" + "I0509 01:55:28.344195 1205 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: executor_model_0 (GPU device 0)\n", + "2023-05-09 01:55:29.628356: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2023-05-09 01:55:31.434543: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.434993: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.435198: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0509 01:55:31.465538 1205 tensorflow.cc:2691] TRITONBACKEND_ModelInstanceInitialize: 1_predicttensorflowtriton_0 (GPU device 0)\n", + "I0509 01:55:31.465701 1205 model_lifecycle.cc:694] successfully loaded 'executor_model' version 1\n", + "2023-05-09 01:55:31.465951: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:31.490532: I tensorflow/cc/saved_model/reader.cc:89] Reading meta graph with tags { serve }\n", + "2023-05-09 01:55:31.490575: I tensorflow/cc/saved_model/reader.cc:130] Reading SavedModel debug info (if present) from: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:31.490777: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491003: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491186: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491411: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491588: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:31.491744: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 29840 MB memory: -> device: 0, name: Quadro RTX 8000, pci bus id: 0000:08:00.0, compute capability: 7.5\n", + "2023-05-09 01:55:31.549442: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:24:43.330311: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", - "2023-04-13 11:24:43.395816: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 710922 microseconds.\n", - "I0413 11:24:43.395921 1527 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", - "I0413 11:24:43.396107 1527 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", - "2023-04-13 11:24:44.668497: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", + "2023-05-09 01:55:32.146750: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /workspace/models_for_benchmarking/1_predicttensorflowtriton/1/model.savedmodel\n", + "2023-05-09 01:55:32.213463: I tensorflow/cc/saved_model/loader.cc:325] SavedModel load for tags { serve }; Status: success: OK. Took 747520 microseconds.\n", + "I0509 01:55:32.213572 1205 python_be.cc:1856] TRITONBACKEND_ModelInstanceInitialize: 0_transformworkflowtriton_0 (GPU device 0)\n", + "I0509 01:55:32.213757 1205 model_lifecycle.cc:694] successfully loaded '1_predicttensorflowtriton' version 1\n", + "2023-05-09 01:55:33.476455: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE3 SSE4.1 SSE4.2 AVX\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-04-13 11:24:46.525315: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:46.525768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "2023-04-13 11:24:46.525978: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", - "I0413 11:24:46.583396 1527 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", - "I0413 11:24:46.583508 1527 server.cc:563] \n", + "2023-05-09 01:55:35.263779: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:35.264127: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "2023-05-09 01:55:35.264284: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n", + "I0509 01:55:35.317101 1205 model_lifecycle.cc:694] successfully loaded '0_transformworkflowtriton' version 1\n", + "I0509 01:55:35.317235 1205 server.cc:563] \n", "+------------------+------+\n", "| Repository Agent | Path |\n", "+------------------+------+\n", "+------------------+------+\n", "\n", - "I0413 11:24:46.583587 1527 server.cc:590] \n", + "I0509 01:55:35.317307 1205 server.cc:590] \n", "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "| Backend | Path | Config |\n", "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", @@ -1204,7 +14316,7 @@ "| tensorflow | /opt/tritonserver/backends/tensorflow2/libtriton_tensorflow2.so | {\"cmdline\":{\"auto-complete-config\":\"true\",\"min-compute-capability\":\"6.000000\",\"backend-directory\":\"/opt/tritonserver/backends\",\"default-max-batch-size\":\"4\"}} |\n", "+------------+-----------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "\n", - "I0413 11:24:46.583634 1527 server.cc:633] \n", + "I0509 01:55:35.317350 1205 server.cc:633] \n", "+---------------------------+---------+--------+\n", "| Model | Version | Status |\n", "+---------------------------+---------+--------+\n", @@ -1213,9 +14325,9 @@ "| executor_model | 1 | READY |\n", "+---------------------------+---------+--------+\n", "\n", - "I0413 11:24:46.610538 1527 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", - "I0413 11:24:46.610778 1527 metrics.cc:757] Collecting CPU metrics\n", - "I0413 11:24:46.610913 1527 tritonserver.cc:2264] \n", + "I0509 01:55:35.343214 1205 metrics.cc:864] Collecting metrics for GPU 0: Quadro RTX 8000\n", + "I0509 01:55:35.343395 1205 metrics.cc:757] Collecting CPU metrics\n", + "I0509 01:55:35.343534 1205 tritonserver.cc:2264] \n", "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "| Option | Value |\n", "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", @@ -1234,16 +14346,16 @@ "| exit_timeout | 30 |\n", "+----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+\n", "\n", - "I0413 11:24:46.611676 1527 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", - "I0413 11:24:46.611833 1527 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", - "I0413 11:24:46.652586 1527 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" + "I0509 01:55:35.344357 1205 grpc_server.cc:4819] Started GRPCInferenceService at 0.0.0.0:8001\n", + "I0509 01:55:35.344507 1205 http_server.cc:3477] Started HTTPService at 0.0.0.0:8000\n", + "I0509 01:55:35.385232 1205 http_server.cc:184] Started Metrics Service at 0.0.0.0:8002\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "2023-04-13 11:25:37.504455: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" + "2023-05-09 01:56:23.448369: I tensorflow/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8700\n" ] } ],