You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
root@0da37f00b33c:/share/criteo_data# bash preprocess.sh 1 criteo_tmp nvt 0 0 0
Warning: existing criteo_tmp is erased
Preprocessing script: NVTabular
Getting the first few examples from the uncompressed dataset...
Counting the number of samples in day_1 dataset...
The first 45840617 examples will be used in day_1 dataset.
Shuffling dataset...
Preprocessing...
Splitting into 36672493-sample training, 4584062-sample val, and 4584062-sample test datasets...
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/tf.py:52: UserWarning: Tensorflow dtype mappings did not load successfully due to an error: No module named 'tensorflow'
warn(f"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}")
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'
warn(f"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}")
2023-12-08 17:36:24,676 NVTabular processing
2023-12-08 17:36:24,699 To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
2023-12-08 17:36:24,702 State start
2023-12-08 17:36:24,712 Scheduler at: tcp://127.0.0.1:40579
2023-12-08 17:36:24,712 dashboard at: 127.0.0.1:8787
2023-12-08 17:36:24,781 Start Nanny at: 'tcp://127.0.0.1:40985'
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/tf.py:52: UserWarning: Tensorflow dtype mappings did not load successfully due to an error: No module named 'tensorflow'
warn(f"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}")
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'
warn(f"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}")
2023-12-08 17:36:26,969 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-12-08 17:36:26,969 Creating preload: dask_cuda.initialize
2023-12-08 17:36:26,969 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-12-08 17:36:26,969 Import preload module: dask_cuda.initialize
2023-12-08 17:36:26,993 Run preload setup: dask_cuda.initialize
2023-12-08 17:36:27,086 Start worker at: tcp://127.0.0.1:46779
2023-12-08 17:36:27,086 Listening to: tcp://127.0.0.1:46779
2023-12-08 17:36:27,086 Worker name: 0
2023-12-08 17:36:27,086 dashboard at: 127.0.0.1:38661
2023-12-08 17:36:27,086 Waiting to connect to: tcp://127.0.0.1:40579
2023-12-08 17:36:27,086 -------------------------------------------------
2023-12-08 17:36:27,086 Threads: 1
2023-12-08 17:36:27,086 Memory: 251.56 GiB
2023-12-08 17:36:27,087 Local Directory: /tmp/dask-worker-space/worker-1fd2e7px
2023-12-08 17:36:27,087 Starting Worker plugin PreImport-047eaac1-e793-4dd5-b59e-bd5028f4f909
2023-12-08 17:36:27,087 Starting Worker plugin RMMSetup-c63a5516-1d21-4b3e-8187-a1f883e8fb5e
2023-12-08 17:36:27,087 Starting Worker plugin CPUAffinity-1188f5ac-00e8-4229-9bea-18511f88e776
2023-12-08 17:36:27,087 -------------------------------------------------
2023-12-08 17:36:27,097 Register worker <WorkerState 'tcp://127.0.0.1:46779', name: 0, status: init, memory: 0, processing: 0>
2023-12-08 17:36:27,103 Starting worker compute stream, tcp://127.0.0.1:46779
2023-12-08 17:36:27,103 Starting established connection to tcp://127.0.0.1:48730
2023-12-08 17:36:27,103 Registered to: tcp://127.0.0.1:40579
2023-12-08 17:36:27,103 -------------------------------------------------
2023-12-08 17:36:27,104 Starting established connection to tcp://127.0.0.1:40579
2023-12-08 17:36:27,142 Receive client connection: Client-508fa7e6-95f0-11ee-8237-0242ac110004
2023-12-08 17:36:27,143 Starting established connection to tcp://127.0.0.1:48732
2023-12-08 17:36:27,147 Run out-of-band function 'reinitialize'
/usr/local/lib/python3.10/dist-packages/merlin/core/utils.py:361: FutureWarning: The client argument is deprecated from DaskExecutor and will be removed in a future version of NVTabular. By default, a global client in the same python context will be detected automatically, and merlin.utils.set_dask_client (as well as Distributed and Serial) can be used for explicit control.
warnings.warn(
2023-12-08 17:36:45,511 Preprocessing
2023-12-08 17:36:45,863 Train Datasets Preprocessing.....
2023-12-08 17:36:46,095 Run out-of-band function 'clean_worker_cache'
2023-12-08 17:36:46,332 Run out-of-band function 'clean_worker_cache'
2023-12-08 17:37:07,465 Run out-of-band function 'clean_worker_cache'
2023-12-08 17:37:08,855 Run out-of-band function 'clean_worker_cache'
Traceback (most recent call last):
File "/share/criteo_data/criteo_script/preprocess_nvt.py", line 418, in
process_NVT(args)
File "/share/criteo_data/criteo_script/preprocess_nvt.py", line 216, in process_NVT
workflow.transform(train_ds_iterator).to_hugectr(
File "/usr/local/lib/python3.10/dist-packages/merlin/io/dataset.py", line 1070, in to_hugectr
self.schema.write(output_path)
AttributeError: 'Schema' object has no attribute 'write'
The text was updated successfully, but these errors were encountered:
root@0da37f00b33c:/share/criteo_data# bash preprocess.sh 1 criteo_tmp nvt 0 0 0
Warning: existing criteo_tmp is erased
Preprocessing script: NVTabular
Getting the first few examples from the uncompressed dataset...
Counting the number of samples in day_1 dataset...
The first 45840617 examples will be used in day_1 dataset.
Shuffling dataset...
Preprocessing...
Splitting into 36672493-sample training, 4584062-sample val, and 4584062-sample test datasets...
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/tf.py:52: UserWarning: Tensorflow dtype mappings did not load successfully due to an error: No module named 'tensorflow'
warn(f"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}")
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'
warn(f"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}")
2023-12-08 17:36:24,676 NVTabular processing
2023-12-08 17:36:24,699 To route to workers diagnostics web server please install jupyter-server-proxy: python -m pip install jupyter-server-proxy
2023-12-08 17:36:24,702 State start
2023-12-08 17:36:24,712 Scheduler at: tcp://127.0.0.1:40579
2023-12-08 17:36:24,712 dashboard at: 127.0.0.1:8787
2023-12-08 17:36:24,781 Start Nanny at: 'tcp://127.0.0.1:40985'
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/tf.py:52: UserWarning: Tensorflow dtype mappings did not load successfully due to an error: No module named 'tensorflow'
warn(f"Tensorflow dtype mappings did not load successfully due to an error: {exc.msg}")
/usr/local/lib/python3.10/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'
warn(f"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}")
2023-12-08 17:36:26,969 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2023-12-08 17:36:26,969 Creating preload: dask_cuda.initialize
2023-12-08 17:36:26,969 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
2023-12-08 17:36:26,969 Import preload module: dask_cuda.initialize
2023-12-08 17:36:26,993 Run preload setup: dask_cuda.initialize
2023-12-08 17:36:27,086 Start worker at: tcp://127.0.0.1:46779
2023-12-08 17:36:27,086 Listening to: tcp://127.0.0.1:46779
2023-12-08 17:36:27,086 Worker name: 0
2023-12-08 17:36:27,086 dashboard at: 127.0.0.1:38661
2023-12-08 17:36:27,086 Waiting to connect to: tcp://127.0.0.1:40579
2023-12-08 17:36:27,086 -------------------------------------------------
2023-12-08 17:36:27,086 Threads: 1
2023-12-08 17:36:27,086 Memory: 251.56 GiB
2023-12-08 17:36:27,087 Local Directory: /tmp/dask-worker-space/worker-1fd2e7px
2023-12-08 17:36:27,087 Starting Worker plugin PreImport-047eaac1-e793-4dd5-b59e-bd5028f4f909
2023-12-08 17:36:27,087 Starting Worker plugin RMMSetup-c63a5516-1d21-4b3e-8187-a1f883e8fb5e
2023-12-08 17:36:27,087 Starting Worker plugin CPUAffinity-1188f5ac-00e8-4229-9bea-18511f88e776
2023-12-08 17:36:27,087 -------------------------------------------------
2023-12-08 17:36:27,097 Register worker <WorkerState 'tcp://127.0.0.1:46779', name: 0, status: init, memory: 0, processing: 0>
2023-12-08 17:36:27,103 Starting worker compute stream, tcp://127.0.0.1:46779
2023-12-08 17:36:27,103 Starting established connection to tcp://127.0.0.1:48730
2023-12-08 17:36:27,103 Registered to: tcp://127.0.0.1:40579
2023-12-08 17:36:27,103 -------------------------------------------------
2023-12-08 17:36:27,104 Starting established connection to tcp://127.0.0.1:40579
2023-12-08 17:36:27,142 Receive client connection: Client-508fa7e6-95f0-11ee-8237-0242ac110004
2023-12-08 17:36:27,143 Starting established connection to tcp://127.0.0.1:48732
2023-12-08 17:36:27,147 Run out-of-band function 'reinitialize'
/usr/local/lib/python3.10/dist-packages/merlin/core/utils.py:361: FutureWarning: The
client
argument is deprecated from DaskExecutor and will be removed in a future version of NVTabular. By default, a global client in the same python context will be detected automatically, andmerlin.utils.set_dask_client
(as well asDistributed
andSerial
) can be used for explicit control.warnings.warn(
2023-12-08 17:36:45,511 Preprocessing
2023-12-08 17:36:45,863 Train Datasets Preprocessing.....
2023-12-08 17:36:46,095 Run out-of-band function 'clean_worker_cache'
2023-12-08 17:36:46,332 Run out-of-band function 'clean_worker_cache'
2023-12-08 17:37:07,465 Run out-of-band function 'clean_worker_cache'
2023-12-08 17:37:08,855 Run out-of-band function 'clean_worker_cache'
Traceback (most recent call last):
File "/share/criteo_data/criteo_script/preprocess_nvt.py", line 418, in
process_NVT(args)
File "/share/criteo_data/criteo_script/preprocess_nvt.py", line 216, in process_NVT
workflow.transform(train_ds_iterator).to_hugectr(
File "/usr/local/lib/python3.10/dist-packages/merlin/io/dataset.py", line 1070, in to_hugectr
self.schema.write(output_path)
AttributeError: 'Schema' object has no attribute 'write'
The text was updated successfully, but these errors were encountered: