From 2d9be1c9c0af16b8f7d2bc7ed1080d8b52be52ab Mon Sep 17 00:00:00 2001 From: Jay Chia Date: Thu, 5 Dec 2024 01:12:13 -0800 Subject: [PATCH] more debug stuff --- .github/working-dir/dedup.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/working-dir/dedup.py b/.github/working-dir/dedup.py index bf0933fbc0..08811b29f0 100644 --- a/.github/working-dir/dedup.py +++ b/.github/working-dir/dedup.py @@ -1,3 +1,5 @@ +import os + import daft from daft import DataFrame, DataType, Expression, Series, col @@ -96,10 +98,15 @@ def components(df: DataFrame) -> DataFrame: if __name__ == "__main__": + import ray + + print("RAY_JOB_ID", os.getenv("RAY_JOB_ID")) + print("Ray is initialized", ray.is_initialized()) + daft.context.set_runner_ray() daft.set_execution_config(enable_ray_tracing=True) - df = daft.read_parquet("s3://eventual-dev-benchmarking-fixtures/redpajama-parquet/v1.0.0/sample-0.01") + df = daft.read_parquet("s3://eventual-dev-benchmarking-fixtures/redpajama-parquet/v1.0.0/sample-0.1") df = dedupe( df, col("doc_id"),