Skip to content

Commit

Permalink
more debug stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Jay Chia committed Dec 5, 2024
1 parent 2515ee0 commit 2d9be1c
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion .github/working-dir/dedup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import daft
from daft import DataFrame, DataType, Expression, Series, col

Expand Down Expand Up @@ -96,10 +98,15 @@ def components(df: DataFrame) -> DataFrame:


if __name__ == "__main__":
import ray

print("RAY_JOB_ID", os.getenv("RAY_JOB_ID"))
print("Ray is initialized", ray.is_initialized())

daft.context.set_runner_ray()
daft.set_execution_config(enable_ray_tracing=True)

df = daft.read_parquet("s3://eventual-dev-benchmarking-fixtures/redpajama-parquet/v1.0.0/sample-0.01")
df = daft.read_parquet("s3://eventual-dev-benchmarking-fixtures/redpajama-parquet/v1.0.0/sample-0.1")
df = dedupe(
df,
col("doc_id"),
Expand Down

0 comments on commit 2d9be1c

Please sign in to comment.