Skip to content

Commit

Permalink
Add a benchmarking script
Browse files Browse the repository at this point in the history
  • Loading branch information
Jay Chia committed Dec 6, 2024
1 parent 491054b commit dd40231
Showing 1 changed file with 49 additions and 0 deletions.
49 changes: 49 additions & 0 deletions benchmarking/ooms/big_file_reads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# /// script
# dependencies = []
# ///

import argparse
import time

import daft


def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--enable-optimized-splits", action="store_true", default=False, help="Whether to enable the new splits"
)
parser.add_argument("--dry-run", action="store_true", default=False)
return parser.parse_args()


if __name__ == "__main__":
daft.context.set_runner_ray()

args = get_args()

if args.enable_optimized_splits:
daft.set_execution_config(enable_aggressive_scantask_splitting=True)

df = daft.read_parquet(
[
"s3://daft-public-data/test_fixtures/parquet/large-fake-data.parquet",
]
+ [
"s3://daft-public-data/test_fixtures/parquet/small-fake-data.parquet",
]
* 10
)

start = time.time()
df.explain(True)
print(f"Explain took: {time.time() - start}s")

start = time.time()
df.show()
print(f"Show took: {time.time() - start}s")

if not args.dry_run:
start = time.time()
df.collect()
print(f"Collect took: {time.time() - start}s")

0 comments on commit dd40231

Please sign in to comment.