From a236f9b635915baba955c69b5429f955f7cb29cc Mon Sep 17 00:00:00 2001 From: Sebastian Niehus Date: Tue, 23 Apr 2024 10:46:47 +0200 Subject: [PATCH] feat: Add max_workers argument to run_dataset. Task: IL-327 --- CHANGELOG.md | 1 + src/intelligence_layer/evaluation/run/runner.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cec16682b..f8b06dfc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Unreleased ### Breaking Changes +- feature: `run_dataset` now takes `max_workers` as an additional argument. Defaults to 10, as it was hardcoded before. ### New Features diff --git a/src/intelligence_layer/evaluation/run/runner.py b/src/intelligence_layer/evaluation/run/runner.py index b1e919ed5..e41db92a4 100644 --- a/src/intelligence_layer/evaluation/run/runner.py +++ b/src/intelligence_layer/evaluation/run/runner.py @@ -76,6 +76,7 @@ def run_dataset( tracer: Optional[Tracer] = None, num_examples: Optional[int] = None, abort_on_error: bool = False, + max_workers: int = 10, ) -> RunOverview: """Generates all outputs for the provided dataset. @@ -88,6 +89,7 @@ def run_dataset( num_examples: An optional int to specify how many examples from the dataset should be run. Always the first n examples will be taken. abort_on_error: Flag to abort all run when an error occurs. Defaults to False. + max_workers: Maximum number of workers in the thread pool. Returns: An overview of the run. Outputs will not be returned but instead stored in the @@ -123,7 +125,7 @@ def run( examples = islice(examples, num_examples) run_id = str(uuid4()) start = utc_now() - with ThreadPoolExecutor(max_workers=10) as executor: + with ThreadPoolExecutor(max_workers=max_workers) as executor: ids_and_outputs = tqdm(executor.map(run, examples), desc="Evaluating") failed_count = 0