From a236f9b635915baba955c69b5429f955f7cb29cc Mon Sep 17 00:00:00 2001
From: Sebastian Niehus <sebastian.niehus@ext.aleph-alpha.com>
Date: Tue, 23 Apr 2024 10:46:47 +0200
Subject: [PATCH] feat: Add max_workers argument to run_dataset. Task: IL-327

---
 CHANGELOG.md                                    | 1 +
 src/intelligence_layer/evaluation/run/runner.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cec16682b..f8b06dfc7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## Unreleased
 
 ### Breaking Changes
+- feature: `run_dataset` now takes `max_workers` as an additional argument. Defaults to 10, as it was hardcoded before.
 
 ### New Features
 
diff --git a/src/intelligence_layer/evaluation/run/runner.py b/src/intelligence_layer/evaluation/run/runner.py
index b1e919ed5..e41db92a4 100644
--- a/src/intelligence_layer/evaluation/run/runner.py
+++ b/src/intelligence_layer/evaluation/run/runner.py
@@ -76,6 +76,7 @@ def run_dataset(
         tracer: Optional[Tracer] = None,
         num_examples: Optional[int] = None,
         abort_on_error: bool = False,
+        max_workers: int = 10,
     ) -> RunOverview:
         """Generates all outputs for the provided dataset.
 
@@ -88,6 +89,7 @@ def run_dataset(
             num_examples: An optional int to specify how many examples from the dataset should be run.
                 Always the first n examples will be taken.
             abort_on_error: Flag to abort all run when an error occurs. Defaults to False.
+            max_workers: Maximum number of workers in the thread pool.
 
         Returns:
             An overview of the run. Outputs will not be returned but instead stored in the
@@ -123,7 +125,7 @@ def run(
             examples = islice(examples, num_examples)
         run_id = str(uuid4())
         start = utc_now()
-        with ThreadPoolExecutor(max_workers=10) as executor:
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
             ids_and_outputs = tqdm(executor.map(run, examples), desc="Evaluating")
 
             failed_count = 0