From 4587d1ca85f7c586f6d73020a955a7c2d54f58ff Mon Sep 17 00:00:00 2001
From: Johannes Wesch <johannes.wesch@aleph-alpha.com>
Date: Tue, 12 Nov 2024 13:52:44 +0100
Subject: [PATCH] fix: exchange force_execution bool with allow_diff

---
 src/intelligence_layer/evaluation/benchmark/benchmark.py    | 4 ++--
 .../evaluation/benchmark/studio_benchmark.py                | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/intelligence_layer/evaluation/benchmark/benchmark.py b/src/intelligence_layer/evaluation/benchmark/benchmark.py
index 316359aec..e0055012e 100644
--- a/src/intelligence_layer/evaluation/benchmark/benchmark.py
+++ b/src/intelligence_layer/evaluation/benchmark/benchmark.py
@@ -88,7 +88,7 @@ def get_benchmark(
         benchmark_id: str,
         eval_logic: EvaluationLogic[Input, Output, ExpectedOutput, Evaluation],
         aggregation_logic: AggregationLogic[Evaluation, AggregatedEvaluation],
-        force_execution: bool = False,
+        allow_diff: bool = False,
     ) -> Benchmark:
         """Retrieves an existing benchmark from the repository.
 
@@ -96,7 +96,7 @@ def get_benchmark(
             benchmark_id: Unique identifier for the benchmark to retrieve.
             eval_logic: Evaluation logic to apply.
             aggregation_logic (AggregationLogic[Evaluation, AggregatedEvaluation]): Aggregation logic to apply.
-            force_execution: Execute the benchmark even though logics behaviour do not match.
+            allow_diff: Retrieve the benchmark even though logics behaviour do not match.
 
         Returns:
             The retrieved benchmark instance. Raises ValueError if no benchmark is found.
diff --git a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py
index b2688d93e..234e1f417 100644
--- a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py
+++ b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py
@@ -46,7 +46,7 @@ def __init__(
         self.client = studio_client
 
     def execute(self, task: Task[Input, Output], metadata: dict[str, Any]) -> str:
-        raise NotImplementedError  # <- skip the impl here for now, not this is another ticket
+        raise NotImplementedError
 
 
 class StudioBenchmarkRepository(BenchmarkRepository):
@@ -88,13 +88,11 @@ def get_benchmark(
         benchmark_id: str,
         eval_logic: EvaluationLogic[Input, Output, ExpectedOutput, Evaluation],
         aggregation_logic: AggregationLogic[Evaluation, AggregatedEvaluation],
-        force_execution: bool = False,
+        allow_diff: bool = False,
     ) -> StudioBenchmark:
         benchmark = self.client.get_benchmark(benchmark_id)
         if benchmark is None:
             raise ValueError("Benchmark not found")
-        # check if the logic is the same
-        # check force bool
         return StudioBenchmark(
             benchmark_id,
             benchmark.dataset_id,