fix: exchange force_execution bool with allow_diff

Aleph-Alpha · Nov 12, 2024 · 4587d1c · 4587d1c
1 parent 5c98fae
commit 4587d1c
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 6 deletions.
diff --git a/src/intelligence_layer/evaluation/benchmark/benchmark.py b/src/intelligence_layer/evaluation/benchmark/benchmark.py
@@ -88,15 +88,15 @@ def get_benchmark(
         benchmark_id: str,
         eval_logic: EvaluationLogic[Input, Output, ExpectedOutput, Evaluation],
         aggregation_logic: AggregationLogic[Evaluation, AggregatedEvaluation],
-        force_execution: bool = False,
+        allow_diff: bool = False,
     ) -> Benchmark:
         """Retrieves an existing benchmark from the repository.
 
         Args:
             benchmark_id: Unique identifier for the benchmark to retrieve.
             eval_logic: Evaluation logic to apply.
             aggregation_logic (AggregationLogic[Evaluation, AggregatedEvaluation]): Aggregation logic to apply.
-            force_execution: Execute the benchmark even though logics behaviour do not match.
+            allow_diff: Retrieve the benchmark even though logics behaviour do not match.
 
         Returns:
             The retrieved benchmark instance. Raises ValueError if no benchmark is found.

diff --git a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py
@@ -46,7 +46,7 @@ def __init__(
         self.client = studio_client
 
     def execute(self, task: Task[Input, Output], metadata: dict[str, Any]) -> str:
-        raise NotImplementedError  # <- skip the impl here for now, not this is another ticket
+        raise NotImplementedError
 
 
 class StudioBenchmarkRepository(BenchmarkRepository):
@@ -88,13 +88,11 @@ def get_benchmark(
         benchmark_id: str,
         eval_logic: EvaluationLogic[Input, Output, ExpectedOutput, Evaluation],
         aggregation_logic: AggregationLogic[Evaluation, AggregatedEvaluation],
-        force_execution: bool = False,
+        allow_diff: bool = False,
     ) -> StudioBenchmark:
         benchmark = self.client.get_benchmark(benchmark_id)
         if benchmark is None:
             raise ValueError("Benchmark not found")
-        # check if the logic is the same
-        # check force bool
         return StudioBenchmark(
             benchmark_id,
             benchmark.dataset_id,