From 4587d1ca85f7c586f6d73020a955a7c2d54f58ff Mon Sep 17 00:00:00 2001 From: Johannes Wesch Date: Tue, 12 Nov 2024 13:52:44 +0100 Subject: [PATCH] fix: exchange force_execution bool with allow_diff --- src/intelligence_layer/evaluation/benchmark/benchmark.py | 4 ++-- .../evaluation/benchmark/studio_benchmark.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/intelligence_layer/evaluation/benchmark/benchmark.py b/src/intelligence_layer/evaluation/benchmark/benchmark.py index 316359aec..e0055012e 100644 --- a/src/intelligence_layer/evaluation/benchmark/benchmark.py +++ b/src/intelligence_layer/evaluation/benchmark/benchmark.py @@ -88,7 +88,7 @@ def get_benchmark( benchmark_id: str, eval_logic: EvaluationLogic[Input, Output, ExpectedOutput, Evaluation], aggregation_logic: AggregationLogic[Evaluation, AggregatedEvaluation], - force_execution: bool = False, + allow_diff: bool = False, ) -> Benchmark: """Retrieves an existing benchmark from the repository. @@ -96,7 +96,7 @@ def get_benchmark( benchmark_id: Unique identifier for the benchmark to retrieve. eval_logic: Evaluation logic to apply. aggregation_logic (AggregationLogic[Evaluation, AggregatedEvaluation]): Aggregation logic to apply. - force_execution: Execute the benchmark even though logics behaviour do not match. + allow_diff: Retrieve the benchmark even though logics behaviour do not match. Returns: The retrieved benchmark instance. Raises ValueError if no benchmark is found. diff --git a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py index b2688d93e..234e1f417 100644 --- a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py +++ b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py @@ -46,7 +46,7 @@ def __init__( self.client = studio_client def execute(self, task: Task[Input, Output], metadata: dict[str, Any]) -> str: - raise NotImplementedError # <- skip the impl here for now, not this is another ticket + raise NotImplementedError class StudioBenchmarkRepository(BenchmarkRepository): @@ -88,13 +88,11 @@ def get_benchmark( benchmark_id: str, eval_logic: EvaluationLogic[Input, Output, ExpectedOutput, Evaluation], aggregation_logic: AggregationLogic[Evaluation, AggregatedEvaluation], - force_execution: bool = False, + allow_diff: bool = False, ) -> StudioBenchmark: benchmark = self.client.get_benchmark(benchmark_id) if benchmark is None: raise ValueError("Benchmark not found") - # check if the logic is the same - # check force bool return StudioBenchmark( benchmark_id, benchmark.dataset_id,