diff --git a/src/intelligence_layer/connectors/studio/studio.py b/src/intelligence_layer/connectors/studio/studio.py index c652edc2..c8cd8b36 100644 --- a/src/intelligence_layer/connectors/studio/studio.py +++ b/src/intelligence_layer/connectors/studio/studio.py @@ -490,7 +490,7 @@ def submit_benchmark_execution( def submit_benchmark_lineages( self, - benchmark_lineages: PostBenchmarkLineagesRequest, + benchmark_lineages: Sequence[BenchmarkLineage], benchmark_id: str, execution_id: str, ) -> PostBenchmarkLineagesResponse: @@ -499,15 +499,22 @@ def submit_benchmark_lineages( f"/api/projects/{self.project_id}/evaluation/benchmarks/{benchmark_id}/executions/{execution_id}/lineages", ) + request_data = self._create_post_bechnmark_lineages_request(benchmark_lineages) + response = requests.post( url, headers=self._headers, - data=benchmark_lineages.model_dump_json(), + data=request_data.model_dump_json(), ) self._raise_for_status(response) return PostBenchmarkLineagesResponse(response.json()) + def _create_post_bechnmark_lineages_request( + self, benchmark_lineages: Sequence[BenchmarkLineage] + ) -> PostBenchmarkLineagesRequest: + return PostBenchmarkLineagesRequest(root=benchmark_lineages) + def _raise_for_status(self, response: requests.Response) -> None: try: response.raise_for_status() diff --git a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py index 5d5a54e2..988e1af9 100644 --- a/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py +++ b/src/intelligence_layer/evaluation/benchmark/studio_benchmark.py @@ -172,7 +172,7 @@ def _create_benchmark_lineages( EvaluationLineage[Input, ExpectedOutput, Output, Evaluation] ], trace_ids: list[str], - ) -> Sequence[BenchmarkLineage]: + ) -> Sequence[BenchmarkLineage[Input, Output, ExpectedOutput, Evaluation]]: return [ self._create_benchmark_lineage(eval_lineage, trace_id) for eval_lineage, trace_id in zip(eval_lineages, trace_ids, strict=True) diff --git a/tests/connectors/studio/test_studio_benchmark.py b/tests/connectors/studio/test_studio_benchmark.py index 3cb852df..4eb6cd3c 100644 --- a/tests/connectors/studio/test_studio_benchmark.py +++ b/tests/connectors/studio/test_studio_benchmark.py @@ -247,20 +247,18 @@ def test_submit_benchmark_lineage_uploads_single_lineage( benchmark_execution_id = studio_client.submit_benchmark_execution( benchmark_id=benchmark_id, data=example_request ) - lineages = DummyPostBenchmarkLineagesRequest( - [ - DummyBenchmarkLineage( - trace_id=trace_id, - input="input", - expected_output="output", - example_metadata={"key3": "value3"}, - output="output", - evaluation={"key5": "value5"}, - run_latency=1, - run_tokens=3, - ), - ] - ) + lineages = [ + DummyBenchmarkLineage( + trace_id=trace_id, + input="input", + expected_output="output", + example_metadata={"key3": "value3"}, + output="output", + evaluation={"key5": "value5"}, + run_latency=1, + run_tokens=3, + ), + ] lineage_ids = studio_client.submit_benchmark_lineages( benchmark_lineages=lineages, @@ -268,6 +266,6 @@ def test_submit_benchmark_lineage_uploads_single_lineage( execution_id=benchmark_execution_id, ) - assert len(lineage_ids.root) == len(lineages.root) + assert len(lineage_ids.root) == len(lineages) for lineage_id in lineage_ids.root: assert UUID(lineage_id) diff --git a/tests/evaluation/benchmark/test_benchmark.py b/tests/evaluation/benchmark/test_benchmark.py index bac260d6..da3cb932 100644 --- a/tests/evaluation/benchmark/test_benchmark.py +++ b/tests/evaluation/benchmark/test_benchmark.py @@ -243,4 +243,3 @@ def test_execute_benchmark( mock_studio_client.submit_benchmark_execution.assert_called_once() # type: ignore assert mock_studio_client.submit_trace.call_count == 4 # type: ignore mock_studio_client.submit_benchmark_lineages.assert_called_once() # type: ignore -