diff --git a/CHANGELOG.md b/CHANGELOG.md index c4eb8d75d..f7debf3f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,10 @@ # Changelog ## Unreleased -- Add `DataClient` and `StudioDataRepository` as connectors to Studio for submitting data. +- Add `DataClient` and `StudioDatasetRepository` as connectors to Studio for submitting data. +- Add `StudioRunnerRepository` as a connector to Studio for submitting runs. +- Add `StudioEvaluationRepository` as a connector to Studio for submitting evaluations. +- Add `StudioAggregationRepository` as a connector to Studio for submitting aggregations. ### Breaking Changes ... diff --git a/src/documentation/how_tos/how_to_create_a_dataset_using_studio.ipynb b/src/documentation/how_tos/how_to_create_a_dataset_using_studio.ipynb new file mode 100644 index 000000000..c99427f52 --- /dev/null +++ b/src/documentation/how_tos/how_to_create_a_dataset_using_studio.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from collections.abc import Sequence\n", + "\n", + "from pydantic import BaseModel\n", + "\n", + "from intelligence_layer.evaluation import Example\n", + "from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository\n", + "from intelligence_layer.connectors.data import DataClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to create a dataset\n", + "\n", + "0. Collect data for examples.\n", + "1. Convert data to `Example`s.\n", + "1. Create a `DatasetRepository`.\n", + "2. Store `Example`s to `DatasetRepository`.\n", + "3. Remember the dataset id." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "65421249-cdea-4a98-a5c8-0ed9280869d5\n", + "{'label2', 'label1'}\n", + "{'key_a': ['a', 'b'], 'key_b': 'value'}\n" + ] + } + ], + "source": [ + "class StoryTaskInput(BaseModel): # Should already be implemented in your task\n", + " topic: str\n", + " targeted_word_count: int\n", + "\n", + "\n", + "class StoryTaskExpectedOutput(BaseModel): # Should already be implemented in your task\n", + " keywords: Sequence[str]\n", + "\n", + "\n", + "# Step 1\n", + "examples = [\n", + " Example(\n", + " input=StoryTaskInput(topic=\"rain\", targeted_word_count=42),\n", + " expected_output=StoryTaskExpectedOutput(keywords=[\"wet\"]),\n", + " metadata={\n", + " \"author\": \"Shakespeare\"\n", + " }, # the metadata is optional and can contain custom information\n", + " ),\n", + " # ...\n", + "]*10\n", + "\n", + "# Step 2 - Use FileDatasetRepository or HuggingFaceDatasetRepository for persistence\n", + "dataset_repository = StudioDatasetRepository(\n", + " repository_id=\"\",\n", + " data_client=DataClient(\n", + " token=\"your_token\",\n", + " base_data_platform_url=\"http://localhost:8080\",\n", + " ),\n", + ")\n", + "\n", + "# Step 3\n", + "dataset = dataset_repository.create_dataset(\n", + " examples=examples,\n", + " dataset_name=\"StoryDataset\",\n", + " labels=set([\"label1\", \"label2\"]),\n", + " metadata=dict({\"key_a\": [\"a\", \"b\"], \"key_b\": \"value\"}),\n", + ")\n", + "\n", + "# Step 4\n", + "print(dataset.id)\n", + "print(dataset.labels)\n", + "print(dataset.metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "intelligence-layer-dgcJwC7l-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/documentation/how_tos/how_to_evaluate_runs_using_studio_evaluation_repository.ipynb b/src/documentation/how_tos/how_to_evaluate_runs_using_studio_evaluation_repository.ipynb new file mode 100644 index 000000000..b7a65ad9f --- /dev/null +++ b/src/documentation/how_tos/how_to_evaluate_runs_using_studio_evaluation_repository.ipynb @@ -0,0 +1,130 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from fsspec.implementations.local import LocalFileSystem\n", + "\n", + "from example_data import DummyEvaluationLogic, example_data, DummyEvaluation\n", + "\n", + "from intelligence_layer.evaluation import Evaluator, StudioEvaluationRepository\n", + "from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository\n", + "from intelligence_layer.connectors.data.data import DataClient" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to evaluate runs\n", + "0. Run your tasks on the datasets where you want to evaluate them on (see [here](./how_to_run_a_task_on_a_dataset.ipynb))\n", + " - When evaluating multiple runs, all of them need the same data types \n", + "2. Initialize all necessary repositories for the `Evaluator`, and an `EvaluationLogic`.\n", + "3. Run the evaluator to evaluate all examples and create a single `EvaluationOverview`\n", + "4. (Optional) Save the evaluation id for later use" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Evaluating: 2it [00:00, 31300.78it/s]" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "Evaluating: 2it [00:00, 28532.68it/s]\n" + ] + } + ], + "source": [ + "# Step 0\n", + "\n", + "my_example_data = example_data()\n", + "run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n", + "\n", + "\n", + "# Step 1\n", + "studio_dataset_repository = StudioDatasetRepository(\n", + " repository_id=\"\",\n", + " data_client=DataClient(token=\"\", base_data_platform_url=\"http://localhost:8080\"),\n", + ")\n", + "dataset_repository = my_example_data.dataset_repository\n", + "run_repository = my_example_data.run_repository\n", + "evaluation_repository = StudioEvaluationRepository(\n", + " file_system=LocalFileSystem(True),\n", + " root_directory=Path(\"evals\"),\n", + " studio_dataset_repository=studio_dataset_repository,\n", + " evaluation_type=DummyEvaluation,\n", + ")\n", + "evaluation_logic = DummyEvaluationLogic()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Step 3\n", + "evaluator = Evaluator(\n", + " dataset_repository,\n", + " run_repository,\n", + " evaluation_repository,\n", + " \"My dummy evaluation\",\n", + " evaluation_logic,\n", + ")\n", + "\n", + "evaluation_overview = evaluator.evaluate_runs(\n", + " *run_ids, labels=set({\"label\"}), metadata=dict({\"key\": \"value\"})\n", + ")\n", + "\n", + "# Step 4\n", + "print(evaluation_overview.id)\n", + "print(evaluation_overview.metadata)\n", + "print(evaluation_overview.labels)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "intelligence-layer-d3iSWYpm-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/documentation/how_tos/how_to_run_a_task_on_a_dataset_using_studio_repository.ipynb b/src/documentation/how_tos/how_to_run_a_task_on_a_dataset_using_studio_repository.ipynb new file mode 100644 index 000000000..5e3f6bb27 --- /dev/null +++ b/src/documentation/how_tos/how_to_run_a_task_on_a_dataset_using_studio_repository.ipynb @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from example_data import DummyTask, example_data\n", + "\n", + "from intelligence_layer.evaluation.run.studio_runner_repository import (\n", + " StudioRunnerRepository, \n", + ")\n", + "from intelligence_layer.evaluation.run.runner import Runner\n", + "from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository\n", + "from intelligence_layer.connectors.data.data import DataClient\n", + "\n", + "from fsspec.implementations.local import LocalFileSystem" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "studio_dataset_repository = StudioDatasetRepository(\n", + " repository_id=\"\",\n", + " data_client=DataClient(token=\"\", base_data_platform_url=\"http://localhost:8080\"),\n", + ")\n", + "run_repository = StudioRunnerRepository(\n", + " file_system=LocalFileSystem(True),\n", + " root_directory=Path(\"runs\"),\n", + " output_type=str,\n", + " studio_dataset_repository=studio_dataset_repository,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to run a task on a dataset\n", + "0. Create a suitable dataset (see [here](./how_to_create_a_dataset.ipynb)) and a task (see [here](./how_to_implement_a_task.ipynb)).\n", + "1. Initialize the task and a `RunRepository`, and open the correct `DatasetRepository`\n", + " - The `DatasetRepository` needs to contain the dataset.\n", + " - The `RunRepository` stores results.\n", + "2. Use the `Runner` to run the task on the given dataset via `run_dataset`\n", + "3. Save the id of the resulting `RunOverview`\n", + "\n", + "### Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 0\n", + "my_example_data = example_data()\n", + "print()\n", + "\n", + "# Step 1\n", + "dataset_repository = my_example_data.dataset_repository\n", + "\n", + "task = DummyTask()\n", + "\n", + "# Step 2\n", + "runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")\n", + "run_overview = runner.run_dataset(my_example_data.dataset.id)\n", + "\n", + "# Step 3\n", + "print(run_overview.id)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "intelligence-layer-d3iSWYpm-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/intelligence_layer/connectors/data/data.py b/src/intelligence_layer/connectors/data/data.py index c5da219ed..9885931ac 100644 --- a/src/intelligence_layer/connectors/data/data.py +++ b/src/intelligence_layer/connectors/data/data.py @@ -151,16 +151,17 @@ def create_dataset(self, repository_id: str, dataset: DatasetCreate) -> DataData url = urljoin( self.base_data_platform_url, f"api/v1/repositories/{repository_id}/datasets" ) + body = { + "sourceData": dataset.source_data, + "labels": ",".join(dataset.labels), + "name": dataset.name, + "totalDatapoints": dataset.total_datapoints, + "metadata": json.dumps(dataset.metadata) if dataset.metadata else None, + } response = self._do_request( "POST", url, - files={ - "source_data": dataset.source_data, - "labels": ",".join(dataset.labels), - "name": dataset.name, - "total_datapoints": dataset.total_datapoints, - "metadata": json.dumps(dataset.metadata) if dataset.metadata else None, - }, + files={k: v for k, v in body.items() if v not in [None, ""]}, ) return DataDataset(**response.json()) diff --git a/src/intelligence_layer/connectors/data/models.py b/src/intelligence_layer/connectors/data/models.py index 6e1f5de56..b3fdf2c26 100644 --- a/src/intelligence_layer/connectors/data/models.py +++ b/src/intelligence_layer/connectors/data/models.py @@ -63,7 +63,7 @@ class DataDataset(BaseDataModel): repository_id: str dataset_id: str name: Optional[str] = None - labels: list[str] + labels: Optional[list[str]] = None total_datapoints: int metadata: Optional[dict[str, Any]] = None created_at: datetime diff --git a/src/intelligence_layer/evaluation/__init__.py b/src/intelligence_layer/evaluation/__init__.py index 079b3e563..c3292ef96 100644 --- a/src/intelligence_layer/evaluation/__init__.py +++ b/src/intelligence_layer/evaluation/__init__.py @@ -42,7 +42,9 @@ from .dataset.single_huggingface_dataset_repository import ( SingleHuggingfaceDatasetRepository as SingleHuggingfaceDatasetRepository, ) -from .dataset.studio_data_repository import StudioDataRepository as StudioDataRepository +from .dataset.studio_dataset_repository import ( + StudioDatasetRepository as StudioDatasetRepository, +) from .evaluation.domain import Evaluation as Evaluation from .evaluation.domain import EvaluationFailed as EvaluationFailed from .evaluation.domain import EvaluationOverview as EvaluationOverview @@ -103,6 +105,9 @@ from .evaluation.in_memory_evaluation_repository import ( InMemoryEvaluationRepository as InMemoryEvaluationRepository, ) +from .evaluation.studio_evaluation_repository import ( + StudioEvaluationRepository as StudioEvaluationRepository, +) from .infrastructure.hugging_face_repository import ( HuggingFaceRepository as HuggingFaceRepository, ) @@ -125,5 +130,8 @@ from .run.in_memory_run_repository import InMemoryRunRepository as InMemoryRunRepository from .run.run_repository import RunRepository as RunRepository from .run.runner import Runner as Runner +from .run.studio_runner_repository import ( + StudioRunnerRepository as StudioRunnerRepository, +) __all__ = [symbol for symbol in dir()] diff --git a/src/intelligence_layer/evaluation/aggregation/studio_aggregation_repository.py b/src/intelligence_layer/evaluation/aggregation/studio_aggregation_repository.py new file mode 100644 index 000000000..67c565911 --- /dev/null +++ b/src/intelligence_layer/evaluation/aggregation/studio_aggregation_repository.py @@ -0,0 +1,31 @@ +from pathlib import Path + +from fsspec.implementations.local import LocalFileSystem # type: ignore + +from intelligence_layer.evaluation.aggregation.domain import ( + AggregatedEvaluation, + AggregationOverview, +) +from intelligence_layer.evaluation.aggregation.file_aggregation_repository import FileSystemAggregationRepository +from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository + + +class StudioAggregationRepository(FileSystemAggregationRepository): + """An :class:`AggregationRepository` that stores aggregation results in a Studio Repository.""" + + def __init__(self, file_system: LocalFileSystem, root_directory: Path, studio_dataset_repository: StudioDatasetRepository) -> None: + super().__init__(file_system, root_directory) + self.studio_dataset_repository = studio_dataset_repository + + + def store_aggregation_overview( + self, aggregation_overview: AggregationOverview[AggregatedEvaluation] + ) -> None: + super().store_aggregation_overview(aggregation_overview) + + _ = self.studio_dataset_repository.create_dataset( + examples=[aggregation_overview], + dataset_name=aggregation_overview.id, + labels=aggregation_overview.labels.union(set([aggregation_overview.id])), + metadata={"aggregation_id": aggregation_overview.id}, + ) \ No newline at end of file diff --git a/src/intelligence_layer/evaluation/dataset/studio_data_repository.py b/src/intelligence_layer/evaluation/dataset/studio_dataset_repository.py similarity index 92% rename from src/intelligence_layer/evaluation/dataset/studio_data_repository.py rename to src/intelligence_layer/evaluation/dataset/studio_dataset_repository.py index 39103da59..2f7577cb9 100644 --- a/src/intelligence_layer/evaluation/dataset/studio_data_repository.py +++ b/src/intelligence_layer/evaluation/dataset/studio_dataset_repository.py @@ -1,8 +1,11 @@ import json from collections.abc import Iterable -from typing import Optional +from typing import Any, Optional + +from pydantic import BaseModel from intelligence_layer.connectors.base.json_serializable import ( + JsonSerializable, SerializableDict, ) from intelligence_layer.connectors.data import DataClient @@ -16,7 +19,7 @@ ) -class StudioDataRepository(DatasetRepository): +class StudioDatasetRepository(DatasetRepository): """Dataset repository interface with Data Platform.""" def __init__(self, repository_id: str, data_client: DataClient) -> None: @@ -25,7 +28,7 @@ def __init__(self, repository_id: str, data_client: DataClient) -> None: def create_dataset( self, - examples: Iterable[Example[Input, ExpectedOutput]], + examples: Iterable[BaseModel], dataset_name: str, id: str | None = None, labels: set[str] | None = None, @@ -47,7 +50,6 @@ def create_dataset( raise NotImplementedError( "Custom dataset IDs are not supported by the Data Platform" ) - source_data_list = [example.model_dump_json() for example in examples] remote_dataset = self.data_client.create_dataset( repository_id=self.repository_id, @@ -62,7 +64,9 @@ def create_dataset( return Dataset( id=remote_dataset.dataset_id, name=remote_dataset.name or "", - labels=set(remote_dataset.labels) if labels is not None else set(), + labels=set(remote_dataset.labels) + if remote_dataset.labels is not None + else set(), metadata=remote_dataset.metadata or dict(), ) @@ -91,7 +95,9 @@ def dataset(self, dataset_id: str) -> Optional[Dataset]: return Dataset( id=remote_dataset.dataset_id, name=remote_dataset.name or "", - labels=set(remote_dataset.labels), + labels=set(remote_dataset.labels) + if remote_dataset.labels is not None + else set(), metadata=remote_dataset.metadata or dict(), ) @@ -107,7 +113,9 @@ def datasets(self) -> Iterable[Dataset]: yield Dataset( id=remote_dataset.dataset_id, name=remote_dataset.name or "", - labels=set(remote_dataset.labels), + labels=set(remote_dataset.labels) + if remote_dataset.labels is not None + else set(), metadata=remote_dataset.metadata or dict(), ) diff --git a/src/intelligence_layer/evaluation/evaluation/studio_evaluation_repository.py b/src/intelligence_layer/evaluation/evaluation/studio_evaluation_repository.py new file mode 100644 index 000000000..e1062ddf6 --- /dev/null +++ b/src/intelligence_layer/evaluation/evaluation/studio_evaluation_repository.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from fsspec import AbstractFileSystem # type: ignore + +from intelligence_layer.connectors.base.json_serializable import JsonSerializable +from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository +from intelligence_layer.evaluation.evaluation.domain import ( + Evaluation, + EvaluationOverview, +) + +from intelligence_layer.evaluation.evaluation.file_evaluation_repository import FileSystemEvaluationRepository + +class StudioEvaluationRepository(FileSystemEvaluationRepository): + """An :class:`EvaluationRepository` that stores evaluation results in a Studio Repository.""" + + def __init__(self, file_system: AbstractFileSystem, root_directory: Path, evaluation_type: type[Evaluation], studio_dataset_repository: StudioDatasetRepository) -> None: + super().__init__(file_system, root_directory) + self.studio_dataset_repository = studio_dataset_repository + self.evaluation_type = evaluation_type + + def store_evaluation_overview(self, overview: EvaluationOverview) -> None: + super().store_evaluation_overview(overview) + + _ = self.studio_dataset_repository.create_dataset( + examples=self.example_evaluations(overview.id, self.evaluation_type), + dataset_name=overview.id, + labels=overview.labels.union(set([overview.id])), + metadata=overview.model_dump(mode='json'), + ) \ No newline at end of file diff --git a/src/intelligence_layer/evaluation/run/studio_runner_repository.py b/src/intelligence_layer/evaluation/run/studio_runner_repository.py new file mode 100644 index 000000000..4442ad209 --- /dev/null +++ b/src/intelligence_layer/evaluation/run/studio_runner_repository.py @@ -0,0 +1,27 @@ +from pathlib import Path + +from fsspec import AbstractFileSystem # type: ignore + +from intelligence_layer.core.task import Output +from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository +from intelligence_layer.evaluation.run.domain import RunOverview +from intelligence_layer.evaluation.run.file_run_repository import FileSystemRunRepository + +class StudioRunnerRepository(FileSystemRunRepository): + """An :class:`RunRepository` that stores run results in a Studio Repository.""" + + def __init__(self, file_system: AbstractFileSystem, root_directory: Path, output_type: type[Output], studio_dataset_repository: StudioDatasetRepository) -> None: + super().__init__(file_system, root_directory) + self.studio_dataset_repository = studio_dataset_repository + self.output_type = output_type + + + def store_run_overview(self, overview: RunOverview) -> None: + super().store_run_overview(overview) + + _ = self.studio_dataset_repository.create_dataset( + examples=self.example_outputs(overview.id, self.output_type), + dataset_name=overview.id, + labels=overview.labels.union(set([overview.id])), + metadata=overview.model_dump(mode="json") + ) \ No newline at end of file diff --git a/tests/connectors/data/test_data.py b/tests/connectors/data/test_data.py index 7710c29be..2a7a88830 100644 --- a/tests/connectors/data/test_data.py +++ b/tests/connectors/data/test_data.py @@ -121,8 +121,11 @@ def return_json_override() -> dict[Any, Any]: # Call the method repository = data_client.create_repository( + # Ignore because mypy does not support the dynamic transformation of pydantic.alias camelCase -> snake_case DataRepositoryCreate( - name="Repository 3", media_type="application/json", modality="text" + name="Repository 3", + mediaType="application/json", + modality="text", # type: ignore ) ) @@ -158,8 +161,11 @@ def test_create_repository_handles_request_exception( # Call the method with pytest.raises(DataInternalError): data_client.create_repository( + # Ignore because mypy does not support the dynamic transformation of pydantic.alias camelCase -> snake_case DataRepositoryCreate( - name="Repository 3", media_type="application/json", modality="image" + name="Repository 3", + mediaType="application/json", + modality="image", # type: ignore ) ) @@ -262,9 +268,7 @@ def return_json_override() -> dict[Any, Any]: files={ "source_data": b"source_data", "labels": "label1,label2", - "name": None, "total_datapoints": 100, - "metadata": None, }, ) diff --git a/tests/evaluation/aggregation/test_studio_aggregation_repository.py b/tests/evaluation/aggregation/test_studio_aggregation_repository.py new file mode 100644 index 000000000..300d171e5 --- /dev/null +++ b/tests/evaluation/aggregation/test_studio_aggregation_repository.py @@ -0,0 +1,113 @@ +from datetime import datetime +import os +from unittest.mock import Mock, patch +from pathlib import Path +from typing import Any +from urllib.parse import urljoin +import pytest +from fsspec import AbstractFileSystem # type: ignore +from pydantic import BaseModel +from intelligence_layer.connectors.data.data import DataClient +from intelligence_layer.evaluation.aggregation.domain import AggregationOverview +from intelligence_layer.evaluation.aggregation.studio_aggregation_repository import StudioAggregationRepository +from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository +from intelligence_layer.evaluation.evaluation.domain import EvaluationOverview +from intelligence_layer.evaluation.evaluation.studio_evaluation_repository import StudioEvaluationRepository +from intelligence_layer.evaluation.run.domain import RunOverview +from intelligence_layer.evaluation.run.file_run_repository import FileSystemRunRepository +from intelligence_layer.evaluation.run.studio_runner_repository import StudioRunnerRepository +from intelligence_layer.core import Output + +class MockFileSystem(AbstractFileSystem): # type: ignore + pass + + +class MockEvaluationOutput(BaseModel): + evaluation_id: str + example_id: str + result: Any + +class MockExampleOutput(BaseModel): + output: Any + +@pytest.fixture +def mock_data_client() -> Mock: + return Mock(spec=DataClient, base_data_platform_url="http://localhost:3000") + + +@pytest.fixture +def mock_studio_dataset_repository(mock_data_client: Mock) -> StudioDatasetRepository: + return StudioDatasetRepository(repository_id="repo1", data_client=mock_data_client) + + +def test_upload_aggregation_overview_to_studio_repository(mock_studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock)-> None: + + mock_studio_dataset_repository.create_dataset = Mock() # type: ignore + mock_studio_dataset_repository.create_dataset.return_value = Mock( + id="dataset_id", + labels={"label1", "label2"}, + metadata={}, + name="Dataset 1", + ) + + # Create a mock overview + run_overview = RunOverview( + id="run1", + labels={"label1", "label2"}, + metadata={"metadata1": "value1", "metadata2": "value2"}, + start=datetime.now(), + end=datetime.now(), + failed_example_count=0, + successful_example_count=2, + description="description", + dataset_id="dataset_id", + ) + + evaluation_overview = EvaluationOverview( + run_overviews=frozenset([run_overview]), + id="evaluation1", + start_date=datetime.now(), + end_date=datetime.now(), + failed_evaluation_count=0, + successful_evaluation_count=1, + description="description", + metadata={"metadata1": "value1", "metadata2": "value2"}, + labels={"label1", "label2"}, + ) + + class Evaluation(BaseModel): + evaluation_id: str + example_id: str + result: Any + + aggregation_overview = AggregationOverview[Evaluation]( + evaluation_overviews=frozenset([evaluation_overview]), + id="aggregation1", + start=datetime.now(), + end=datetime.now(), + crashed_during_evaluation_count=0, + successful_evaluation_count=1, + statistics=Evaluation(evaluation_id="evaluation1", example_id="example1", result="result1"), + description="description", + metadata={"metadata1": "value1", "metadata2": "value2"}, + labels={"label1", "label2"}, + ) + + + # Create an instance of the StudioRunnerRepository + studio_evaluation_repository = StudioAggregationRepository( + file_system=MockFileSystem(), + root_directory=Path("/aggregation"), + studio_dataset_repository=mock_studio_dataset_repository, + ) + # Call the store_run_overview method + studio_evaluation_repository.store_aggregation_overview(aggregation_overview) + + # Assert that the create_dataset method was called with the correct arguments + mock_studio_dataset_repository.create_dataset.assert_called_once_with( + examples=[aggregation_overview], + dataset_name=aggregation_overview.id, + labels=aggregation_overview.labels.union(set([aggregation_overview.id])), + metadata={"aggregation_id": aggregation_overview.id}, + ) + \ No newline at end of file diff --git a/tests/evaluation/dataset/test_studio_data_repository.py b/tests/evaluation/dataset/test_studio_data_repository.py index 1b96759fc..5a9623476 100644 --- a/tests/evaluation/dataset/test_studio_data_repository.py +++ b/tests/evaluation/dataset/test_studio_data_repository.py @@ -8,8 +8,8 @@ Dataset, Example, ) -from intelligence_layer.evaluation.dataset.studio_data_repository import ( - StudioDataRepository, +from intelligence_layer.evaluation.dataset.studio_dataset_repository import ( + StudioDatasetRepository, ) @@ -19,8 +19,8 @@ def mock_data_client() -> Mock: @pytest.fixture -def studio_data_repository(mock_data_client: Mock) -> StudioDataRepository: - return StudioDataRepository(repository_id="repo1", data_client=mock_data_client) +def studio_dataset_repository(mock_data_client: Mock) -> StudioDatasetRepository: + return StudioDatasetRepository(repository_id="repo1", data_client=mock_data_client) class InputExample(BaseModel): @@ -32,7 +32,7 @@ class ExpectedOutputExample(BaseModel): def test_create_dataset( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Mock the data client's create_dataset method return_dataset_mock = Mock(spec=DataDataset) @@ -58,7 +58,7 @@ def test_create_dataset( ] # Call the method - dataset = studio_data_repository.create_dataset( + dataset = studio_dataset_repository.create_dataset( examples=examples, dataset_name="Dataset 1", labels={"label"}, metadata={} ) @@ -85,10 +85,10 @@ def test_create_dataset( def test_delete_dataset( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Call the method - studio_data_repository.delete_dataset(dataset_id="dataset1") + studio_dataset_repository.delete_dataset(dataset_id="dataset1") # Verify that the data client's delete_dataset method was called with the correct parameters mock_data_client.delete_dataset.assert_called_once_with( @@ -97,7 +97,7 @@ def test_delete_dataset( def test_dataset( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Mock the data client's get_dataset method return_dataset_mock = Mock(spec=DataDataset) @@ -107,7 +107,7 @@ def test_dataset( return_dataset_mock.name = "Dataset 1" mock_data_client.get_dataset.return_value = return_dataset_mock # Call the method - dataset = studio_data_repository.dataset(dataset_id="dataset1") + dataset = studio_dataset_repository.dataset(dataset_id="dataset1") # Assertions assert isinstance(dataset, Dataset) @@ -123,7 +123,7 @@ def test_dataset( def test_datasets( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Mock the data client's list_datasets method return_dataset_mock = Mock(spec=DataDataset) @@ -144,7 +144,7 @@ def test_datasets( ] # Call the method - datasets = list(studio_data_repository.datasets()) + datasets = list(studio_dataset_repository.datasets()) # Assertions assert len(datasets) == 2 @@ -164,7 +164,7 @@ def test_datasets( def test_dataset_ids( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Mock the data client's list_datasets method return_dataset_mock = Mock(spec=DataDataset) @@ -185,7 +185,7 @@ def test_dataset_ids( ] # Call the method - dataset_ids = list(studio_data_repository.dataset_ids()) + dataset_ids = list(studio_dataset_repository.dataset_ids()) # Assertions assert len(dataset_ids) == 2 @@ -197,7 +197,7 @@ def test_dataset_ids( def test_example( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Mock the data client's stream_dataset method mock_data_client.stream_dataset.return_value = [ @@ -206,7 +206,7 @@ def test_example( ] # Call the method - example = studio_data_repository.example( + example = studio_dataset_repository.example( dataset_id="dataset1", example_id="example1", input_type=InputExample, @@ -226,7 +226,7 @@ def test_example( def test_examples( - studio_data_repository: StudioDataRepository, mock_data_client: Mock + studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock ) -> None: # Mock the data client's stream_dataset method mock_data_client.stream_dataset.return_value = [ @@ -236,7 +236,7 @@ def test_examples( # Call the method examples = list( - studio_data_repository.examples( + studio_dataset_repository.examples( dataset_id="dataset1", input_type=InputExample, expected_output_type=ExpectedOutputExample, diff --git a/tests/evaluation/evaluation/test_studio_evaluation_repository.py b/tests/evaluation/evaluation/test_studio_evaluation_repository.py new file mode 100644 index 000000000..7f429ff4e --- /dev/null +++ b/tests/evaluation/evaluation/test_studio_evaluation_repository.py @@ -0,0 +1,100 @@ +from datetime import datetime +import os +from unittest.mock import Mock, patch +from pathlib import Path +from typing import Any +from urllib.parse import urljoin +import pytest +from fsspec import AbstractFileSystem # type: ignore +from pydantic import BaseModel +from intelligence_layer.connectors.data.data import DataClient +from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository +from intelligence_layer.evaluation.evaluation.domain import EvaluationOverview +from intelligence_layer.evaluation.evaluation.studio_evaluation_repository import StudioEvaluationRepository +from intelligence_layer.evaluation.run.domain import RunOverview +from intelligence_layer.evaluation.run.file_run_repository import FileSystemRunRepository +from intelligence_layer.evaluation.run.studio_runner_repository import StudioRunnerRepository +from intelligence_layer.core import Output + +class MockFileSystem(AbstractFileSystem): # type: ignore + pass + + +class MockEvaluationOutput(BaseModel): + evaluation_id: str + example_id: str + result: Any + +class MockExampleOutput(BaseModel): + output: Any + +@pytest.fixture +def mock_data_client() -> Mock: + return Mock(spec=DataClient, base_data_platform_url="http://localhost:3000") + + +@pytest.fixture +def mock_studio_dataset_repository(mock_data_client: Mock) -> StudioDatasetRepository: + return StudioDatasetRepository(repository_id="repo1", data_client=mock_data_client) + + +def test_upload_evaluation_to_studio(mock_studio_dataset_repository: StudioDatasetRepository, mock_data_client: Mock)-> None: + + mock_studio_dataset_repository.create_dataset = Mock() # type: ignore + mock_studio_dataset_repository.create_dataset.return_value = Mock( + id="dataset_id", + labels={"label1", "label2"}, + metadata={}, + name="Dataset 1", + ) + + # Create a mock overview + run_overview = RunOverview( + id="run1", + labels={"label1", "label2"}, + metadata={"metadata1": "value1", "metadata2": "value2"}, + start=datetime.now(), + end=datetime.now(), + failed_example_count=0, + successful_example_count=2, + description="description", + dataset_id="dataset_id", + ) + + evaluation_overview = EvaluationOverview( + run_overviews=frozenset([run_overview]), + id="evaluation1", + start_date=datetime.now(), + end_date=datetime.now(), + failed_evaluation_count=0, + successful_evaluation_count=1, + description="description", + metadata={"metadata1": "value1", "metadata2": "value2"}, + labels={"label1", "label2"}, + ) + + # Create an instance of the StudioRunnerRepository + studio_evaluation_repository = StudioEvaluationRepository( + file_system=MockFileSystem(), + root_directory=Path("/root"), + evaluation_type=MockEvaluationOutput, + studio_dataset_repository=mock_studio_dataset_repository, + ) + + studio_evaluation_repository.example_evaluations = Mock() # type: ignore + studio_evaluation_repository.example_evaluations.return_value = [ + MockEvaluationOutput(example_id="example1", evaluation_id="evaluation1", result=MockExampleOutput(output="output1")), + MockEvaluationOutput(example_id="example2", evaluation_id="evaluation1", result=MockExampleOutput(output="output2")), + ] + + # Call the store_run_overview method + studio_evaluation_repository.store_evaluation_overview(evaluation_overview) + + # Assert that the create_dataset method was called with the correct arguments + mock_studio_dataset_repository.create_dataset.assert_called_once_with( + examples=studio_evaluation_repository.example_evaluations.return_value, + dataset_name=evaluation_overview.id, + labels=evaluation_overview.labels.union(set([evaluation_overview.id])), + metadata=evaluation_overview.model_dump(mode='json'), + ) + \ No newline at end of file diff --git a/tests/evaluation/run/test_studio_runner_repository.py b/tests/evaluation/run/test_studio_runner_repository.py new file mode 100644 index 000000000..7f5026da6 --- /dev/null +++ b/tests/evaluation/run/test_studio_runner_repository.py @@ -0,0 +1,89 @@ +from datetime import datetime +import os +from unittest.mock import Mock, patch +from pathlib import Path +from typing import Any +from urllib.parse import urljoin +import pytest +from fsspec import AbstractFileSystem # type: ignore +from pydantic import BaseModel +from intelligence_layer.connectors.data.data import DataClient +from intelligence_layer.evaluation.dataset.studio_dataset_repository import StudioDatasetRepository +from intelligence_layer.evaluation.run.domain import RunOverview +from intelligence_layer.evaluation.run.file_run_repository import FileSystemRunRepository +from intelligence_layer.evaluation.run.studio_runner_repository import StudioRunnerRepository +from intelligence_layer.core import Output + +class MockFileSystem(AbstractFileSystem): # type: ignore + pass + +class InputExample(BaseModel): + data: str + + +class ExpectedOutputExample(BaseModel): + data: str + + +class MockExampleOutput(BaseModel): + example_id: str + run_id: str + output: Any + +@pytest.fixture +def mock_data_client() -> Mock: + return Mock(spec=DataClient, base_data_platform_url="http://localhost:3000") + + +@pytest.fixture +def mock_studio_data_repository(mock_data_client: Mock) -> StudioDatasetRepository: + return StudioDatasetRepository(repository_id="repo1", data_client=mock_data_client) + + +def test_upload_to_studio_runner_repository(mock_studio_data_repository: StudioDatasetRepository, mock_data_client: Mock)-> None: + + mock_studio_data_repository.create_dataset = Mock() # type: ignore + mock_studio_data_repository.create_dataset.return_value = Mock( + id="dataset_id", + labels={"label1", "label2"}, + metadata={}, + name="Dataset 1", + ) + + # Create a mock overview + overview = RunOverview( + id="run1", + labels={"label1", "label2"}, + metadata={"metadata1": "value1", "metadata2": "value2"}, + start=datetime.now(), + end=datetime.now(), + failed_example_count=0, + successful_example_count=2, + description="description", + dataset_id="dataset_id", + ) + + # Create an instance of the StudioRunnerRepository + studio_runner_repository = StudioRunnerRepository( + file_system=MockFileSystem(), + root_directory=Path("/root"), + output_type=MockExampleOutput, + studio_dataset_repository=mock_studio_data_repository, + ) + + studio_runner_repository.example_outputs = Mock() # type: ignore + studio_runner_repository.example_outputs.return_value = [ + MockExampleOutput(example_id="example1", run_id="run1", output="output1"), + MockExampleOutput(example_id="example2", run_id="run1", output="output2"), + ] + + # Call the store_run_overview method + studio_runner_repository.store_run_overview(overview) + + # Assert that the create_dataset method was called with the correct arguments + mock_studio_data_repository.create_dataset.assert_called_once_with( + examples=studio_runner_repository.example_outputs.return_value, + dataset_name=overview.id, + labels=overview.labels.union(set([overview.id])), + metadata=overview.model_dump(mode="json") + ) \ No newline at end of file