Skip to content

Commit

Permalink
refactor: make most how-tos runnable (#757)
Browse files Browse the repository at this point in the history
Task: IL-423
  • Loading branch information
NiklasKoehneckeAA authored Apr 18, 2024
1 parent eafb2e6 commit 2db940f
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 52 deletions.
26 changes: 22 additions & 4 deletions src/examples/how_tos/example_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Sequence
from typing import Iterable, Sequence

from pydantic import BaseModel

Expand All @@ -16,6 +16,7 @@
RunOverview,
SuccessfulExampleOutput,
)
from intelligence_layer.evaluation.aggregation.aggregator import AggregationLogic


class DummyExample(Example[str, str]):
Expand All @@ -41,6 +42,15 @@ def do_evaluate(
)


class DummyAggregation(BaseModel):
num_evaluations: int


class DummyAggregationLogic(AggregationLogic[DummyEvaluation, DummyAggregation]):
def aggregate(self, evaluations: Iterable[DummyEvaluation]) -> DummyAggregation:
return DummyAggregation(num_evaluations=len(list(evaluations)))


class ExampleData:
examples: Sequence[DummyExample]
dataset_repository: InMemoryDatasetRepository
Expand All @@ -51,7 +61,8 @@ class ExampleData:
dataset: Dataset
run_overview_1: RunOverview
run_overview_2: RunOverview
evaluation_overview: EvaluationOverview
evaluation_overview_1: EvaluationOverview
evaluation_overview_2: EvaluationOverview


def example_data() -> ExampleData:
Expand All @@ -78,7 +89,12 @@ def example_data() -> ExampleData:
"my-evaluator",
DummyEvaluationLogic(),
)
evaluation_overview = evaluator.evaluate_runs(run_overview_1.id, run_overview_2.id)
evaluation_overview_1 = evaluator.evaluate_runs(
run_overview_1.id, run_overview_2.id
)
evaluation_overview_2 = evaluator.evaluate_runs(
run_overview_1.id, run_overview_2.id
)

example_data = ExampleData()
example_data.examples = examples
Expand All @@ -90,5 +106,7 @@ def example_data() -> ExampleData:
example_data.dataset = dataset
example_data.run_overview_1 = run_overview_1
example_data.run_overview_2 = run_overview_2
example_data.evaluation_overview = evaluation_overview
example_data.evaluation_overview_1 = evaluation_overview_1
example_data.evaluation_overview_2 = evaluation_overview_2

return example_data
37 changes: 25 additions & 12 deletions src/examples/how_tos/how_to_aggregate_evaluations.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from example_data import DummyAggregationLogic, example_data\n",
"\n",
"from intelligence_layer.evaluation.aggregation.aggregator import Aggregator\n",
"from intelligence_layer.evaluation.aggregation.in_memory_aggregation_repository import (\n",
" InMemoryAggregationRepository,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -20,15 +34,21 @@
"metadata": {},
"outputs": [],
"source": [
"%%script false --no-raise-error # the following code does not execute as the evaluations do not exist\n",
"\n",
"# Step 0\n",
"evaluation_ids = [\"eval_of_interest\", \"other_eval_of_interest\"]\n",
"\n",
"\n",
"my_example_data = example_data()\n",
"print()\n",
"\n",
"evaluation_ids = [\n",
" my_example_data.evaluation_overview_1.id,\n",
" my_example_data.evaluation_overview_2.id,\n",
"]\n",
"\n",
"# Step 1\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"evaluation_repository = my_example_data.evaluation_repository\n",
"aggregation_repository = InMemoryAggregationRepository()\n",
"aggregation_logic = SingleLabelClassifyAggregationLogic()\n",
"aggregation_logic = DummyAggregationLogic()\n",
"\n",
"# Step 2\n",
"aggregator = Aggregator(\n",
Expand All @@ -42,13 +62,6 @@
"# Step 3\n",
"print(aggregation_overview.id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
43 changes: 27 additions & 16 deletions src/examples/how_tos/how_to_evaluate_runs.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from example_data import DummyEvaluationLogic, example_data\n",
"\n",
"from intelligence_layer.evaluation.evaluation.evaluator import Evaluator\n",
"from intelligence_layer.evaluation.evaluation.in_memory_evaluation_repository import (\n",
" InMemoryEvaluationRepository,\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -25,34 +39,31 @@
"metadata": {},
"outputs": [],
"source": [
"%%script false --no-raise-error # the following code does not execute as the runs do not exist\n",
"\n",
"# Step 0\n",
"run_ids = [\"run_id_of_interest\", \"other_run_id_of_interest\"]\n",
"my_example_data = example_data()\n",
"print()\n",
"run_ids = [my_example_data.run_overview_1.id, my_example_data.run_overview_2.id]\n",
"\n",
"# Step 1\n",
"dataset_repository = InMemoryDatasetRepository()\n",
"run_repository = InMemoryRunRepository()\n",
"dataset_repository = my_example_data.dataset_repository\n",
"run_repository = my_example_data.run_repository\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"evaluation_logic = SingleLabelClassifyEvaluationLogic()\n",
"evaluation_logic = DummyEvaluationLogic()\n",
"\n",
"# Step 3\n",
"evaluator = Evaluator(dataset_repository, run_repository, evaluation_repository, \"My joke evaluation\", evaluation_logic)\n",
"evaluator = Evaluator(\n",
" dataset_repository,\n",
" run_repository,\n",
" evaluation_repository,\n",
" \"My dummy evaluation\",\n",
" evaluation_logic,\n",
")\n",
"\n",
"evaluation_overview = evaluator.evaluate_runs(*run_ids)\n",
"\n",
"# Step 4\n",
"print(evaluation_overview.id)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"```python\n",
"```"
]
}
],
"metadata": {
Expand Down
9 changes: 1 addition & 8 deletions src/examples/how_tos/how_to_retrieve_data_for_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
")\n",
"\n",
"# retrieve all evaluations, and an evaluation for an example\n",
"my_evaluation_id = example_data.evaluation_overview.id\n",
"my_evaluation_id = example_data.evaluation_overview_1.id\n",
"my_evaluations = evaluation_repository.example_evaluations(\n",
" my_evaluation_id, evaluation_type=DummyEvaluation\n",
")\n",
Expand Down Expand Up @@ -117,13 +117,6 @@
"my_lineage = my_evaluator.evaluation_lineage(my_evaluation_id, my_example_id)\n",
"display(my_lineage)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
31 changes: 19 additions & 12 deletions src/examples/how_tos/how_to_run_a_task_on_a_dataset.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,26 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from example_data import DummyTask, example_data\n",
"\n",
"from intelligence_layer.evaluation.run.in_memory_run_repository import (\n",
" InMemoryRunRepository,\n",
")\n",
"from intelligence_layer.evaluation.run.runner import Runner"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# How to run a task on a dataset\n",
"0. Create a suitable dataset (see [here](./how_to_create_a_dataset.ipynb)) and a task (see [here](./how_to_implement_a_task.ipynb)).\n",
"1. Initialize the task, a `DatasetRepository` and a `RunRepository`\n",
"1. Initialize the task and a `RunRepository`, and open the correct `DatasetRepository`\n",
" - The `DatasetRepository` needs to contain the dataset.\n",
" - The `RunRepository` stores results.\n",
"2. Use the `Runner` to run the task on the given dataset via `run_dataset`\n",
Expand All @@ -21,25 +35,18 @@
"metadata": {},
"outputs": [],
"source": [
"%%script false --no-raise-error # the following code does not execute as the dataset does not exist\n",
"\n",
"# Step 0\n",
"dataset_id = \"my-dataset-id\"\n",
"\n",
"my_example_data = example_data()\n",
"print()\n",
"\n",
"# Step 1\n",
"class DummyTask(Task[None, None]):\n",
" def do_run(self, input: None, task_span: TaskSpan) -> None:\n",
" return None\n",
"\n",
"\n",
"dataset_repository = InMemoryDatasetRepository()\n",
"dataset_repository = my_example_data.dataset_repository\n",
"run_repository = InMemoryRunRepository()\n",
"task = DummyTask()\n",
"\n",
"# Step 2\n",
"runner = Runner(task, dataset_repository, run_repository, \"MyRunDescription\")\n",
"run_overview = runner.run_dataset(dataset_id)\n",
"run_overview = runner.run_dataset(my_example_data.dataset.id)\n",
"\n",
"# Step 3\n",
"print(run_overview.id)"
Expand Down

0 comments on commit 2db940f

Please sign in to comment.