Skip to content

Commit

Permalink
Il 258 Split up EvaluationRepository (#503)
Browse files Browse the repository at this point in the history
  • Loading branch information
NiklasKoehneckeAA authored Feb 15, 2024
1 parent 1bbc5ad commit ee15ddf
Show file tree
Hide file tree
Showing 34 changed files with 860 additions and 690 deletions.
10 changes: 7 additions & 3 deletions src/examples/classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
"from intelligence_layer.use_cases import ClassifyInput, PromptBasedClassify\n",
"from intelligence_layer.core import Chunk, InMemoryTracer\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"\n",
"text_to_classify = Chunk(\"In the distant future, a space exploration party embarked on a thrilling journey to the uncharted regions of the galaxy. \\n\\\n",
"With excitement in their hearts and the cosmos as their canvas, they ventured into the unknown, discovering breathtaking celestial wonders. \\n\\\n",
"As they gazed upon distant stars and nebulas, they forged unforgettable memories that would forever bind them as pioneers of the cosmos.\")\n",
Expand Down Expand Up @@ -425,7 +429,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "3.10-intelligence",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -439,9 +443,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
10 changes: 7 additions & 3 deletions src/examples/document_index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
"\n",
"from intelligence_layer.connectors import DocumentIndexClient\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"\n",
"\n",
"document_index = DocumentIndexClient(token=getenv(\"AA_TOKEN\"), base_document_index_url = \"https://document-index.aleph-alpha.com\")\n",
"?document_index"
Expand Down Expand Up @@ -282,7 +286,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "intelligence-layer-jSYEeheU-py3.10",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -296,9 +300,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
20 changes: 11 additions & 9 deletions src/examples/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,21 @@
"from dotenv import load_dotenv\n",
"\n",
"from intelligence_layer.connectors import LimitedConcurrencyClient\n",
"from intelligence_layer.evaluation import InMemoryEvaluationRepository, InMemoryDatasetRepository, Runner\n",
"from intelligence_layer.evaluation import InMemoryEvaluationRepository, InMemoryRunRepository, InMemoryDatasetRepository, Runner\n",
"from intelligence_layer.use_cases import SingleLabelClassifyEvaluator, PromptBasedClassify\n",
"\n",
"load_dotenv()\n",
"\n",
"client = LimitedConcurrencyClient.from_token(os.getenv(\"AA_TOKEN\"))\n",
"task = PromptBasedClassify(client)\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"dataset_repository = InMemoryDatasetRepository()\n",
"run_repository = InMemoryRunRepository()\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"\n",
"\n",
"\n",
"evaluator = SingleLabelClassifyEvaluator(evaluation_repository, dataset_repository, \"singel-label-classify\")\n",
"runner = Runner(task, evaluation_repository, dataset_repository, \"prompt-based-classify\")\n"
"evaluator = SingleLabelClassifyEvaluator(dataset_repository, run_repository, evaluation_repository, \"singel-label-classify\")\n",
"runner = Runner(task, dataset_repository, run_repository, \"prompt-based-classify\")\n"
]
},
{
Expand Down Expand Up @@ -257,8 +259,8 @@
" ]\n",
")\n",
"\n",
"embedding_based_classify_evaluator = MultiLabelClassifyEvaluator(evaluation_repository, dataset_repository, \"multi-label-classify\", threshold=0.6)\n",
"embedding_based_classify_runner = Runner(embedding_based_classify, evaluation_repository, dataset_repository, \"embedding-based-classify\")\n"
"embedding_based_classify_evaluator = MultiLabelClassifyEvaluator(dataset_repository, run_repository, evaluation_repository, \"multi-label-classify\", threshold=0.6)\n",
"embedding_based_classify_runner = Runner(embedding_based_classify,dataset_repository, run_repository, \"embedding-based-classify\")\n"
]
},
{
Expand Down Expand Up @@ -305,7 +307,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "intelligence-layer-tfT-HG2V-py3.11",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -319,9 +321,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
22 changes: 16 additions & 6 deletions src/examples/human_evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
" Example,\n",
" InMemoryDatasetRepository,\n",
" InMemoryEvaluationRepository,\n",
" InMemoryRunRepository,\n",
" Runner,\n",
" SuccessfulExampleOutput\n",
")\n",
Expand Down Expand Up @@ -171,8 +172,8 @@
"outputs": [],
"source": [
"task = Instruct(client, model=\"luminous-base-control\")\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"runner = Runner(task, evaluation_repository, dataset_repository, \"Instruct\")\n",
"run_repository = InMemoryRunRepository()\n",
"runner = Runner(task,dataset_repository, run_repository, \"Instruct\")\n",
"run_overview = runner.run_dataset(dataset_id)"
]
},
Expand Down Expand Up @@ -321,10 +322,12 @@
" \n",
"argilla_client = DefaultArgillaClient()\n",
"workspace_id = argilla_client.create_workspace(\"test\")\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"\n",
"evaluator = InstructArgillaEvaluator(\n",
" ArgillaEvaluationRepository(evaluation_repository, argilla_client),\n",
" dataset_repository,\n",
" run_repository,\n",
" ArgillaEvaluationRepository(evaluation_repository, argilla_client),\n",
" \"instruct\",\n",
" workspace_id,\n",
" fields,\n",
Expand Down Expand Up @@ -371,11 +374,18 @@
" output = evaluator.aggregate_evaluation(eval_overview.id)\n",
" print(output.statistics)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "intelligence-layer-WXd7Z3vu-py3.11",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -389,9 +399,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
10 changes: 7 additions & 3 deletions src/examples/qa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
"load_dotenv()\n",
"from intelligence_layer.connectors import LimitedConcurrencyClient\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"\n",
"client = LimitedConcurrencyClient.from_token(getenv(\"AA_TOKEN\"))"
]
},
Expand Down Expand Up @@ -338,7 +342,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "3.10-intelligence",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -352,9 +356,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
21 changes: 8 additions & 13 deletions src/examples/quickstart_task.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -289,13 +289,6 @@
" KeywordExtractionAggregatedEvaluation,\n",
" ]\n",
"):\n",
" def __init__(\n",
" self, evaluation_repository: EvaluationRepository, dataset_repository: DatasetRepository, description: str\n",
" ) -> None:\n",
" \"\"\"We recommend adding the task to the init method of the evaluator\n",
"\n",
" This allows for easy comparing of different implementations of the same task.\"\"\"\n",
" super().__init__(evaluation_repository, dataset_repository, description)\n",
"\n",
" def do_evaluate(\n",
" self,\n",
Expand Down Expand Up @@ -342,12 +335,14 @@
"outputs": [],
"source": [
"from intelligence_layer.core import NoOpTracer\n",
"from intelligence_layer.evaluation import InMemoryDatasetRepository, InMemoryEvaluationRepository, Runner, Example\n",
"from intelligence_layer.evaluation import InMemoryDatasetRepository, InMemoryEvaluationRepository, InMemoryRunRepository, Runner, Example\n",
"\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"dataset_repository = InMemoryDatasetRepository()\n",
"evaluator = KeywordExtractionEvaluator(evaluation_repository, dataset_repository, \"keyword-extraction\")\n",
"runner = Runner(task, evaluation_repository, dataset_repository, \"keyword-extraction\")\n",
"run_repository = InMemoryRunRepository()\n",
"evaluation_repository = InMemoryEvaluationRepository()\n",
"\n",
"evaluator = KeywordExtractionEvaluator(dataset_repository, run_repository, evaluation_repository, \"keyword-extraction\")\n",
"runner = Runner(task, dataset_repository, run_repository, \"keyword-extraction\")\n",
"\n",
"input = KeywordExtractionInput(text=\"This is a text about dolphins and sharks.\")\n",
"expected_output = KeywordExtractionExpectedOutput(keywords=[\"dolphins\", \"sharks\"])\n",
Expand Down Expand Up @@ -428,7 +423,7 @@
"outputs": [],
"source": [
"examples = list(dataset_repository.examples_by_id(dataset_id, evaluator.input_type(), evaluator.expected_output_type()))\n",
"last_example_result = evaluation_repository.example_trace(\n",
"last_example_result = run_repository.example_trace(\n",
" next(iter(evaluation.run_overviews)).id, examples[-1].id\n",
")\n",
"last_example_result.trace\n"
Expand Down Expand Up @@ -498,7 +493,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
10 changes: 7 additions & 3 deletions src/examples/summarize.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
"\n",
"from intelligence_layer.connectors import LimitedConcurrencyClient\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"\n",
"client = LimitedConcurrencyClient.from_token(getenv(\"AA_TOKEN\"))\n"
]
},
Expand Down Expand Up @@ -194,7 +198,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "3.10-intelligence",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -208,9 +212,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
6 changes: 6 additions & 0 deletions src/intelligence_layer/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from intelligence_layer.core.intelligence_app import (
AuthenticatedIntelligenceApp as AuthenticatedIntelligenceApp,
)
from intelligence_layer.core.intelligence_app import AuthService as AuthService
from intelligence_layer.core.intelligence_app import IntelligenceApp as IntelligenceApp

from .chunk import Chunk as Chunk
from .chunk import ChunkInput as ChunkInput
from .chunk import ChunkOutput as ChunkOutput
Expand Down
31 changes: 22 additions & 9 deletions src/intelligence_layer/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
from .accumulator import MeanAccumulator as MeanAccumulator
from .dataset_repository import FileDatasetRepository as FileDatasetRepository
from .dataset_repository import InMemoryDatasetRepository as InMemoryDatasetRepository
from .data_storage.dataset_repository import DatasetRepository as DatasetRepository
from .data_storage.dataset_repository import (
FileDatasetRepository as FileDatasetRepository,
)
from .data_storage.dataset_repository import (
InMemoryDatasetRepository as InMemoryDatasetRepository,
)
from .data_storage.evaluation_repository import (
ArgillaEvaluationRepository as ArgillaEvaluationRepository,
)
from .data_storage.evaluation_repository import (
EvaluationRepository as EvaluationRepository,
)
from .data_storage.evaluation_repository import (
FileEvaluationRepository as FileEvaluationRepository,
)
from .data_storage.evaluation_repository import (
InMemoryEvaluationRepository as InMemoryEvaluationRepository,
)
from .data_storage.run_repository import FileRunRepository as FileRunRepository
from .data_storage.run_repository import InMemoryRunRepository as InMemoryRunRepository
from .data_storage.run_repository import RunRepository as RunRepository
from .domain import Evaluation as Evaluation
from .domain import EvaluationFailed as EvaluationFailed
from .domain import EvaluationOverview as EvaluationOverview
Expand All @@ -20,15 +40,8 @@
from .elo import PayoffMatrix as PayoffMatrix
from .elo import PlayerScore as PlayerScore
from .elo import WinRateCalculator as WinRateCalculator
from .evaluation_repository import FileEvaluationRepository as FileEvaluationRepository
from .evaluation_repository import (
InMemoryEvaluationRepository as InMemoryEvaluationRepository,
)
from .evaluator import ArgillaEvaluationRepository as ArgillaEvaluationRepository
from .evaluator import ArgillaEvaluator as ArgillaEvaluator
from .evaluator import BaseEvaluator as BaseEvaluator
from .evaluator import DatasetRepository as DatasetRepository
from .evaluator import EvaluationRepository as EvaluationRepository
from .evaluator import Evaluator as Evaluator
from .graders import BleuGrader as BleuGrader
from .graders import RougeGrader as RougeGrader
Expand Down
Empty file.
Loading

0 comments on commit ee15ddf

Please sign in to comment.