Skip to content

Commit

Permalink
IL-258 Rebase and clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianNiehusTNG committed Feb 15, 2024
1 parent 6655bf2 commit 2d96a03
Show file tree
Hide file tree
Showing 21 changed files with 84 additions and 139 deletions.
62 changes: 0 additions & 62 deletions src/intelligence_layer/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,3 @@
from intelligence_layer.core.evaluation.data_storage.dataset_repository import (
DatasetRepository as DatasetRepository,
)
from intelligence_layer.core.evaluation.data_storage.dataset_repository import (
FileDatasetRepository as FileDatasetRepository,
)
from intelligence_layer.core.evaluation.data_storage.dataset_repository import (
InMemoryDatasetRepository as InMemoryDatasetRepository,
)
from intelligence_layer.core.evaluation.data_storage.evaluation_repository import (
ArgillaEvaluationRepository as ArgillaEvaluationRepository,
)
from intelligence_layer.core.evaluation.data_storage.evaluation_repository import (
EvaluationRepository as EvaluationRepository,
)
from intelligence_layer.core.evaluation.data_storage.evaluation_repository import (
FileEvaluationRepository as FileEvaluationRepository,
)
from intelligence_layer.core.evaluation.data_storage.evaluation_repository import (
InMemoryEvaluationRepository as InMemoryEvaluationRepository,
)
from intelligence_layer.core.evaluation.data_storage.run_repository import (
FileRunRepository as FileRunRepository,
)
from intelligence_layer.core.evaluation.data_storage.run_repository import (
InMemoryRunRepository as InMemoryRunRepository,
)
from intelligence_layer.core.evaluation.data_storage.run_repository import (
RunRepository as RunRepository,
)
from intelligence_layer.core.evaluation.instruct_comparison_argilla_evaluator import (
InstructComparisonArgillaEvaluator as InstructComparisonArgillaEvaluator,
)
from intelligence_layer.core.evaluation.runner import Runner as Runner
from intelligence_layer.core.intelligence_app import (
AuthenticatedIntelligenceApp as AuthenticatedIntelligenceApp,
)
Expand All @@ -55,34 +21,6 @@
from .echo import EchoInput as EchoInput
from .echo import EchoOutput as EchoOutput
from .echo import EchoTask as EchoTask
from .evaluation.accumulator import MeanAccumulator as MeanAccumulator
from .evaluation.domain import Evaluation as Evaluation
from .evaluation.domain import EvaluationOverview as EvaluationOverview
from .evaluation.domain import Example as Example
from .evaluation.domain import ExampleEvaluation as ExampleEvaluation
from .evaluation.domain import ExampleOutput as ExampleOutput
from .evaluation.domain import ExampleTrace as ExampleTrace
from .evaluation.domain import ExpectedOutput as ExpectedOutput
from .evaluation.domain import FailedExampleEvaluation as FailedExampleEvaluation
from .evaluation.domain import LogTrace as LogTrace
from .evaluation.domain import RunOverview as RunOverview
from .evaluation.domain import SpanTrace as SpanTrace
from .evaluation.domain import SuccessfulExampleOutput as SuccessfulExampleOutput
from .evaluation.domain import TaskSpanTrace as TaskSpanTrace
from .evaluation.elo import EloCalculator as EloCalculator
from .evaluation.elo import Payoff as Payoff
from .evaluation.elo import PayoffMatrix as PayoffMatrix
from .evaluation.elo import PlayerScore as PlayerScore
from .evaluation.elo import WinRateCalculator as WinRateCalculator
from .evaluation.evaluator import ArgillaEvaluator as ArgillaEvaluator
from .evaluation.evaluator import BaseEvaluator as BaseEvaluator
from .evaluation.evaluator import Evaluator as Evaluator
from .evaluation.graders import BleuGrader as BleuGrader
from .evaluation.graders import RougeGrader as RougeGrader
from .evaluation.graders import RougeScores as RougeScores
from .evaluation.hugging_face import (
HuggingFaceDatasetRepository as HuggingFaceDatasetRepository,
)
from .explain import Explain as Explain
from .explain import ExplainInput as ExplainInput
from .explain import ExplainOutput as ExplainOutput
Expand Down
31 changes: 22 additions & 9 deletions src/intelligence_layer/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
from .accumulator import MeanAccumulator as MeanAccumulator
from .dataset_repository import FileDatasetRepository as FileDatasetRepository
from .dataset_repository import InMemoryDatasetRepository as InMemoryDatasetRepository
from .data_storage.dataset_repository import DatasetRepository as DatasetRepository
from .data_storage.dataset_repository import (
FileDatasetRepository as FileDatasetRepository,
)
from .data_storage.dataset_repository import (
InMemoryDatasetRepository as InMemoryDatasetRepository,
)
from .data_storage.evaluation_repository import (
ArgillaEvaluationRepository as ArgillaEvaluationRepository,
)
from .data_storage.evaluation_repository import (
EvaluationRepository as EvaluationRepository,
)
from .data_storage.evaluation_repository import (
FileEvaluationRepository as FileEvaluationRepository,
)
from .data_storage.evaluation_repository import (
InMemoryEvaluationRepository as InMemoryEvaluationRepository,
)
from .data_storage.run_repository import FileRunRepository as FileRunRepository
from .data_storage.run_repository import InMemoryRunRepository as InMemoryRunRepository
from .data_storage.run_repository import RunRepository as RunRepository
from .domain import Evaluation as Evaluation
from .domain import EvaluationFailed as EvaluationFailed
from .domain import EvaluationOverview as EvaluationOverview
Expand All @@ -20,15 +40,8 @@
from .elo import PayoffMatrix as PayoffMatrix
from .elo import PlayerScore as PlayerScore
from .elo import WinRateCalculator as WinRateCalculator
from .evaluation_repository import FileEvaluationRepository as FileEvaluationRepository
from .evaluation_repository import (
InMemoryEvaluationRepository as InMemoryEvaluationRepository,
)
from .evaluator import ArgillaEvaluationRepository as ArgillaEvaluationRepository
from .evaluator import ArgillaEvaluator as ArgillaEvaluator
from .evaluator import BaseEvaluator as BaseEvaluator
from .evaluator import DatasetRepository as DatasetRepository
from .evaluator import EvaluationRepository as EvaluationRepository
from .evaluator import Evaluator as Evaluator
from .graders import BleuGrader as BleuGrader
from .graders import RougeGrader as RougeGrader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,9 @@
from fsspec import AbstractFileSystem # type: ignore
from fsspec.implementations.local import LocalFileSystem # type: ignore

<<<<<<<< HEAD:src/intelligence_layer/evaluation/dataset_repository.py
========
from intelligence_layer.core.evaluation.domain import Example, ExpectedOutput
>>>>>>>> 0a6ce90 (IL-258 Move Fix docstring of IndividualEvaluationOverview):src/intelligence_layer/core/evaluation/data_storage/dataset_repository.py
from intelligence_layer.core.task import Input
from intelligence_layer.core import Input
from intelligence_layer.core.tracer import JsonSerializer, PydanticSerializable
from intelligence_layer.evaluation.domain import Example, ExpectedOutput
from intelligence_layer.evaluation.evaluator import DatasetRepository


class DatasetRepository(ABC):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,25 @@

from pydantic import BaseModel, ValidationError

<<<<<<<< HEAD:src/intelligence_layer/evaluation/evaluation_repository.py
from intelligence_layer.core.task import Output
from intelligence_layer.core.tracer import (
FileTracer,
InMemoryTaskSpan,
InMemoryTracer,
JsonSerializer,
PydanticSerializable,
Tracer,
)
from intelligence_layer.evaluation.domain import (
========
from intelligence_layer.connectors.argilla.argilla_client import (
ArgillaClient,
ArgillaEvaluation,
)
from intelligence_layer.core.evaluation.data_storage.utils import read_utf8, write_utf8
from intelligence_layer.core.evaluation.domain import (
>>>>>>>> 0a6ce90 (IL-258 Move Fix docstring of IndividualEvaluationOverview):src/intelligence_layer/core/evaluation/data_storage/evaluation_repository.py
from intelligence_layer.core import Output
from intelligence_layer.core.tracer import FileTracer, JsonSerializer, Tracer
from intelligence_layer.evaluation.data_storage.utils import read_utf8, write_utf8
from intelligence_layer.evaluation.domain import (
Evaluation,
ExampleEvaluation,
ExampleOutput,
FailedExampleEvaluation,
IndividualEvaluationOverview,
RunOverview,
<<<<<<<< HEAD:src/intelligence_layer/evaluation/evaluation_repository.py
TaskSpanTrace,
)
from intelligence_layer.evaluation.evaluator import (
EvaluationOverviewType,
EvaluationRepository,
)
========
)
from intelligence_layer.core.task import Output
from intelligence_layer.core.tracer import FileTracer, JsonSerializer, Tracer

EvaluationOverviewType = TypeVar(
"EvaluationOverviewType", bound=IndividualEvaluationOverview
)
>>>>>>>> 0a6ce90 (IL-258 Move Fix docstring of IndividualEvaluationOverview):src/intelligence_layer/core/evaluation/data_storage/evaluation_repository.py


class SerializedExampleEvaluation(BaseModel):
Expand Down
22 changes: 11 additions & 11 deletions src/intelligence_layer/evaluation/data_storage/run_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,6 @@
from pathlib import Path
from typing import Iterable, Optional, Sequence, cast

from intelligence_layer.core.evaluation.data_storage.utils import (
_parse_log,
read_utf8,
write_utf8,
)
from intelligence_layer.core.evaluation.domain import (
ExampleOutput,
ExampleTrace,
RunOverview,
TaskSpanTrace,
)
from intelligence_layer.core.task import Output
from intelligence_layer.core.tracer import (
FileTracer,
Expand All @@ -23,6 +12,17 @@
PydanticSerializable,
Tracer,
)
from intelligence_layer.evaluation.data_storage.utils import (
_parse_log,
read_utf8,
write_utf8,
)
from intelligence_layer.evaluation.domain import (
ExampleOutput,
ExampleTrace,
RunOverview,
TaskSpanTrace,
)


class RunRepository(ABC):
Expand Down
10 changes: 9 additions & 1 deletion src/intelligence_layer/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,15 @@
RecordData,
)
from intelligence_layer.core.task import Input, Output
from intelligence_layer.core.tracer import Tracer, utc_now
from intelligence_layer.core.tracer import utc_now
from intelligence_layer.evaluation.data_storage.dataset_repository import (
DatasetRepository,
)
from intelligence_layer.evaluation.data_storage.evaluation_repository import (
ArgillaEvaluationRepository,
EvaluationRepository,
)
from intelligence_layer.evaluation.data_storage.run_repository import RunRepository
from intelligence_layer.evaluation.domain import (
AggregatedEvaluation,
Evaluation,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,15 @@
Question,
RecordData,
)
from intelligence_layer.evaluation.data_storage.dataset_repository import DatasetRepository
from intelligence_layer.evaluation.data_storage.evaluation_repository import ArgillaEvaluationRepository
from intelligence_layer.core.complete import InstructInput, PromptOutput
from intelligence_layer.evaluation.accumulator import MeanAccumulator
from intelligence_layer.evaluation.data_storage.dataset_repository import (
DatasetRepository,
)
from intelligence_layer.evaluation.data_storage.evaluation_repository import (
ArgillaEvaluationRepository,
)
from intelligence_layer.evaluation.data_storage.run_repository import RunRepository
from intelligence_layer.evaluation.domain import Example, SuccessfulExampleOutput
from intelligence_layer.evaluation.elo import (
EloCalculator,
Expand All @@ -23,7 +28,7 @@
PlayerScore,
WinRateCalculator,
)

from intelligence_layer.evaluation.evaluator import ArgillaEvaluator


class AggregatedInstructComparison(BaseModel):
Expand Down
9 changes: 7 additions & 2 deletions src/intelligence_layer/evaluation/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
from intelligence_layer.connectors.limited_concurrency_client import (
LimitedConcurrencyClient,
)
from intelligence_layer.evaluation.data_storage.dataset_repository import FileDatasetRepository
from intelligence_layer.evaluation.data_storage.evaluation_repository import FileEvaluationRepository
from intelligence_layer.evaluation.data_storage.dataset_repository import (
FileDatasetRepository,
)
from intelligence_layer.evaluation.data_storage.evaluation_repository import (
FileEvaluationRepository,
)
from intelligence_layer.evaluation.data_storage.run_repository import FileRunRepository
from intelligence_layer.evaluation.runner import Runner


Expand Down
7 changes: 4 additions & 3 deletions src/intelligence_layer/evaluation/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@

from intelligence_layer.core.task import Input, Output, Task
from intelligence_layer.core.tracer import CompositeTracer, Tracer, utc_now
from intelligence_layer.evaluation.data_storage.dataset_repository import (
DatasetRepository,
)
from intelligence_layer.evaluation.data_storage.run_repository import RunRepository
from intelligence_layer.evaluation.domain import (
Example,
ExampleOutput,
ExpectedOutput,
FailedExampleRun,
RunOverview,
)
from intelligence_layer.evaluation.data_storage.dataset_repository import DatasetRepository
from intelligence_layer.evaluation.data_storage.run_repository import RunRepository



class Runner(Generic[Input, Output]):
Expand Down
2 changes: 1 addition & 1 deletion src/intelligence_layer/use_cases/summarize/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
MeanAccumulator,
RougeGrader,
)
from intelligence_layer.core.evaluation.data_storage.run_repository import RunRepository
from intelligence_layer.evaluation.data_storage.run_repository import RunRepository


class LongContextSummarizeInput(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion tests/evaluation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
FileEvaluationRepository,
FileRunRepository,
InMemoryDatasetRepository,
InMemoryEvaluationRepository,
InMemoryRunRepository,
Runner,
RunOverview,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/evaluation/test_argilla_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Runner,
SuccessfulExampleOutput,
)
from intelligence_layer.core.evaluation.data_storage.run_repository import (
from intelligence_layer.evaluation.data_storage.run_repository import (
InMemoryRunRepository,
)
from tests.conftest import DummyStringInput, DummyStringOutput, DummyStringTask
Expand Down
5 changes: 0 additions & 5 deletions tests/evaluation/test_evaluation_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,15 @@
from pydantic import BaseModel
from pytest import fixture

from intelligence_layer.core import InMemoryTaskSpan
from intelligence_layer.core.tracer import CompositeTracer, InMemoryTracer
from intelligence_layer.evaluation import (
EvaluationOverview,
EvaluationRepository,
ExampleEvaluation,
ExampleOutput,
ExampleTrace,
FailedExampleEvaluation,
FileEvaluationRepository,
InMemoryEvaluationRepository,
TaskSpanTrace,
)
from tests.conftest import DummyStringInput
from tests.evaluation.conftest import DummyAggregatedEvaluation, DummyEvaluation


Expand Down
1 change: 1 addition & 0 deletions tests/evaluation/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
FailedExampleEvaluation,
InMemoryDatasetRepository,
InMemoryEvaluationRepository,
InMemoryRunRepository,
MeanAccumulator,
Runner,
SuccessfulExampleOutput,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Question,
RecordData,
)
from intelligence_layer.core import ArgillaEvaluationRepository
from intelligence_layer.core.complete import InstructInput, PromptOutput
from intelligence_layer.core.prompt_template import PromptWithMetadata
from intelligence_layer.core.tracer import utc_now
Expand All @@ -26,6 +25,7 @@
ExampleOutput,
InMemoryDatasetRepository,
InMemoryEvaluationRepository,
InMemoryRunRepository,
InstructComparisonArgillaEvaluator,
Payoff,
PayoffMatrix,
Expand Down
1 change: 0 additions & 1 deletion tests/evaluation/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from intelligence_layer.connectors import AlephAlphaClientProtocol
from intelligence_layer.core import Task, TaskSpan
from intelligence_layer.evaluation import (
DatasetRepository,
EvaluationOverview,
Evaluator,
Example,
Expand Down
Loading

0 comments on commit 2d96a03

Please sign in to comment.