From 9511782485d39107c75ac17bd6353581e8313244 Mon Sep 17 00:00:00 2001 From: Sebastian Niehus Date: Fri, 17 May 2024 08:29:14 +0200 Subject: [PATCH] fix: Start adding review comments TASK: IL-394 --- CHANGELOG.md | 6 +++--- tests/evaluation/test_elo_evaluator.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aedddb08f..918d3ce04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,10 @@ ## Unreleased ### Breaking Changes - - Changed the behavior of `IncrementalEvaluator::do_evaluate` such that it now promotes all output to `do_incremental_evaluate`instead of only the new outputs. + - Changed the behavior of `IncrementalEvaluator::do_evaluate` such that it now sends all `SuccessfulExampleOutput`s to `do_incremental_evaluate` instead of only the new `SuccessfulExampleOutput`s. - ### New Features - - Add generic `EloEvaluator` class and `EloEvaluationLogic`for implementation of Elo evaluation use cases. + - Add generic `EloEvaluator` class and `EloEvaluationLogic` for implementation of Elo evaluation use cases. - Add `EloQaEvaluator` and `EloQaEvaluationLogic` for Elo evaluation of QA runs. - Add `IncrementalEloQaEvaluator` and `IncrementalEloQaEvaluationLogic` for Elo evaluation of QA runs with later addition of more runs to an existing evaluation. - Add `EloAggregationAdapter` class to simplify using the `ComparisonEvaluationAggregationLogic` for different Elo use cases. @@ -14,7 +14,7 @@ ### Fixes ... ### Deprecations -...lint +... ## 1.2.0 diff --git a/tests/evaluation/test_elo_evaluator.py b/tests/evaluation/test_elo_evaluator.py index 51cec543a..8698dbe1c 100644 --- a/tests/evaluation/test_elo_evaluator.py +++ b/tests/evaluation/test_elo_evaluator.py @@ -8,10 +8,11 @@ ControlModel, Language, LuminousControlModel, + NoOpTracer, TextChunk, + Tracer, utc_now, ) -from intelligence_layer.core.tracer.tracer import NoOpTracer, Tracer from intelligence_layer.evaluation import ( ComparisonEvaluation, EloEvaluationLogic, @@ -29,8 +30,6 @@ ) from intelligence_layer.examples import SingleChunkQaInput, SingleChunkQaOutput -load_dotenv() - class DummyEloQaEvalLogic( EloEvaluationLogic[SingleChunkQaInput, SingleChunkQaOutput, SingleChunkQaOutput] @@ -40,6 +39,7 @@ def __init__( model: ControlModel, tracer: Tracer = NoOpTracer(), ): + load_dotenv() super().__init__() self._model = model self.tracer = tracer