Skip to content

Commit

Permalink
fix: Fix test for incremental evaluator.
Browse files Browse the repository at this point in the history
 Rename test_diff_evaluator.py to test_incremental_evaluator.py
Task: IL-394
  • Loading branch information
SebastianNiehusAA committed May 16, 2024
1 parent d0c4e58 commit d2b9cb6
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def do_evaluate(
[output for output in outputs if output.run_id in run_output_ids]
)

return self.do_incremental_evaluate(example, outputs, already_evaluated_outputs)
return self.do_incremental_evaluate(example, list(outputs), already_evaluated_outputs)

@abstractmethod
def do_incremental_evaluate(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


class DummyEvaluation(BaseModel):
new_run_ids: list[str]
all_run_ids: list[str]
old_run_ids: list[list[str]]


Expand All @@ -29,7 +29,7 @@ def do_incremental_evaluate(
already_evaluated_outputs: list[list[SuccessfulExampleOutput[str]]],
) -> DummyEvaluation:
return DummyEvaluation(
new_run_ids=[output.run_id for output in outputs],
all_run_ids=[output.run_id for output in outputs],
old_run_ids=[
[output.run_id for output in evaluated_output]
for evaluated_output in already_evaluated_outputs
Expand All @@ -46,7 +46,7 @@ def do_run(self, input: str, tracer: Tracer) -> str:
return f"{input} {self._info}"


def test_incremental_evaluator_should_filter_previous_run_ids() -> None:
def test_incremental_evaluator_separates_all_runs_and_previous_runs() -> None:
# Given
examples = [Example(input="a", expected_output="0", id="id_0")]
dataset_repository = InMemoryDatasetRepository()
Expand Down Expand Up @@ -89,7 +89,7 @@ def create_run(name: str) -> str:
iter(evaluator.evaluation_lineages(second_evaluation_overview.id))
).evaluation.result
assert isinstance(second_result, DummyEvaluation)
assert second_result.new_run_ids == [second_run_id]
assert second_result.all_run_ids == [first_run_id, second_run_id]
assert second_result.old_run_ids == [[first_run_id]]

independent_run_id = create_run("independent")
Expand All @@ -115,6 +115,6 @@ def create_run(name: str) -> str:
iter(evaluator.evaluation_lineages(third_evaluation_overview.id))
).evaluation.result
assert isinstance(third_result, DummyEvaluation)
assert third_result.new_run_ids == [third_run_id]
assert sorted(third_result.all_run_ids) == sorted([first_run_id, second_run_id, independent_run_id, third_run_id])
assert sorted(third_result.old_run_ids[0]) == sorted([first_run_id, second_run_id])
assert sorted(third_result.old_run_ids[1]) == sorted([independent_run_id])

0 comments on commit d2b9cb6

Please sign in to comment.