diff --git a/CHANGELOG.md b/CHANGELOG.md index c6bb4f600..f1077423a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Breaking Changes ### New Features +- feature: Add SingleLabelClassifyFailedExampleIterator for easy retrieval of failed examples. - feature: Error information is printed to the console on failed runs and evaluations. - feature: The stack trace of a failed run/evaluation is included in the `FailedExampleRun`/`FailedExampleEvaluation` object - feature: The `Runner.run_dataset` and `Evaluator.evaluate_run` have an optional flag `abort_on_error` to stop running/evaluating when an error occurs. diff --git a/src/examples/user_journey.ipynb b/src/examples/user_journey.ipynb index 3cf2484f1..1dc569768 100644 --- a/src/examples/user_journey.ipynb +++ b/src/examples/user_journey.ipynb @@ -22,9 +22,7 @@ " ClassifyInput,\n", " PromptBasedClassify,\n", " SingleLabelClassifyAggregationLogic,\n", - " SingleLabelClassifyEvaluation,\n", " SingleLabelClassifyEvaluationLogic,\n", - " SingleLabelClassifyOutput,\n", ")\n", "import json\n", "\n", @@ -319,10 +317,11 @@ "metadata": {}, "outputs": [], "source": [ + "from intelligence_layer.use_cases.classify.classify import (\n", + " SingleLabelClassifyFailedExampleIterator,\n", + ")\n", "\n", - "from intelligence_layer.use_cases.classify.classify import FailedExampleIterator\n", - "\n", - "failed_example_iterator = FailedExampleIterator(\n", + "failed_example_iterator = SingleLabelClassifyFailedExampleIterator(\n", " dataset_repository, run_repository, evaluation_repository\n", ")\n", "list(failed_example_iterator.get_examples(eval_overview.id))" @@ -509,7 +508,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "list(failed_example_iterator.get_examples(eval_overview_prompt_adjusted.id))" ] }, diff --git a/src/intelligence_layer/use_cases/classify/classify.py b/src/intelligence_layer/use_cases/classify/classify.py index 7e28e0271..59e2ab376 100644 --- a/src/intelligence_layer/use_cases/classify/classify.py +++ b/src/intelligence_layer/use_cases/classify/classify.py @@ -168,7 +168,7 @@ def do_evaluate_single_output( ) -class FailedExampleIterator: +class SingleLabelClassifyFailedExampleIterator: def __init__( self, dataset_repository: DatasetRepository,