Skip to content

Commit

Permalink
feat: Add FailedExampleIterator for retrieval of failed examples and …
Browse files Browse the repository at this point in the history
…adapt user_journey.ipynb

Task IL-367
  • Loading branch information
SebastianNiehusTNG authored and JohannesWesch committed Apr 4, 2024
1 parent b51d98f commit 5475833
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 3 deletions.
4 changes: 1 addition & 3 deletions src/examples/user_journey.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -386,9 +386,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"This confirms it: some expected labels are missing. Let's try fixing this.\n",
"\n",
"We can do this two ways: Adjust our set of labels or adjust the eval set. In this case, we'll do the latter.\n"
"\n"
]
},
{
Expand Down
44 changes: 44 additions & 0 deletions src/intelligence_layer/use_cases/classify/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
from intelligence_layer.core import TextChunk
from intelligence_layer.evaluation import (
AggregationLogic,
DatasetRepository,
EvaluationRepository,
Example,
MeanAccumulator,
RepositoryNavigator,
RunRepository,
SingleOutputEvaluationLogic,
)
from intelligence_layer.evaluation.evaluation.domain import FailedExampleEvaluation

Probability = NewType("Probability", float)

Expand Down Expand Up @@ -111,6 +116,11 @@ def aggregate(
confusion_matrix[(evaluation.predicted, evaluation.expected)] += 1
by_label[evaluation.predicted]["predicted"] += 1
by_label[evaluation.expected]["expected"] += 1

if len(missing_labels) > 0:
warn_message = "[WARNING] There were examples with expected labels missing in the evaluation inputs. For a detailed list, see the 'statistics.missing_labels' field of the returned `AggregationOverview`."
warnings.warn(warn_message, RuntimeWarning)

return AggregatedSingleLabelClassifyEvaluation(
percentage_correct=acc.extract(),
confusion_matrix=confusion_matrix,
Expand Down Expand Up @@ -158,6 +168,40 @@ def do_evaluate_single_output(
)


class FailedExampleIterator:
def __init__(
self,
dataset_repository: DatasetRepository,
run_repository: RunRepository,
evaluation_repository: EvaluationRepository,
):
self.repository_navigator = RepositoryNavigator(
dataset_repository, run_repository, evaluation_repository
)

# TODO: Add test
def get_examples(
self, evaluation_overview_id: str, first_n: int = 0
) -> Iterable[Example[ClassifyInput, str]]:
evaluation_lineages = self.repository_navigator.evaluation_lineages(
evaluation_id=evaluation_overview_id,
input_type=ClassifyInput,
expected_output_type=str,
output_type=SingleLabelClassifyOutput,
evaluation_type=SingleLabelClassifyEvaluation,
)
count_yielded = 0
for lineage in evaluation_lineages:
if first_n != 0 and count_yielded >= first_n:
break
if (
isinstance(lineage.evaluation.result, FailedExampleEvaluation)
or not lineage.evaluation.result.correct
):
count_yielded += 1
yield lineage.example


class MultiLabelClassifyEvaluation(BaseModel):
"""The evaluation of a single multi-label classification example.
Expand Down

0 comments on commit 5475833

Please sign in to comment.