Skip to content

Commit

Permalink
Fixes for rebase onto main
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianNiehusTNG committed Apr 3, 2024
1 parent 169969a commit 24ec873
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/examples/data/classify_examples.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
},
{
"label": "Human Resources",
"message": "I want to take a week off immediatly"
"message": "I want to take a week off immediately"
},
{
"label": "Human Resources",
Expand Down
2 changes: 1 addition & 1 deletion src/examples/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@
" examples=[\n",
" Example(\n",
" input=ClassifyInput(chunk=TextChunk(item[\"text\"]), labels=all_labels),\n",
" expected_output=item[\"label_name\"],\n",
" expected_output=item[\"label_name\"][0],\n",
" )\n",
" for item in data\n",
" ],\n",
Expand Down
15 changes: 8 additions & 7 deletions src/examples/user_journey.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
"source": [
"from intelligence_layer.core import TextChunk, InMemoryTracer\n",
"from intelligence_layer.use_cases import PromptBasedClassify, ClassifyInput\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()\n",
"\n",
"# instantiating the default task\n",
"prompt_based_classify = PromptBasedClassify()\n",
Expand Down Expand Up @@ -146,8 +148,9 @@
" expected_output=example[\"label\"],\n",
" )\n",
" for example in labeled_examples\n",
" ]\n",
")"
" ],\n",
" dataset_name=\"MyDataset\",\n",
").id"
]
},
{
Expand Down Expand Up @@ -179,8 +182,6 @@
"metadata": {},
"outputs": [],
"source": [
"from dotenv import load_dotenv\n",
"\n",
"from intelligence_layer.evaluation import (\n",
" Evaluator,\n",
" InMemoryEvaluationRepository,\n",
Expand All @@ -194,7 +195,6 @@
" SingleLabelClassifyAggregationLogic,\n",
")\n",
"\n",
"load_dotenv()\n",
"\n",
"# we need a few repositories to store runs, evals and aggregated evaluations\n",
"run_repository = InMemoryRunRepository()\n",
Expand Down Expand Up @@ -367,8 +367,9 @@
" expected_output=example[\"label\"],\n",
" )\n",
" for example in labeled_examples\n",
" ]\n",
")"
" ],\n",
" dataset_name=\"CleanedDataset\",\n",
").id"
]
},
{
Expand Down
9 changes: 3 additions & 6 deletions src/intelligence_layer/use_cases/classify/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,24 +84,21 @@ class PerformanceScores(BaseModel):


class AggregatedLabelInfo(BaseModel):
scores: PerformanceScores
expected_share: float
actual_share: float
expected_count: int
predicted_count: int


class AggregatedSingleLabelClassifyEvaluation(BaseModel):
"""The aggregated evaluation of a single label classify implementation against a dataset.
Attributes:
percentage_correct: Percentage of answers that were considered to be correct.
js_divergence: Divergence between expected and predicted distributions ().
confusion_matrix: How often each label was confused with each other.
by_label: Each label along with a couple aggregated statistics.
missing_labels: Each label missing from the results accompanied by the missing count.
"""

percentage_correct: float
js_divergence: float
confusion_matrix: Mapping[tuple[str, str], int]
by_label: Mapping[str, AggregatedLabelInfo]
missing_labels: Mapping[str, int]
Expand All @@ -118,7 +115,7 @@ def aggregate(
acc = MeanAccumulator()
missing_labels: dict[str, int] = defaultdict(int)
confusion_matrix: dict[tuple[str, str], int] = defaultdict(int)
by_label: dict[str, Mapping[str, int]] = defaultdict(lambda: defaultdict(int))
by_label: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
for evaluation in evaluations:
acc.add(1.0 if evaluation.correct else 0.0)
if evaluation.expected_label_missing:
Expand Down
10 changes: 5 additions & 5 deletions tests/use_cases/classify/test_prompt_based_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def classify_evaluator(
) -> Evaluator[
ClassifyInput,
SingleLabelClassifyOutput,
Sequence[str],
str,
SingleLabelClassifyEvaluation,
]:
return Evaluator(
Expand Down Expand Up @@ -197,7 +197,7 @@ def test_can_evaluate_classify(
chunk=TextChunk("This is good"),
labels=frozenset({"positive", "negative"}),
),
expected_output=["positive"],
expected_output="positive",
)

dataset_id = in_memory_dataset_repository.create_dataset(
Expand Down Expand Up @@ -230,20 +230,20 @@ def test_can_aggregate_evaluations(
in_memory_dataset_repository: InMemoryDatasetRepository,
classify_runner: Runner[ClassifyInput, SingleLabelClassifyOutput],
) -> None:
positive_lst: Sequence[str] = ["positive"]
positive: str = "positive"
correct_example = Example(
input=ClassifyInput(
chunk=TextChunk("This is good"),
labels=frozenset({"positive", "negative"}),
),
expected_output=positive_lst,
expected_output=positive,
)
incorrect_example = Example(
input=ClassifyInput(
chunk=TextChunk("This is extremely bad"),
labels=frozenset({"positive", "negative"}),
),
expected_output=positive_lst,
expected_output=positive,
)
dataset_id = in_memory_dataset_repository.create_dataset(
examples=[correct_example, incorrect_example], dataset_name="test-dataset"
Expand Down

0 comments on commit 24ec873

Please sign in to comment.