Skip to content

Commit

Permalink
deleted argilla evaluation repository
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesWesch committed May 7, 2024
1 parent f2f1426 commit 1cb61ec
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 454 deletions.
17 changes: 3 additions & 14 deletions src/intelligence_layer/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,12 @@
)
from .aggregation.aggregator import AggregationLogic as AggregationLogic
from .aggregation.aggregator import Aggregator as Aggregator
from .aggregation.argilla_aggregator import (
AggregatedInstructComparison as AggregatedInstructComparison,
)
from .aggregation.argilla_aggregator import ArgillaAggregator as ArgillaAggregator
from .aggregation.argilla_aggregator import (
InstructComparisonArgillaAggregationLogic as InstructComparisonArgillaAggregationLogic,
)
from .aggregation.argilla_aggregator import PlayerScore as PlayerScore
from .aggregation.domain import AggregatedEvaluation as AggregatedEvaluation
from .aggregation.domain import AggregationOverview as AggregationOverview
from .aggregation.elo import EloCalculator as EloCalculator
from .aggregation.elo import (
InstructComparisonEvaluation as InstructComparisonEvaluation,
)
from .aggregation.elo import MatchOutcome as MatchOutcome
from .aggregation.elo import WinRateCalculator as WinRateCalculator
from .aggregation.file_aggregation_repository import (
Expand Down Expand Up @@ -45,12 +40,6 @@
from .dataset.single_huggingface_dataset_repository import (
SingleHuggingfaceDatasetRepository as SingleHuggingfaceDatasetRepository,
)
from .evaluation.argilla_evaluation_repository import (
ArgillaEvaluationRepository as ArgillaEvaluationRepository,
)
from .evaluation.argilla_evaluation_repository import (
RecordDataSequence as RecordDataSequence,
)
from .evaluation.argilla_evaluator import (
ArgillaEvaluationLogic as ArgillaEvaluationLogic,
)
Expand Down
117 changes: 0 additions & 117 deletions src/intelligence_layer/evaluation/aggregation/argilla_aggregator.py

This file was deleted.

68 changes: 67 additions & 1 deletion src/intelligence_layer/evaluation/aggregation/elo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from collections import defaultdict
import random
from collections import Counter, defaultdict
from enum import Enum
from typing import Iterable, Mapping, Sequence

import numpy as np
from pydantic import BaseModel

from intelligence_layer.evaluation.aggregation.accumulator import MeanAccumulator
from intelligence_layer.evaluation.aggregation.aggregator import AggregationLogic


class MatchOutcome(str, Enum):
Expand Down Expand Up @@ -96,3 +101,64 @@ def calculate(
player: self.win_count[player] / match_count
for player, match_count in self.match_count.items()
}


class PlayerScore(BaseModel):
elo: float
elo_standard_error: float
win_rate: float
num_matches: int


class AggregatedInstructComparison(BaseModel):
scores: Mapping[str, PlayerScore]


class InstructComparisonEvaluation(BaseModel):
first: str
second: str
winner: MatchOutcome


class InstructComparisonAggregationLogic(
AggregationLogic[InstructComparisonEvaluation, AggregatedInstructComparison]
):
def aggregate(
self, evaluations: Iterable[InstructComparisonEvaluation]
) -> AggregatedInstructComparison:
flattened_evaluations = [
(
evaluation.first,
evaluation.second,
evaluation.winner,
)
for evaluation in evaluations
]
player_counter = Counter(
player for match in flattened_evaluations for player in [match[0], match[1]]
)
player_counts = dict(player_counter)
players = player_counts.keys()

accumulators = {p: MeanAccumulator() for p in players}
for _ in range(100):
elo_calc = EloCalculator(players)
random.shuffle(flattened_evaluations)
elo_calc.calculate(flattened_evaluations)
for p in players:
accumulators[p].add(elo_calc.ratings[p])

win_rate_calc = WinRateCalculator(players)
win_rate = win_rate_calc.calculate(flattened_evaluations)

return AggregatedInstructComparison(
scores={
p: PlayerScore(
elo=acc.extract(),
elo_standard_error=acc.standard_error(),
win_rate=win_rate[p],
num_matches=player_counts[p],
)
for p, acc in accumulators.items()
},
)
Loading

0 comments on commit 1cb61ec

Please sign in to comment.