Skip to content

Commit

Permalink
fix scorer - slight logic falacy
Browse files Browse the repository at this point in the history
  • Loading branch information
ayulockin committed Nov 21, 2024
1 parent 4e9c192 commit ce01ec7
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
2 changes: 1 addition & 1 deletion tests/scorers/test_robustness_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ def model(questions: list[str]):
scorers=[robustness_scorer],
)
result = await evaluation.evaluate(model)
assert result["RobustnessScorer"]["cohen_h"]["mean"] == 1.0
assert truncate(result["RobustnessScorer"]["cohen_h"]["mean"], 5) == 0.49999
5 changes: 4 additions & 1 deletion weave/scorers/robustness_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ def score(
score_o = 1.0 if not ground_truths else binary_scores[0]

# Average perturbed similarity score
score_p = sum(binary_scores[1:]) / len(binary_scores[1:])
# When ground truths are present, start from index 1
# When no ground truths, use all perturbed outputs (already in binary_scores)
perturbed_binary_scores = binary_scores[1:] if ground_truths else binary_scores
score_p = sum(perturbed_binary_scores) / len(perturbed_binary_scores)

def psi(score: float) -> float:
return 2 * math.asin(math.sqrt(score))
Expand Down

0 comments on commit ce01ec7

Please sign in to comment.