diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f383d130..603c3b398 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ ... ### Fixes -- Reverted a bug introduced in `MultipleChunkRetrieverQa` text highlighting. +- Update docstrings for 'calculate_bleu' in 'BleuGrader' to now correctly reflect float range from 0 to 100 for the return value. ### Deprecations ... diff --git a/src/intelligence_layer/evaluation/evaluation/graders.py b/src/intelligence_layer/evaluation/evaluation/graders.py index cd7cf81c4..ce369e18d 100644 --- a/src/intelligence_layer/evaluation/evaluation/graders.py +++ b/src/intelligence_layer/evaluation/evaluation/graders.py @@ -22,7 +22,7 @@ def calculate_bleu(self, hypothesis: str, reference: str) -> float: reference: The baseline for the evaluation. Returns: - BLEU-score, float between 0 and 1. Where 1 means perfect match and 0 no overlap. + BLEU-score, float between 0 and 100. Where 100 means perfect match and 0 no overlap. """ bleu_score = self.bleu.corpus_score( hypotheses=[hypothesis], references=[[reference]]