fix: add sentence tokenizer to class attribute

wandb · Dec 16, 2024 · 31ccdbd · 31ccdbd
1 parent 98df0a3
commit 31ccdbd
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/weave/scorers/context_relevance_scorer.py b/weave/scorers/context_relevance_scorer.py
@@ -273,6 +273,8 @@ def model_post_init(self, __context: Any) -> None:
 
             nltk.download("punkt_tab")
             from nltk.tokenize import sent_tokenize
+
+            self.sent_tokenize = sent_tokenize
         except ImportError:
             print(
                 "The `transformers`, `torch` and `nltk` packages are required to use the ContextRelevanceScorer, please run `pip install transformers torch nltk`"
@@ -297,7 +299,7 @@ def _score_document(
     ) -> list[dict[str, Any]]:
         """Score a single document."""
         document_sentences = document.split("\n")
-        document_sentences = [sent_tokenize(doc) for doc in document_sentences]
+        document_sentences = [self.sent_tokenize(doc) for doc in document_sentences]
         document_sentences = [s for doc in document_sentences for s in doc]
         context_scores = []
         for batch in range(0, len(document_sentences), self._max_num_sentences):