Skip to content

Commit

Permalink
fix: add sentence tokenizer to class attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
parambharat committed Dec 16, 2024
1 parent 98df0a3 commit 31ccdbd
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion weave/scorers/context_relevance_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ def model_post_init(self, __context: Any) -> None:

nltk.download("punkt_tab")
from nltk.tokenize import sent_tokenize

self.sent_tokenize = sent_tokenize
except ImportError:
print(
"The `transformers`, `torch` and `nltk` packages are required to use the ContextRelevanceScorer, please run `pip install transformers torch nltk`"
Expand All @@ -297,7 +299,7 @@ def _score_document(
) -> list[dict[str, Any]]:
"""Score a single document."""
document_sentences = document.split("\n")
document_sentences = [sent_tokenize(doc) for doc in document_sentences]
document_sentences = [self.sent_tokenize(doc) for doc in document_sentences]
document_sentences = [s for doc in document_sentences for s in doc]
context_scores = []
for batch in range(0, len(document_sentences), self._max_num_sentences):
Expand Down

0 comments on commit 31ccdbd

Please sign in to comment.