Skip to content

Commit

Permalink
Update EmbeddingSimilarityEvaluator.py to save some examples and thei…
Browse files Browse the repository at this point in the history
…r scores in a csv file

Add new option to save some examples from the evaluating dataset with their similarity score in a csv file to help the user the see the performance of the model.
  • Loading branch information
youssefkhalil320 authored Oct 22, 2024
1 parent f286d9f commit b0272f7
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions sentence_transformers/evaluation/EmbeddingSimilarityEvaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def __init__(
write_csv: bool = True,
precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] | None = None,
truncate_dim: int | None = None,
samples_csv_filename: str = "results"
):
"""
Constructs an evaluator based for the dataset.
Expand All @@ -97,6 +98,7 @@ def __init__(
self.write_csv = write_csv
self.precision = precision
self.truncate_dim = truncate_dim
self.samples_csv_filename = samples_csv_filename

assert len(self.sentences1) == len(self.sentences2)
assert len(self.sentences1) == len(self.scores)
Expand Down Expand Up @@ -174,6 +176,7 @@ def __call__(
precision=self.precision,
normalize_embeddings=bool(self.precision),
)

# Binary and ubinary embeddings are packed, so we need to unpack them for the distance metrics
if self.precision == "binary":
embeddings1 = (embeddings1 + 128).astype(np.uint8)
Expand Down Expand Up @@ -210,6 +213,21 @@ def __call__(
)
logger.info(f"Dot-Product-Similarity:\tPearson: {eval_pearson_dot:.4f}\tSpearman: {eval_spearman_dot:.4f}")

# Print sentences with cosine similarity scores
print("\nSentences and their cosine similarity scores:\n")
# for sent1, sent2, score in zip(self.sentences1, self.sentences2, cosine_scores):
# print(f"Sentence 1: {sent1}")
# print(f"Sentence 2: {sent2}")
# print(f"Cosine Similarity Score: {score:.4f}\n")
with open(self.samples_csv_filename, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Sentence 1", "Sentence 2", "Cosine Score"])
for sent1, sent2, score in zip(self.sentences1, self.sentences2, cosine_scores):
writer.writerow([sent1, sent2, score])

# Print the directory where the CSV file is saved
print(f"CSV file saved in: {self.samples_csv_filename}")

if output_path is not None and self.write_csv:
csv_path = os.path.join(output_path, self.csv_file)
output_file_exists = os.path.isfile(csv_path)
Expand Down Expand Up @@ -257,6 +275,7 @@ def __call__(
self.store_metrics_in_model_card_data(model, metrics)
return metrics


@property
def description(self) -> str:
return "Semantic Similarity"

0 comments on commit b0272f7

Please sign in to comment.