Skip to content

Commit

Permalink
test: replace unittest with pytest (#2407)
Browse files Browse the repository at this point in the history
* test: replace unittest with pytest

* test: add back hf hub workaround of pull request

* style: fix formatting in tests

* Re-add missing tests

* Fix wrong slow mark

* Remove dead code (inaccessible return)

* Move fixtures to conftest.py, add type hints

---------

Co-authored-by: Tom Aarsen <[email protected]>
  • Loading branch information
bwanglzu and tomaarsen authored Jan 15, 2024
1 parent e112358 commit 056d9b4
Show file tree
Hide file tree
Showing 9 changed files with 619 additions and 494 deletions.
29 changes: 27 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,32 @@
from sentence_transformers import SentenceTransformer
import pytest

from sentence_transformers import SentenceTransformer, CrossEncoder
from sentence_transformers.models import Transformer, Pooling


@pytest.fixture()
def model() -> SentenceTransformer:
def stsb_bert_tiny_model() -> SentenceTransformer:
return SentenceTransformer("sentence-transformers-testing/stsb-bert-tiny-safetensors")


@pytest.fixture()
def paraphrase_distilroberta_base_v1_model() -> SentenceTransformer:
return SentenceTransformer("paraphrase-distilroberta-base-v1")


@pytest.fixture()
def distilroberta_base_ce_model() -> CrossEncoder:
return CrossEncoder("distilroberta-base", num_labels=1)


@pytest.fixture()
def clip_vit_b_32_model() -> SentenceTransformer:
return SentenceTransformer("clip-ViT-B-32")


@pytest.fixture()
def distilbert_base_uncased_model() -> SentenceTransformer:
word_embedding_model = Transformer("distilbert-base-uncased")
pooling_model = Pooling(word_embedding_model.get_word_embedding_dimension())
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
return model
121 changes: 67 additions & 54 deletions tests/test_compute_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,70 +3,83 @@
"""


import unittest
from sentence_transformers import SentenceTransformer
import numpy as np

from sentence_transformers import SentenceTransformer


def test_encode_token_embeddings(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
"""
Test that encode(output_value='token_embeddings') works
:return:
"""
model = paraphrase_distilroberta_base_v1_model
sent = [
"Hello Word, a test sentence",
"Here comes another sentence",
"My final sentence",
"Sentences",
"Sentence five five five five five five five",
]
emb = model.encode(sent, output_value="token_embeddings", batch_size=2)
assert len(emb) == len(sent)
for s, e in zip(sent, emb):
assert len(model.tokenize([s])["input_ids"][0]) == e.shape[0]


def test_encode_single_sentences(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
model = paraphrase_distilroberta_base_v1_model
# Single sentence
emb = model.encode("Hello Word, a test sentence")
assert emb.shape == (768,)
assert abs(np.sum(emb) - 7.9811716) < 0.001

class ComputeEmbeddingsTest(unittest.TestCase):
def setUp(self):
self.model = SentenceTransformer("paraphrase-distilroberta-base-v1")
# Single sentence as list
emb = model.encode(["Hello Word, a test sentence"])
assert emb.shape == (1, 768)
assert abs(np.sum(emb) - 7.9811716) < 0.001

def test_encode_token_embeddings(self):
"""
Test that encode(output_value='token_embeddings') works
:return:
"""
sent = [
# Sentence list
emb = model.encode(
[
"Hello Word, a test sentence",
"Here comes another sentence",
"My final sentence",
"Sentences",
"Sentence five five five five five five five",
]
emb = self.model.encode(sent, output_value="token_embeddings", batch_size=2)
assert len(emb) == len(sent)
for s, e in zip(sent, emb):
assert len(self.model.tokenize([s])["input_ids"][0]) == e.shape[0]
)
assert emb.shape == (3, 768)
assert abs(np.sum(emb) - 22.968266) < 0.001

def test_encode_single_sentences(self):
# Single sentence
emb = self.model.encode("Hello Word, a test sentence")
assert emb.shape == (768,)
assert abs(np.sum(emb) - 7.9811716) < 0.001

# Single sentence as list
emb = self.model.encode(["Hello Word, a test sentence"])
assert emb.shape == (1, 768)
assert abs(np.sum(emb) - 7.9811716) < 0.001

# Sentence list
emb = self.model.encode(["Hello Word, a test sentence", "Here comes another sentence", "My final sentence"])
assert emb.shape == (3, 768)
assert abs(np.sum(emb) - 22.968266) < 0.001
def test_encode_normalize(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
model = paraphrase_distilroberta_base_v1_model
emb = model.encode(
[
"Hello Word, a test sentence",
"Here comes another sentence",
"My final sentence",
],
normalize_embeddings=True,
)
assert emb.shape == (3, 768)
for norm in np.linalg.norm(emb, axis=1):
assert abs(norm - 1) < 0.001

def test_encode_normalize(self):
emb = self.model.encode(
["Hello Word, a test sentence", "Here comes another sentence", "My final sentence"],
normalize_embeddings=True,
)
assert emb.shape == (3, 768)
for norm in np.linalg.norm(emb, axis=1):
assert abs(norm - 1) < 0.001

def test_encode_tuple_sentences(self):
# Input a sentence tuple
emb = self.model.encode([("Hello Word, a test sentence", "Second input for model")])
assert emb.shape == (1, 768)
assert abs(np.sum(emb) - 9.503508) < 0.001
def test_encode_tuple_sentences(paraphrase_distilroberta_base_v1_model: SentenceTransformer) -> None:
model = paraphrase_distilroberta_base_v1_model
# Input a sentence tuple
emb = model.encode([("Hello Word, a test sentence", "Second input for model")])
assert emb.shape == (1, 768)
assert abs(np.sum(emb) - 9.503508) < 0.001

# List of sentence tuples
emb = self.model.encode(
[
("Hello Word, a test sentence", "Second input for model"),
("My second tuple", "With two inputs"),
("Final tuple", "final test"),
]
)
assert emb.shape == (3, 768)
assert abs(np.sum(emb) - 32.14627) < 0.001
# List of sentence tuples
emb = model.encode(
[
("Hello Word, a test sentence", "Second input for model"),
("My second tuple", "With two inputs"),
("Final tuple", "final test"),
]
)
assert emb.shape == (3, 768)
assert abs(np.sum(emb) - 32.14627) < 0.001
140 changes: 83 additions & 57 deletions tests/test_cross_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,92 @@
import csv
import gzip
import os
import unittest
import pytest

import pytest
from torch.utils.data import DataLoader

from sentence_transformers import CrossEncoder, util
from sentence_transformers.readers import InputExample
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.readers import InputExample
from typing import Generator, List, Tuple


@pytest.fixture()
def sts_resource() -> Generator[Tuple[List[InputExample], List[InputExample]], None, None]:
sts_dataset_path = "datasets/stsbenchmark.tsv.gz"
if not os.path.exists(sts_dataset_path):
util.http_get("https://sbert.net/datasets/stsbenchmark.tsv.gz", sts_dataset_path)

stsb_train_samples = []
stsb_test_samples = []
with gzip.open(sts_dataset_path, "rt", encoding="utf8") as fIn:
reader = csv.DictReader(fIn, delimiter="\t", quoting=csv.QUOTE_NONE)
for row in reader:
score = float(row["score"]) / 5.0 # Normalize score to range 0 ... 1
inp_example = InputExample(texts=[row["sentence1"], row["sentence2"]], label=score)

if row["split"] == "test":
stsb_test_samples.append(inp_example)
elif row["split"] == "train":
stsb_train_samples.append(inp_example)
yield stsb_train_samples, stsb_test_samples


def evaluate_stsb_test(
distilroberta_base_ce_model: CrossEncoder,
expected_score: float,
test_samples: List[InputExample],
num_test_samples: int = -1,
) -> None:
model = distilroberta_base_ce_model
evaluator = CECorrelationEvaluator.from_input_examples(test_samples[:num_test_samples], name="sts-test")
score = evaluator(model) * 100
print("STS-Test Performance: {:.2f} vs. exp: {:.2f}".format(score, expected_score))
assert score > expected_score or abs(score - expected_score) < 0.1


def test_pretrained_stsb(sts_resource: Tuple[List[InputExample], List[InputExample]]):
_, sts_test_samples = sts_resource
model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
evaluate_stsb_test(model, 87.92, sts_test_samples)


@pytest.mark.slow
def test_train_stsb_slow(
distilroberta_base_ce_model: CrossEncoder, sts_resource: Tuple[List[InputExample], List[InputExample]]
) -> None:
model = distilroberta_base_ce_model
sts_train_samples, sts_test_samples = sts_resource
train_dataloader = DataLoader(sts_train_samples, shuffle=True, batch_size=16)
model.fit(
train_dataloader=train_dataloader,
epochs=1,
warmup_steps=int(len(train_dataloader) * 0.1),
)
evaluate_stsb_test(model, 75, sts_test_samples)


def test_train_stsb(
distilroberta_base_ce_model: CrossEncoder, sts_resource: Tuple[List[InputExample], List[InputExample]]
) -> None:
model = distilroberta_base_ce_model
sts_train_samples, sts_test_samples = sts_resource
train_dataloader = DataLoader(sts_train_samples[:500], shuffle=True, batch_size=16)
model.fit(
train_dataloader=train_dataloader,
epochs=1,
warmup_steps=int(len(train_dataloader) * 0.1),
)
evaluate_stsb_test(model, 50, sts_test_samples, num_test_samples=100)


def test_classifier_dropout_is_set() -> None:
model = CrossEncoder("cross-encoder/stsb-distilroberta-base", classifier_dropout=0.1234)
assert model.config.classifier_dropout == 0.1234
assert model.model.config.classifier_dropout == 0.1234


class CrossEncoderTest(unittest.TestCase):
def setUp(self):
sts_dataset_path = "datasets/stsbenchmark.tsv.gz"
if not os.path.exists(sts_dataset_path):
util.http_get("https://sbert.net/datasets/stsbenchmark.tsv.gz", sts_dataset_path)

# Read STSB
self.stsb_train_samples = []
self.test_samples = []
with gzip.open(sts_dataset_path, "rt", encoding="utf8") as fIn:
reader = csv.DictReader(fIn, delimiter="\t", quoting=csv.QUOTE_NONE)
for row in reader:
score = float(row["score"]) / 5.0 # Normalize score to range 0 ... 1
inp_example = InputExample(texts=[row["sentence1"], row["sentence2"]], label=score)

if row["split"] == "test":
self.test_samples.append(inp_example)
elif row["split"] == "train":
self.stsb_train_samples.append(inp_example)

def evaluate_stsb_test(self, model, expected_score, num_test_samples: int = -1):
evaluator = CECorrelationEvaluator.from_input_examples(self.test_samples[:num_test_samples], name="sts-test")
score = evaluator(model) * 100
print("STS-Test Performance: {:.2f} vs. exp: {:.2f}".format(score, expected_score))
assert score > expected_score or abs(score - expected_score) < 0.1

def test_pretrained_stsb(self):
model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
self.evaluate_stsb_test(model, 87.92)

@pytest.mark.slow
def test_train_stsb_slow(self):
model = CrossEncoder("distilroberta-base", num_labels=1)
train_dataloader = DataLoader(self.stsb_train_samples, shuffle=True, batch_size=16)
model.fit(train_dataloader=train_dataloader, epochs=1, warmup_steps=int(len(train_dataloader) * 0.1))
self.evaluate_stsb_test(model, 75)

def test_train_stsb(self):
model = CrossEncoder("distilroberta-base", num_labels=1)
train_dataloader = DataLoader(self.stsb_train_samples[:500], shuffle=True, batch_size=16)
model.fit(train_dataloader=train_dataloader, epochs=1, warmup_steps=int(len(train_dataloader) * 0.1))
self.evaluate_stsb_test(model, 50, num_test_samples=100)


class CrossEncoderClassifierDropoutTest(unittest.TestCase):
def test_classifier_dropout_is_set(self):
model = CrossEncoder("cross-encoder/stsb-distilroberta-base", classifier_dropout=0.1234)
assert model.config.classifier_dropout == 0.1234
assert model.model.config.classifier_dropout == 0.1234

def test_classifier_dropout_default_value(self):
model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
assert model.config.classifier_dropout is None
assert model.model.config.classifier_dropout is None
def test_classifier_dropout_default_value() -> None:
model = CrossEncoder("cross-encoder/stsb-distilroberta-base")
assert model.config.classifier_dropout is None
assert model.model.config.classifier_dropout is None
Loading

0 comments on commit 056d9b4

Please sign in to comment.