From 02ce6de12b5662837918ca9bbbcf62c14b04b6d7 Mon Sep 17 00:00:00 2001 From: JINO-ROHIT Date: Tue, 10 Dec 2024 17:52:58 +0530 Subject: [PATCH] [DRAFT]tests for nanobeir evaluator --- tests/test_nanobeir_evaluator.py | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/test_nanobeir_evaluator.py diff --git a/tests/test_nanobeir_evaluator.py b/tests/test_nanobeir_evaluator.py new file mode 100644 index 000000000..49780999c --- /dev/null +++ b/tests/test_nanobeir_evaluator.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import pytest + +from sentence_transformers import SentenceTransformer +from sentence_transformers.evaluation import NanoBEIREvaluator + + +def test_nanobeir_evaluator(): + """Tests that the NanoBERTEvaluator can be loaded and produces expected metrics""" + datasets = ["QuoraRetrieval", "MSMARCO"] + query_prompts = { + "QuoraRetrieval": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ", + "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\\nQuery: " + } + + model = SentenceTransformer("sentence-transformers-testing/stsb-bert-tiny-safetensors") + + evaluator = NanoBEIREvaluator( + dataset_names=datasets, + query_prompts=query_prompts, + ) + + results = evaluator(model) + + assert len(results) > 0 + assert all(isinstance(results[metric], float) for metric in results) + +# def test_nanobeir_evaluator_with_invalid_dataset(): +# """Test that NanoBEIREvaluator raises an error for invalid dataset names.""" +# invalid_datasets = ["invalidDataset"] + +# with pytest.raises(ValueError, match=f"Dataset(s) {invalid_datasets} not found in the NanoBEIR collection.Valid dataset names are: ['climatefever', 'dbpedia', 'fever', 'fiqa2018', 'hotpotqa', 'msmarco', 'nfcorpus', 'nq', 'quoraretrieval', 'scidocs', 'arguana', 'scifact', 'touche2020']"): +# NanoBEIREvaluator( +# dataset_names=invalid_datasets, +# ) + +# def test_nanobeir_evaluator_empty_inputs(): +# """Test that NanoBEIREvaluator behaves correctly with empty datasets.""" +# with pytest.raises(ValueError, match="dataset_names cannot be empty. Use None to evaluate on all datasets."): +# NanoBEIREvaluator( +# dataset_names=[], +# )