Merge branch 'master' into pr-2964

UKPLab · Nov 6, 2024 · e3b334c · e3b334c
2 parents d9d485b + 1cb196a
commit e3b334c
Show file tree

Hide file tree

Showing 57 changed files with 237 additions and 129 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -17,7 +17,7 @@ jobs:
     name: Run unit tests
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12']
         os: [ubuntu-latest, windows-latest]
       fail-fast: false
     runs-on: ${{ matrix.os }}

diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ For the **full documentation**, see **[www.SBERT.net](https://www.sbert.net)**.
 
 ## Installation
 
-We recommend **Python 3.8+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.34.0+](https://github.com/huggingface/transformers)**.
+We recommend **Python 3.9+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.34.0+](https://github.com/huggingface/transformers)**.
 
 **Install with pip**
 

diff --git a/docs/installation.md b/docs/installation.md
@@ -1,6 +1,6 @@
 # Installation
 
-We recommend **Python 3.8+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.41.0+](https://github.com/huggingface/transformers)**. There are 5 extra options to install Sentence Transformers:
+We recommend **Python 3.9+**, **[PyTorch 1.11.0+](https://pytorch.org/get-started/locally/)**, and **[transformers v4.41.0+](https://github.com/huggingface/transformers)**. There are 5 extra options to install Sentence Transformers:
 * **Default:** This allows for loading, saving, and inference (i.e., getting embeddings) of models.
 * **ONNX:** This allows for loading, saving, inference, optimizing, and quantizing of models using the ONNX backend.
 * **OpenVINO:** This allows for loading, saving, and inference of models using the OpenVINO backend.

diff --git a/examples/training/matryoshka/matryoshka_eval_stsb.py b/examples/training/matryoshka/matryoshka_eval_stsb.py
@@ -5,7 +5,7 @@
 
 import argparse
 import os
-from typing import Dict, List, Optional, Tuple, cast
+from typing import Optional, cast
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -21,7 +21,7 @@
 
 # Dimension plot
 def _grouped_barplot_ratios(
-    group_name_to_x_to_y: Dict[str, Dict[int, float]], ax: Optional[plt.Axes] = None
+    group_name_to_x_to_y: dict[str, dict[int, float]], ax: Optional[plt.Axes] = None
 ) -> plt.Axes:
     # To save a pandas dependency, do from scratch in matplotlib
     if ax is None:
@@ -72,9 +72,9 @@ def _grouped_barplot_ratios(
 
 
 def plot_across_dimensions(
-    model_name_to_dim_to_score: Dict[str, Dict[int, float]],
+    model_name_to_dim_to_score: dict[str, dict[int, float]],
     filename: str,
-    figsize: Tuple[float, float] = (7, 7),
+    figsize: tuple[float, float] = (7, 7),
     title: str = "STSB test score for various embedding dimensions (via truncation),\nwith and without Matryoshka loss",
 ) -> None:
     # Sort each by key
@@ -139,8 +139,8 @@ def plot_across_dimensions(
 
     args = parser.parse_args()
     plot_filename: str = args.plot_filename
-    model_names: List[str] = args.model_names
-    DIMENSIONS: List[int] = args.dimensions
+    model_names: list[str] = args.model_names
+    DIMENSIONS: list[int] = args.dimensions
 
     # Load STSb
     stsb_test = load_dataset("mteb/stsbenchmark-sts", split="test")
@@ -153,10 +153,10 @@ def plot_across_dimensions(
     )
 
     # Run test_evaluator
-    model_name_to_dim_to_score: Dict[str, Dict[int, float]] = {}
+    model_name_to_dim_to_score: dict[str, dict[int, float]] = {}
     for model_name in tqdm(model_names, desc="Evaluating models"):
         model = SentenceTransformer(model_name)
-        dim_to_score: Dict[int, float] = {}
+        dim_to_score: dict[int, float] = {}
         for dim in tqdm(DIMENSIONS, desc=f"Evaluating {model_name}"):
             output_path = os.path.join(model_name, f"dim-{dim}")
             os.makedirs(output_path)

diff --git a/examples/training/quora_duplicate_questions/create_splits.py b/examples/training/quora_duplicate_questions/create_splits.py
@@ -481,9 +481,11 @@ def write_mining_files(name, ids, dups):
 
 
 ###### Classification dataset #####
-with open("quora-IR-dataset/classification/train_pairs.tsv", "w", encoding="utf8") as fOutTrain, open(
-    "quora-IR-dataset/classification/dev_pairs.tsv", "w", encoding="utf8"
-) as fOutDev, open("quora-IR-dataset/classification/test_pairs.tsv", "w", encoding="utf8") as fOutTest:
+with (
+    open("quora-IR-dataset/classification/train_pairs.tsv", "w", encoding="utf8") as fOutTrain,
+    open("quora-IR-dataset/classification/dev_pairs.tsv", "w", encoding="utf8") as fOutDev,
+    open("quora-IR-dataset/classification/test_pairs.tsv", "w", encoding="utf8") as fOutTest,
+):
     fOutTrain.write("\t".join(["qid1", "qid2", "question1", "question2", "is_duplicate"]) + "\n")
     fOutDev.write("\t".join(["qid1", "qid2", "question1", "question2", "is_duplicate"]) + "\n")
     fOutTest.write("\t".join(["qid1", "qid2", "question1", "question2", "is_duplicate"]) + "\n")

diff --git a/examples/unsupervised_learning/CT/train_ct_from_file.py b/examples/unsupervised_learning/CT/train_ct_from_file.py
@@ -55,9 +55,9 @@
 
 ################# Read the train corpus  #################
 train_sentences = []
-with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
-    filepath, encoding="utf8"
-) as fIn:
+with (
+    gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
+):
     for line in tqdm.tqdm(fIn, desc="Read file"):
         line = line.strip()
         if len(line) >= 10:

diff --git a/examples/unsupervised_learning/CT_In-Batch_Negatives/train_ct-improved_from_file.py b/examples/unsupervised_learning/CT_In-Batch_Negatives/train_ct-improved_from_file.py
@@ -55,9 +55,9 @@
 
 ################# Read the train corpus  #################
 train_sentences = []
-with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
-    filepath, encoding="utf8"
-) as fIn:
+with (
+    gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
+):
     for line in tqdm.tqdm(fIn, desc="Read file"):
         line = line.strip()
         if len(line) >= 10:

diff --git a/examples/unsupervised_learning/MLM/train_mlm.py b/examples/unsupervised_learning/MLM/train_mlm.py
@@ -48,9 +48,11 @@
 
 train_sentences = []
 train_path = sys.argv[2]
-with gzip.open(train_path, "rt", encoding="utf8") if train_path.endswith(".gz") else open(
-    train_path, encoding="utf8"
-) as fIn:
+with (
+    gzip.open(train_path, "rt", encoding="utf8")
+    if train_path.endswith(".gz")
+    else open(train_path, encoding="utf8") as fIn
+):
     for line in fIn:
         line = line.strip()
         if len(line) >= 10:
@@ -61,9 +63,11 @@
 dev_sentences = []
 if len(sys.argv) >= 4:
     dev_path = sys.argv[3]
-    with gzip.open(dev_path, "rt", encoding="utf8") if dev_path.endswith(".gz") else open(
-        dev_path, encoding="utf8"
-    ) as fIn:
+    with (
+        gzip.open(dev_path, "rt", encoding="utf8")
+        if dev_path.endswith(".gz")
+        else open(dev_path, encoding="utf8") as fIn
+    ):
         for line in fIn:
             line = line.strip()
             if len(line) >= 10:

diff --git a/examples/unsupervised_learning/SimCSE/train_simcse_from_file.py b/examples/unsupervised_learning/SimCSE/train_simcse_from_file.py
@@ -55,9 +55,9 @@
 
 ################# Read the train corpus  #################
 train_samples = []
-with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
-    filepath, encoding="utf8"
-) as fIn:
+with (
+    gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
+):
     for line in tqdm.tqdm(fIn, desc="Read file"):
         line = line.strip()
         if len(line) >= 10:

diff --git a/examples/unsupervised_learning/TSDAE/train_tsdae_from_file.py b/examples/unsupervised_learning/TSDAE/train_tsdae_from_file.py
@@ -45,9 +45,9 @@
 
 ################# Read the train corpus  #################
 train_sentences = []
-with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
-    filepath, encoding="utf8"
-) as fIn:
+with (
+    gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(filepath, encoding="utf8") as fIn
+):
     for line in tqdm.tqdm(fIn, desc="Read file"):
         line = line.strip()
         if len(line) >= 10:

diff --git a/index.rst b/index.rst
@@ -28,7 +28,7 @@ Using Sentence Transformer models is elementary:
    
       pip install -U sentence-transformers
    
-   We recommend **Python 3.8+** and **PyTorch 1.11.0+**. See `installation <docs/installation.html>`_ for further installation options.
+   We recommend **Python 3.9+** and **PyTorch 1.11.0+**. See `installation <docs/installation.html>`_ for further installation options.
 
 .. code-block:: python
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
 maintainers = [
     { name = "Tom Aarsen", email = "[email protected]" }
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 keywords = [
     "Transformer Networks",
     "BERT",
@@ -25,7 +25,6 @@ classifiers = [
     "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Science/Research",
     "License :: OSI Approved :: Apache Software License",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",

diff --git a/sentence_transformers/SentenceTransformer.py b/sentence_transformers/SentenceTransformer.py
@@ -13,10 +13,11 @@
 import traceback
 import warnings
 from collections import OrderedDict
+from collections.abc import Iterable, Iterator
 from contextlib import contextmanager
 from multiprocessing import Queue
 from pathlib import Path
-from typing import Any, Callable, Iterable, Iterator, Literal, overload
+from typing import Any, Callable, Literal, overload
 
 import numpy as np
 import torch

diff --git a/sentence_transformers/datasets/ParallelSentencesDataset.py b/sentence_transformers/datasets/ParallelSentencesDataset.py
@@ -77,9 +77,11 @@ def load_data(
         logger.info("Load " + filepath)
         parallel_sentences = []
 
-        with gzip.open(filepath, "rt", encoding="utf8") if filepath.endswith(".gz") else open(
-            filepath, encoding="utf8"
-        ) as fIn:
+        with (
+            gzip.open(filepath, "rt", encoding="utf8")
+            if filepath.endswith(".gz")
+            else open(filepath, encoding="utf8") as fIn
+        ):
             count = 0
             for line in fIn:
                 sentences = line.strip().split("\t")

diff --git a/sentence_transformers/evaluation/InformationRetrievalEvaluator.py b/sentence_transformers/evaluation/InformationRetrievalEvaluator.py
@@ -317,8 +317,10 @@ def compute_metrices(
 
             # Encode chunk of corpus
             if corpus_embeddings is None:
-                with nullcontext() if self.truncate_dim is None else corpus_model.truncate_sentence_embeddings(
-                    self.truncate_dim
+                with (
+                    nullcontext()
+                    if self.truncate_dim is None
+                    else corpus_model.truncate_sentence_embeddings(self.truncate_dim)
                 ):
                     sub_corpus_embeddings = corpus_model.encode(
                         self.corpus[corpus_start_idx:corpus_end_idx],

diff --git a/sentence_transformers/evaluation/MSEEvaluator.py b/sentence_transformers/evaluation/MSEEvaluator.py
@@ -81,8 +81,10 @@ def __init__(
     ):
         super().__init__()
         self.truncate_dim = truncate_dim
-        with nullcontext() if self.truncate_dim is None else teacher_model.truncate_sentence_embeddings(
-            self.truncate_dim
+        with (
+            nullcontext()
+            if self.truncate_dim is None
+            else teacher_model.truncate_sentence_embeddings(self.truncate_dim)
         ):
             self.source_embeddings = teacher_model.encode(
                 source_sentences, show_progress_bar=show_progress_bar, batch_size=batch_size, convert_to_numpy=True

diff --git a/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.py b/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.py
@@ -79,8 +79,10 @@ def __init__(
             self.csv_headers.append(f"{src_lang}-{trg_lang}")
 
         all_source_sentences = list(all_source_sentences)
-        with nullcontext() if self.truncate_dim is None else teacher_model.truncate_sentence_embeddings(
-            self.truncate_dim
+        with (
+            nullcontext()
+            if self.truncate_dim is None
+            else teacher_model.truncate_sentence_embeddings(self.truncate_dim)
         ):
             all_src_embeddings = teacher_model.encode(all_source_sentences, batch_size=self.batch_size)
         self.teacher_embeddings = {sent: emb for sent, emb in zip(all_source_sentences, all_src_embeddings)}

diff --git a/sentence_transformers/evaluation/NanoBEIREvaluator.py b/sentence_transformers/evaluation/NanoBEIREvaluator.py
@@ -84,8 +84,7 @@ class NanoBEIREvaluator(SentenceEvaluator):
             from sentence_transformers import SentenceTransformer
             from sentence_transformers.evaluation import NanoBEIREvaluator
 
-            # Load a model
-            model = SentenceTransformer('all-mpnet-base-v2')
+            model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
 
             datasets = ["QuoraRetrieval", "MSMARCO"]
             query_prompts = {
@@ -95,54 +94,78 @@ class NanoBEIREvaluator(SentenceEvaluator):
 
             evaluator = NanoBEIREvaluator(
                 dataset_names=datasets,
-                name="NanoBEIR",
                 query_prompts=query_prompts,
             )
 
             results = evaluator(model)
             '''
             NanoBEIR Evaluation of the model on ['QuoraRetrieval', 'MSMARCO'] dataset:
-            Evaluating NanoBeIRNanoQuoraRetrieval
-            Evaluating NanoBeIRNanoMSMARCO
-
+            Evaluating NanoQuoraRetrieval
+            Information Retrieval Evaluation of the model on the NanoQuoraRetrieval dataset:
+            Queries: 50
+            Corpus: 5046
+
+            Score-Function: cosine
+            Accuracy@1: 92.00%
+            Accuracy@3: 98.00%
+            Accuracy@5: 100.00%
+            Accuracy@10: 100.00%
+            Precision@1: 92.00%
+            Precision@3: 40.67%
+            Precision@5: 26.00%
+            Precision@10: 14.00%
+            Recall@1: 81.73%
+            Recall@3: 94.20%
+            Recall@5: 97.93%
+            Recall@10: 100.00%
+            MRR@10: 0.9540
+            NDCG@10: 0.9597
+            MAP@100: 0.9395
+
+            Evaluating NanoMSMARCO
+            Information Retrieval Evaluation of the model on the NanoMSMARCO dataset:
+            Queries: 50
+            Corpus: 5043
+
+            Score-Function: cosine
+            Accuracy@1: 40.00%
+            Accuracy@3: 74.00%
+            Accuracy@5: 78.00%
+            Accuracy@10: 88.00%
+            Precision@1: 40.00%
+            Precision@3: 24.67%
+            Precision@5: 15.60%
+            Precision@10: 8.80%
+            Recall@1: 40.00%
+            Recall@3: 74.00%
+            Recall@5: 78.00%
+            Recall@10: 88.00%
+            MRR@10: 0.5849
+            NDCG@10: 0.6572
+            MAP@100: 0.5892
             Average Queries: 50.0
             Average Corpus: 5044.5
 
             Aggregated for Score Function: cosine
-            Accuracy@1: 39.00%
-            Accuracy@3: 57.00%
-            Accuracy@5: 66.00%
-            Accuracy@10: 77.00%
-            Precision@1: 39.00%
-            Recall@1: 34.03%
-            Precision@3: 20.67%
-            Recall@3: 54.07%
-            Precision@5: 15.00%
-            Recall@5: 64.27%
-            Precision@10: 8.90%
-            Recall@10: 75.97%
-            MRR@10: 0.5004
-            NDCG@10: 0.5513
-            Aggregated for Score Function: dot
-            Accuracy@1: 39.00%
-            Accuracy@3: 57.00%
-            Accuracy@5: 66.00%
-            Accuracy@10: 77.00%
-            Precision@1: 39.00%
-            Recall@1: 34.03%
-            Precision@3: 20.67%
-            Recall@3: 54.07%
-            Precision@5: 15.00%
-            Recall@5: 64.27%
-            Precision@10: 8.90%
-            Recall@10: 75.97%
-            MRR@10: 0.5004
-            NDCG@10: 0.5513
+            Accuracy@1: 66.00%
+            Accuracy@3: 86.00%
+            Accuracy@5: 89.00%
+            Accuracy@10: 94.00%
+            Precision@1: 66.00%
+            Recall@1: 60.87%
+            Precision@3: 32.67%
+            Recall@3: 84.10%
+            Precision@5: 20.80%
+            Recall@5: 87.97%
+            Precision@10: 11.40%
+            Recall@10: 94.00%
+            MRR@10: 0.7694
+            NDCG@10: 0.8085
             '''
-            logger.info(evaluator.primary_metric)
-            # => "cosine_ndcg@10"
-            logger.info(results["mean"][evaluator.primary_metric])
-            # => 0.5512516989358924
+            print(evaluator.primary_metric)
+            # => "NanoBEIR_mean_cosine_ndcg@10"
+            print(results[evaluator.primary_metric])
+            # => 0.8084508771660436
     """
 
     def __init__(