Run code formatting on docs (#2389)

blacken-docs in particular
UKPLab · Dec 18, 2023 · 6d11984 · 6d11984
1 parent 9b1c33f
commit 6d11984
Show file tree

Hide file tree

Showing 27 changed files with 430 additions and 276 deletions.
diff --git a/docs/hugging_face.md b/docs/hugging_face.md
@@ -18,7 +18,8 @@ Any pre-trained models from the Hub can be loaded with a single line of code:
 
 ```py
 from sentence_transformers import SentenceTransformer
-model = SentenceTransformer('model_name')
+
+model = SentenceTransformer("model_name")
 ```
 
 You can even click `Use in sentence-transformers` to get a code snippet that you can copy and paste! 
@@ -32,11 +33,14 @@ Here is an example that loads the [multi-qa-MiniLM-L6-cos-v1 model](https://hugg
 
 ```py
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
 
-query_embedding = model.encode('How big is London')
-passage_embedding = model.encode(['London has 9,787,426 inhabitants at the 2011 census',
-                                  'London is known for its finacial district'])
+model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
+
+query_embedding = model.encode("How big is London")
+passage_embedding = model.encode([
+    "London has 9,787,426 inhabitants at the 2011 census",
+    "London is known for its finacial district",
+])
 
 print("Similarity:", util.dot_score(query_embedding, passage_embedding))
 ```
@@ -51,7 +55,7 @@ answer_1 = "<A>All plans come with unlimited private models and datasets."
 answer_2 = "<A>AutoNLP is an automatic way to train and deploy state-of-the-art NLP models, seamlessly integrated with the Hugging Face ecosystem."
 answer_3 = "<A>Based on how much training data and model variants are created, we send you a compute cost and payment link - as low as $10 per job."
 
-model = SentenceTransformer('clips/mfaq')
+model = SentenceTransformer("clips/mfaq")
 query_embedding = model.encode(question)
 corpus_embeddings = model.encode([answer_1, answer_2, answer_3])
 

diff --git a/docs/package_reference/SentenceTransformer.md b/docs/package_reference/SentenceTransformer.md
@@ -3,7 +3,8 @@
 This page documents the properties and methods when you load a SentenceTransformer model:
 ```python
 from sentence_transformers import SentenceTransformer
-model = SentenceTransformer('model-name')
+
+model = SentenceTransformer("model-name")
 ```
 
 ```eval_rst

diff --git a/docs/pretrained-models/ce-msmarco.md b/docs/pretrained-models/ce-msmarco.md
@@ -8,8 +8,11 @@ The training data consists of over 500k examples, while the complete corpus cons
 Pre-trained models can be used like this:
 ```python
 from sentence_transformers import CrossEncoder
-model = CrossEncoder('model_name', max_length=512)
-scores = model.predict([('Query', 'Paragraph1'), ('Query', 'Paragraph2') , ('Query', 'Paragraph3')])
+
+model = CrossEncoder("model_name", max_length=512)
+scores = model.predict(
+    [("Query", "Paragraph1"), ("Query", "Paragraph2"), ("Query", "Paragraph3")]
+)
 ```
 
 ## Usage with Transformers
@@ -18,10 +21,10 @@ scores = model.predict([('Query', 'Paragraph1'), ('Query', 'Paragraph2') , ('Que
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 
-model = AutoModelForSequenceClassification.from_pretrained('model_name')
-tokenizer = AutoTokenizer.from_pretrained('model_name')
+model = AutoModelForSequenceClassification.from_pretrained("model_name")
+tokenizer = AutoTokenizer.from_pretrained("model_name")
 
-features = tokenizer(['Query', 'Query'], ['Paragraph1', 'Paragraph2'],  padding=True, truncation=True, return_tensors="pt")
+features = tokenizer(["Query", "Query"], ["Paragraph1", "Paragraph2"], padding=True, truncation=True, return_tensors="pt")
 
 model.eval()
 with torch.no_grad():

diff --git a/docs/pretrained-models/dpr.md b/docs/pretrained-models/dpr.md
@@ -17,21 +17,22 @@ To encode paragraphs, you need to provide a title (e.g. the Wikipedia article ti
 Queries are encoded with **question_encoder**:
 ```python
 from sentence_transformers import SentenceTransformer, util
-passage_encoder = SentenceTransformer('facebook-dpr-ctx_encoder-single-nq-base')
+
+passage_encoder = SentenceTransformer("facebook-dpr-ctx_encoder-single-nq-base")
 
 passages = [
     "London [SEP] London is the capital and largest city of England and the United Kingdom.",
     "Paris [SEP] Paris is the capital and most populous city of France.",
-    "Berlin [SEP] Berlin is the capital and largest city of Germany by both area and population."
+    "Berlin [SEP] Berlin is the capital and largest city of Germany by both area and population.",
 ]
 
 passage_embeddings = passage_encoder.encode(passages)
 
-query_encoder = SentenceTransformer('facebook-dpr-question_encoder-single-nq-base')
+query_encoder = SentenceTransformer("facebook-dpr-question_encoder-single-nq-base")
 query = "What is the capital of England?"
 query_embedding = query_encoder.encode(query)
 
-#Important: You must use dot-product, not cosine_similarity
+# Important: You must use dot-product, not cosine_similarity
 scores = util.dot_score(query_embedding, passage_embeddings)
 print("Scores:", scores)
 ```

diff --git a/docs/pretrained-models/msmarco-v1.md b/docs/pretrained-models/msmarco-v1.md
@@ -14,10 +14,11 @@ Version 1 models were trained on the training set of MS Marco Passage retrieval
 They can be used like this:
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('distilroberta-base-msmarco-v1')
 
-query_embedding = model.encode('[QRY] ' + 'How big is London')
-passage_embedding = model.encode('[DOC] ' + 'London has 9,787,426 inhabitants at the 2011 census')
+model = SentenceTransformer("distilroberta-base-msmarco-v1")
+
+query_embedding = model.encode("[QRY] " + "How big is London")
+passage_embedding = model.encode("[DOC] " + "London has 9,787,426 inhabitants at the 2011 census")
 
 print("Similarity:", util.pytorch_cos_sim(query_embedding, passage_embedding))
 ```

diff --git a/docs/pretrained-models/msmarco-v2.md b/docs/pretrained-models/msmarco-v2.md
@@ -6,10 +6,11 @@ The training data consists of over 500k examples, while the complete  corpus con
 ## Usage
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('msmarco-distilroberta-base-v2')
 
-query_embedding = model.encode('How big is London')
-passage_embedding = model.encode('London has 9,787,426 inhabitants at the 2011 census')
+model = SentenceTransformer("msmarco-distilroberta-base-v2")
+
+query_embedding = model.encode("How big is London")
+passage_embedding = model.encode("London has 9,787,426 inhabitants at the 2011 census")
 
 print("Similarity:", util.pytorch_cos_sim(query_embedding, passage_embedding))
 ```

diff --git a/docs/pretrained-models/msmarco-v3.md b/docs/pretrained-models/msmarco-v3.md
@@ -6,10 +6,11 @@ The training data constist of over 500k examples, while the complete  corpus con
 ## Usage
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('msmarco-distilroberta-base-v3')
 
-query_embedding = model.encode('How big is London')
-passage_embedding = model.encode('London has 9,787,426 inhabitants at the 2011 census')
+model = SentenceTransformer("msmarco-distilroberta-base-v3")
+
+query_embedding = model.encode("How big is London")
+passage_embedding = model.encode("London has 9,787,426 inhabitants at the 2011 census")
 
 print("Similarity:", util.cos_sim(query_embedding, passage_embedding))
 ```

diff --git a/docs/pretrained-models/msmarco-v5.md b/docs/pretrained-models/msmarco-v5.md
@@ -6,11 +6,14 @@ The training data constist of over 500k examples, while the complete  corpus con
 ## Usage
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('msmarco-distilbert-dot-v5')
 
-query_embedding = model.encode('How big is London')
-passage_embedding = model.encode(['London has 9,787,426 inhabitants at the 2011 census',
-                                  'London is known for its finacial district'])
+model = SentenceTransformer("msmarco-distilbert-dot-v5")
+
+query_embedding = model.encode("How big is London")
+passage_embedding = model.encode([
+    "London has 9,787,426 inhabitants at the 2011 census",
+    "London is known for its finacial district",
+])
 
 print("Similarity:", util.dot_score(query_embedding, passage_embedding))
 ```

diff --git a/docs/pretrained-models/nq-v1.md b/docs/pretrained-models/nq-v1.md
@@ -5,12 +5,15 @@
 
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('nq-distilbert-base-v1')
 
-query_embedding = model.encode('How many people live in London?')
+model = SentenceTransformer("nq-distilbert-base-v1")
 
-#The passages are encoded as [ [title1, text1], [title2, text2], ...]
-passage_embedding = model.encode([['London', 'London has 9,787,426 inhabitants at the 2011 census.']])
+query_embedding = model.encode("How many people live in London?")
+
+# The passages are encoded as [ [title1, text1], [title2, text2], ...]
+passage_embedding = model.encode(
+    [["London", "London has 9,787,426 inhabitants at the 2011 census."]]
+)
 
 print("Similarity:", util.cos_sim(query_embedding, passage_embedding))
 ```

diff --git a/docs/pretrained_cross-encoders.md b/docs/pretrained_cross-encoders.md
@@ -10,12 +10,15 @@ This page lists available **pretrained Cross-Encoders**. Cross-Encoders require
 These models can be used like this:
 ```python
 from sentence_transformers import CrossEncoder
-model = CrossEncoder('model_name', max_length=512)
-scores = model.predict([('Query1', 'Paragraph1'), ('Query1', 'Paragraph2')])
 
-#For Example
-scores = model.predict([('How many people live in Berlin?', 'Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.'), 
-                        ('How many people live in Berlin?', 'Berlin is well known for its museums.')])
+model = CrossEncoder("model_name", max_length=512)
+scores = model.predict([("Query1", "Paragraph1"), ("Query1", "Paragraph2")])
+
+# For Example
+scores = model.predict([
+    ("How many people live in Berlin?", "Berlin had a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers."),
+    ("How many people live in Berlin?", "Berlin is well known for its museums."),
+])
 ```
 
 - **cross-encoder/ms-marco-TinyBERT-L-2-v2** - MRR@10 on MS Marco Dev Set: 32.56
@@ -42,8 +45,9 @@ QNLI is based on the [SQuAD dataset](https://rajpurkar.github.io/SQuAD-explorer/
 The following models can be used like this:
 ```python
 from sentence_transformers import CrossEncoder
-model = CrossEncoder('model_name')
-scores = model.predict([('Sent A1', 'Sent B1'), ('Sent A2', 'Sent B2')])
+
+model = CrossEncoder("model_name")
+scores = model.predict([("Sent A1", "Sent B1"), ("Sent A2", "Sent B2")])
 ```
 
 They return a score  0...1 indicating the semantic similarity of the given sentence pair.
@@ -75,11 +79,15 @@ Given two sentences, are these contradicting each other, entailing one the other
 
 ```python
 from sentence_transformers import CrossEncoder
-model = CrossEncoder('model_name')
-scores = model.predict([('A man is eating pizza', 'A man eats something'), ('A black race car starts up in front of a crowd of people.', 'A man is driving down a lonely road.')])
 
-#Convert scores to labels
-label_mapping = ['contradiction', 'entailment', 'neutral']
+model = CrossEncoder("model_name")
+scores = model.predict([
+    ("A man is eating pizza", "A man eats something"),
+    ("A black race car starts up in front of a crowd of people.", "A man is driving down a lonely road."),
+])
+
+# Convert scores to labels
+label_mapping = ["contradiction", "entailment", "neutral"]
 labels = [label_mapping[score_max] for score_max in scores.argmax(axis=1)]
 ```
 
diff --git a/docs/pretrained_models.md b/docs/pretrained_models.md
@@ -4,7 +4,8 @@ We provide various pre-trained models. Using these models is easy:
 
 ```python
 from sentence_transformers import SentenceTransformer
-model = SentenceTransformer('model_name')
+
+model = SentenceTransformer("model_name")
 ```
 
 All models are hosted on the [HuggingFace Model Hub](https://huggingface.co/sentence-transformers).
@@ -26,11 +27,14 @@ The following models have been specifically trained for **Semantic Search**: Giv
 
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
 
-query_embedding = model.encode('How big is London')
-passage_embedding = model.encode(['London has 9,787,426 inhabitants at the 2011 census',
-                                  'London is known for its finacial district'])
+model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
+
+query_embedding = model.encode("How big is London")
+passage_embedding = model.encode([
+    "London has 9,787,426 inhabitants at the 2011 census",
+    "London is known for its finacial district",
+])
 
 print("Similarity:", util.dot_score(query_embedding, passage_embedding))
 ```
@@ -144,12 +148,15 @@ The following models were trained on [Google's Natural Questions dataset](https:
 
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('nq-distilbert-base-v1')
 
-query_embedding = model.encode('How many people live in London?')
+model = SentenceTransformer("nq-distilbert-base-v1")
+
+query_embedding = model.encode("How many people live in London?")
 
-#The passages are encoded as [ [title1, text1], [title2, text2], ...]
-passage_embedding = model.encode([['London', 'London has 9,787,426 inhabitants at the 2011 census.']])
+# The passages are encoded as [ [title1, text1], [title2, text2], ...]
+passage_embedding = model.encode(
+    [["London", "London has 9,787,426 inhabitants at the 2011 census."]]
+)
 
 print("Similarity:", util.cos_sim(query_embedding, passage_embedding))
 ```

diff --git a/docs/quickstart.md b/docs/quickstart.md
@@ -2,17 +2,20 @@
 Once you have SentenceTransformers [installed](installation.md), the usage is simple:
 ```python
 from sentence_transformers import SentenceTransformer
-model = SentenceTransformer('all-MiniLM-L6-v2')
 
-#Our sentences we like to encode
-sentences = ['This framework generates embeddings for each input sentence',
-    'Sentences are passed as a list of string.', 
-    'The quick brown fox jumps over the lazy dog.']
+model = SentenceTransformer("all-MiniLM-L6-v2")
 
-#Sentences are encoded by calling model.encode()
+# Our sentences we like to encode
+sentences = [
+    "This framework generates embeddings for each input sentence",
+    "Sentences are passed as a list of string.",
+    "The quick brown fox jumps over the lazy dog.",
+]
+
+# Sentences are encoded by calling model.encode()
 sentence_embeddings = model.encode(sentences)
 
-#Print the embeddings
+# Print the embeddings
 for sentence, embedding in zip(sentences, sentence_embeddings):
     print("Sentence:", sentence)
     print("Embedding:", embedding)
@@ -30,9 +33,10 @@ The sentences (texts) are mapped such that sentences with similar meanings are c
 
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('all-MiniLM-L6-v2')
 
-#Sentences are encoded by calling model.encode()
+model = SentenceTransformer("all-MiniLM-L6-v2")
+
+# Sentences are encoded by calling model.encode()
 emb1 = model.encode("This is a red cat with a hat.")
 emb2 = model.encode("Have you seen my red cat?")
 
@@ -43,32 +47,34 @@ print("Cosine-Similarity:", cos_sim)
 If you have a list with more sentences, you can use the following code example:
 ```python
 from sentence_transformers import SentenceTransformer, util
-model = SentenceTransformer('all-MiniLM-L6-v2')
-
-sentences = ['A man is eating food.',
-          'A man is eating a piece of bread.',
-          'The girl is carrying a baby.',
-          'A man is riding a horse.',
-          'A woman is playing violin.',
-          'Two men pushed carts through the woods.',
-          'A man is riding a white horse on an enclosed ground.',
-          'A monkey is playing drums.',
-          'Someone in a gorilla costume is playing a set of drums.'
-          ]
-
-#Encode all sentences
+
+model = SentenceTransformer("all-MiniLM-L6-v2")
+
+sentences = [
+    "A man is eating food.",
+    "A man is eating a piece of bread.",
+    "The girl is carrying a baby.",
+    "A man is riding a horse.",
+    "A woman is playing violin.",
+    "Two men pushed carts through the woods.",
+    "A man is riding a white horse on an enclosed ground.",
+    "A monkey is playing drums.",
+    "Someone in a gorilla costume is playing a set of drums.",
+]
+
+# Encode all sentences
 embeddings = model.encode(sentences)
 
-#Compute cosine similarity between all pairs
+# Compute cosine similarity between all pairs
 cos_sim = util.cos_sim(embeddings, embeddings)
 
-#Add all pairs to a list with their cosine similarity score
+# Add all pairs to a list with their cosine similarity score
 all_sentence_combinations = []
-for i in range(len(cos_sim)-1):
-    for j in range(i+1, len(cos_sim)):
+for i in range(len(cos_sim) - 1):
+    for j in range(i + 1, len(cos_sim)):
         all_sentence_combinations.append([cos_sim[i][j], i, j])
 
-#Sort list by the highest cosine similarity score
+# Sort list by the highest cosine similarity score
 all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)
 
 print("Top-5 most similar pairs:")