From da27c961df7daf02b632a41b0df11331c39a28d5 Mon Sep 17 00:00:00 2001
From: Alan Akbik <alan.akbik@gmail.com>
Date: Wed, 4 Dec 2024 14:22:12 +0100
Subject: [PATCH] Fix return statements

---
 flair/data.py | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/flair/data.py b/flair/data.py
index a8d221697..f5d902f0e 100644
--- a/flair/data.py
+++ b/flair/data.py
@@ -74,7 +74,8 @@ def add_item(self, item: str) -> int:
         Args:
             item: a string for which to assign an id.
 
-        Returns: ID of string
+        Returns:
+            ID of string
         """
         bytes_item = item.encode("utf-8")
         if bytes_item not in self.item2idx:
@@ -88,7 +89,8 @@ def get_idx_for_item(self, item: str) -> int:
         Args:
             item: string for which ID is requested
 
-        Returns: ID of string, otherwise 0
+        Returns:
+            ID of string, otherwise 0
         """
         item_encoded = item.encode("utf-8")
         if item_encoded in self.item2idx:
@@ -108,7 +110,8 @@ def get_idx_for_items(self, items: list[str]) -> list[int]:
         Args:
             items: List of string for which IDs are requested
 
-        Returns: List of ID of strings
+        Returns:
+            List of ID of strings
         """
         if not hasattr(self, "item2idx_not_encoded"):
             d = {key.decode("UTF-8"): value for key, value in self.item2idx.items()}
@@ -1463,6 +1466,10 @@ def downsample(
         return self
 
     def filter_empty_sentences(self):
+        """A method that filters all sentences consisting of 0 tokens.
+
+        This is an in-place operation that directly modifies the Corpus object itself by removing these sentences.
+        """
         log.info("Filtering empty sentences")
         if self._train is not None:
             self._train = Corpus._filter_empty_sentences(self._train)
@@ -1473,6 +1480,15 @@ def filter_empty_sentences(self):
         log.info(self)
 
     def filter_long_sentences(self, max_charlength: int):
+        """
+        A method that filters all sentences for which the plain text is longer than a specified number of characters.
+
+        This is an in-place operation that directly modifies the Corpus object itself by removing these sentences.
+
+        Args:
+            max_charlength: The maximum permissible character length of a sentence.
+
+        """
         log.info("Filtering long sentences")
         if self._train is not None:
             self._train = Corpus._filter_long_sentences(self._train, max_charlength)
@@ -1517,7 +1533,7 @@ def _filter_empty_sentences(dataset) -> Dataset:
         return subset
 
     def make_vocab_dictionary(self, max_tokens: int = -1, min_freq: int = 1) -> Dictionary:
-        """Creates a dictionary of all tokens contained in the corpus.
+        """Creates a :class:`Dictionary` of all tokens contained in the corpus.
 
         By defining `max_tokens` you can set the maximum number of tokens that should be contained in the dictionary.
         If there are more than `max_tokens` tokens in the corpus, the most frequent tokens are added first.
@@ -1525,10 +1541,13 @@ def make_vocab_dictionary(self, max_tokens: int = -1, min_freq: int = 1) -> Dict
         to be added to the dictionary.
 
         Args:
-            max_tokens: the maximum number of tokens that should be added to the dictionary (-1 = take all tokens)
-            min_freq: a token needs to occur at least `min_freq` times to be added to the dictionary (-1 = there is no limitation)
+            max_tokens: The maximum number of tokens that should be added to the dictionary (providing a value of "-1"
+                means that there is no maximum in this regard).
+            min_freq: A token needs to occur at least `min_freq` times to be added to the dictionary (providing a value
+                of "-1" means that there is no limitation in this regard).
 
-        Returns: dictionary of tokens
+        Returns:
+            A :class:`Dictionary` of all unique tokens in the corpus.
         """
         tokens = self._get_most_common_tokens(max_tokens, min_freq)
 
@@ -1837,7 +1856,8 @@ def make_tag_dictionary(self, tag_type: str) -> Dictionary:
         Args:
             tag_type: the label type to gather the tag labels
 
-        Returns: A Dictionary containing the labeled tags, including "O" and "<START>" and "<STOP>"
+        Returns:
+            A Dictionary containing the labeled tags, including "O" and "<START>" and "<STOP>"
 
         """
         tag_dictionary: Dictionary = Dictionary(add_unk=False)