[docs] Fix docstrings (#49)

felixdittrich92 · Nov 19, 2024 · ff8cce4 · ff8cce4
1 parent 9c8abc2
commit ff8cce4
Show file tree

Hide file tree

Showing 48 changed files with 25 additions and 223 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
       - id: no-commit-to-branch
         args: ['--branch', 'main']
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.9
+    rev: v0.7.4
     hooks:
       - id: ruff
         args: [ --fix ]

diff --git a/onnxtr/contrib/__init__.py b/onnxtr/contrib/__init__.py
@@ -0,0 +1 @@
+from .artefacts import ArtefactDetector
diff --git a/onnxtr/contrib/artefacts.py b/onnxtr/contrib/artefacts.py
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
     >>> results = detector(doc)
 
     Args:
-    ----
         arch: the architecture to use
         batch_size: the batch size to use
         model_path: the path to the model to use
@@ -109,7 +108,6 @@ def show(self, **kwargs: Any) -> None:
         Display the results
 
         Args:
-        ----
             **kwargs: additional keyword arguments to be passed to `plt.show`
         """
         requires_package("matplotlib", "`.show()` requires matplotlib installed")

diff --git a/onnxtr/contrib/base.py b/onnxtr/contrib/base.py
@@ -16,7 +16,6 @@ class _BasePredictor:
     Base class for all predictors
 
     Args:
-    ----
         batch_size: the batch size to use
         url: the url to use to download a model if needed
         model_path: the path to the model to use
@@ -35,13 +34,11 @@ def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = Non
         Download the model from the given url if needed
 
         Args:
-        ----
             url: the url to use
             model_path: the path to the model to use
             **kwargs: additional arguments to be passed to `download_from_url`
 
         Returns:
-        -------
             Any: the ONNX loaded model
         """
         if not url and not model_path:
@@ -54,11 +51,9 @@ def preprocess(self, img: np.ndarray) -> np.ndarray:
         Preprocess the input image
 
         Args:
-        ----
             img: the input image to preprocess
 
         Returns:
-        -------
             np.ndarray: the preprocessed image
         """
         raise NotImplementedError
@@ -68,12 +63,10 @@ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarr
         Postprocess the model output
 
         Args:
-        ----
             output: the model output to postprocess
             input_images: the input images used to generate the output
 
         Returns:
-        -------
             Any: the postprocessed output
         """
         raise NotImplementedError
@@ -83,11 +76,9 @@ def __call__(self, inputs: List[np.ndarray]) -> Any:
         Call the model on the given inputs
 
         Args:
-        ----
             inputs: the inputs to use
 
         Returns:
-        -------
             Any: the postprocessed output
         """
         self._inputs = inputs

diff --git a/onnxtr/file_utils.py b/onnxtr/file_utils.py
@@ -19,7 +19,6 @@ def requires_package(name: str, extra_message: Optional[str] = None) -> None:  #
     package requirement helper
 
     Args:
-    ----
         name: name of the package
         extra_message: additional message to display if the package is not found
     """

diff --git a/onnxtr/io/elements.py b/onnxtr/io/elements.py
@@ -62,7 +62,6 @@ class Word(Element):
     """Implements a word element
 
     Args:
-    ----
         value: the text string of the word
         confidence: the confidence associated with the text prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -106,7 +105,6 @@ class Artefact(Element):
     """Implements a non-textual element
 
     Args:
-    ----
         artefact_type: the type of artefact
         confidence: the confidence of the type prediction
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -139,7 +137,6 @@ class Line(Element):
     """Implements a line element as a collection of words
 
     Args:
-    ----
         words: list of word elements
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
             the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
@@ -186,7 +183,6 @@ class Block(Element):
     """Implements a block element as a collection of lines and artefacts
 
     Args:
-    ----
         lines: list of line elements
         artefacts: list of artefacts
         geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
@@ -240,7 +236,6 @@ class Page(Element):
     """Implements a page element as a collection of blocks
 
     Args:
-    ----
         page: image encoded as a numpy array in uint8
         blocks: list of block elements
         page_idx: the index of the page in the input raw document
@@ -295,11 +290,9 @@ def synthesize(self, **kwargs) -> np.ndarray:
         """Synthesize the page from the predictions
 
         Args:
-        ----
             **kwargs: keyword arguments passed to the `synthesize_page` method
 
         Returns
-        -------
             synthesized page
         """
         return synthesize_page(self.export(), **kwargs)
@@ -309,11 +302,9 @@ def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") -> Tuple
         convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
 
         Args:
-        ----
             file_title: the title of the XML file
 
         Returns:
-        -------
             a tuple of the XML byte string, and its ElementTree
         """
         p_idx = self.page_idx
@@ -421,7 +412,6 @@ class Document(Element):
     """Implements a document element as a collection of pages
 
     Args:
-    ----
         pages: list of page elements
     """
 
@@ -447,11 +437,9 @@ def synthesize(self, **kwargs) -> List[np.ndarray]:
         """Synthesize all pages from their predictions
 
         Args:
-        ----
             **kwargs: keyword arguments passed to the `Page.synthesize` method
 
-        Returns
-        -------
+        Returns:
             list of synthesized pages
         """
         return [page.synthesize(**kwargs) for page in self.pages]
@@ -460,11 +448,9 @@ def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
         """Export the document as XML (hOCR-format)
 
         Args:
-        ----
             **kwargs: additional keyword arguments passed to the Page.export_as_xml method
 
         Returns:
-        -------
             list of tuple of (bytes, ElementTree)
         """
         return [page.export_as_xml(**kwargs) for page in self.pages]

diff --git a/onnxtr/io/html.py b/onnxtr/io/html.py
@@ -15,12 +15,10 @@ def read_html(url: str, **kwargs: Any) -> bytes:
     >>> doc = read_html("https://www.yoursite.com")
 
     Args:
-    ----
         url: URL of the target web page
         **kwargs: keyword arguments from `weasyprint.HTML`
 
     Returns:
-    -------
         decoded PDF file as a bytes stream
     """
     from weasyprint import HTML

diff --git a/onnxtr/io/image.py b/onnxtr/io/image.py
@@ -25,13 +25,11 @@ def read_img_as_numpy(
     >>> page = read_img_as_numpy("path/to/your/doc.jpg")
 
     Args:
-    ----
         file: the path to the image file
         output_size: the expected output size of each page in format H x W
         rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
 
     Returns:
-    -------
         the page decoded as numpy ndarray of shape H x W x 3
     """
     if isinstance(file, (str, Path)):

diff --git a/onnxtr/io/pdf.py b/onnxtr/io/pdf.py
@@ -26,15 +26,13 @@ def read_pdf(
     >>> doc = read_pdf("path/to/your/doc.pdf")
 
     Args:
-    ----
         file: the path to the PDF file
         scale: rendering scale (1 corresponds to 72dpi)
         rgb_mode: if True, the output will be RGB, otherwise BGR
         password: a password to unlock the document, if encrypted
         **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
 
     Returns:
-    -------
         the list of pages decoded as numpy ndarray of shape H x W x C
     """
     # Rasterise pages to numpy ndarrays with pypdfium2

diff --git a/onnxtr/io/reader.py b/onnxtr/io/reader.py
@@ -29,12 +29,10 @@ def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
         >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
 
         Args:
-        ----
             file: the path to the PDF file or a binary stream
             **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
 
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         return read_pdf(file, **kwargs)
@@ -47,12 +45,10 @@ def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:
         >>> doc = DocumentFile.from_url("https://www.yoursite.com")
 
         Args:
-        ----
             url: the URL of the target web page
             **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
 
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         requires_package(
@@ -71,12 +67,10 @@ def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwarg
         >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
 
         Args:
-        ----
             files: the path to the image file or a binary stream, or a collection of those
             **kwargs: additional parameters to :meth:`onnxtr.io.image.read_img_as_numpy`
 
         Returns:
-        -------
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
         if isinstance(files, (str, Path, bytes)):

diff --git a/onnxtr/models/_utils.py b/onnxtr/models/_utils.py
@@ -20,11 +20,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
     """Get the maximum shape ratio of a contour.
 
     Args:
-    ----
         contour: the contour from cv2.findContour
 
     Returns:
-    -------
         the maximum shape ratio
     """
     _, (w, h), _ = cv2.minAreaRect(contour)
@@ -43,7 +41,6 @@ def estimate_orientation(
      lines of the document and the assumption that they should be horizontal.
 
     Args:
-    ----
         img: the img or bitmap to analyze (H, W, C)
         general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence)
             estimated by a model
@@ -53,7 +50,6 @@ def estimate_orientation(
         lower_area: the minimum area of a contour to be considered
 
     Returns:
-    -------
         the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
     """
     assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
@@ -162,11 +158,9 @@ def get_language(text: str) -> Tuple[str, float]:
     Get the language with the highest probability or no language if only a few words or a low probability
 
     Args:
-    ----
         text (str): text
 
     Returns:
-    -------
         The detected language in ISO 639 code and confidence score
     """
     try: