Skip to content

Commit

Permalink
[docs] Fix docstrings (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
felixdittrich92 authored Nov 19, 2024
1 parent 9c8abc2 commit ff8cce4
Show file tree
Hide file tree
Showing 48 changed files with 25 additions and 223 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repos:
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.9
rev: v0.7.4
hooks:
- id: ruff
args: [ --fix ]
Expand Down
1 change: 1 addition & 0 deletions onnxtr/contrib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .artefacts import ArtefactDetector
2 changes: 0 additions & 2 deletions onnxtr/contrib/artefacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
>>> results = detector(doc)
Args:
----
arch: the architecture to use
batch_size: the batch size to use
model_path: the path to the model to use
Expand Down Expand Up @@ -109,7 +108,6 @@ def show(self, **kwargs: Any) -> None:
Display the results
Args:
----
**kwargs: additional keyword arguments to be passed to `plt.show`
"""
requires_package("matplotlib", "`.show()` requires matplotlib installed")
Expand Down
9 changes: 0 additions & 9 deletions onnxtr/contrib/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ class _BasePredictor:
Base class for all predictors
Args:
----
batch_size: the batch size to use
url: the url to use to download a model if needed
model_path: the path to the model to use
Expand All @@ -35,13 +34,11 @@ def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = Non
Download the model from the given url if needed
Args:
----
url: the url to use
model_path: the path to the model to use
**kwargs: additional arguments to be passed to `download_from_url`
Returns:
-------
Any: the ONNX loaded model
"""
if not url and not model_path:
Expand All @@ -54,11 +51,9 @@ def preprocess(self, img: np.ndarray) -> np.ndarray:
Preprocess the input image
Args:
----
img: the input image to preprocess
Returns:
-------
np.ndarray: the preprocessed image
"""
raise NotImplementedError
Expand All @@ -68,12 +63,10 @@ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarr
Postprocess the model output
Args:
----
output: the model output to postprocess
input_images: the input images used to generate the output
Returns:
-------
Any: the postprocessed output
"""
raise NotImplementedError
Expand All @@ -83,11 +76,9 @@ def __call__(self, inputs: List[np.ndarray]) -> Any:
Call the model on the given inputs
Args:
----
inputs: the inputs to use
Returns:
-------
Any: the postprocessed output
"""
self._inputs = inputs
Expand Down
1 change: 0 additions & 1 deletion onnxtr/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def requires_package(name: str, extra_message: Optional[str] = None) -> None: #
package requirement helper
Args:
----
name: name of the package
extra_message: additional message to display if the package is not found
"""
Expand Down
16 changes: 1 addition & 15 deletions onnxtr/io/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class Word(Element):
"""Implements a word element
Args:
----
value: the text string of the word
confidence: the confidence associated with the text prediction
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
Expand Down Expand Up @@ -106,7 +105,6 @@ class Artefact(Element):
"""Implements a non-textual element
Args:
----
artefact_type: the type of artefact
confidence: the confidence of the type prediction
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
Expand Down Expand Up @@ -139,7 +137,6 @@ class Line(Element):
"""Implements a line element as a collection of words
Args:
----
words: list of word elements
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
Expand Down Expand Up @@ -186,7 +183,6 @@ class Block(Element):
"""Implements a block element as a collection of lines and artefacts
Args:
----
lines: list of line elements
artefacts: list of artefacts
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
Expand Down Expand Up @@ -240,7 +236,6 @@ class Page(Element):
"""Implements a page element as a collection of blocks
Args:
----
page: image encoded as a numpy array in uint8
blocks: list of block elements
page_idx: the index of the page in the input raw document
Expand Down Expand Up @@ -295,11 +290,9 @@ def synthesize(self, **kwargs) -> np.ndarray:
"""Synthesize the page from the predictions
Args:
----
**kwargs: keyword arguments passed to the `synthesize_page` method
Returns
-------
synthesized page
"""
return synthesize_page(self.export(), **kwargs)
Expand All @@ -309,11 +302,9 @@ def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") -> Tuple
convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
Args:
----
file_title: the title of the XML file
Returns:
-------
a tuple of the XML byte string, and its ElementTree
"""
p_idx = self.page_idx
Expand Down Expand Up @@ -421,7 +412,6 @@ class Document(Element):
"""Implements a document element as a collection of pages
Args:
----
pages: list of page elements
"""

Expand All @@ -447,11 +437,9 @@ def synthesize(self, **kwargs) -> List[np.ndarray]:
"""Synthesize all pages from their predictions
Args:
----
**kwargs: keyword arguments passed to the `Page.synthesize` method
Returns
-------
Returns:
list of synthesized pages
"""
return [page.synthesize(**kwargs) for page in self.pages]
Expand All @@ -460,11 +448,9 @@ def export_as_xml(self, **kwargs) -> List[Tuple[bytes, ET.ElementTree]]:
"""Export the document as XML (hOCR-format)
Args:
----
**kwargs: additional keyword arguments passed to the Page.export_as_xml method
Returns:
-------
list of tuple of (bytes, ElementTree)
"""
return [page.export_as_xml(**kwargs) for page in self.pages]
Expand Down
2 changes: 0 additions & 2 deletions onnxtr/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@ def read_html(url: str, **kwargs: Any) -> bytes:
>>> doc = read_html("https://www.yoursite.com")
Args:
----
url: URL of the target web page
**kwargs: keyword arguments from `weasyprint.HTML`
Returns:
-------
decoded PDF file as a bytes stream
"""
from weasyprint import HTML
Expand Down
2 changes: 0 additions & 2 deletions onnxtr/io/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,11 @@ def read_img_as_numpy(
>>> page = read_img_as_numpy("path/to/your/doc.jpg")
Args:
----
file: the path to the image file
output_size: the expected output size of each page in format H x W
rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
Returns:
-------
the page decoded as numpy ndarray of shape H x W x 3
"""
if isinstance(file, (str, Path)):
Expand Down
2 changes: 0 additions & 2 deletions onnxtr/io/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,13 @@ def read_pdf(
>>> doc = read_pdf("path/to/your/doc.pdf")
Args:
----
file: the path to the PDF file
scale: rendering scale (1 corresponds to 72dpi)
rgb_mode: if True, the output will be RGB, otherwise BGR
password: a password to unlock the document, if encrypted
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
-------
the list of pages decoded as numpy ndarray of shape H x W x C
"""
# Rasterise pages to numpy ndarrays with pypdfium2
Expand Down
6 changes: 0 additions & 6 deletions onnxtr/io/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,10 @@ def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
>>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
Args:
----
file: the path to the PDF file or a binary stream
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
-------
the list of pages decoded as numpy ndarray of shape H x W x 3
"""
return read_pdf(file, **kwargs)
Expand All @@ -47,12 +45,10 @@ def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:
>>> doc = DocumentFile.from_url("https://www.yoursite.com")
Args:
----
url: the URL of the target web page
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
-------
the list of pages decoded as numpy ndarray of shape H x W x 3
"""
requires_package(
Expand All @@ -71,12 +67,10 @@ def from_images(cls, files: Union[Sequence[AbstractFile], AbstractFile], **kwarg
>>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
Args:
----
files: the path to the image file or a binary stream, or a collection of those
**kwargs: additional parameters to :meth:`onnxtr.io.image.read_img_as_numpy`
Returns:
-------
the list of pages decoded as numpy ndarray of shape H x W x 3
"""
if isinstance(files, (str, Path, bytes)):
Expand Down
6 changes: 0 additions & 6 deletions onnxtr/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ def get_max_width_length_ratio(contour: np.ndarray) -> float:
"""Get the maximum shape ratio of a contour.
Args:
----
contour: the contour from cv2.findContour
Returns:
-------
the maximum shape ratio
"""
_, (w, h), _ = cv2.minAreaRect(contour)
Expand All @@ -43,7 +41,6 @@ def estimate_orientation(
lines of the document and the assumption that they should be horizontal.
Args:
----
img: the img or bitmap to analyze (H, W, C)
general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence)
estimated by a model
Expand All @@ -53,7 +50,6 @@ def estimate_orientation(
lower_area: the minimum area of a contour to be considered
Returns:
-------
the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
"""
assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
Expand Down Expand Up @@ -162,11 +158,9 @@ def get_language(text: str) -> Tuple[str, float]:
Get the language with the highest probability or no language if only a few words or a low probability
Args:
----
text (str): text
Returns:
-------
The detected language in ISO 639 code and confidence score
"""
try:
Expand Down
Loading

0 comments on commit ff8cce4

Please sign in to comment.