Skip to content

Commit

Permalink
feat(rapidocr_openvino): Add limit of max_side_len and min_side_len
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Oct 18, 2024
1 parent c3504c2 commit da711e8
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 18 deletions.
2 changes: 2 additions & 0 deletions python/rapidocr_openvino/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Global:
print_verbose: false
min_height: 30
width_height_ratio: 8
max_side_len: 2000
min_side_len: 30

inference_num_threads: &infer_num_threads -1

Expand Down
84 changes: 71 additions & 13 deletions python/rapidocr_openvino/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# @Contact: [email protected]
import copy
from pathlib import Path
from typing import Any, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import cv2
import numpy as np
Expand All @@ -15,9 +15,12 @@
LoadImage,
UpdateParameters,
VisRes,
add_round_letterbox,
get_logger,
increase_min_side,
init_args,
read_yaml,
reduce_max_side,
update_model_path,
)

Expand Down Expand Up @@ -54,6 +57,8 @@ def __init__(self, config_path: Optional[str] = None, **kwargs):
self.text_rec = TextRecognizer(config["Rec"])

self.load_img = LoadImage()
self.max_side_len = global_config["max_side_len"]
self.min_side_len = global_config["min_side_len"]

def __call__(
self,
Expand All @@ -78,11 +83,16 @@ def __call__(

img = self.load_img(img_content)

raw_h, raw_w = img.shape[:2]
op_record = {}
img, ratio_h, ratio_w = self.preprocess(img)
op_record["preprocess"] = {"ratio_h": ratio_h, "ratio_w": ratio_w}

dt_boxes, cls_res, rec_res = None, None, None
det_elapse, cls_elapse, rec_elapse = 0.0, 0.0, 0.0

if use_det:
img, padding_h = self.maybe_add_letterbox(img)
img, op_record = self.maybe_add_letterbox(img, op_record)
dt_boxes, det_elapse = self.auto_text_det(img)
if dt_boxes is None:
return None, None
Expand All @@ -95,16 +105,30 @@ def __call__(
if use_rec:
rec_res, rec_elapse = self.text_rec(img)

if dt_boxes is not None and padding_h > 0:
for box in dt_boxes:
box[:, 1] -= padding_h
if dt_boxes is not None and rec_res is not None:
dt_boxes = self._get_origin_points(dt_boxes, op_record, raw_h, raw_w)

ocr_res = self.get_final_res(
dt_boxes, cls_res, rec_res, det_elapse, cls_elapse, rec_elapse
)
return ocr_res

def maybe_add_letterbox(self, img: np.ndarray) -> Tuple[np.ndarray, int]:
def preprocess(self, img: np.ndarray) -> Tuple[np.ndarray, float, float]:
h, w = img.shape[:2]
max_value = max(h, w)
ratio_h = ratio_w = 1.0
if max_value > self.max_side_len:
img, ratio_h, ratio_w = reduce_max_side(img, self.max_side_len)

h, w = img.shape[:2]
min_value = min(h, w)
if min_value < self.min_side_len:
img, ratio_h, ratio_w = increase_min_side(img, self.min_side_len)
return img, ratio_h, ratio_w

def maybe_add_letterbox(
self, img: np.ndarray, op_record: Dict[str, Any]
) -> Tuple[np.ndarray, Dict[str, Any]]:
h, w = img.shape[:2]

if self.width_height_ratio == -1:
Expand All @@ -113,13 +137,18 @@ def maybe_add_letterbox(self, img: np.ndarray) -> Tuple[np.ndarray, int]:
use_limit_ratio = w / h > self.width_height_ratio

if h <= self.min_height or use_limit_ratio:
new_h = max(int(w / self.width_height_ratio), self.min_height) * 2
padding_h = int(abs(new_h - h) / 2)
block_img = cv2.copyMakeBorder(
img, padding_h, padding_h, 0, 0, cv2.BORDER_CONSTANT, value=(0, 0, 0)
)
return block_img, padding_h
return img, 0
padding_h = self._get_padding_h(h, w)
block_img = add_round_letterbox(img, (padding_h, padding_h, 0, 0))
op_record["padding_1"] = {"top": padding_h, "left": 0}
return block_img, op_record

op_record["padding_1"] = {"top": 0, "left": 0}
return img, op_record

def _get_padding_h(self, h: int, w: int) -> int:
new_h = max(int(w / self.width_height_ratio), self.min_height) * 2
padding_h = int(abs(new_h - h) / 2)
return padding_h

def auto_text_det(
self, img: np.ndarray
Expand Down Expand Up @@ -201,6 +230,35 @@ def sorted_boxes(dt_boxes: np.ndarray) -> List[np.ndarray]:
break
return _boxes

def _get_origin_points(
self,
dt_boxes: List[np.ndarray],
op_record: Dict[str, Any],
raw_h: int,
raw_w: int,
) -> np.ndarray:
dt_boxes_array = np.array(dt_boxes)
for op in reversed(list(op_record.keys())):
v = op_record[op]
if "padding" in op:
top, left = v.get("top"), v.get("left")
dt_boxes_array[:, :, 0] -= left
dt_boxes_array[:, :, 1] -= top
elif "preprocess" in op:
ratio_h = v.get("ratio_h")
ratio_w = v.get("ratio_w")
dt_boxes_array[:, :, 0] *= ratio_w
dt_boxes_array[:, :, 1] *= ratio_h

dt_boxes_array = np.where(dt_boxes_array < 0, 0, dt_boxes_array)
dt_boxes_array[..., 0] = np.where(
dt_boxes_array[..., 0] > raw_w, raw_w, dt_boxes_array[..., 0]
)
dt_boxes_array[..., 1] = np.where(
dt_boxes_array[..., 1] > raw_h, raw_h, dt_boxes_array[..., 1]
)
return dt_boxes_array

def get_final_res(
self,
dt_boxes: Optional[List[np.ndarray]],
Expand Down
1 change: 1 addition & 0 deletions python/rapidocr_openvino/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .load_image import LoadImage, LoadImageError
from .logger import get_logger
from .parse_parameters import UpdateParameters, init_args, update_model_path
from .process_img import add_round_letterbox, increase_min_side, reduce_max_side
from .vis_res import VisRes


Expand Down
87 changes: 87 additions & 0 deletions python/rapidocr_openvino/utils/process_img.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: [email protected]
from typing import Tuple

import cv2
import numpy as np


def reduce_max_side(
img: np.ndarray, max_side_len: int = 2000
) -> Tuple[np.ndarray, float, float]:
h, w = img.shape[:2]

ratio = 1.0
if max(h, w) > max_side_len:
if h > w:
ratio = float(max_side_len) / h
else:
ratio = float(max_side_len) / w

resize_h = int(h * ratio)
resize_w = int(w * ratio)

resize_h = int(round(resize_h / 32) * 32)
resize_w = int(round(resize_w / 32) * 32)

try:
if int(resize_w) <= 0 or int(resize_h) <= 0:
raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
img = cv2.resize(img, (resize_w, resize_h))
except Exception as exc:
raise ResizeImgError() from exc

ratio_h = h / resize_h
ratio_w = w / resize_w
return img, ratio_h, ratio_w


def increase_min_side(
img: np.ndarray, min_side_len: int = 30
) -> Tuple[np.ndarray, float, float]:
h, w = img.shape[:2]

ratio = 1.0
if min(h, w) < min_side_len:
if h < w:
ratio = float(min_side_len) / h
else:
ratio = float(min_side_len) / w

resize_h = int(h * ratio)
resize_w = int(w * ratio)

resize_h = int(round(resize_h / 32) * 32)
resize_w = int(round(resize_w / 32) * 32)

try:
if int(resize_w) <= 0 or int(resize_h) <= 0:
raise ResizeImgError("resize_w or resize_h is less than or equal to 0")
img = cv2.resize(img, (resize_w, resize_h))
except Exception as exc:
raise ResizeImgError() from exc

ratio_h = h / resize_h
ratio_w = w / resize_w
return img, ratio_h, ratio_w


def add_round_letterbox(
img: np.ndarray,
padding_tuple: Tuple[int, int, int, int],
) -> np.ndarray:
padded_img = cv2.copyMakeBorder(
img,
padding_tuple[0],
padding_tuple[1],
padding_tuple[2],
padding_tuple[3],
cv2.BORDER_CONSTANT,
value=(0, 0, 0),
)
return padded_img


class ResizeImgError(Exception):
pass
5 changes: 3 additions & 2 deletions python/requirements_vino.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
pyclipper>=1.2.0
openvino>=2022.2.0
openvino>=2022.2.0,<=2024.0.0
opencv_python>=4.5.1.48
numpy>=1.19.5,<3.0.0
six>=1.15.0
Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug
PyYAML
Pillow
Pillow
tqdm
4 changes: 2 additions & 2 deletions python/tests/base_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import importlib
import sys
from pathlib import Path
from typing import Union
from typing import Optional, Union

import requests
import yaml
Expand All @@ -21,7 +21,7 @@ def __init__(self, package_name: str = "rapidocr_onnxruntime"):
sys.path.append(str(self.root_dir))
sys.path.append(str(self.package_dir))

def init_module(self, module_name: str, class_name: str = None):
def init_module(self, module_name: str, class_name: Optional[str] = None):
if class_name is None:
module_part = importlib.import_module(f"{self.package_name}")
return module_part
Expand Down
14 changes: 13 additions & 1 deletion python/tests/test_vino.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,31 @@
import cv2
import numpy as np
import pytest
from base_module import BaseModule

from tests.base_module import BaseModule

root_dir = Path(__file__).resolve().parent.parent
sys.path.append(str(root_dir))

from rapidocr_openvino import LoadImageError, RapidOCR
from tests.base_module import download_file

engine = RapidOCR()
tests_dir = root_dir / "tests" / "test_files"
img_path = tests_dir / "ch_en_num.jpg"
package_name = "rapidocr_openvino"


def test_long_img():
img_url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/long.jpeg"
img_path = tests_dir / "long.jpeg"
download_file(img_url, save_path=img_path)
result, _ = engine(img_path)
assert result is not None
assert len(result) == 55
img_path.unlink()


def test_mode_one_img():
img_path = tests_dir / "issue_170.png"
result, _ = engine(img_path)
Expand Down

0 comments on commit da711e8

Please sign in to comment.