-
-
Notifications
You must be signed in to change notification settings - Fork 368
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(rapidocr_openvino): Add limit of max_side_len and min_side_len
- Loading branch information
Showing
7 changed files
with
179 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
# @Contact: [email protected] | ||
import copy | ||
from pathlib import Path | ||
from typing import Any, List, Optional, Tuple, Union | ||
from typing import Any, Dict, List, Optional, Tuple, Union | ||
|
||
import cv2 | ||
import numpy as np | ||
|
@@ -15,9 +15,12 @@ | |
LoadImage, | ||
UpdateParameters, | ||
VisRes, | ||
add_round_letterbox, | ||
get_logger, | ||
increase_min_side, | ||
init_args, | ||
read_yaml, | ||
reduce_max_side, | ||
update_model_path, | ||
) | ||
|
||
|
@@ -54,6 +57,8 @@ def __init__(self, config_path: Optional[str] = None, **kwargs): | |
self.text_rec = TextRecognizer(config["Rec"]) | ||
|
||
self.load_img = LoadImage() | ||
self.max_side_len = global_config["max_side_len"] | ||
self.min_side_len = global_config["min_side_len"] | ||
|
||
def __call__( | ||
self, | ||
|
@@ -78,11 +83,16 @@ def __call__( | |
|
||
img = self.load_img(img_content) | ||
|
||
raw_h, raw_w = img.shape[:2] | ||
op_record = {} | ||
img, ratio_h, ratio_w = self.preprocess(img) | ||
op_record["preprocess"] = {"ratio_h": ratio_h, "ratio_w": ratio_w} | ||
|
||
dt_boxes, cls_res, rec_res = None, None, None | ||
det_elapse, cls_elapse, rec_elapse = 0.0, 0.0, 0.0 | ||
|
||
if use_det: | ||
img, padding_h = self.maybe_add_letterbox(img) | ||
img, op_record = self.maybe_add_letterbox(img, op_record) | ||
dt_boxes, det_elapse = self.auto_text_det(img) | ||
if dt_boxes is None: | ||
return None, None | ||
|
@@ -95,16 +105,30 @@ def __call__( | |
if use_rec: | ||
rec_res, rec_elapse = self.text_rec(img) | ||
|
||
if dt_boxes is not None and padding_h > 0: | ||
for box in dt_boxes: | ||
box[:, 1] -= padding_h | ||
if dt_boxes is not None and rec_res is not None: | ||
dt_boxes = self._get_origin_points(dt_boxes, op_record, raw_h, raw_w) | ||
|
||
ocr_res = self.get_final_res( | ||
dt_boxes, cls_res, rec_res, det_elapse, cls_elapse, rec_elapse | ||
) | ||
return ocr_res | ||
|
||
def maybe_add_letterbox(self, img: np.ndarray) -> Tuple[np.ndarray, int]: | ||
def preprocess(self, img: np.ndarray) -> Tuple[np.ndarray, float, float]: | ||
h, w = img.shape[:2] | ||
max_value = max(h, w) | ||
ratio_h = ratio_w = 1.0 | ||
if max_value > self.max_side_len: | ||
img, ratio_h, ratio_w = reduce_max_side(img, self.max_side_len) | ||
|
||
h, w = img.shape[:2] | ||
min_value = min(h, w) | ||
if min_value < self.min_side_len: | ||
img, ratio_h, ratio_w = increase_min_side(img, self.min_side_len) | ||
return img, ratio_h, ratio_w | ||
|
||
def maybe_add_letterbox( | ||
self, img: np.ndarray, op_record: Dict[str, Any] | ||
) -> Tuple[np.ndarray, Dict[str, Any]]: | ||
h, w = img.shape[:2] | ||
|
||
if self.width_height_ratio == -1: | ||
|
@@ -113,13 +137,18 @@ def maybe_add_letterbox(self, img: np.ndarray) -> Tuple[np.ndarray, int]: | |
use_limit_ratio = w / h > self.width_height_ratio | ||
|
||
if h <= self.min_height or use_limit_ratio: | ||
new_h = max(int(w / self.width_height_ratio), self.min_height) * 2 | ||
padding_h = int(abs(new_h - h) / 2) | ||
block_img = cv2.copyMakeBorder( | ||
img, padding_h, padding_h, 0, 0, cv2.BORDER_CONSTANT, value=(0, 0, 0) | ||
) | ||
return block_img, padding_h | ||
return img, 0 | ||
padding_h = self._get_padding_h(h, w) | ||
block_img = add_round_letterbox(img, (padding_h, padding_h, 0, 0)) | ||
op_record["padding_1"] = {"top": padding_h, "left": 0} | ||
return block_img, op_record | ||
|
||
op_record["padding_1"] = {"top": 0, "left": 0} | ||
return img, op_record | ||
|
||
def _get_padding_h(self, h: int, w: int) -> int: | ||
new_h = max(int(w / self.width_height_ratio), self.min_height) * 2 | ||
padding_h = int(abs(new_h - h) / 2) | ||
return padding_h | ||
|
||
def auto_text_det( | ||
self, img: np.ndarray | ||
|
@@ -201,6 +230,35 @@ def sorted_boxes(dt_boxes: np.ndarray) -> List[np.ndarray]: | |
break | ||
return _boxes | ||
|
||
def _get_origin_points( | ||
self, | ||
dt_boxes: List[np.ndarray], | ||
op_record: Dict[str, Any], | ||
raw_h: int, | ||
raw_w: int, | ||
) -> np.ndarray: | ||
dt_boxes_array = np.array(dt_boxes) | ||
for op in reversed(list(op_record.keys())): | ||
v = op_record[op] | ||
if "padding" in op: | ||
top, left = v.get("top"), v.get("left") | ||
dt_boxes_array[:, :, 0] -= left | ||
dt_boxes_array[:, :, 1] -= top | ||
elif "preprocess" in op: | ||
ratio_h = v.get("ratio_h") | ||
ratio_w = v.get("ratio_w") | ||
dt_boxes_array[:, :, 0] *= ratio_w | ||
dt_boxes_array[:, :, 1] *= ratio_h | ||
|
||
dt_boxes_array = np.where(dt_boxes_array < 0, 0, dt_boxes_array) | ||
dt_boxes_array[..., 0] = np.where( | ||
dt_boxes_array[..., 0] > raw_w, raw_w, dt_boxes_array[..., 0] | ||
) | ||
dt_boxes_array[..., 1] = np.where( | ||
dt_boxes_array[..., 1] > raw_h, raw_h, dt_boxes_array[..., 1] | ||
) | ||
return dt_boxes_array | ||
|
||
def get_final_res( | ||
self, | ||
dt_boxes: Optional[List[np.ndarray]], | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# -*- encoding: utf-8 -*- | ||
# @Author: SWHL | ||
# @Contact: [email protected] | ||
from typing import Tuple | ||
|
||
import cv2 | ||
import numpy as np | ||
|
||
|
||
def reduce_max_side( | ||
img: np.ndarray, max_side_len: int = 2000 | ||
) -> Tuple[np.ndarray, float, float]: | ||
h, w = img.shape[:2] | ||
|
||
ratio = 1.0 | ||
if max(h, w) > max_side_len: | ||
if h > w: | ||
ratio = float(max_side_len) / h | ||
else: | ||
ratio = float(max_side_len) / w | ||
|
||
resize_h = int(h * ratio) | ||
resize_w = int(w * ratio) | ||
|
||
resize_h = int(round(resize_h / 32) * 32) | ||
resize_w = int(round(resize_w / 32) * 32) | ||
|
||
try: | ||
if int(resize_w) <= 0 or int(resize_h) <= 0: | ||
raise ResizeImgError("resize_w or resize_h is less than or equal to 0") | ||
img = cv2.resize(img, (resize_w, resize_h)) | ||
except Exception as exc: | ||
raise ResizeImgError() from exc | ||
|
||
ratio_h = h / resize_h | ||
ratio_w = w / resize_w | ||
return img, ratio_h, ratio_w | ||
|
||
|
||
def increase_min_side( | ||
img: np.ndarray, min_side_len: int = 30 | ||
) -> Tuple[np.ndarray, float, float]: | ||
h, w = img.shape[:2] | ||
|
||
ratio = 1.0 | ||
if min(h, w) < min_side_len: | ||
if h < w: | ||
ratio = float(min_side_len) / h | ||
else: | ||
ratio = float(min_side_len) / w | ||
|
||
resize_h = int(h * ratio) | ||
resize_w = int(w * ratio) | ||
|
||
resize_h = int(round(resize_h / 32) * 32) | ||
resize_w = int(round(resize_w / 32) * 32) | ||
|
||
try: | ||
if int(resize_w) <= 0 or int(resize_h) <= 0: | ||
raise ResizeImgError("resize_w or resize_h is less than or equal to 0") | ||
img = cv2.resize(img, (resize_w, resize_h)) | ||
except Exception as exc: | ||
raise ResizeImgError() from exc | ||
|
||
ratio_h = h / resize_h | ||
ratio_w = w / resize_w | ||
return img, ratio_h, ratio_w | ||
|
||
|
||
def add_round_letterbox( | ||
img: np.ndarray, | ||
padding_tuple: Tuple[int, int, int, int], | ||
) -> np.ndarray: | ||
padded_img = cv2.copyMakeBorder( | ||
img, | ||
padding_tuple[0], | ||
padding_tuple[1], | ||
padding_tuple[2], | ||
padding_tuple[3], | ||
cv2.BORDER_CONSTANT, | ||
value=(0, 0, 0), | ||
) | ||
return padded_img | ||
|
||
|
||
class ResizeImgError(Exception): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
pyclipper>=1.2.0 | ||
openvino>=2022.2.0 | ||
openvino>=2022.2.0,<=2024.0.0 | ||
opencv_python>=4.5.1.48 | ||
numpy>=1.19.5,<3.0.0 | ||
six>=1.15.0 | ||
Shapely>=1.7.1,!=2.0.4 # python3.12 2.0.4 bug | ||
PyYAML | ||
Pillow | ||
Pillow | ||
tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters