From 2ef1857217191598716cb524c81a6b070e3837ed Mon Sep 17 00:00:00 2001 From: SWHL Date: Wed, 25 Oct 2023 18:58:05 +0800 Subject: [PATCH] Fixed issue #133 --- python/demo.py | 12 ++--- python/rapidocr_onnxruntime/utils.py | 79 +++++++++++++++------------- python/rapidocr_openvino/utils.py | 79 +++++++++++++++------------- python/rapidocr_paddle/utils.py | 79 +++++++++++++++------------- 4 files changed, 132 insertions(+), 117 deletions(-) diff --git a/python/demo.py b/python/demo.py index c19137966..037f5502f 100644 --- a/python/demo.py +++ b/python/demo.py @@ -3,16 +3,14 @@ # @Contact: liekkaskono@163.com import cv2 -from rapidocr_paddle import RapidOCR, VisRes - -# from rapidocr_onnxruntime import RapidOCR, VisRes - +from rapidocr_onnxruntime import RapidOCR, VisRes +# from rapidocr_paddle import RapidOCR, VisRes # from rapidocr_openvino import RapidOCR, VisRes engine = RapidOCR() -vis = VisRes(font_path="resources/fonts/FZYTK.TTF") +vis = VisRes() image_path = "tests/test_files/ch_en_num.jpg" with open(image_path, "rb") as f: @@ -23,5 +21,7 @@ print(elapse_list) boxes, txts, scores = list(zip(*result)) -vis_img = vis(img, boxes, txts, scores) + +font_path = "resources/fonts/FZYTK.TTF" +vis_img = vis(img, boxes, txts, scores, font_path) cv2.imwrite("vis.png", vis_img) diff --git a/python/rapidocr_onnxruntime/utils.py b/python/rapidocr_onnxruntime/utils.py index b762e45f6..2e5e34505 100644 --- a/python/rapidocr_onnxruntime/utils.py +++ b/python/rapidocr_onnxruntime/utils.py @@ -364,16 +364,7 @@ def remove_prefix( class VisRes: - def __init__( - self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5 - ): - if font_path is None or not Path(font_path).exists(): - raise FileNotFoundError( - f"The {font_path} does not exists! \n" - f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" - ) - - self.font_path = str(font_path) + def __init__(self, text_score: float = 0.5): self.text_score = text_score self.load_img = LoadImage() @@ -383,32 +374,38 @@ def __call__( dt_boxes: np.ndarray, txts: Optional[Union[List[str], Tuple[str]]] = None, scores: Optional[Tuple[float]] = None, + font_path: Optional[str] = None, ) -> np.ndarray: - img = self.load_img(img_content) - img = Image.fromarray(img) - if txts is None and scores is None: - return self.draw_dt_boxes(img, dt_boxes) + return self.draw_dt_boxes(img_content, dt_boxes) + return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores, font_path) - return self.draw_ocr_box_txt(img, dt_boxes, txts, scores) + def draw_dt_boxes(self, img_content: InputType, dt_boxes: np.ndarray) -> np.ndarray: + img = self.load_img(img_content) - def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray: - img_temp = img.copy() - draw_img = ImageDraw.Draw(img_temp) for idx, box in enumerate(dt_boxes): - draw_img.polygon(np.array(box), fill=self.get_random_color()) + color = self.get_random_color() - box_height = self.get_box_height(box) - font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") - draw_img.polygon( - np.array(box).reshape(8).tolist(), - outline=(0, 0, 0), + points = np.array(box) + cv2.polylines(img, np.int32([points]), 1, color=color, thickness=1) + + start_point = round(points[0][0]), round(points[0][1]) + cv2.putText( + img, f"{idx}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3 ) - draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font) - return np.array(img_temp) + return img - def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): + def draw_ocr_box_txt( + self, + img_content: InputType, + dt_boxes: np.ndarray, + txts: Optional[Union[List[str], Tuple[str]]] = None, + scores: Optional[Tuple[float]] = None, + font_path: Optional[str] = None, + ) -> np.ndarray: + font_path = self.get_font_path(font_path) + + image = Image.fromarray(self.load_img(img_content)) h, w = image.height, image.width if image.mode == "L": image = image.convert("RGB") @@ -419,22 +416,21 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): random.seed(0) draw_left = ImageDraw.Draw(img_left) draw_right = ImageDraw.Draw(img_right) - for idx, (box, txt) in enumerate(zip(boxes, txts)): + for idx, (box, txt) in enumerate(zip(dt_boxes, txts)): if scores is not None and float(scores[idx]) < self.text_score: continue color = self.get_random_color() - draw_left.polygon(np.array(box), fill=color) - draw_right.polygon( - np.array(box).reshape(8).tolist(), - outline=color, - ) + + box_list = np.array(box).reshape(8).tolist() + draw_left.polygon(box_list, fill=color) + draw_right.polygon(box_list, outline=color) box_height = self.get_box_height(box) box_width = self.get_box_width(box) if box_height > 2 * box_width: font_size = max(int(box_width * 0.9), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") cur_y = box[0][1] for c in txt: char_size = font.getsize(c) @@ -444,7 +440,7 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): cur_y += char_size[1] else: font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) img_left = Image.blend(image, img_left, 0.5) @@ -454,7 +450,16 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): return np.array(img_show) @staticmethod - def get_random_color(): + def get_font_path(font_path: Optional[Union[str, Path]] = None) -> str: + if font_path is None or not Path(font_path).exists(): + raise FileNotFoundError( + f"The {font_path} does not exists! \n" + f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" + ) + return str(font_path) + + @staticmethod + def get_random_color() -> Tuple[int, int, int]: return ( random.randint(0, 255), random.randint(0, 255), diff --git a/python/rapidocr_openvino/utils.py b/python/rapidocr_openvino/utils.py index f391045f2..8ce9e86aa 100644 --- a/python/rapidocr_openvino/utils.py +++ b/python/rapidocr_openvino/utils.py @@ -292,16 +292,7 @@ def remove_prefix( class VisRes: - def __init__( - self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5 - ): - if font_path is None or not Path(font_path).exists(): - raise FileNotFoundError( - f"The {font_path} does not exists! \n" - f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" - ) - - self.font_path = str(font_path) + def __init__(self, text_score: float = 0.5): self.text_score = text_score self.load_img = LoadImage() @@ -311,32 +302,38 @@ def __call__( dt_boxes: np.ndarray, txts: Optional[Union[List[str], Tuple[str]]] = None, scores: Optional[Tuple[float]] = None, + font_path: Optional[str] = None, ) -> np.ndarray: - img = self.load_img(img_content) - img = Image.fromarray(img) - if txts is None and scores is None: - return self.draw_dt_boxes(img, dt_boxes) + return self.draw_dt_boxes(img_content, dt_boxes) + return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores, font_path) - return self.draw_ocr_box_txt(img, dt_boxes, txts, scores) + def draw_dt_boxes(self, img_content: InputType, dt_boxes: np.ndarray) -> np.ndarray: + img = self.load_img(img_content) - def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray: - img_temp = img.copy() - draw_img = ImageDraw.Draw(img_temp) for idx, box in enumerate(dt_boxes): - draw_img.polygon(np.array(box), fill=self.get_random_color()) + color = self.get_random_color() - box_height = self.get_box_height(box) - font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") - draw_img.polygon( - np.array(box).reshape(8).tolist(), - outline=(0, 0, 0), + points = np.array(box) + cv2.polylines(img, np.int32([points]), 1, color=color, thickness=1) + + start_point = round(points[0][0]), round(points[0][1]) + cv2.putText( + img, f"{idx}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3 ) - draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font) - return np.array(img_temp) + return img - def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): + def draw_ocr_box_txt( + self, + img_content: InputType, + dt_boxes: np.ndarray, + txts: Optional[Union[List[str], Tuple[str]]] = None, + scores: Optional[Tuple[float]] = None, + font_path: Optional[str] = None, + ) -> np.ndarray: + font_path = self.get_font_path(font_path) + + image = Image.fromarray(self.load_img(img_content)) h, w = image.height, image.width if image.mode == "L": image = image.convert("RGB") @@ -347,22 +344,21 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): random.seed(0) draw_left = ImageDraw.Draw(img_left) draw_right = ImageDraw.Draw(img_right) - for idx, (box, txt) in enumerate(zip(boxes, txts)): + for idx, (box, txt) in enumerate(zip(dt_boxes, txts)): if scores is not None and float(scores[idx]) < self.text_score: continue color = self.get_random_color() - draw_left.polygon(np.array(box), fill=color) - draw_right.polygon( - np.array(box).reshape(8).tolist(), - outline=color, - ) + + box_list = np.array(box).reshape(8).tolist() + draw_left.polygon(box_list, fill=color) + draw_right.polygon(box_list, outline=color) box_height = self.get_box_height(box) box_width = self.get_box_width(box) if box_height > 2 * box_width: font_size = max(int(box_width * 0.9), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") cur_y = box[0][1] for c in txt: char_size = font.getsize(c) @@ -372,7 +368,7 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): cur_y += char_size[1] else: font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) img_left = Image.blend(image, img_left, 0.5) @@ -382,7 +378,16 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): return np.array(img_show) @staticmethod - def get_random_color(): + def get_font_path(font_path: Optional[Union[str, Path]] = None) -> str: + if font_path is None or not Path(font_path).exists(): + raise FileNotFoundError( + f"The {font_path} does not exists! \n" + f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" + ) + return str(font_path) + + @staticmethod + def get_random_color() -> Tuple[int, int, int]: return ( random.randint(0, 255), random.randint(0, 255), diff --git a/python/rapidocr_paddle/utils.py b/python/rapidocr_paddle/utils.py index 0143cc277..323884ac2 100644 --- a/python/rapidocr_paddle/utils.py +++ b/python/rapidocr_paddle/utils.py @@ -385,16 +385,7 @@ def remove_prefix( class VisRes: - def __init__( - self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5 - ): - if font_path is None or not Path(font_path).exists(): - raise FileNotFoundError( - f"The {font_path} does not exists! \n" - f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" - ) - - self.font_path = str(font_path) + def __init__(self, text_score: float = 0.5): self.text_score = text_score self.load_img = LoadImage() @@ -404,32 +395,38 @@ def __call__( dt_boxes: np.ndarray, txts: Optional[Union[List[str], Tuple[str]]] = None, scores: Optional[Tuple[float]] = None, + font_path: Optional[str] = None, ) -> np.ndarray: - img = self.load_img(img_content) - img = Image.fromarray(img) - if txts is None and scores is None: - return self.draw_dt_boxes(img, dt_boxes) + return self.draw_dt_boxes(img_content, dt_boxes) + return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores, font_path) - return self.draw_ocr_box_txt(img, dt_boxes, txts, scores) + def draw_dt_boxes(self, img_content: InputType, dt_boxes: np.ndarray) -> np.ndarray: + img = self.load_img(img_content) - def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray: - img_temp = img.copy() - draw_img = ImageDraw.Draw(img_temp) for idx, box in enumerate(dt_boxes): - draw_img.polygon(np.array(box), fill=self.get_random_color()) + color = self.get_random_color() - box_height = self.get_box_height(box) - font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") - draw_img.polygon( - np.array(box).reshape(8).tolist(), - outline=(0, 0, 0), + points = np.array(box) + cv2.polylines(img, np.int32([points]), 1, color=color, thickness=1) + + start_point = round(points[0][0]), round(points[0][1]) + cv2.putText( + img, f"{idx}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3 ) - draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font) - return np.array(img_temp) + return img - def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): + def draw_ocr_box_txt( + self, + img_content: InputType, + dt_boxes: np.ndarray, + txts: Optional[Union[List[str], Tuple[str]]] = None, + scores: Optional[Tuple[float]] = None, + font_path: Optional[str] = None, + ) -> np.ndarray: + font_path = self.get_font_path(font_path) + + image = Image.fromarray(self.load_img(img_content)) h, w = image.height, image.width if image.mode == "L": image = image.convert("RGB") @@ -440,22 +437,21 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): random.seed(0) draw_left = ImageDraw.Draw(img_left) draw_right = ImageDraw.Draw(img_right) - for idx, (box, txt) in enumerate(zip(boxes, txts)): + for idx, (box, txt) in enumerate(zip(dt_boxes, txts)): if scores is not None and float(scores[idx]) < self.text_score: continue color = self.get_random_color() - draw_left.polygon(np.array(box), fill=color) - draw_right.polygon( - np.array(box).reshape(8).tolist(), - outline=color, - ) + + box_list = np.array(box).reshape(8).tolist() + draw_left.polygon(box_list, fill=color) + draw_right.polygon(box_list, outline=color) box_height = self.get_box_height(box) box_width = self.get_box_width(box) if box_height > 2 * box_width: font_size = max(int(box_width * 0.9), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") cur_y = box[0][1] for c in txt: char_size = font.getsize(c) @@ -465,7 +461,7 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): cur_y += char_size[1] else: font_size = max(int(box_height * 0.8), 10) - font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8") + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) img_left = Image.blend(image, img_left, 0.5) @@ -475,7 +471,16 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None): return np.array(img_show) @staticmethod - def get_random_color(): + def get_font_path(font_path: Optional[Union[str, Path]] = None) -> str: + if font_path is None or not Path(font_path).exists(): + raise FileNotFoundError( + f"The {font_path} does not exists! \n" + f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing" + ) + return str(font_path) + + @staticmethod + def get_random_color() -> Tuple[int, int, int]: return ( random.randint(0, 255), random.randint(0, 255),