Skip to content

Commit

Permalink
Fixed issue #133
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Oct 25, 2023
1 parent 9a98f25 commit 2ef1857
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 117 deletions.
12 changes: 6 additions & 6 deletions python/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@
# @Contact: [email protected]
import cv2

from rapidocr_paddle import RapidOCR, VisRes

# from rapidocr_onnxruntime import RapidOCR, VisRes

from rapidocr_onnxruntime import RapidOCR, VisRes

# from rapidocr_paddle import RapidOCR, VisRes
# from rapidocr_openvino import RapidOCR, VisRes


engine = RapidOCR()
vis = VisRes(font_path="resources/fonts/FZYTK.TTF")
vis = VisRes()

image_path = "tests/test_files/ch_en_num.jpg"
with open(image_path, "rb") as f:
Expand All @@ -23,5 +21,7 @@
print(elapse_list)

boxes, txts, scores = list(zip(*result))
vis_img = vis(img, boxes, txts, scores)

font_path = "resources/fonts/FZYTK.TTF"
vis_img = vis(img, boxes, txts, scores, font_path)
cv2.imwrite("vis.png", vis_img)
79 changes: 42 additions & 37 deletions python/rapidocr_onnxruntime/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,16 +364,7 @@ def remove_prefix(


class VisRes:
def __init__(
self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5
):
if font_path is None or not Path(font_path).exists():
raise FileNotFoundError(
f"The {font_path} does not exists! \n"
f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing"
)

self.font_path = str(font_path)
def __init__(self, text_score: float = 0.5):
self.text_score = text_score
self.load_img = LoadImage()

Expand All @@ -383,32 +374,38 @@ def __call__(
dt_boxes: np.ndarray,
txts: Optional[Union[List[str], Tuple[str]]] = None,
scores: Optional[Tuple[float]] = None,
font_path: Optional[str] = None,
) -> np.ndarray:
img = self.load_img(img_content)
img = Image.fromarray(img)

if txts is None and scores is None:
return self.draw_dt_boxes(img, dt_boxes)
return self.draw_dt_boxes(img_content, dt_boxes)
return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores, font_path)

return self.draw_ocr_box_txt(img, dt_boxes, txts, scores)
def draw_dt_boxes(self, img_content: InputType, dt_boxes: np.ndarray) -> np.ndarray:
img = self.load_img(img_content)

def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray:
img_temp = img.copy()
draw_img = ImageDraw.Draw(img_temp)
for idx, box in enumerate(dt_boxes):
draw_img.polygon(np.array(box), fill=self.get_random_color())
color = self.get_random_color()

box_height = self.get_box_height(box)
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
draw_img.polygon(
np.array(box).reshape(8).tolist(),
outline=(0, 0, 0),
points = np.array(box)
cv2.polylines(img, np.int32([points]), 1, color=color, thickness=1)

start_point = round(points[0][0]), round(points[0][1])
cv2.putText(
img, f"{idx}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3
)
draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font)
return np.array(img_temp)
return img

def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
def draw_ocr_box_txt(
self,
img_content: InputType,
dt_boxes: np.ndarray,
txts: Optional[Union[List[str], Tuple[str]]] = None,
scores: Optional[Tuple[float]] = None,
font_path: Optional[str] = None,
) -> np.ndarray:
font_path = self.get_font_path(font_path)

image = Image.fromarray(self.load_img(img_content))
h, w = image.height, image.width
if image.mode == "L":
image = image.convert("RGB")
Expand All @@ -419,22 +416,21 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
for idx, (box, txt) in enumerate(zip(dt_boxes, txts)):
if scores is not None and float(scores[idx]) < self.text_score:
continue

color = self.get_random_color()
draw_left.polygon(np.array(box), fill=color)
draw_right.polygon(
np.array(box).reshape(8).tolist(),
outline=color,
)

box_list = np.array(box).reshape(8).tolist()
draw_left.polygon(box_list, fill=color)
draw_right.polygon(box_list, outline=color)

box_height = self.get_box_height(box)
box_width = self.get_box_width(box)
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
Expand All @@ -444,7 +440,7 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)

img_left = Image.blend(image, img_left, 0.5)
Expand All @@ -454,7 +450,16 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
return np.array(img_show)

@staticmethod
def get_random_color():
def get_font_path(font_path: Optional[Union[str, Path]] = None) -> str:
if font_path is None or not Path(font_path).exists():
raise FileNotFoundError(
f"The {font_path} does not exists! \n"
f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing"
)
return str(font_path)

@staticmethod
def get_random_color() -> Tuple[int, int, int]:
return (
random.randint(0, 255),
random.randint(0, 255),
Expand Down
79 changes: 42 additions & 37 deletions python/rapidocr_openvino/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,16 +292,7 @@ def remove_prefix(


class VisRes:
def __init__(
self, font_path: Optional[Union[str, Path]] = None, text_score: float = 0.5
):
if font_path is None or not Path(font_path).exists():
raise FileNotFoundError(
f"The {font_path} does not exists! \n"
f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing"
)

self.font_path = str(font_path)
def __init__(self, text_score: float = 0.5):
self.text_score = text_score
self.load_img = LoadImage()

Expand All @@ -311,32 +302,38 @@ def __call__(
dt_boxes: np.ndarray,
txts: Optional[Union[List[str], Tuple[str]]] = None,
scores: Optional[Tuple[float]] = None,
font_path: Optional[str] = None,
) -> np.ndarray:
img = self.load_img(img_content)
img = Image.fromarray(img)

if txts is None and scores is None:
return self.draw_dt_boxes(img, dt_boxes)
return self.draw_dt_boxes(img_content, dt_boxes)
return self.draw_ocr_box_txt(img_content, dt_boxes, txts, scores, font_path)

return self.draw_ocr_box_txt(img, dt_boxes, txts, scores)
def draw_dt_boxes(self, img_content: InputType, dt_boxes: np.ndarray) -> np.ndarray:
img = self.load_img(img_content)

def draw_dt_boxes(self, img: Image, dt_boxes: np.ndarray) -> np.ndarray:
img_temp = img.copy()
draw_img = ImageDraw.Draw(img_temp)
for idx, box in enumerate(dt_boxes):
draw_img.polygon(np.array(box), fill=self.get_random_color())
color = self.get_random_color()

box_height = self.get_box_height(box)
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
draw_img.polygon(
np.array(box).reshape(8).tolist(),
outline=(0, 0, 0),
points = np.array(box)
cv2.polylines(img, np.int32([points]), 1, color=color, thickness=1)

start_point = round(points[0][0]), round(points[0][1])
cv2.putText(
img, f"{idx}", start_point, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 3
)
draw_img.text([box[0][0], box[0][1]], str(idx), fill=(0, 0, 0), font=font)
return np.array(img_temp)
return img

def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
def draw_ocr_box_txt(
self,
img_content: InputType,
dt_boxes: np.ndarray,
txts: Optional[Union[List[str], Tuple[str]]] = None,
scores: Optional[Tuple[float]] = None,
font_path: Optional[str] = None,
) -> np.ndarray:
font_path = self.get_font_path(font_path)

image = Image.fromarray(self.load_img(img_content))
h, w = image.height, image.width
if image.mode == "L":
image = image.convert("RGB")
Expand All @@ -347,22 +344,21 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
for idx, (box, txt) in enumerate(zip(dt_boxes, txts)):
if scores is not None and float(scores[idx]) < self.text_score:
continue

color = self.get_random_color()
draw_left.polygon(np.array(box), fill=color)
draw_right.polygon(
np.array(box).reshape(8).tolist(),
outline=color,
)

box_list = np.array(box).reshape(8).tolist()
draw_left.polygon(box_list, fill=color)
draw_right.polygon(box_list, outline=color)

box_height = self.get_box_height(box)
box_width = self.get_box_width(box)
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
Expand All @@ -372,7 +368,7 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(self.font_path, font_size, encoding="utf-8")
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)

img_left = Image.blend(image, img_left, 0.5)
Expand All @@ -382,7 +378,16 @@ def draw_ocr_box_txt(self, image: Image, boxes, txts, scores=None):
return np.array(img_show)

@staticmethod
def get_random_color():
def get_font_path(font_path: Optional[Union[str, Path]] = None) -> str:
if font_path is None or not Path(font_path).exists():
raise FileNotFoundError(
f"The {font_path} does not exists! \n"
f"You could download the file in the https://drive.google.com/file/d/1evWVX38EFNwTq_n5gTFgnlv8tdaNcyIA/view?usp=sharing"
)
return str(font_path)

@staticmethod
def get_random_color() -> Tuple[int, int, int]:
return (
random.randint(0, 255),
random.randint(0, 255),
Expand Down
Loading

0 comments on commit 2ef1857

Please sign in to comment.