Skip to content

Commit

Permalink
update image_ocr_controller.py moving calculating percentage of ima…
Browse files Browse the repository at this point in the history
…ge area covered by text to function `calculate_text_percentage`
  • Loading branch information
ahmednasserswe committed Sep 30, 2024
1 parent f8719bc commit df30bdd
Showing 1 changed file with 22 additions and 17 deletions.
39 changes: 22 additions & 17 deletions app/main/controller/image_ocr_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,22 @@ def polygon_area(self, vertices):
x2, y2 = vertices[(i + 1) % len(vertices)]
area += (x1 * y2 - x2 * y1)
return abs(area) / 2

def calculate_text_percentage(self, response):
bounds = []
for page in response.full_text_annotation.pages:
for block in page.blocks:
bounds.append(block.bounding_box)
total_text_area = 0
for annotation in bounds:
vertices = [(v.x, v.y) for v in annotation.vertices]
area = self.polygon_area(vertices)
total_text_area += area
# response object contains the whole image width and height in response.full_text_annotation.pages[0]
# as we are sending images, response.full_text_annotation.pages is always 1 page only
image_area = response.full_text_annotation.pages[0].width * response.full_text_annotation.pages[0].height
text_percentage = (total_text_area / image_area) * 100
return text_percentage

@api.response(200, 'text successfully extracted.')
@api.doc('Perform text extraction from an image')
@api.doc(params={'url': 'url of image to extract text from'})
Expand All @@ -46,22 +61,12 @@ def post(self):
return

#### calculate bounding boxes areas.
bounds = []
for page in response.full_text_annotation.pages:
for block in page.blocks:
bounds.append(block.bounding_box)
total_area = 0
for annotation in bounds:
vertices = [(v.x, v.y) for v in annotation.vertices]
area = self.polygon_area(vertices)
total_area += area
image_width = response.full_text_annotation.pages[0].width
image_height = response.full_text_annotation.pages[0].height
image_area = image_width * image_height
percentage = (total_area / image_area) * 100

app.logger.info(
f"[Alegre OCR] [image_uri {image.source.image_uri}] [percentage of image area covered by text {percentage}%] Image OCR response package looks like {convert_text_annotation_to_json(texts[0])}")
try:
text_percentage = self.calculate_text_percentage(response)
app.logger.info(
f"[Alegre OCR] [image_uri {image.source.image_uri}] [percentage of image area covered by text {text_percentage}%] Image OCR response package looks like {convert_text_annotation_to_json(texts[0])}")
except Exception as caught_exception:
app.logger.error(f"[image_uri {image.source.image_uri}] Error calculating percentage of image area covered by text. Error was {caught_exception}. Image OCR response package looks like {convert_text_annotation_to_json(texts[0])}")

# Assuming the image has a known width and height (you'll need to replace this with your actual image dimensions)
image_width = response.full_text_annotation.pages[0].width
Expand Down

0 comments on commit df30bdd

Please sign in to comment.