update image_ocr_controller.py moving calculating percentage of ima…

…ge area covered by text to function `calculate_text_percentage`
meedan · Sep 30, 2024 · df30bdd · df30bdd
1 parent f8719bc
commit df30bdd
Showing 1 changed file with 22 additions and 17 deletions.
diff --git a/app/main/controller/image_ocr_controller.py b/app/main/controller/image_ocr_controller.py
@@ -25,7 +25,22 @@ def polygon_area(self, vertices):
             x2, y2 = vertices[(i + 1) % len(vertices)]
             area += (x1 * y2 - x2 * y1)
         return abs(area) / 2
-
+    def calculate_text_percentage(self, response):
+        bounds = []
+        for page in response.full_text_annotation.pages:
+            for block in page.blocks:
+                    bounds.append(block.bounding_box)
+        total_text_area = 0
+        for annotation in bounds:
+            vertices = [(v.x, v.y) for v in annotation.vertices]
+            area = self.polygon_area(vertices)
+            total_text_area += area
+        # response object contains the whole image width and height in response.full_text_annotation.pages[0]
+        # as we are sending images, response.full_text_annotation.pages is always 1 page only
+        image_area = response.full_text_annotation.pages[0].width * response.full_text_annotation.pages[0].height
+        text_percentage = (total_text_area / image_area) * 100
+        return text_percentage
+
     @api.response(200, 'text successfully extracted.')
     @api.doc('Perform text extraction from an image')
     @api.doc(params={'url': 'url of image to extract text from'})
@@ -46,22 +61,12 @@ def post(self):
             return
 
         #### calculate bounding boxes areas.
-        bounds = []
-        for page in response.full_text_annotation.pages:
-            for block in page.blocks:
-                    bounds.append(block.bounding_box)
-        total_area = 0
-        for annotation in bounds:
-            vertices = [(v.x, v.y) for v in annotation.vertices]
-            area = self.polygon_area(vertices)
-            total_area += area
-        image_width = response.full_text_annotation.pages[0].width
-        image_height = response.full_text_annotation.pages[0].height
-        image_area = image_width * image_height
-        percentage = (total_area / image_area) * 100
-
-        app.logger.info(
-            f"[Alegre OCR] [image_uri {image.source.image_uri}] [percentage of image area covered by text {percentage}%] Image OCR response package looks like {convert_text_annotation_to_json(texts[0])}")
+        try:
+            text_percentage = self.calculate_text_percentage(response)
+            app.logger.info(
+                f"[Alegre OCR] [image_uri {image.source.image_uri}] [percentage of image area covered by text {text_percentage}%] Image OCR response package looks like {convert_text_annotation_to_json(texts[0])}")
+        except Exception as caught_exception:
+            app.logger.error(f"[image_uri {image.source.image_uri}] Error calculating percentage of image area covered by text. Error was {caught_exception}. Image OCR response package looks like {convert_text_annotation_to_json(texts[0])}")
 
         # Assuming the image has a known width and height (you'll need to replace this with your actual image dimensions)
         image_width = response.full_text_annotation.pages[0].width