From b8b952c28fa3e6129097ff6447eb822743a37b39 Mon Sep 17 00:00:00 2001
From: Richard Abrich <richard.abrich@gmail.com>
Date: Thu, 31 Oct 2024 23:48:42 -0400
Subject: [PATCH] undo changes to gradio_demo.py

---
 gradio_demo.py | 34 +++++++++-------------------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/gradio_demo.py b/gradio_demo.py
index b664c3b..cc835e4 100644
--- a/gradio_demo.py
+++ b/gradio_demo.py
@@ -1,21 +1,15 @@
-from typing import Optional, Text, Tuple
+from typing import Optional
+
 import gradio as gr
+import numpy as np
 import torch
 from PIL import Image
 import io
-import base64
-import json
-import numpy as np
 
+import base64, os
 from utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
-
-class NumpyEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.ndarray):
-            return obj.tolist()
-        if isinstance(obj, np.float32):
-            return float(obj)
-        return json.JSONEncoder.default(self, obj)
+import torch
+from PIL import Image
 
 yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt')
 caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence")
@@ -69,27 +63,17 @@ def process(
 
     image_save_path = 'imgs/saved_image_demo.png'
     image_input.save(image_save_path)
+    # import pdb; pdb.set_trace()
 
     ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=use_paddleocr)
     text, ocr_bbox = ocr_bbox_rslt
+    # print('prompt:', prompt)
 
     dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold)
-
-    # Convert base64 string to PIL Image
     image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
 
     print('finish processing')
-
-    # Combine text and bounding boxes into JSON-friendly format
-    result = {
-        "label_coordinates": label_coordinates,
-        "parsed_content_list": parsed_content_list,
-    }
-    
-    # Convert to JSON string format for return using the custom encoder
-    result_json = json.dumps(result, indent=4, cls=NumpyEncoder)
-    
-    return image, result_json
+    return image, str(parsed_content_list), str(label_coordinates)
 
 
 with gr.Blocks() as demo: