From b8b952c28fa3e6129097ff6447eb822743a37b39 Mon Sep 17 00:00:00 2001 From: Richard Abrich <richard.abrich@gmail.com> Date: Thu, 31 Oct 2024 23:48:42 -0400 Subject: [PATCH] undo changes to gradio_demo.py --- gradio_demo.py | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/gradio_demo.py b/gradio_demo.py index b664c3b..cc835e4 100644 --- a/gradio_demo.py +++ b/gradio_demo.py @@ -1,21 +1,15 @@ -from typing import Optional, Text, Tuple +from typing import Optional + import gradio as gr +import numpy as np import torch from PIL import Image import io -import base64 -import json -import numpy as np +import base64, os from utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img - -class NumpyEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - if isinstance(obj, np.float32): - return float(obj) - return json.JSONEncoder.default(self, obj) +import torch +from PIL import Image yolo_model = get_yolo_model(model_path='weights/icon_detect/best.pt') caption_model_processor = get_caption_model_processor(model_name="florence2", model_name_or_path="weights/icon_caption_florence") @@ -69,27 +63,17 @@ def process( image_save_path = 'imgs/saved_image_demo.png' image_input.save(image_save_path) + # import pdb; pdb.set_trace() ocr_bbox_rslt, is_goal_filtered = check_ocr_box(image_save_path, display_img = False, output_bb_format='xyxy', goal_filtering=None, easyocr_args={'paragraph': False, 'text_threshold':0.9}, use_paddleocr=use_paddleocr) text, ocr_bbox = ocr_bbox_rslt + # print('prompt:', prompt) dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold) - - # Convert base64 string to PIL Image image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img))) print('finish processing') - - # Combine text and bounding boxes into JSON-friendly format - result = { - "label_coordinates": label_coordinates, - "parsed_content_list": parsed_content_list, - } - - # Convert to JSON string format for return using the custom encoder - result_json = json.dumps(result, indent=4, cls=NumpyEncoder) - - return image, result_json + return image, str(parsed_content_list), str(label_coordinates) with gr.Blocks() as demo: