From dc1ba664ba3e1d7e686e829e6bad20c0fd3d940a Mon Sep 17 00:00:00 2001 From: Rezart Abazi Date: Sat, 6 Jan 2024 14:22:49 +0100 Subject: [PATCH] data extraction from jpg llm --- backend/llms/gpt.py | 36 ++++++++++++--- backend/llms/prompt_manager.py | 19 ++++++++ backend/llms/system_message_manager.py | 4 ++ backend/object_storage/__init__.py | 0 .../digitalocean_space_manager.py | 19 ++++++++ backend/requirements.txt | 4 +- backend/routes/data_profile_routes.py | 46 +++---------------- backend/utils/image_conversion_manager.py | 13 ++---- 8 files changed, 85 insertions(+), 56 deletions(-) create mode 100644 backend/object_storage/__init__.py diff --git a/backend/llms/gpt.py b/backend/llms/gpt.py index d2a4f46..58a3940 100644 --- a/backend/llms/gpt.py +++ b/backend/llms/gpt.py @@ -1,5 +1,5 @@ from openai import ChatCompletion -from typing import List, Optional +from typing import Optional import json import openai @@ -124,9 +124,22 @@ def _get_system_message_content(self, assistant_type: str = "generic") -> str: ) return system_message_content - def _create_message(self, role: str, prompt: str): + def _create_message(self, role: str, prompt: str, image_url: str = ""): """Create either a user, system, or assistant message.""" - return {"role": f"{role}", "content": f"{prompt}"} + if image_url: + return { + "role": f"{role}", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image", + "image_url": {"url": image_url}, + }, + ], + "image_url": f"{image_url}", + } + else: + return {"role": f"{role}", "content": f"{prompt}"} def _add_system_message(self, assistant_type: str) -> None: """ @@ -151,8 +164,8 @@ def _add_system_message(self, assistant_type: str) -> None: self.llm_type = assistant_type - async def _send_and_receive_message(self, prompt: str) -> str: - user_message = self._create_message("user", prompt) + async def _send_and_receive_message(self, prompt: str, image_url: str = "") -> str: + user_message = self._create_message("user", prompt, image_url) self.history.append(user_message) # Check token limit and truncate history if needed @@ -330,5 +343,14 @@ def generate_text(self, input_text): return assistant_message_content - def generate_analytics_text(self, input_text: str, table_names: List[str]): - self._add_system_message(assistant_type="analytics_chat") + def extract_data_from_jpg(self, instructions: str, jpg_file: str): + self._add_system_message(assistant_type="jpg_data_extraction") + + base64_image = tiktoken.image_to_base64(jpg_file) + image_url = f"data:image/jpeg;base64,{base64_image}" + + prompt = self.prompt_manager.jpg_data_extraction_prompt(instructions) + + assistant_message_content = self._send_and_receive_message(prompt, image_url) + + return assistant_message_content diff --git a/backend/llms/prompt_manager.py b/backend/llms/prompt_manager.py index 828530d..bcee9c6 100644 --- a/backend/llms/prompt_manager.py +++ b/backend/llms/prompt_manager.py @@ -93,3 +93,22 @@ def create_table_desc_prompt( if extra_desc: prompt += f"\n\nAdditional information about the sample data: {extra_desc}" return prompt + + def jpg_data_extraction_prompt(self, instructions: str): + prompt = f""" + Extract the following data from the given JPG file: + + User request: + {instructions} + + Provide output in JSON format using the requested information as keys. + + Example output: + {{ + "client_name":"John Doe", + "invoice_amount":"1000", + "date":"01-01-2021" + }} + In this example, the requested information would have been client name, invoice amount, and date. + """ + return prompt diff --git a/backend/llms/system_message_manager.py b/backend/llms/system_message_manager.py index 4678709..e1793dc 100644 --- a/backend/llms/system_message_manager.py +++ b/backend/llms/system_message_manager.py @@ -46,6 +46,10 @@ def __init__(self): You are a table categorization assistant. Your task is to analyze sample data and existing table metadata to identify the most suitable table for appending the sample data. Return only the name of the table. """, + """jpg_data_extraction""": """ + You are a JPG data extraction assistant. Your task is to extract specific data in the order specifed from a JPG file and return it in a json format. + Return only the extracted data. + """, "generic": "You are a generic assistant.", } diff --git a/backend/object_storage/__init__.py b/backend/object_storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/object_storage/digitalocean_space_manager.py b/backend/object_storage/digitalocean_space_manager.py index 795c2a0..f24a51d 100644 --- a/backend/object_storage/digitalocean_space_manager.py +++ b/backend/object_storage/digitalocean_space_manager.py @@ -81,3 +81,22 @@ def create_presigned_url( # The response contains the presigned URL return response + + def delete_file(self, organization_name, object_name): + """Delete a file from an S3 bucket + + :param organization_name: Name of the organization the file belongs to + :param object_name: S3 object name + :return: True if the referenced object was deleted, otherwise False + """ + + # Prepend the organization_name to the object_name + object_name = f"{organization_name}/{object_name}" + + # Delete the file + try: + self.client.delete_object(Bucket=self.bucket_name, Key=object_name) + except Exception as e: + print(e) + return False + return True diff --git a/backend/requirements.txt b/backend/requirements.txt index 11cea2f..64c9270 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -20,4 +20,6 @@ langchain==0.0.351 pytest==6.2.5 pytest-asyncio==0.15.1 sendgrid==6.11.0 -boto3==1.34.10 \ No newline at end of file +boto3==1.34.10 +pillow==10.1.0 +pdf2image==1.16.3 \ No newline at end of file diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py index 9402afa..25e98d3 100644 --- a/backend/routes/data_profile_routes.py +++ b/backend/routes/data_profile_routes.py @@ -6,6 +6,7 @@ from database.database_manager import DatabaseManager from database.data_profile_manager import DataProfileManager +from llms.gpt import GPTLLM from models.data_profile import ( DataProfile, DataProfileCreateRequest, @@ -74,49 +75,14 @@ async def preview_data_profile( temp_file_path = temp_file.name # Use the ImageConversionManager context manager to convert the PDF to JPG - jpg_files = [] - with ImageConversionManager(temp_file_path, "/change-me/") as manager: - jpg_files = manager.convert_to_jpg(temp_file_path) + with ImageConversionManager(temp_file_path, "/tmp/") as manager: + jpg_file = manager.convert_to_jpg(temp_file_path) + gpt = GPTLLM() + data = gpt.extract_data_from_jpg(instructions, jpg_file) # Clean up the uploaded temp file os.unlink(temp_file_path) - - # Assuming you have a function to send the JPGs to the LLM and get a response - # Send the JPG files to the LLM using the API - # You need to define how you'll handle multiple JPGs - this is just a placeholder - # if jpg_files: - # # Here you would typically prepare and send your request to the LLM API. - # # This will vary greatly depending on the LLM's API specifics. - # # For now, this is a placeholder for how you might make the request. - # # Replace with your actual API endpoint and key - # llm_api_endpoint = "https://api.example.com/llm" - # api_key = "your_api_key" - # response = requests.post( - # llm_api_endpoint, - # headers={"Authorization": f"Bearer {api_key}"}, - # files={"file": open(jpg_files[0], "rb")}, - # ) - - # # Handle the response - # if response.status_code == 200: - # llm_response = response.json() - # else: - # raise HTTPException(status_code=500, detail="LLM API request failed") - # else: - # raise HTTPException(status_code=500, detail="Failed to convert file") - - # Clean up the created JPG files - for jpg_file in jpg_files: - os.unlink(jpg_file) - - # Return the LLM's response as JSON - # return JSONResponse(content=llm_response) - - -# Now you would include this router in your FastAPI application instance. -# from fastapi import FastAPI -# app = FastAPI() -# app.include_router(data_profile_router) + return data # the response has to be a json diff --git a/backend/utils/image_conversion_manager.py b/backend/utils/image_conversion_manager.py index dcc61c1..72ebbeb 100644 --- a/backend/utils/image_conversion_manager.py +++ b/backend/utils/image_conversion_manager.py @@ -57,14 +57,11 @@ def convert_to_jpg(self, file_path): # Add more conditions for other file types if needed def _convert_pdf_to_jpg(self, file_path): - images = convert_from_path(file_path) - jpg_files = [] - for i, image in enumerate(images): - jpg_filename = f"output_page_{i}.jpg" - jpg_file_path = os.path.join(self.output_folder, jpg_filename) - image.save(jpg_file_path, "JPEG") - jpg_files.append(jpg_file_path) - return jpg_files + image = convert_from_path(file_path) + jpg_filename = os.path.basename(file_path).replace(".pdf", ".jpg") + jpg_file_path = os.path.join(self.output_folder, jpg_filename) + image.save(jpg_file_path, "JPEG") + return jpg_file_path def _convert_png_to_jpg(self, file_path): rgb_im = Image.open(file_path).convert("RGB")