diff --git a/backend/llms/gpt.py b/backend/llms/gpt.py index d2a4f46..58a3940 100644 --- a/backend/llms/gpt.py +++ b/backend/llms/gpt.py @@ -1,5 +1,5 @@ from openai import ChatCompletion -from typing import List, Optional +from typing import Optional import json import openai @@ -124,9 +124,22 @@ def _get_system_message_content(self, assistant_type: str = "generic") -> str: ) return system_message_content - def _create_message(self, role: str, prompt: str): + def _create_message(self, role: str, prompt: str, image_url: str = ""): """Create either a user, system, or assistant message.""" - return {"role": f"{role}", "content": f"{prompt}"} + if image_url: + return { + "role": f"{role}", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image", + "image_url": {"url": image_url}, + }, + ], + "image_url": f"{image_url}", + } + else: + return {"role": f"{role}", "content": f"{prompt}"} def _add_system_message(self, assistant_type: str) -> None: """ @@ -151,8 +164,8 @@ def _add_system_message(self, assistant_type: str) -> None: self.llm_type = assistant_type - async def _send_and_receive_message(self, prompt: str) -> str: - user_message = self._create_message("user", prompt) + async def _send_and_receive_message(self, prompt: str, image_url: str = "") -> str: + user_message = self._create_message("user", prompt, image_url) self.history.append(user_message) # Check token limit and truncate history if needed @@ -330,5 +343,14 @@ def generate_text(self, input_text): return assistant_message_content - def generate_analytics_text(self, input_text: str, table_names: List[str]): - self._add_system_message(assistant_type="analytics_chat") + def extract_data_from_jpg(self, instructions: str, jpg_file: str): + self._add_system_message(assistant_type="jpg_data_extraction") + + base64_image = tiktoken.image_to_base64(jpg_file) + image_url = f"data:image/jpeg;base64,{base64_image}" + + prompt = self.prompt_manager.jpg_data_extraction_prompt(instructions) + + assistant_message_content = self._send_and_receive_message(prompt, image_url) + + return assistant_message_content diff --git a/backend/llms/prompt_manager.py b/backend/llms/prompt_manager.py index 828530d..bcee9c6 100644 --- a/backend/llms/prompt_manager.py +++ b/backend/llms/prompt_manager.py @@ -93,3 +93,22 @@ def create_table_desc_prompt( if extra_desc: prompt += f"\n\nAdditional information about the sample data: {extra_desc}" return prompt + + def jpg_data_extraction_prompt(self, instructions: str): + prompt = f""" + Extract the following data from the given JPG file: + + User request: + {instructions} + + Provide output in JSON format using the requested information as keys. + + Example output: + {{ + "client_name":"John Doe", + "invoice_amount":"1000", + "date":"01-01-2021" + }} + In this example, the requested information would have been client name, invoice amount, and date. + """ + return prompt diff --git a/backend/llms/system_message_manager.py b/backend/llms/system_message_manager.py index 4678709..e1793dc 100644 --- a/backend/llms/system_message_manager.py +++ b/backend/llms/system_message_manager.py @@ -46,6 +46,10 @@ def __init__(self): You are a table categorization assistant. Your task is to analyze sample data and existing table metadata to identify the most suitable table for appending the sample data. Return only the name of the table. """, + """jpg_data_extraction""": """ + You are a JPG data extraction assistant. Your task is to extract specific data in the order specifed from a JPG file and return it in a json format. + Return only the extracted data. + """, "generic": "You are a generic assistant.", } diff --git a/backend/object_storage/__init__.py b/backend/object_storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/object_storage/digitalocean_space_manager.py b/backend/object_storage/digitalocean_space_manager.py index 795c2a0..f24a51d 100644 --- a/backend/object_storage/digitalocean_space_manager.py +++ b/backend/object_storage/digitalocean_space_manager.py @@ -81,3 +81,22 @@ def create_presigned_url( # The response contains the presigned URL return response + + def delete_file(self, organization_name, object_name): + """Delete a file from an S3 bucket + + :param organization_name: Name of the organization the file belongs to + :param object_name: S3 object name + :return: True if the referenced object was deleted, otherwise False + """ + + # Prepend the organization_name to the object_name + object_name = f"{organization_name}/{object_name}" + + # Delete the file + try: + self.client.delete_object(Bucket=self.bucket_name, Key=object_name) + except Exception as e: + print(e) + return False + return True diff --git a/backend/requirements.txt b/backend/requirements.txt index 11cea2f..64c9270 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -20,4 +20,6 @@ langchain==0.0.351 pytest==6.2.5 pytest-asyncio==0.15.1 sendgrid==6.11.0 -boto3==1.34.10 \ No newline at end of file +boto3==1.34.10 +pillow==10.1.0 +pdf2image==1.16.3 \ No newline at end of file diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py index 9402afa..3a12d70 100644 --- a/backend/routes/data_profile_routes.py +++ b/backend/routes/data_profile_routes.py @@ -1,11 +1,10 @@ from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form -# from starlette.responses import JSONResponse import tempfile -import os from database.database_manager import DatabaseManager from database.data_profile_manager import DataProfileManager +from llms.gpt import GPTLLM from models.data_profile import ( DataProfile, DataProfileCreateRequest, @@ -69,57 +68,13 @@ async def preview_data_profile( ): suffix = file.filename.split(".")[-1] # Save the uploaded file temporarily - with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: + with tempfile.NamedTemporaryFile(delete=True, suffix=suffix) as temp_file: temp_file.write(await file.read()) temp_file_path = temp_file.name - # Use the ImageConversionManager context manager to convert the PDF to JPG - jpg_files = [] - with ImageConversionManager(temp_file_path, "/change-me/") as manager: - jpg_files = manager.convert_to_jpg(temp_file_path) - - # Clean up the uploaded temp file - os.unlink(temp_file_path) - - # Assuming you have a function to send the JPGs to the LLM and get a response - # Send the JPG files to the LLM using the API - # You need to define how you'll handle multiple JPGs - this is just a placeholder - # if jpg_files: - # # Here you would typically prepare and send your request to the LLM API. - # # This will vary greatly depending on the LLM's API specifics. - # # For now, this is a placeholder for how you might make the request. - # # Replace with your actual API endpoint and key - # llm_api_endpoint = "https://api.example.com/llm" - # api_key = "your_api_key" - # response = requests.post( - # llm_api_endpoint, - # headers={"Authorization": f"Bearer {api_key}"}, - # files={"file": open(jpg_files[0], "rb")}, - # ) - - # # Handle the response - # if response.status_code == 200: - # llm_response = response.json() - # else: - # raise HTTPException(status_code=500, detail="LLM API request failed") - # else: - # raise HTTPException(status_code=500, detail="Failed to convert file") - - # Clean up the created JPG files - for jpg_file in jpg_files: - os.unlink(jpg_file) - - # Return the LLM's response as JSON - # return JSONResponse(content=llm_response) - - -# Now you would include this router in your FastAPI application instance. -# from fastapi import FastAPI -# app = FastAPI() -# app.include_router(data_profile_router) - - -# the response has to be a json - -# file -- > convert to jpg --> | -# data-profile --> | --> llm --> response + # Use the ImageConversionManager context manager to convert the PDF to JPG + with ImageConversionManager(temp_file_path, "/tmp/") as manager: + jpg_file = manager.convert_to_jpg(temp_file_path) + gpt = GPTLLM() + data = gpt.extract_data_from_jpg(instructions, jpg_file) + return data diff --git a/backend/utils/image_conversion_manager.py b/backend/utils/image_conversion_manager.py index dcc61c1..72ebbeb 100644 --- a/backend/utils/image_conversion_manager.py +++ b/backend/utils/image_conversion_manager.py @@ -57,14 +57,11 @@ def convert_to_jpg(self, file_path): # Add more conditions for other file types if needed def _convert_pdf_to_jpg(self, file_path): - images = convert_from_path(file_path) - jpg_files = [] - for i, image in enumerate(images): - jpg_filename = f"output_page_{i}.jpg" - jpg_file_path = os.path.join(self.output_folder, jpg_filename) - image.save(jpg_file_path, "JPEG") - jpg_files.append(jpg_file_path) - return jpg_files + image = convert_from_path(file_path) + jpg_filename = os.path.basename(file_path).replace(".pdf", ".jpg") + jpg_file_path = os.path.join(self.output_folder, jpg_filename) + image.save(jpg_file_path, "JPEG") + return jpg_file_path def _convert_png_to_jpg(self, file_path): rgb_im = Image.open(file_path).convert("RGB")