Skip to content

Commit

Permalink
Merge pull request #180 from DocShow-AI/image_data_extraction_llm
Browse files Browse the repository at this point in the history
Image data extraction llm
  • Loading branch information
liberty-rising authored Jan 6, 2024
2 parents d5c5f86 + 775dd77 commit b94dc35
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 69 deletions.
36 changes: 29 additions & 7 deletions backend/llms/gpt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from openai import ChatCompletion
from typing import List, Optional
from typing import Optional

import json
import openai
Expand Down Expand Up @@ -124,9 +124,22 @@ def _get_system_message_content(self, assistant_type: str = "generic") -> str:
)
return system_message_content

def _create_message(self, role: str, prompt: str):
def _create_message(self, role: str, prompt: str, image_url: str = ""):
"""Create either a user, system, or assistant message."""
return {"role": f"{role}", "content": f"{prompt}"}
if image_url:
return {
"role": f"{role}",
"content": [
{"type": "text", "text": prompt},
{
"type": "image",
"image_url": {"url": image_url},
},
],
"image_url": f"{image_url}",
}
else:
return {"role": f"{role}", "content": f"{prompt}"}

def _add_system_message(self, assistant_type: str) -> None:
"""
Expand All @@ -151,8 +164,8 @@ def _add_system_message(self, assistant_type: str) -> None:

self.llm_type = assistant_type

async def _send_and_receive_message(self, prompt: str) -> str:
user_message = self._create_message("user", prompt)
async def _send_and_receive_message(self, prompt: str, image_url: str = "") -> str:
user_message = self._create_message("user", prompt, image_url)
self.history.append(user_message)

# Check token limit and truncate history if needed
Expand Down Expand Up @@ -330,5 +343,14 @@ def generate_text(self, input_text):

return assistant_message_content

def generate_analytics_text(self, input_text: str, table_names: List[str]):
self._add_system_message(assistant_type="analytics_chat")
def extract_data_from_jpg(self, instructions: str, jpg_file: str):
self._add_system_message(assistant_type="jpg_data_extraction")

base64_image = tiktoken.image_to_base64(jpg_file)
image_url = f"data:image/jpeg;base64,{base64_image}"

prompt = self.prompt_manager.jpg_data_extraction_prompt(instructions)

assistant_message_content = self._send_and_receive_message(prompt, image_url)

return assistant_message_content
19 changes: 19 additions & 0 deletions backend/llms/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,22 @@ def create_table_desc_prompt(
if extra_desc:
prompt += f"\n\nAdditional information about the sample data: {extra_desc}"
return prompt

def jpg_data_extraction_prompt(self, instructions: str):
prompt = f"""
Extract the following data from the given JPG file:
User request:
{instructions}
Provide output in JSON format using the requested information as keys.
Example output:
{{
"client_name":"John Doe",
"invoice_amount":"1000",
"date":"01-01-2021"
}}
In this example, the requested information would have been client name, invoice amount, and date.
"""
return prompt
4 changes: 4 additions & 0 deletions backend/llms/system_message_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ def __init__(self):
You are a table categorization assistant. Your task is to analyze sample data and existing table metadata to identify the most suitable
table for appending the sample data. Return only the name of the table.
""",
"""jpg_data_extraction""": """
You are a JPG data extraction assistant. Your task is to extract specific data in the order specifed from a JPG file and return it in a json format.
Return only the extracted data.
""",
"generic": "You are a generic assistant.",
}

Expand Down
Empty file.
19 changes: 19 additions & 0 deletions backend/object_storage/digitalocean_space_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,22 @@ def create_presigned_url(

# The response contains the presigned URL
return response

def delete_file(self, organization_name, object_name):
"""Delete a file from an S3 bucket
:param organization_name: Name of the organization the file belongs to
:param object_name: S3 object name
:return: True if the referenced object was deleted, otherwise False
"""

# Prepend the organization_name to the object_name
object_name = f"{organization_name}/{object_name}"

# Delete the file
try:
self.client.delete_object(Bucket=self.bucket_name, Key=object_name)
except Exception as e:
print(e)
return False
return True
4 changes: 3 additions & 1 deletion backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ langchain==0.0.351
pytest==6.2.5
pytest-asyncio==0.15.1
sendgrid==6.11.0
boto3==1.34.10
boto3==1.34.10
pillow==10.1.0
pdf2image==1.16.3
61 changes: 8 additions & 53 deletions backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form

# from starlette.responses import JSONResponse
import tempfile
import os

from database.database_manager import DatabaseManager
from database.data_profile_manager import DataProfileManager
from llms.gpt import GPTLLM
from models.data_profile import (
DataProfile,
DataProfileCreateRequest,
Expand Down Expand Up @@ -69,57 +68,13 @@ async def preview_data_profile(
):
suffix = file.filename.split(".")[-1]
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
with tempfile.NamedTemporaryFile(delete=True, suffix=suffix) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name

# Use the ImageConversionManager context manager to convert the PDF to JPG
jpg_files = []
with ImageConversionManager(temp_file_path, "/change-me/") as manager:
jpg_files = manager.convert_to_jpg(temp_file_path)

# Clean up the uploaded temp file
os.unlink(temp_file_path)

# Assuming you have a function to send the JPGs to the LLM and get a response
# Send the JPG files to the LLM using the API
# You need to define how you'll handle multiple JPGs - this is just a placeholder
# if jpg_files:
# # Here you would typically prepare and send your request to the LLM API.
# # This will vary greatly depending on the LLM's API specifics.
# # For now, this is a placeholder for how you might make the request.
# # Replace with your actual API endpoint and key
# llm_api_endpoint = "https://api.example.com/llm"
# api_key = "your_api_key"
# response = requests.post(
# llm_api_endpoint,
# headers={"Authorization": f"Bearer {api_key}"},
# files={"file": open(jpg_files[0], "rb")},
# )

# # Handle the response
# if response.status_code == 200:
# llm_response = response.json()
# else:
# raise HTTPException(status_code=500, detail="LLM API request failed")
# else:
# raise HTTPException(status_code=500, detail="Failed to convert file")

# Clean up the created JPG files
for jpg_file in jpg_files:
os.unlink(jpg_file)

# Return the LLM's response as JSON
# return JSONResponse(content=llm_response)


# Now you would include this router in your FastAPI application instance.
# from fastapi import FastAPI
# app = FastAPI()
# app.include_router(data_profile_router)


# the response has to be a json

# file -- > convert to jpg --> |
# data-profile --> | --> llm --> response
# Use the ImageConversionManager context manager to convert the PDF to JPG
with ImageConversionManager(temp_file_path, "/tmp/") as manager:
jpg_file = manager.convert_to_jpg(temp_file_path)
gpt = GPTLLM()
data = gpt.extract_data_from_jpg(instructions, jpg_file)
return data
13 changes: 5 additions & 8 deletions backend/utils/image_conversion_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,11 @@ def convert_to_jpg(self, file_path):
# Add more conditions for other file types if needed

def _convert_pdf_to_jpg(self, file_path):
images = convert_from_path(file_path)
jpg_files = []
for i, image in enumerate(images):
jpg_filename = f"output_page_{i}.jpg"
jpg_file_path = os.path.join(self.output_folder, jpg_filename)
image.save(jpg_file_path, "JPEG")
jpg_files.append(jpg_file_path)
return jpg_files
image = convert_from_path(file_path)
jpg_filename = os.path.basename(file_path).replace(".pdf", ".jpg")
jpg_file_path = os.path.join(self.output_folder, jpg_filename)
image.save(jpg_file_path, "JPEG")
return jpg_file_path

def _convert_png_to_jpg(self, file_path):
rgb_im = Image.open(file_path).convert("RGB")
Expand Down

0 comments on commit b94dc35

Please sign in to comment.