Skip to content

Commit

Permalink
Merge pull request #178 from DocShow-AI/create_image_conversion_manager
Browse files Browse the repository at this point in the history
add conversion manager
  • Loading branch information
liberty-rising authored Jan 6, 2024
2 parents 708bf2c + 54f2499 commit d5c5f86
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 21 deletions.
86 changes: 66 additions & 20 deletions backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from fastapi import APIRouter, HTTPException, Depends
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form

# from starlette.responses import JSONResponse
import tempfile
import os

from database.database_manager import DatabaseManager
from database.data_profile_manager import DataProfileManager
Expand All @@ -11,7 +13,7 @@
)
from models.user import User
from security import get_current_user

from utils.image_conversion_manager import ImageConversionManager

data_profile_router = APIRouter()

Expand Down Expand Up @@ -59,21 +61,65 @@ async def get_data_profile(
return data_profile


# @data_profile_router.post("/data-profiles/preview-endpoint/")
# async def preview_data_profile(
# file: UploadFile = File(...),
# instructions: str = Form(...),
# current_user: User = Depends(get_current_user),
# ):
# # Read the file's content
# file_content = await file.read()

# # Process the file content, perhaps to convert it into a string
# # if it's a binary file, like a PDF or an image.
# text_content = process_file_content(file_content)

# # Use Langchain to send a request to your LLM
# # Here you can customize the request as needed
# response = llm.generate(text_content, instructions)

# return JSONResponse(content=response)
@data_profile_router.post("/data-profiles/preview/")
async def preview_data_profile(
file: UploadFile = File(...),
instructions: str = Form(...),
current_user: User = Depends(get_current_user),
):
suffix = file.filename.split(".")[-1]
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name

# Use the ImageConversionManager context manager to convert the PDF to JPG
jpg_files = []
with ImageConversionManager(temp_file_path, "/change-me/") as manager:
jpg_files = manager.convert_to_jpg(temp_file_path)

# Clean up the uploaded temp file
os.unlink(temp_file_path)

# Assuming you have a function to send the JPGs to the LLM and get a response
# Send the JPG files to the LLM using the API
# You need to define how you'll handle multiple JPGs - this is just a placeholder
# if jpg_files:
# # Here you would typically prepare and send your request to the LLM API.
# # This will vary greatly depending on the LLM's API specifics.
# # For now, this is a placeholder for how you might make the request.
# # Replace with your actual API endpoint and key
# llm_api_endpoint = "https://api.example.com/llm"
# api_key = "your_api_key"
# response = requests.post(
# llm_api_endpoint,
# headers={"Authorization": f"Bearer {api_key}"},
# files={"file": open(jpg_files[0], "rb")},
# )

# # Handle the response
# if response.status_code == 200:
# llm_response = response.json()
# else:
# raise HTTPException(status_code=500, detail="LLM API request failed")
# else:
# raise HTTPException(status_code=500, detail="Failed to convert file")

# Clean up the created JPG files
for jpg_file in jpg_files:
os.unlink(jpg_file)

# Return the LLM's response as JSON
# return JSONResponse(content=llm_response)


# Now you would include this router in your FastAPI application instance.
# from fastapi import FastAPI
# app = FastAPI()
# app.include_router(data_profile_router)


# the response has to be a json

# file -- > convert to jpg --> |
# data-profile --> | --> llm --> response
75 changes: 75 additions & 0 deletions backend/utils/image_conversion_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# from PIL import Image
# import pdf2image


# class ImageConversionManager:
# def __init__(self, output_folder):
# self.output_folder = output_folder

# def convert_to_jpg(self, file_path):
# if file_path.endswith(".pdf"):
# return self._convert_pdf_to_jpg(file_path)
# elif file_path.endswith(".png"):
# return self._convert_png_to_jpg(file_path)
# # Add more conditions for other file types if needed

# def _convert_pdf_to_jpg(self, file_path):
# images = pdf2image.convert_from_path(file_path)
# for i, image in enumerate(images):
# image.save(file_path.replace(".pdf", "") + str(i) + ".jpg", "JPEG")
# return file_path.replace(".pdf", "") + "0.jpg"

# def _convert_png_to_jpg(self, file_path):
# with Image.open(file_path) as img:
# rgb_im = img.convert("RGB")
# rgb_im.save(file_path.replace(".png", ".jpg"), "JPEG")
# return file_path.replace(".png", ".jpg")


import os
import tempfile
from PIL import Image
from pdf2image import convert_from_path


class ImageConversionManager:
def __init__(self, file_path: str, output_folder: str):
self.file_path = file_path
self.output_folder = output_folder

def __enter__(self):
self.output_folder = tempfile.mkdtemp() # Create a temporary directory
return self

def __exit__(self, exc_type, exc_value, traceback):
if self.output_folder and os.path.isdir(self.output_folder):
for filename in os.listdir(self.output_folder):
file_path = os.path.join(self.output_folder, filename)
if os.path.isfile(file_path):
os.unlink(file_path) # Delete the file
os.rmdir(self.output_folder) # Delete the directory

def convert_to_jpg(self, file_path):
if file_path.endswith(".pdf"):
return self._convert_pdf_to_jpg(file_path)
elif file_path.endswith(".png"):
return self._convert_png_to_jpg(file_path)
# Add more conditions for other file types if needed

def _convert_pdf_to_jpg(self, file_path):
images = convert_from_path(file_path)
jpg_files = []
for i, image in enumerate(images):
jpg_filename = f"output_page_{i}.jpg"
jpg_file_path = os.path.join(self.output_folder, jpg_filename)
image.save(jpg_file_path, "JPEG")
jpg_files.append(jpg_file_path)
return jpg_files

def _convert_png_to_jpg(self, file_path):
rgb_im = Image.open(file_path).convert("RGB")
jpg_filename = os.path.basename(file_path).replace(".png", ".jpg")
jpg_file_path = os.path.join(self.output_folder, jpg_filename)
rgb_im.save(jpg_file_path, "JPEG")
rgb_im.close()
return jpg_file_path
2 changes: 1 addition & 1 deletion frontend/src/pages/data-profiling/DataProfilingPage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function DataProfilingPage() {
formData.append('file', selectedFile);
formData.append('instructions', instructions);

axios.post(`${API_URL}data-profiles/preview-endpoint/`, formData, {
axios.post(`${API_URL}data-profiles/preview/`, formData, {
headers: {
'Content-Type': 'multipart/form-data'
}
Expand Down

0 comments on commit d5c5f86

Please sign in to comment.