Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #179

Merged
merged 2 commits into from
Jan 6, 2024
Merged

Dev #179

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 66 additions & 20 deletions backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from fastapi import APIRouter, HTTPException, Depends
from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form

# from starlette.responses import JSONResponse
import tempfile
import os

from database.database_manager import DatabaseManager
from database.data_profile_manager import DataProfileManager
Expand All @@ -11,7 +13,7 @@
)
from models.user import User
from security import get_current_user

from utils.image_conversion_manager import ImageConversionManager

data_profile_router = APIRouter()

Expand Down Expand Up @@ -59,21 +61,65 @@ async def get_data_profile(
return data_profile


# @data_profile_router.post("/data-profiles/preview-endpoint/")
# async def preview_data_profile(
# file: UploadFile = File(...),
# instructions: str = Form(...),
# current_user: User = Depends(get_current_user),
# ):
# # Read the file's content
# file_content = await file.read()

# # Process the file content, perhaps to convert it into a string
# # if it's a binary file, like a PDF or an image.
# text_content = process_file_content(file_content)

# # Use Langchain to send a request to your LLM
# # Here you can customize the request as needed
# response = llm.generate(text_content, instructions)

# return JSONResponse(content=response)
@data_profile_router.post("/data-profiles/preview/")
async def preview_data_profile(
file: UploadFile = File(...),
instructions: str = Form(...),
current_user: User = Depends(get_current_user),
):
suffix = file.filename.split(".")[-1]
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
temp_file.write(await file.read())
temp_file_path = temp_file.name

# Use the ImageConversionManager context manager to convert the PDF to JPG
jpg_files = []
with ImageConversionManager(temp_file_path, "/change-me/") as manager:
jpg_files = manager.convert_to_jpg(temp_file_path)

# Clean up the uploaded temp file
os.unlink(temp_file_path)

# Assuming you have a function to send the JPGs to the LLM and get a response
# Send the JPG files to the LLM using the API
# You need to define how you'll handle multiple JPGs - this is just a placeholder
# if jpg_files:
# # Here you would typically prepare and send your request to the LLM API.
# # This will vary greatly depending on the LLM's API specifics.
# # For now, this is a placeholder for how you might make the request.
# # Replace with your actual API endpoint and key
# llm_api_endpoint = "https://api.example.com/llm"
# api_key = "your_api_key"
# response = requests.post(
# llm_api_endpoint,
# headers={"Authorization": f"Bearer {api_key}"},
# files={"file": open(jpg_files[0], "rb")},
# )

# # Handle the response
# if response.status_code == 200:
# llm_response = response.json()
# else:
# raise HTTPException(status_code=500, detail="LLM API request failed")
# else:
# raise HTTPException(status_code=500, detail="Failed to convert file")

# Clean up the created JPG files
for jpg_file in jpg_files:
os.unlink(jpg_file)

# Return the LLM's response as JSON
# return JSONResponse(content=llm_response)


# Now you would include this router in your FastAPI application instance.
# from fastapi import FastAPI
# app = FastAPI()
# app.include_router(data_profile_router)


# the response has to be a json

# file -- > convert to jpg --> |
# data-profile --> | --> llm --> response
75 changes: 75 additions & 0 deletions backend/utils/image_conversion_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# from PIL import Image
# import pdf2image


# class ImageConversionManager:
# def __init__(self, output_folder):
# self.output_folder = output_folder

# def convert_to_jpg(self, file_path):
# if file_path.endswith(".pdf"):
# return self._convert_pdf_to_jpg(file_path)
# elif file_path.endswith(".png"):
# return self._convert_png_to_jpg(file_path)
# # Add more conditions for other file types if needed

# def _convert_pdf_to_jpg(self, file_path):
# images = pdf2image.convert_from_path(file_path)
# for i, image in enumerate(images):
# image.save(file_path.replace(".pdf", "") + str(i) + ".jpg", "JPEG")
# return file_path.replace(".pdf", "") + "0.jpg"

# def _convert_png_to_jpg(self, file_path):
# with Image.open(file_path) as img:
# rgb_im = img.convert("RGB")
# rgb_im.save(file_path.replace(".png", ".jpg"), "JPEG")
# return file_path.replace(".png", ".jpg")


import os
import tempfile
from PIL import Image
from pdf2image import convert_from_path


class ImageConversionManager:
def __init__(self, file_path: str, output_folder: str):
self.file_path = file_path
self.output_folder = output_folder

def __enter__(self):
self.output_folder = tempfile.mkdtemp() # Create a temporary directory
return self

def __exit__(self, exc_type, exc_value, traceback):
if self.output_folder and os.path.isdir(self.output_folder):
for filename in os.listdir(self.output_folder):
file_path = os.path.join(self.output_folder, filename)
if os.path.isfile(file_path):
os.unlink(file_path) # Delete the file
os.rmdir(self.output_folder) # Delete the directory

def convert_to_jpg(self, file_path):
if file_path.endswith(".pdf"):
return self._convert_pdf_to_jpg(file_path)
elif file_path.endswith(".png"):
return self._convert_png_to_jpg(file_path)
# Add more conditions for other file types if needed

def _convert_pdf_to_jpg(self, file_path):
images = convert_from_path(file_path)
jpg_files = []
for i, image in enumerate(images):
jpg_filename = f"output_page_{i}.jpg"
jpg_file_path = os.path.join(self.output_folder, jpg_filename)
image.save(jpg_file_path, "JPEG")
jpg_files.append(jpg_file_path)
return jpg_files

def _convert_png_to_jpg(self, file_path):
rgb_im = Image.open(file_path).convert("RGB")
jpg_filename = os.path.basename(file_path).replace(".png", ".jpg")
jpg_file_path = os.path.join(self.output_folder, jpg_filename)
rgb_im.save(jpg_file_path, "JPEG")
rgb_im.close()
return jpg_file_path
2 changes: 1 addition & 1 deletion frontend/src/pages/data-profiling/DataProfilingPage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ function DataProfilingPage() {
formData.append('file', selectedFile);
formData.append('instructions', instructions);

axios.post(`${API_URL}data-profiles/preview-endpoint/`, formData, {
axios.post(`${API_URL}data-profiles/preview/`, formData, {
headers: {
'Content-Type': 'multipart/form-data'
}
Expand Down
Loading