From 54f249992c75ce831d71206d43a825e802a10022 Mon Sep 17 00:00:00 2001 From: Rezart Abazi Date: Sat, 6 Jan 2024 12:04:38 +0100 Subject: [PATCH] add conversion manager --- backend/routes/data_profile_routes.py | 86 ++++++++++++++----- backend/utils/image_conversion_manager.py | 75 ++++++++++++++++ .../data-profiling/DataProfilingPage.jsx | 2 +- 3 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 backend/utils/image_conversion_manager.py diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py index d9e1ce4..9402afa 100644 --- a/backend/routes/data_profile_routes.py +++ b/backend/routes/data_profile_routes.py @@ -1,6 +1,8 @@ -from fastapi import APIRouter, HTTPException, Depends +from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form # from starlette.responses import JSONResponse +import tempfile +import os from database.database_manager import DatabaseManager from database.data_profile_manager import DataProfileManager @@ -11,7 +13,7 @@ ) from models.user import User from security import get_current_user - +from utils.image_conversion_manager import ImageConversionManager data_profile_router = APIRouter() @@ -59,21 +61,65 @@ async def get_data_profile( return data_profile -# @data_profile_router.post("/data-profiles/preview-endpoint/") -# async def preview_data_profile( -# file: UploadFile = File(...), -# instructions: str = Form(...), -# current_user: User = Depends(get_current_user), -# ): -# # Read the file's content -# file_content = await file.read() - -# # Process the file content, perhaps to convert it into a string -# # if it's a binary file, like a PDF or an image. -# text_content = process_file_content(file_content) - -# # Use Langchain to send a request to your LLM -# # Here you can customize the request as needed -# response = llm.generate(text_content, instructions) - -# return JSONResponse(content=response) +@data_profile_router.post("/data-profiles/preview/") +async def preview_data_profile( + file: UploadFile = File(...), + instructions: str = Form(...), + current_user: User = Depends(get_current_user), +): + suffix = file.filename.split(".")[-1] + # Save the uploaded file temporarily + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file: + temp_file.write(await file.read()) + temp_file_path = temp_file.name + + # Use the ImageConversionManager context manager to convert the PDF to JPG + jpg_files = [] + with ImageConversionManager(temp_file_path, "/change-me/") as manager: + jpg_files = manager.convert_to_jpg(temp_file_path) + + # Clean up the uploaded temp file + os.unlink(temp_file_path) + + # Assuming you have a function to send the JPGs to the LLM and get a response + # Send the JPG files to the LLM using the API + # You need to define how you'll handle multiple JPGs - this is just a placeholder + # if jpg_files: + # # Here you would typically prepare and send your request to the LLM API. + # # This will vary greatly depending on the LLM's API specifics. + # # For now, this is a placeholder for how you might make the request. + # # Replace with your actual API endpoint and key + # llm_api_endpoint = "https://api.example.com/llm" + # api_key = "your_api_key" + # response = requests.post( + # llm_api_endpoint, + # headers={"Authorization": f"Bearer {api_key}"}, + # files={"file": open(jpg_files[0], "rb")}, + # ) + + # # Handle the response + # if response.status_code == 200: + # llm_response = response.json() + # else: + # raise HTTPException(status_code=500, detail="LLM API request failed") + # else: + # raise HTTPException(status_code=500, detail="Failed to convert file") + + # Clean up the created JPG files + for jpg_file in jpg_files: + os.unlink(jpg_file) + + # Return the LLM's response as JSON + # return JSONResponse(content=llm_response) + + +# Now you would include this router in your FastAPI application instance. +# from fastapi import FastAPI +# app = FastAPI() +# app.include_router(data_profile_router) + + +# the response has to be a json + +# file -- > convert to jpg --> | +# data-profile --> | --> llm --> response diff --git a/backend/utils/image_conversion_manager.py b/backend/utils/image_conversion_manager.py new file mode 100644 index 0000000..dcc61c1 --- /dev/null +++ b/backend/utils/image_conversion_manager.py @@ -0,0 +1,75 @@ +# from PIL import Image +# import pdf2image + + +# class ImageConversionManager: +# def __init__(self, output_folder): +# self.output_folder = output_folder + +# def convert_to_jpg(self, file_path): +# if file_path.endswith(".pdf"): +# return self._convert_pdf_to_jpg(file_path) +# elif file_path.endswith(".png"): +# return self._convert_png_to_jpg(file_path) +# # Add more conditions for other file types if needed + +# def _convert_pdf_to_jpg(self, file_path): +# images = pdf2image.convert_from_path(file_path) +# for i, image in enumerate(images): +# image.save(file_path.replace(".pdf", "") + str(i) + ".jpg", "JPEG") +# return file_path.replace(".pdf", "") + "0.jpg" + +# def _convert_png_to_jpg(self, file_path): +# with Image.open(file_path) as img: +# rgb_im = img.convert("RGB") +# rgb_im.save(file_path.replace(".png", ".jpg"), "JPEG") +# return file_path.replace(".png", ".jpg") + + +import os +import tempfile +from PIL import Image +from pdf2image import convert_from_path + + +class ImageConversionManager: + def __init__(self, file_path: str, output_folder: str): + self.file_path = file_path + self.output_folder = output_folder + + def __enter__(self): + self.output_folder = tempfile.mkdtemp() # Create a temporary directory + return self + + def __exit__(self, exc_type, exc_value, traceback): + if self.output_folder and os.path.isdir(self.output_folder): + for filename in os.listdir(self.output_folder): + file_path = os.path.join(self.output_folder, filename) + if os.path.isfile(file_path): + os.unlink(file_path) # Delete the file + os.rmdir(self.output_folder) # Delete the directory + + def convert_to_jpg(self, file_path): + if file_path.endswith(".pdf"): + return self._convert_pdf_to_jpg(file_path) + elif file_path.endswith(".png"): + return self._convert_png_to_jpg(file_path) + # Add more conditions for other file types if needed + + def _convert_pdf_to_jpg(self, file_path): + images = convert_from_path(file_path) + jpg_files = [] + for i, image in enumerate(images): + jpg_filename = f"output_page_{i}.jpg" + jpg_file_path = os.path.join(self.output_folder, jpg_filename) + image.save(jpg_file_path, "JPEG") + jpg_files.append(jpg_file_path) + return jpg_files + + def _convert_png_to_jpg(self, file_path): + rgb_im = Image.open(file_path).convert("RGB") + jpg_filename = os.path.basename(file_path).replace(".png", ".jpg") + jpg_file_path = os.path.join(self.output_folder, jpg_filename) + rgb_im.save(jpg_file_path, "JPEG") + rgb_im.close() + return jpg_file_path diff --git a/frontend/src/pages/data-profiling/DataProfilingPage.jsx b/frontend/src/pages/data-profiling/DataProfilingPage.jsx index 6aafb6f..7d6f686 100644 --- a/frontend/src/pages/data-profiling/DataProfilingPage.jsx +++ b/frontend/src/pages/data-profiling/DataProfilingPage.jsx @@ -89,7 +89,7 @@ function DataProfilingPage() { formData.append('file', selectedFile); formData.append('instructions', instructions); - axios.post(`${API_URL}data-profiles/preview-endpoint/`, formData, { + axios.post(`${API_URL}data-profiles/preview/`, formData, { headers: { 'Content-Type': 'multipart/form-data' }