Merge pull request #178 from DocShow-AI/create_image_conversion_manager

add conversion manager
liberty-rising · Jan 6, 2024 · d5c5f86 · d5c5f86
2 parents 708bf2c + 54f2499
commit d5c5f86
Show file tree

Hide file tree

Showing 3 changed files with 142 additions and 21 deletions.
diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py
@@ -1,6 +1,8 @@
-from fastapi import APIRouter, HTTPException, Depends
+from fastapi import APIRouter, HTTPException, Depends, UploadFile, File, Form
 
 # from starlette.responses import JSONResponse
+import tempfile
+import os
 
 from database.database_manager import DatabaseManager
 from database.data_profile_manager import DataProfileManager
@@ -11,7 +13,7 @@
 )
 from models.user import User
 from security import get_current_user
-
+from utils.image_conversion_manager import ImageConversionManager
 
 data_profile_router = APIRouter()
 
@@ -59,21 +61,65 @@ async def get_data_profile(
         return data_profile
 
 
-# @data_profile_router.post("/data-profiles/preview-endpoint/")
-# async def preview_data_profile(
-#     file: UploadFile = File(...),
-#     instructions: str = Form(...),
-#     current_user: User = Depends(get_current_user),
-# ):
-#     # Read the file's content
-#     file_content = await file.read()
-
-#     # Process the file content, perhaps to convert it into a string
-#     # if it's a binary file, like a PDF or an image.
-#     text_content = process_file_content(file_content)
-
-#     # Use Langchain to send a request to your LLM
-#     # Here you can customize the request as needed
-#     response = llm.generate(text_content, instructions)
-
-#     return JSONResponse(content=response)
+@data_profile_router.post("/data-profiles/preview/")
+async def preview_data_profile(
+    file: UploadFile = File(...),
+    instructions: str = Form(...),
+    current_user: User = Depends(get_current_user),
+):
+    suffix = file.filename.split(".")[-1]
+    # Save the uploaded file temporarily
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
+        temp_file.write(await file.read())
+        temp_file_path = temp_file.name
+
+    # Use the ImageConversionManager context manager to convert the PDF to JPG
+    jpg_files = []
+    with ImageConversionManager(temp_file_path, "/change-me/") as manager:
+        jpg_files = manager.convert_to_jpg(temp_file_path)
+
+    # Clean up the uploaded temp file
+    os.unlink(temp_file_path)
+
+    # Assuming you have a function to send the JPGs to the LLM and get a response
+    # Send the JPG files to the LLM using the API
+    # You need to define how you'll handle multiple JPGs - this is just a placeholder
+    # if jpg_files:
+    #     # Here you would typically prepare and send your request to the LLM API.
+    #     # This will vary greatly depending on the LLM's API specifics.
+    #     # For now, this is a placeholder for how you might make the request.
+    #     # Replace with your actual API endpoint and key
+    #     llm_api_endpoint = "https://api.example.com/llm"
+    #     api_key = "your_api_key"
+    #     response = requests.post(
+    #         llm_api_endpoint,
+    #         headers={"Authorization": f"Bearer {api_key}"},
+    #         files={"file": open(jpg_files[0], "rb")},
+    #     )
+
+    #     # Handle the response
+    #     if response.status_code == 200:
+    #         llm_response = response.json()
+    #     else:
+    #         raise HTTPException(status_code=500, detail="LLM API request failed")
+    # else:
+    #     raise HTTPException(status_code=500, detail="Failed to convert file")
+
+    # Clean up the created JPG files
+    for jpg_file in jpg_files:
+        os.unlink(jpg_file)
+
+    # Return the LLM's response as JSON
+    # return JSONResponse(content=llm_response)
+
+
+# Now you would include this router in your FastAPI application instance.
+# from fastapi import FastAPI
+# app = FastAPI()
+# app.include_router(data_profile_router)
+
+
+# the response has to be a json
+
+# file -- > convert to jpg --> |
+# data-profile             --> | --> llm --> response
diff --git a/backend/utils/image_conversion_manager.py b/backend/utils/image_conversion_manager.py
@@ -0,0 +1,75 @@
+# from PIL import Image
+# import pdf2image
+
+
+# class ImageConversionManager:
+#     def __init__(self, output_folder):
+#         self.output_folder = output_folder
+
+#     def convert_to_jpg(self, file_path):
+#         if file_path.endswith(".pdf"):
+#             return self._convert_pdf_to_jpg(file_path)
+#         elif file_path.endswith(".png"):
+#             return self._convert_png_to_jpg(file_path)
+#         # Add more conditions for other file types if needed
+
+#     def _convert_pdf_to_jpg(self, file_path):
+#         images = pdf2image.convert_from_path(file_path)
+#         for i, image in enumerate(images):
+#             image.save(file_path.replace(".pdf", "") + str(i) + ".jpg", "JPEG")
+#         return file_path.replace(".pdf", "") + "0.jpg"
+
+#     def _convert_png_to_jpg(self, file_path):
+#         with Image.open(file_path) as img:
+#             rgb_im = img.convert("RGB")
+#             rgb_im.save(file_path.replace(".png", ".jpg"), "JPEG")
+#         return file_path.replace(".png", ".jpg")
+
+
+import os
+import tempfile
+from PIL import Image
+from pdf2image import convert_from_path
+
+
+class ImageConversionManager:
+    def __init__(self, file_path: str, output_folder: str):
+        self.file_path = file_path
+        self.output_folder = output_folder
+
+    def __enter__(self):
+        self.output_folder = tempfile.mkdtemp()  # Create a temporary directory
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if self.output_folder and os.path.isdir(self.output_folder):
+            for filename in os.listdir(self.output_folder):
+                file_path = os.path.join(self.output_folder, filename)
+                if os.path.isfile(file_path):
+                    os.unlink(file_path)  # Delete the file
+            os.rmdir(self.output_folder)  # Delete the directory
+
+    def convert_to_jpg(self, file_path):
+        if file_path.endswith(".pdf"):
+            return self._convert_pdf_to_jpg(file_path)
+        elif file_path.endswith(".png"):
+            return self._convert_png_to_jpg(file_path)
+        # Add more conditions for other file types if needed
+
+    def _convert_pdf_to_jpg(self, file_path):
+        images = convert_from_path(file_path)
+        jpg_files = []
+        for i, image in enumerate(images):
+            jpg_filename = f"output_page_{i}.jpg"
+            jpg_file_path = os.path.join(self.output_folder, jpg_filename)
+            image.save(jpg_file_path, "JPEG")
+            jpg_files.append(jpg_file_path)
+        return jpg_files
+
+    def _convert_png_to_jpg(self, file_path):
+        rgb_im = Image.open(file_path).convert("RGB")
+        jpg_filename = os.path.basename(file_path).replace(".png", ".jpg")
+        jpg_file_path = os.path.join(self.output_folder, jpg_filename)
+        rgb_im.save(jpg_file_path, "JPEG")
+        rgb_im.close()
+        return jpg_file_path
diff --git a/frontend/src/pages/data-profiling/DataProfilingPage.jsx b/frontend/src/pages/data-profiling/DataProfilingPage.jsx
@@ -89,7 +89,7 @@ function DataProfilingPage() {
       formData.append('file', selectedFile);
       formData.append('instructions', instructions);
 
-      axios.post(`${API_URL}data-profiles/preview-endpoint/`, formData, { 
+      axios.post(`${API_URL}data-profiles/preview/`, formData, { 
         headers: {
           'Content-Type': 'multipart/form-data'
         }