diff --git a/backend/database/data_profile_manager.py b/backend/database/data_profile_manager.py index 88c56d6..499989e 100644 --- a/backend/database/data_profile_manager.py +++ b/backend/database/data_profile_manager.py @@ -5,9 +5,14 @@ class DataProfileManager: def __init__(self, session): self.session = session - def get_dataprofile_by_name(self, name): + def get_dataprofile_by_name_and_org(self, name, org_id) -> DataProfile: """Retrieve a DataProfile by its name.""" - return self.session.query(DataProfile).filter(DataProfile.name == name).first() + return ( + self.session.query(DataProfile) + .filter(DataProfile.name == name) + .filter(DataProfile.organization_id == org_id) + .first() + ) def get_all_data_profiles(self): """Retrieve all DataProfiles.""" diff --git a/backend/envs/dev/initialization/setup_dev_environment.py b/backend/envs/dev/initialization/setup_dev_environment.py index f82b97b..74c8d03 100644 --- a/backend/envs/dev/initialization/setup_dev_environment.py +++ b/backend/envs/dev/initialization/setup_dev_environment.py @@ -178,7 +178,9 @@ def create_sample_dataprofile(): # Using DatabaseManager to manage the database session with DatabaseManager() as session: profile_manager = DataProfileManager(session) - existing_profile = profile_manager.get_dataprofile_by_name(sample_profile.name) + existing_profile = profile_manager.get_dataprofile_by_name_and_org( + sample_profile.name, 1 + ) if not existing_profile: profile_manager.create_dataprofile(sample_profile) logger.debug("Sample data profile created.") diff --git a/backend/llms/gpt.py b/backend/llms/gpt.py index 0d0a2e7..a2bfc1b 100644 --- a/backend/llms/gpt.py +++ b/backend/llms/gpt.py @@ -7,6 +7,7 @@ from database.database_manager import DatabaseManager from llms.prompt_manager import PromptManager from llms.system_message_manager import SystemMessageManager +from models.data_profile import DataProfile from models.user import User from openai import ChatCompletion from settings import OPENAI_API_KEY @@ -402,11 +403,12 @@ def generate_text(self, input_text): return assistant_message_content async def extract_data_from_jpgs( - self, instructions: str, jpg_presigned_urls: List[str] + self, data_profile: DataProfile, jpg_presigned_urls: List[str] ): self._add_system_message(assistant_type="jpg_data_extraction") self._set_model(model_type="img") + instructions = data_profile.description prompt = self.prompt_manager.jpg_data_extraction_prompt(instructions) assistant_message_content = await self._send_and_receive_message( diff --git a/backend/llms/prompt_manager.py b/backend/llms/prompt_manager.py index 4d49ea0..61a1336 100644 --- a/backend/llms/prompt_manager.py +++ b/backend/llms/prompt_manager.py @@ -102,6 +102,7 @@ def jpg_data_extraction_prompt(self, instructions: str): {instructions} Provide output in a JSON string using the requested information as keys. + The JSON string should be flat, not nested. Example output: {{ diff --git a/backend/object_storage/digitalocean_space_manager.py b/backend/object_storage/digitalocean_space_manager.py index 2863dfc..d9f3524 100644 --- a/backend/object_storage/digitalocean_space_manager.py +++ b/backend/object_storage/digitalocean_space_manager.py @@ -11,6 +11,7 @@ from typing import List import boto3 +from fastapi import UploadFile from settings import ( SPACES_ACCESS_KEY, SPACES_BUCKET_NAME, @@ -21,7 +22,12 @@ class DigitalOceanSpaceManager: - def __init__(self, organization_name: str = "", file_paths: List[str] = []): + def __init__( + self, + organization_name: str = "", + files: List[UploadFile] = [], + file_paths: List[str] = [], + ): session = boto3.session.Session() self.client = session.client( "s3", @@ -34,6 +40,7 @@ def __init__(self, organization_name: str = "", file_paths: List[str] = []): self.organization_name = organization_name.replace(" ", "_") + self.files = files self.file_paths = file_paths self.file_names = [os.path.basename(file_path) for file_path in file_paths] self.object_names: List[str] = [] @@ -52,6 +59,27 @@ def upload_files(self): """ all_uploaded = True + # Upload the files + for file in self.files: + # Prepend the organization_name to the object_name + object_name = f"{self.organization_name}/{file.filename}" + try: + file.file.seek(0) # Ensure we're at the start of the file + self.client.upload_fileobj(file.file, self.bucket_name, object_name) + self.object_names.append(object_name) + except Exception as e: + print(e) + all_uploaded = False + + return all_uploaded + + def upload_files_by_paths(self): + """Upload multiple files using their file paths to an S3 bucket + + :return: True if files were uploaded, else False + """ + all_uploaded = True + # Upload the files for file_path, file_name in zip(self.file_paths, self.file_names): # Prepend the organization_name to the object_name diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py index a83a290..f00b515 100644 --- a/backend/routes/data_profile_routes.py +++ b/backend/routes/data_profile_routes.py @@ -1,9 +1,11 @@ +import os import tempfile from typing import List from database.data_profile_manager import DataProfileManager from database.database_manager import DatabaseManager from database.organization_manager import OrganizationManager +from database.table_manager import TableManager from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile from llms.gpt import GPTLLM from models.data_profile import ( @@ -43,12 +45,15 @@ async def save_data_profiles( ) -> DataProfileCreateResponse: with DatabaseManager() as session: data_profile_manager = DataProfileManager(session) - if data_profile_manager.get_dataprofile_by_name(request.name): + if data_profile_manager.get_dataprofile_by_name_and_org( + request.name, current_user.organization_id + ): raise HTTPException(status_code=400, detail="Data Profile already exists") new_data_profile = DataProfile( name=request.name, description=request.description, + organization_id=current_user.organization_id, ) created_data_profile = data_profile_manager.create_dataprofile(new_data_profile) @@ -104,11 +109,99 @@ async def preview_data_profile( with DigitalOceanSpaceManager( organization_name=organization_name, file_paths=jpg_file_paths ) as space_manager: - space_manager.upload_files() + space_manager.upload_files_by_paths() jpg_presigned_urls = space_manager.create_presigned_urls() gpt = GPTLLM(chat_id=1, user=current_user) extracted_data = await gpt.extract_data_from_jpgs( instructions, jpg_presigned_urls ) - return extracted_data + # Delete the temporary files + for path in temp_file_paths: + os.remove(path) + + return extracted_data + + +@data_profile_router.post("/data-profiles/{data_profile_name}/preview/") +async def preview_data_profile_upload( + data_profile_name: str, + files: List[UploadFile] = File(...), + current_user: User = Depends(get_current_user), +): + temp_file_paths = [] + for file in files: + if file.filename: + suffix = file.filename.split(".")[-1] + + # Save the uploaded file temporarily + temp_file = tempfile.NamedTemporaryFile(delete=False, suffix="." + suffix) + temp_file.write(await file.read()) + temp_file.close() + temp_file_paths.append(temp_file.name) + + # Get the organization name + with DatabaseManager() as session: + org_manager = OrganizationManager(session) + organization_name = org_manager.get_organization( + current_user.organization_id + ).name + + data_profile_manager = DataProfileManager(session) + data_profile = data_profile_manager.get_dataprofile_by_name_and_org( + data_profile_name, current_user.organization_id + ) + + # Use the ImageConversionManager context manager to convert the PDF to JPG + with ImageConversionManager(temp_file_paths) as manager: + jpg_file_paths = manager.convert_to_jpgs() + + # Upload the JPG file to DigitalOcean Spaces, automatically deleting it when done + with DigitalOceanSpaceManager( + organization_name=organization_name, file_paths=jpg_file_paths + ) as space_manager: + space_manager.upload_files_by_paths() + jpg_presigned_urls = space_manager.create_presigned_urls() + gpt = GPTLLM(chat_id=1, user=current_user) + extracted_data = await gpt.extract_data_from_jpgs( + data_profile, jpg_presigned_urls + ) + + # Delete the temporary files + for path in temp_file_paths: + os.remove(path) + + return extracted_data + + +@data_profile_router.post("/data-profiles/{data_profile_name}/extracted-data/") +async def save_extracted_data( + data_profile_name: str, + extracted_data: dict, + files: List[UploadFile] = File(...), + current_user: User = Depends(get_current_user), +): + # Get the organization name + with DatabaseManager() as session: + org_manager = OrganizationManager(session) + organization_name = org_manager.get_organization( + current_user.organization_id + ).name + + data_profile_manager = DataProfileManager(session) + data_profile: DataProfile = ( + data_profile_manager.get_dataprofile_by_name_and_org( + data_profile_name, current_user.organization_id + ) + ) + + table_manager = TableManager(session) + print(data_profile, table_manager) # TODO: To be further implemented + + # Upload the JPG file to DigitalOcean Spaces, automatically deleting it when done + with DigitalOceanSpaceManager( + organization_name=organization_name, files=files + ) as space_manager: + space_manager.upload_files() + + return {"message": "Extracted data saved successfully"} diff --git a/backend/utils/image_conversion_manager.py b/backend/utils/image_conversion_manager.py index 7a36483..f323ff0 100644 --- a/backend/utils/image_conversion_manager.py +++ b/backend/utils/image_conversion_manager.py @@ -20,12 +20,19 @@ def __exit__(self, exc_type, exc_value, traceback): os.unlink(converted_file_path) # Delete the file def convert_to_jpgs(self): - if all(file_path.endswith(".pdf") for file_path in self.file_paths): + if all(file_path.lower().endswith(".pdf") for file_path in self.file_paths): return self._convert_pdfs_to_jpgs(self.file_paths) - elif all(file_path.endswith(".png") for file_path in self.file_paths): + elif all(file_path.lower().endswith(".png") for file_path in self.file_paths): return self._convert_pngs_to_jpgs(self.file_paths) + elif all( + file_path.lower().endswith((".jpg", ".jpeg")) + for file_path in self.file_paths + ): + return self.file_paths else: - print("All files must be of the same type (either all .pdf or all .png)") + print( + "All files must be of the same type (either all .pdf, all .png, or all .jpg/.jpeg)" + ) return [] def _convert_pdfs_to_jpgs(self, file_paths: List[str]): diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 4820081..f202404 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -40,10 +40,6 @@ function AppWrapper() { function App() { const { isLoading } = useAuth(); - if (isLoading) { - return
Loading...
; // Or any other loading indicator - } - useEffect(() => { if (APP_ENV === "dev") { document.title = "DocShow AI - Dev"; @@ -52,6 +48,10 @@ function App() { } }, []); + if (isLoading) { + return
Loading...
; // Or any other loading indicator + } + return ( { +const AIAssistant = () => { const [userInput, setUserInput] = useState(""); const [chatHistory, setChatHistory] = useState([]); const chatEndRef = useRef(null); diff --git a/frontend/src/pages/analytics/AnalyticsPage.jsx b/frontend/src/pages/analytics/AnalyticsPage.jsx index 6af3427..ec4212d 100644 --- a/frontend/src/pages/analytics/AnalyticsPage.jsx +++ b/frontend/src/pages/analytics/AnalyticsPage.jsx @@ -1,33 +1,9 @@ // AnalyticsPage.js -import React, { useEffect, useState } from "react"; +import React from "react"; import { Box, Typography, Grid } from "@mui/material"; import AIAssistant from "./AIAssistant"; -import TableSelectDropdown from "../../components/tables/selects/TableSelectDropdown"; -import { fetchOrganizationTables } from "../../api/organizationTables"; function AnalyticsPage() { - const [tables, setTables] = useState([]); - const [selectedTable, setSelectedTable] = useState(""); - - useEffect(() => { - const getOrganizationTables = async () => { - const data = await fetchOrganizationTables(); - setTables(data); - }; - - getOrganizationTables(); - }, []); - - const handleTableSelect = (table) => { - setSelectedTable(table); - }; - - useEffect(() => { - if (selectedTable) { - handleTableSelect(selectedTable); - } - }, [selectedTable]); - return ( @@ -35,7 +11,7 @@ function AnalyticsPage() { - + diff --git a/frontend/src/pages/upload/CreateDataProfilePage.jsx b/frontend/src/pages/upload/CreateDataProfilePage.jsx index 8f38d13..1bf4ef5 100644 --- a/frontend/src/pages/upload/CreateDataProfilePage.jsx +++ b/frontend/src/pages/upload/CreateDataProfilePage.jsx @@ -24,7 +24,7 @@ function CreateDataProfilePage({ open, onClose, onCreate }) { const handleSubmit = (event) => { event.preventDefault(); - onCreate({ name, extractInstructions }); + onCreate(name, extractInstructions); }; const handlePreview = () => { @@ -101,7 +101,7 @@ function CreateDataProfilePage({ open, onClose, onCreate }) { type="submit" color="primary" variant="contained" - disabled={!isPreviewTableOpen} + disabled={!isPreviewTableOpen || !name || !extractInstructions} > Create diff --git a/frontend/src/pages/upload/UploadPage.jsx b/frontend/src/pages/upload/UploadPage.jsx index 6dc6662..10269d9 100644 --- a/frontend/src/pages/upload/UploadPage.jsx +++ b/frontend/src/pages/upload/UploadPage.jsx @@ -1,22 +1,32 @@ import React, { useState, useEffect } from "react"; -import { Box, Button, Stack, Typography } from "@mui/material"; +import { + Box, + Button, + CircularProgress, + Stack, + Typography, +} from "@mui/material"; import axios from "axios"; import AlertSnackbar from "./AlertSnackbar"; -import DataProfileSelector from "./DataProfileSelector"; import CreateDataProfilePage from "./CreateDataProfilePage"; +import DataProfileSelector from "./DataProfileSelector"; +import FileUploader from "./FileUploader"; +import PreviewTable from "./PreviewTable"; import { API_URL } from "../../utils/constants"; function UploadPage() { - const [files, setFiles] = useState(null); - const [dataProfile, setDataProfile] = useState([]); + const [files, setFiles] = useState([]); + const [dataProfile, setDataProfile] = useState(null); const [dataProfiles, setDataProfiles] = useState([]); - const [analyzed, setAnalyzed] = useState(false); const [alertInfo, setAlertInfo] = useState({ open: false, message: "", severity: "info", }); const [showCreateDataProfile, setShowCreateDataProfile] = useState(false); + const [previewData, setPreviewData] = useState(null); + const [isPreviewLoading, setIsPreviewLoading] = useState(false); + const [isPreviewTableOpen, setIsPreviewTableOpen] = useState(false); useEffect(() => { axios @@ -27,14 +37,46 @@ function UploadPage() { .catch((error) => console.error("Error fetching data profiles:", error)); }, []); - const handleAnalyze = () => { - // Placeholder for analyze functionality - setAnalyzed(true); + const handleCreateDataProfile = (name, extractInstructions) => { + axios + .post(`${API_URL}data-profile/`, { + name: name, + description: extractInstructions, + }) + .then((response) => { + // Handle successful data profile creation + setDataProfiles((prevDataProfiles) => [...prevDataProfiles, name]); + setShowCreateDataProfile(false); + }) + .catch((error) => { + console.error("Error creating data profile:", error); + }); }; - const handleCreateDataProfile = (dataProfile) => { - setDataProfiles([...dataProfiles, dataProfile]); - setShowCreateDataProfile(false); + const handlePreview = () => { + if (files.length && dataProfile) { + setIsPreviewLoading(true); + const formData = new FormData(); + files.forEach((file) => { + formData.append("files", file); // Append each file + }); + + axios + .post(`${API_URL}data-profiles/${dataProfile}/preview/`, formData, { + headers: { + "Content-Type": "multipart/form-data", + }, + }) + .then((response) => { + setPreviewData(response.data); // Store the preview data + setIsPreviewTableOpen(true); + setIsPreviewLoading(false); + }) + .catch((error) => { + console.error("Error on preview:", error); + setIsPreviewLoading(false); + }); + } }; const handleSubmit = async () => { @@ -82,8 +124,6 @@ function UploadPage() { - {/* */} - + + + + + + {previewData && } + + + {isPreviewLoading && } + +