From 94ec52be3eee5b96a8bf360b1a9654680ff96e35 Mon Sep 17 00:00:00 2001 From: liberty-rising Date: Tue, 16 Jan 2024 18:17:31 +0100 Subject: [PATCH] begin work on data profiling --- backend/llms/gpt.py | 8 ++- backend/routes/data_profile_routes.py | 2 - backend/utils/image_conversion_manager.py | 2 +- .../src/components/file-upload/FileUpload.jsx | 38 +++++++++++ .../data-profiling/DataProfilingPage.jsx | 63 ++++++++++++++----- 5 files changed, 92 insertions(+), 21 deletions(-) create mode 100644 frontend/src/components/file-upload/FileUpload.jsx diff --git a/backend/llms/gpt.py b/backend/llms/gpt.py index be60757..12b07e6 100644 --- a/backend/llms/gpt.py +++ b/backend/llms/gpt.py @@ -412,5 +412,9 @@ async def extract_data_from_jpgs( assistant_message_content = await self._send_and_receive_message( prompt, jpg_presigned_urls ) - # data = json.loads(assistant_message_content) TODO: Ajust prompt to only return the json - return assistant_message_content + json_string = assistant_message_content.replace("```json\n", "").replace( + "\n```", "" + ) + data = json.loads(json_string) + print(data) + return data diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py index 525ab59..965da68 100644 --- a/backend/routes/data_profile_routes.py +++ b/backend/routes/data_profile_routes.py @@ -91,8 +91,6 @@ async def preview_data_profile( with ImageConversionManager(temp_file_paths) as manager: jpg_file_paths = manager.convert_to_jpgs() - print("jpg_file_paths", jpg_file_paths) - # Upload the JPG file to DigitalOcean Spaces, automatically deleting it when done with DigitalOceanSpaceManager( organization_name=organization_name, file_paths=jpg_file_paths diff --git a/backend/utils/image_conversion_manager.py b/backend/utils/image_conversion_manager.py index 23a9521..08c04fe 100644 --- a/backend/utils/image_conversion_manager.py +++ b/backend/utils/image_conversion_manager.py @@ -40,7 +40,7 @@ def _convert_pdfs_to_jpgs(self, file_paths: List[str]): jpg_file_path = os.path.join(output_folder, jpg_filename) # Resize the image - image.thumbnail((1024, 1024)) + # image.thumbnail((1024, 1024)) image.save(jpg_file_path, "JPEG") jpg_file_paths.append(jpg_file_path) diff --git a/frontend/src/components/file-upload/FileUpload.jsx b/frontend/src/components/file-upload/FileUpload.jsx new file mode 100644 index 0000000..0554220 --- /dev/null +++ b/frontend/src/components/file-upload/FileUpload.jsx @@ -0,0 +1,38 @@ +import React, { useState } from 'react'; +import axios from 'axios'; +import { API_URL } from '../../utils/constants'; + +function FileUpload({ endpoint }) { + const [selectedFiles, setSelectedFiles] = useState([]); + + const handleFileChange = (e) => { + setSelectedFiles(e.target.files); + } + + const handleUpload = async () => { + const formData = new FormData(); + for (let i = 0; i < selectedFiles.length; i++) { + formData.append('files', selectedFiles[i]); + } + + try { + const response = await axios.post(`${API_URL}${endpoint}`, formData, { + headers: { + 'Content-Type': 'multipart/form-data' + } + }); + console.log(response.data); + } catch (error) { + console.error(error); + } + } + + return ( +
+ + +
+ ); +} + +export default FileUpload; \ No newline at end of file diff --git a/frontend/src/pages/data-profiling/DataProfilingPage.jsx b/frontend/src/pages/data-profiling/DataProfilingPage.jsx index 7d6f686..55ea032 100644 --- a/frontend/src/pages/data-profiling/DataProfilingPage.jsx +++ b/frontend/src/pages/data-profiling/DataProfilingPage.jsx @@ -26,6 +26,8 @@ function DataProfilingPage() { const [previewData, setPreviewData] = useState(null); const navigate = useNavigate(); const fileInputRef = useRef(null); + const [selectedFiles, setSelectedFiles] = useState([]); // Array of files + const [selectedFileNames, setSelectedFileNames] = useState([]); // Array of file names useEffect(() => { axios.get(`${API_URL}data-profiles/`) @@ -40,6 +42,15 @@ function DataProfilingPage() { .catch(error => console.error('Error fetching data profiles:', error)); }, []); + const generateTableHeaders = (data) => { + if (data && data.length > 0) { + return Object.keys(data[0]).map(key => ( + {key.replace(/_/g, ' ').toUpperCase()} + )); + } + return null; + }; + const handleProfileCreate = () => { navigate('/data-profiling/create'); }; @@ -49,10 +60,10 @@ function DataProfilingPage() { }; const handleFileChange = (event) => { - const file = event.target.files[0]; - if (file) { - setSelectedFile(file); - setSelectedFileName(file.name); + const files = Array.from(event.target.files); + if (files.length) { + setSelectedFiles(files); + setSelectedFileNames(files.map(file => file.name)); } }; @@ -61,10 +72,12 @@ function DataProfilingPage() { }; const handleUpload = () => { - if (selectedFile) { + if (selectedFiles.length) { setIsUploading(true); const formData = new FormData(); - formData.append('file', selectedFile); + selectedFiles.forEach(file => { + formData.append('files', file); // Append each file to the form data + }); formData.append('instructions', instructions); axios.post(`${API_URL}upload-url`, formData) @@ -72,8 +85,8 @@ function DataProfilingPage() { console.log(response); setIsUploading(false); // Reset states after upload - setSelectedFile(null); - setSelectedFileName(''); + setSelectedFiles([]); + setSelectedFileNames([]); setInstructions(''); }) .catch(error => { @@ -84,9 +97,11 @@ function DataProfilingPage() { }; const handlePreview = () => { - if (selectedFile && instructions) { + if (selectedFiles.length && instructions) { const formData = new FormData(); - formData.append('file', selectedFile); + selectedFiles.forEach(file => { + formData.append('files', file); // Append each file + }); formData.append('instructions', instructions); axios.post(`${API_URL}data-profiles/preview/`, formData, { @@ -126,7 +141,7 @@ function DataProfilingPage() { variant="contained" color="info" onClick={handlePreview} - disabled={!selectedFile || !instructions} // Disable if no file or instructions + disabled={!selectedFiles || !instructions} // Disable if no file or instructions > Preview @@ -136,6 +151,7 @@ function DataProfilingPage() { type="file" ref={fileInputRef} onChange={handleFileChange} + multiple // Allow multiple file selection style={{ display: 'none' }} /> @@ -150,17 +166,32 @@ function DataProfilingPage() { helperText="Write any special instructions here" /> - {selectedFileName && ( + {selectedFileNames.length > 0 && ( - File selected: {selectedFileName} + Files selected: {selectedFileNames.join(', ')} )} {/* Display preview data if available */} {previewData && ( - - Preview Data: {JSON.stringify(previewData)} - + + + + + {generateTableHeaders(previewData)} + + + + {previewData.map((row, index) => ( + + {Object.values(row).map((value, idx) => ( + {value} + ))} + + ))} + +
+
)}