Skip to content

Commit

Permalink
begin work on data profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
liberty-rising committed Jan 16, 2024
1 parent 9548d94 commit 94ec52b
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 21 deletions.
8 changes: 6 additions & 2 deletions backend/llms/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,5 +412,9 @@ async def extract_data_from_jpgs(
assistant_message_content = await self._send_and_receive_message(
prompt, jpg_presigned_urls
)
# data = json.loads(assistant_message_content) TODO: Ajust prompt to only return the json
return assistant_message_content
json_string = assistant_message_content.replace("```json\n", "").replace(
"\n```", ""
)
data = json.loads(json_string)
print(data)
return data
2 changes: 0 additions & 2 deletions backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ async def preview_data_profile(
with ImageConversionManager(temp_file_paths) as manager:
jpg_file_paths = manager.convert_to_jpgs()

print("jpg_file_paths", jpg_file_paths)

# Upload the JPG file to DigitalOcean Spaces, automatically deleting it when done
with DigitalOceanSpaceManager(
organization_name=organization_name, file_paths=jpg_file_paths
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/image_conversion_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _convert_pdfs_to_jpgs(self, file_paths: List[str]):
jpg_file_path = os.path.join(output_folder, jpg_filename)

# Resize the image
image.thumbnail((1024, 1024))
# image.thumbnail((1024, 1024))

image.save(jpg_file_path, "JPEG")
jpg_file_paths.append(jpg_file_path)
Expand Down
38 changes: 38 additions & 0 deletions frontend/src/components/file-upload/FileUpload.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import React, { useState } from 'react';
import axios from 'axios';
import { API_URL } from '../../utils/constants';

function FileUpload({ endpoint }) {
const [selectedFiles, setSelectedFiles] = useState([]);

const handleFileChange = (e) => {
setSelectedFiles(e.target.files);
}

const handleUpload = async () => {
const formData = new FormData();
for (let i = 0; i < selectedFiles.length; i++) {
formData.append('files', selectedFiles[i]);
}

try {
const response = await axios.post(`${API_URL}${endpoint}`, formData, {
headers: {
'Content-Type': 'multipart/form-data'
}
});
console.log(response.data);
} catch (error) {
console.error(error);
}
}

return (
<div>
<input type="file" multiple onChange={handleFileChange} />
<button onClick={handleUpload}>Upload</button>
</div>
);
}

export default FileUpload;
63 changes: 47 additions & 16 deletions frontend/src/pages/data-profiling/DataProfilingPage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ function DataProfilingPage() {
const [previewData, setPreviewData] = useState(null);
const navigate = useNavigate();
const fileInputRef = useRef(null);
const [selectedFiles, setSelectedFiles] = useState([]); // Array of files
const [selectedFileNames, setSelectedFileNames] = useState([]); // Array of file names

useEffect(() => {
axios.get(`${API_URL}data-profiles/`)
Expand All @@ -40,6 +42,15 @@ function DataProfilingPage() {
.catch(error => console.error('Error fetching data profiles:', error));
}, []);

const generateTableHeaders = (data) => {
if (data && data.length > 0) {
return Object.keys(data[0]).map(key => (
<TableCell key={key}>{key.replace(/_/g, ' ').toUpperCase()}</TableCell>
));
}
return null;
};

const handleProfileCreate = () => {
navigate('/data-profiling/create');
};
Expand All @@ -49,10 +60,10 @@ function DataProfilingPage() {
};

const handleFileChange = (event) => {
const file = event.target.files[0];
if (file) {
setSelectedFile(file);
setSelectedFileName(file.name);
const files = Array.from(event.target.files);
if (files.length) {
setSelectedFiles(files);
setSelectedFileNames(files.map(file => file.name));
}
};

Expand All @@ -61,19 +72,21 @@ function DataProfilingPage() {
};

const handleUpload = () => {
if (selectedFile) {
if (selectedFiles.length) {
setIsUploading(true);
const formData = new FormData();
formData.append('file', selectedFile);
selectedFiles.forEach(file => {
formData.append('files', file); // Append each file to the form data
});
formData.append('instructions', instructions);

axios.post(`${API_URL}upload-url`, formData)
.then(response => {
console.log(response);
setIsUploading(false);
// Reset states after upload
setSelectedFile(null);
setSelectedFileName('');
setSelectedFiles([]);
setSelectedFileNames([]);
setInstructions('');
})
.catch(error => {
Expand All @@ -84,9 +97,11 @@ function DataProfilingPage() {
};

const handlePreview = () => {
if (selectedFile && instructions) {
if (selectedFiles.length && instructions) {
const formData = new FormData();
formData.append('file', selectedFile);
selectedFiles.forEach(file => {
formData.append('files', file); // Append each file
});
formData.append('instructions', instructions);

axios.post(`${API_URL}data-profiles/preview/`, formData, {
Expand Down Expand Up @@ -126,7 +141,7 @@ function DataProfilingPage() {
variant="contained"
color="info"
onClick={handlePreview}
disabled={!selectedFile || !instructions} // Disable if no file or instructions
disabled={!selectedFiles || !instructions} // Disable if no file or instructions
>
Preview
</Button>
Expand All @@ -136,6 +151,7 @@ function DataProfilingPage() {
type="file"
ref={fileInputRef}
onChange={handleFileChange}
multiple // Allow multiple file selection
style={{ display: 'none' }}
/>
</Box>
Expand All @@ -150,17 +166,32 @@ function DataProfilingPage() {
helperText="Write any special instructions here"
/>

{selectedFileName && (
{selectedFileNames.length > 0 && (
<Typography variant="subtitle1" gutterBottom>
File selected: {selectedFileName}
Files selected: {selectedFileNames.join(', ')}
</Typography>
)}

{/* Display preview data if available */}
{previewData && (
<Typography variant="subtitle1" gutterBottom>
Preview Data: {JSON.stringify(previewData)}
</Typography>
<TableContainer component={Paper}>
<Table>
<TableHead>
<TableRow>
{generateTableHeaders(previewData)}
</TableRow>
</TableHead>
<TableBody>
{previewData.map((row, index) => (
<TableRow key={index}>
{Object.values(row).map((value, idx) => (
<TableCell key={idx}>{value}</TableCell>
))}
</TableRow>
))}
</TableBody>
</Table>
</TableContainer>
)}

<TableContainer component={Paper}>
Expand Down

0 comments on commit 94ec52b

Please sign in to comment.