Skip to content

Commit

Permalink
suggest data types for table columns
Browse files Browse the repository at this point in the history
  • Loading branch information
liberty-rising committed Jan 29, 2024
1 parent 02cade8 commit df85c02
Show file tree
Hide file tree
Showing 8 changed files with 189 additions and 114 deletions.
14 changes: 11 additions & 3 deletions backend/llms/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,16 +368,20 @@ async def generate_chart_config(

return parsed_config

async def generate_suggested_column_types(self, data: dict):
async def generate_suggested_column_types(self, column_names: list, data: dict):
"""Generate suggested column types for the given data."""
self._add_system_message(assistant_type="column_type_suggestion")
self._set_response_format(is_json=True)

prompt = self.prompt_manager.create_column_type_suggestion_prompt(data)
prompt = self.prompt_manager.create_column_type_suggestion_prompt(
column_names, data
)

gpt_response = await self._send_and_receive_message(prompt)

return gpt_response
suggested_column_types = json.loads(gpt_response)

return suggested_column_types

def fetch_table_name_from_sample(
self, sample_content: str, extra_desc: str, table_metadata: str
Expand Down Expand Up @@ -429,5 +433,9 @@ async def extract_data_from_jpgs(
"\n```", ""
)
data = json.loads(json_string)

# If data is a dictionary, wrap it in a list
if isinstance(data, dict):
data = [data]
print(data)
return data
15 changes: 15 additions & 0 deletions backend/llms/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,18 @@ def jpg_data_extraction_prompt(self, instructions: str):
Return only the requested information, no additional text or formatting.
"""
return prompt

def create_column_type_suggestion_prompt(self, column_names, data):
prompt = f"""
Based on the following data, suggest the data types for each column in the table.
The available column types are: text, integer, money, date, boolean
Column names:
{column_names}
Data:
{data}
Return a JSON with the column names as keys and the suggested data types as values.
"""
return prompt
2 changes: 1 addition & 1 deletion backend/llms/system_message_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def __init__(self):
You will be generating SQL queries, and providing useful information for reports and analytics based on the given prompt.""",
"column_type_suggestion": """
You are a column type suggestion assistant.
You will be suggesting PostgreSQL column types based on the given prompt.
You will be suggesting column data types based on the given prompt.
""",
"sql_code": """
You are a PostgreSQL SQL statement assistant.
Expand Down
4 changes: 4 additions & 0 deletions backend/models/data_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ class DataProfileCreateRequest(BaseModel):
class DataProfileCreateResponse(BaseModel):
name: str
extract_instructions: str


class SuggestedColumnTypesRequest(BaseModel):
data: list
26 changes: 20 additions & 6 deletions backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
DataProfile,
DataProfileCreateRequest,
DataProfileCreateResponse,
SuggestedColumnTypesRequest,
)
from models.user import User
from security import get_current_user
Expand Down Expand Up @@ -78,6 +79,11 @@ async def get_data_profile(
return data_profile


@data_profile_router.get("/data-profiles/column-types/")
async def get_column_types(current_user: User = Depends(get_current_user)):
return ["text", "integer", "money", "date", "boolean"]


@data_profile_router.post("/data-profiles/preview/")
async def preview_data_profile(
files: List[UploadFile] = File(...),
Expand Down Expand Up @@ -128,12 +134,20 @@ async def preview_data_profile(
return extracted_data


# @data_profile_router.post("/data-profiles/preview/column-types/")
# async def generate_suggested_column_types(
# data, current_user: User = Depends(get_current_user)
# ):
# gpt = GPTLLM(chat_id=1, user=current_user)
# suggested_column_types = await gpt.generate_suggested_column_types(data)
@data_profile_router.post("/data-profiles/preview/column-types/")
async def generate_suggested_column_types(
request: SuggestedColumnTypesRequest, current_user: User = Depends(get_current_user)
):
gpt = GPTLLM(chat_id=1, user=current_user)
if request.data:
column_names = list(request.data[0].keys())
suggested_column_types = await gpt.generate_suggested_column_types(
column_names, request.data
)

print(suggested_column_types)

return suggested_column_types


@data_profile_router.post("/data-profiles/{data_profile_name}/preview/")
Expand Down
26 changes: 26 additions & 0 deletions frontend/src/api/dataProfilesRequests.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import axios from "axios";
import { API_URL } from "../utils/constants";

export const getPreviewData = (sampleFiles, extractInstructions) => {
const formData = new FormData();
sampleFiles.forEach((file) => {
formData.append("files", file);
});
formData.append("extract_instructions", extractInstructions);

return axios.post(`${API_URL}data-profiles/preview/`, formData, {
headers: {
"Content-Type": "multipart/form-data",
},
});
};

export const getAvailableColumnTypes = () => {
return axios.get(`${API_URL}data-profiles/column-types/`);
};

export const getSuggestedColumnTypes = (previewData) => {
return axios.post(`${API_URL}data-profiles/preview/column-types/`, {
data: previewData,
});
};
45 changes: 31 additions & 14 deletions frontend/src/pages/upload/CreateDataProfileWindow.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,21 @@ import {
Stack,
TextField,
} from "@mui/material";
import axios from "axios";
import FileUploader from "./FileUploader";
import DataPreviewAndSchemaEditor from "./DataPreviewAndSchemaEditor";
import { API_URL } from "../../utils/constants";
import {
getPreviewData,
getAvailableColumnTypes,
getSuggestedColumnTypes,
} from "../../api/dataProfilesRequests";

function CreateDataProfileWindow({ open, onClose, onCreate }) {
const [name, setName] = useState("");
const [extractInstructions, setExtractInstructions] = useState("");
const [sampleFiles, setSampleFiles] = useState([]);
const [previewData, setPreviewData] = useState(null);
const [availableColumnTypes, setAvailableColumnTypes] = useState([]);
const [selectedColumnTypes, setSelectedColumnTypes] = useState(null);
const [isPreviewLoading, setIsPreviewLoading] = useState(false);
const [isPreviewTableOpen, setIsPreviewTableOpen] = useState(false);

Expand All @@ -30,25 +35,33 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) {
const handlePreview = () => {
if (sampleFiles.length && extractInstructions) {
setIsPreviewLoading(true);
setPreviewData(null);
setSelectedColumnTypes(null);

const formData = new FormData();
sampleFiles.forEach((file) => {
formData.append("files", file); // Append each file
formData.append("files", file);
});
formData.append("extract_instructions", extractInstructions);

axios
.post(`${API_URL}data-profiles/preview/`, formData, {
headers: {
"Content-Type": "multipart/form-data",
},
Promise.all([
getPreviewData(sampleFiles, extractInstructions),
getAvailableColumnTypes(),
])
.then(([previewDataResponse, availableTypesResponse]) => {
setPreviewData(previewDataResponse.data);
setAvailableColumnTypes(availableTypesResponse.data);

return getSuggestedColumnTypes(previewDataResponse.data);
})
.then((response) => {
setPreviewData(response.data); // Store the preview data
.then((suggestedTypesResponse) => {
setSelectedColumnTypes(suggestedTypesResponse.data);
setIsPreviewTableOpen(true);
setIsPreviewLoading(false);
})
.catch((error) => {
console.error("Error on preview:", error);
console.error("Error during preview setup:", error);
})
.finally(() => {
setIsPreviewLoading(false);
});
}
Expand Down Expand Up @@ -95,8 +108,12 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) {
/>
</Box>
<Box mt={2}>
{previewData && (
<DataPreviewAndSchemaEditor previewData={previewData} />
{previewData && selectedColumnTypes && (
<DataPreviewAndSchemaEditor
previewData={previewData}
availableColumnTypes={availableColumnTypes}
selectedColumnTypes={selectedColumnTypes}
/>
)}
</Box>
<Box display="flex" justifyContent="center" mt={2}>
Expand Down
Loading

0 comments on commit df85c02

Please sign in to comment.