Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #253

Merged
merged 4 commits into from
Jan 29, 2024
Merged

Dev #253

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions backend/database/sql_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import pandas as pd
from sqlalchemy import inspect, text
from sqlalchemy.orm import Session
from utils.sql_string_manager import SQLStringManager


class SQLExecutor:
def __init__(self, session: Session):
self.session = session
self.database_type = "postgres"
self.sql_string_manager = SQLStringManager()

def append_df_to_table(self, df: pd.DataFrame, table_name: str):
try:
Expand All @@ -20,6 +22,23 @@ def append_df_to_table(self, df: pd.DataFrame, table_name: str):
)
raise

def create_table_for_data_profile(
self, org_id: int, table_name: str, column_names_and_types: dict
):
"""Creates a table for a data profile."""
try:
create_query = (
self.sql_string_manager.generate_create_query_for_data_profile_table(
table_name, column_names_and_types
)
)
self.session.execute(text(create_query))
self.session.commit()
except Exception as e:
print(f"An error occurred: {e}")
self.session.rollback()
raise

def execute_create_query(self, create_query: str):
try:
self.session.execute(text(create_query))
Expand Down
24 changes: 18 additions & 6 deletions backend/database/table_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from llms.base import BaseLLM
from models.table_map import TableMap
from sqlalchemy.orm import Session
from utils.sql_string_manipulator import SQLStringManipulator
from utils.sql_string_manager import SQLStringManager


class TableManager:
Expand Down Expand Up @@ -42,6 +42,20 @@ def _map_table_to_org(
print(f"An error occurred: {e}")
raise HTTPException(status_code=400, detail=str(e))

def create_table_for_data_profile(
self, org_id: int, table_name: str, column_names_and_types: dict
):
"""Creates a table for a data profile."""
try:
executor = SQLExecutor(self.session)
executor.create_table_for_data_profile(
org_id, table_name, column_names_and_types
)
self._map_table_to_org(org_id, table_name)
except Exception as e:
print(f"An error occurred: {e}")
raise HTTPException(status_code=400, detail=str(e))

def create_table_with_llm(self, sample_content: str, header: str, extra_desc: str):
"""
Creates a table using an LLM based on sample file content and a message.
Expand All @@ -61,11 +75,11 @@ def create_table_with_llm(self, sample_content: str, header: str, extra_desc: st
sample_content, header, table_names, extra_desc
)

create_query = SQLStringManipulator(
create_query = SQLStringManager(
raw_create_query
).extract_sql_query_from_text() # Just in case

if SQLStringManipulator(
if SQLStringManager(
create_query
).is_valid_create_table_query(): # Checks if the query is valid
sql_executor = SQLExecutor(self.session)
Expand Down Expand Up @@ -94,9 +108,7 @@ def create_table_desc_with_llm(
create_query, sample_content, extra_desc
)

table_name = SQLStringManipulator(
create_query
).get_table_from_create_query()
table_name = SQLStringManager(create_query).get_table_from_create_query()

# Store description in separate table
manager = TableMetadataManager(self.session)
Expand Down
1 change: 1 addition & 0 deletions backend/models/data_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def to_dict(self):
class DataProfileCreateRequest(BaseModel):
name: str
extract_instructions: str
column_names_and_types: dict


class DataProfileCreateResponse(BaseModel):
Expand Down
25 changes: 25 additions & 0 deletions backend/routes/data_profile_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from security import get_current_user
from utils.image_conversion_manager import ImageConversionManager
from utils.object_storage.digitalocean_space_manager import DigitalOceanSpaceManager
from utils.sql_string_manager import SQLStringManager

data_profile_router = APIRouter()

Expand Down Expand Up @@ -45,17 +46,41 @@ async def save_data_profile(
request: DataProfileCreateRequest, current_user: User = Depends(get_current_user)
) -> DataProfileCreateResponse:
"""Save a new data profile to the database"""
if len(request.name) > 50:
raise HTTPException(
status_code=400, detail="Data Profile name cannot be longer than 50 chars"
)

formatted_name = request.name.replace(" ", "_").lower()
table_name = f"org_{current_user.organization_id}_{formatted_name}"
sql_string_manager = SQLStringManager()
if not sql_string_manager.is_valid_table_name(table_name):
raise HTTPException(
status_code=400,
detail="Data Profile name must only contain letters, numbers, and underscores",
)

with DatabaseManager() as session:
data_profile_manager = DataProfileManager(session)
if data_profile_manager.get_dataprofile_by_name_and_org(
request.name, current_user.organization_id
):
raise HTTPException(status_code=400, detail="Data Profile already exists")

# Create the table for the data profile
table_manager = TableManager(session)
table_manager.create_table_for_data_profile(
org_id=current_user.organization_id,
table_name=table_name,
column_names_and_types=request.column_names_and_types,
)

# Create the data profile
new_data_profile = DataProfile(
name=request.name,
extract_instructions=request.extract_instructions,
organization_id=current_user.organization_id,
table_name=table_name, # TODO: To be further implemented
)
created_data_profile = data_profile_manager.create_dataprofile(new_data_profile)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional


class SQLStringManipulator:
class SQLStringManager:
"""
A class for manipulating SQL query strings.

Expand All @@ -21,7 +21,7 @@ class SQLStringManipulator:

def __init__(self, sql_string: str = ""):
"""
Initializes an instance of the SQLStringManipulator class with a query string.
Initializes an instance of the SQLStringManager class with a query string.

Parameters:
query_str (str): The SQL query string to be manipulated.
Expand All @@ -31,6 +31,48 @@ def __init__(self, sql_string: str = ""):
def set_sql_string(self, sql_string: str):
self.sql_string = sql_string

def map_to_postgres_type(self, column_type: str) -> str:
"""
Maps a generic column type to a PostgreSQL data type.

Parameters:
column_type (str): The generic column type.

Returns:
str: The PostgreSQL data type.
"""
type_mapping = {
"text": "TEXT",
"integer": "INTEGER",
"money": "DECIMAL",
"date": "DATE",
"boolean": "BOOLEAN",
}

return type_mapping.get(column_type, "TEXT")

def generate_create_query_for_data_profile_table(
self, table_name: str, column_names_and_types: dict
) -> str:
"""
Generates a CREATE TABLE query for a data profile table.

Parameters:
table_name (str): The name of the table.
column_names_and_types (dict): A dictionary of column names and types.

Returns:
str: The CREATE TABLE query.
"""
# Generate the CREATE TABLE query
create_query = f"CREATE TABLE {table_name} ("
for column_name, column_type in column_names_and_types.items():
postgres_type = self.map_to_postgres_type(column_type)
create_query += f"{column_name} {postgres_type}, "
create_query = create_query[:-2] + ");"

return create_query

def get_table_from_create_query(self) -> Optional[str]:
"""
Extract the table name from a SQL CREATE TABLE query.
Expand Down Expand Up @@ -58,6 +100,18 @@ def is_valid_create_table_query(self) -> bool:
pattern = r"^CREATE TABLE .+;\s*$"
return bool(re.match(pattern, clean_query))

def is_valid_pg_table_name(self, table_name) -> bool:
# Check if the table name matches the allowed pattern
# Pattern explanation:
# ^[_a-z] : Must start with an underscore or a lowercase letter
# [_a-z0-9]*$ : Can be followed by any number of underscores, lowercase letters, or digits
pattern = r"^[_a-z][_a-z0-9]*$"

if re.match(pattern, table_name):
return True
else:
return False

def extract_sql_query_from_text(self) -> Optional[str]:
"""
Extracts an SQL query from a given text.
Expand Down
13 changes: 12 additions & 1 deletion frontend/src/pages/upload/CreateDataProfileWindow.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) {
const [selectedColumnTypes, setSelectedColumnTypes] = useState(null);
const [isPreviewLoading, setIsPreviewLoading] = useState(false);
const [isPreviewTableOpen, setIsPreviewTableOpen] = useState(false);
const [columnNamesAndTypes, setColumnNamesAndTypes] = useState({});

const handleSubmit = (event) => {
event.preventDefault();
onCreate(name, extractInstructions);
onCreate(name, extractInstructions, columnNamesAndTypes);
};

const handlePreview = () => {
Expand Down Expand Up @@ -67,6 +68,15 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) {
}
};

const handleColumnsChange = (columns) => {
const newColumnNamesAndTypes = columns.reduce((acc, column) => {
acc[column.name] = column.type;
return acc;
}, {});

setColumnNamesAndTypes(newColumnNamesAndTypes);
};

return (
<Dialog
open={open}
Expand Down Expand Up @@ -113,6 +123,7 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) {
previewData={previewData}
availableColumnTypes={availableColumnTypes}
selectedColumnTypes={selectedColumnTypes}
onColumnsChange={handleColumnsChange}
/>
)}
</Box>
Expand Down
5 changes: 5 additions & 0 deletions frontend/src/pages/upload/DataPreviewAndSchemaEditor.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ function DataPreviewAndSchemaEditor({
previewData,
availableColumnTypes,
selectedColumnTypes,
onColumnsChange,
}) {
const [columns, setColumns] = useState([]);

Expand All @@ -35,6 +36,10 @@ function DataPreviewAndSchemaEditor({
}
}, [previewData, selectedColumnTypes]);

useEffect(() => {
onColumnsChange(columns); // Call the callback function when columns change
}, [columns]);

const handleColumnTypeChange = (index, newType) => {
setColumns((prevColumns) =>
prevColumns.map((column, colIndex) =>
Expand Down
7 changes: 6 additions & 1 deletion frontend/src/pages/upload/UploadPage.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,16 @@ function UploadPage() {
.catch((error) => console.error("Error fetching data profiles:", error));
}, []);

const handleCreateDataProfile = (name, extractInstructions) => {
const handleCreateDataProfile = (
name,
extractInstructions,
columnNamesAndTypes,
) => {
axios
.post(`${API_URL}data-profile/`, {
name: name,
extract_instructions: extractInstructions,
column_names_and_types: columnNamesAndTypes,
})
.then((response) => {
// Handle successful data profile creation
Expand Down
20 changes: 20 additions & 0 deletions k8s/backend-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,26 @@ spec:
- configMapRef:
name: backend-config
env:
- name: AZURE_CLIENT_ID
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_CLIENT_ID
- name: AZURE_TENANT_ID
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_TENANT_ID
- name: AZURE_APP_VALUE
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_APP_VALUE
- name: AZURE_APP_SECRET
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_APP_SECRET
- name: DATABASE_URL
valueFrom:
secretKeyRef:
Expand Down
20 changes: 20 additions & 0 deletions k8s/migration-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,26 @@ spec:
- configMapRef:
name: backend-config
env:
- name: AZURE_CLIENT_ID
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_CLIENT_ID
- name: AZURE_TENANT_ID
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_TENANT_ID
- name: AZURE_APP_VALUE
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_APP_VALUE
- name: AZURE_APP_SECRET
valueFrom:
secretKeyRef:
name: azure-secrets
key: AZURE_APP_SECRET
- name: DATABASE_URL
valueFrom:
secretKeyRef:
Expand Down
Loading