From 02b1ccd6e21ef21eebf1c66e1f2304a73747591e Mon Sep 17 00:00:00 2001 From: liberty-rising Date: Mon, 29 Jan 2024 20:23:31 +0100 Subject: [PATCH 1/2] create new table for created data profile --- backend/database/sql_executor.py | 19 ++++++ backend/database/table_manager.py | 24 ++++++-- backend/models/data_profile.py | 1 + backend/routes/data_profile_routes.py | 25 ++++++++ ...g_manipulator.py => sql_string_manager.py} | 58 ++++++++++++++++++- .../pages/upload/CreateDataProfileWindow.jsx | 13 ++++- .../upload/DataPreviewAndSchemaEditor.jsx | 5 ++ frontend/src/pages/upload/UploadPage.jsx | 7 ++- 8 files changed, 142 insertions(+), 10 deletions(-) rename backend/utils/{sql_string_manipulator.py => sql_string_manager.py} (56%) diff --git a/backend/database/sql_executor.py b/backend/database/sql_executor.py index 14d7708..c74e64e 100644 --- a/backend/database/sql_executor.py +++ b/backend/database/sql_executor.py @@ -3,12 +3,14 @@ import pandas as pd from sqlalchemy import inspect, text from sqlalchemy.orm import Session +from utils.sql_string_manager import SQLStringManager class SQLExecutor: def __init__(self, session: Session): self.session = session self.database_type = "postgres" + self.sql_string_manager = SQLStringManager() def append_df_to_table(self, df: pd.DataFrame, table_name: str): try: @@ -20,6 +22,23 @@ def append_df_to_table(self, df: pd.DataFrame, table_name: str): ) raise + def create_table_for_data_profile( + self, org_id: int, table_name: str, column_names_and_types: dict + ): + """Creates a table for a data profile.""" + try: + create_query = ( + self.sql_string_manager.generate_create_query_for_data_profile_table( + table_name, column_names_and_types + ) + ) + self.session.execute(text(create_query)) + self.session.commit() + except Exception as e: + print(f"An error occurred: {e}") + self.session.rollback() + raise + def execute_create_query(self, create_query: str): try: self.session.execute(text(create_query)) diff --git a/backend/database/table_manager.py b/backend/database/table_manager.py index c647721..c1699c4 100644 --- a/backend/database/table_manager.py +++ b/backend/database/table_manager.py @@ -8,7 +8,7 @@ from llms.base import BaseLLM from models.table_map import TableMap from sqlalchemy.orm import Session -from utils.sql_string_manipulator import SQLStringManipulator +from utils.sql_string_manager import SQLStringManager class TableManager: @@ -42,6 +42,20 @@ def _map_table_to_org( print(f"An error occurred: {e}") raise HTTPException(status_code=400, detail=str(e)) + def create_table_for_data_profile( + self, org_id: int, table_name: str, column_names_and_types: dict + ): + """Creates a table for a data profile.""" + try: + executor = SQLExecutor(self.session) + executor.create_table_for_data_profile( + org_id, table_name, column_names_and_types + ) + self._map_table_to_org(org_id, table_name) + except Exception as e: + print(f"An error occurred: {e}") + raise HTTPException(status_code=400, detail=str(e)) + def create_table_with_llm(self, sample_content: str, header: str, extra_desc: str): """ Creates a table using an LLM based on sample file content and a message. @@ -61,11 +75,11 @@ def create_table_with_llm(self, sample_content: str, header: str, extra_desc: st sample_content, header, table_names, extra_desc ) - create_query = SQLStringManipulator( + create_query = SQLStringManager( raw_create_query ).extract_sql_query_from_text() # Just in case - if SQLStringManipulator( + if SQLStringManager( create_query ).is_valid_create_table_query(): # Checks if the query is valid sql_executor = SQLExecutor(self.session) @@ -94,9 +108,7 @@ def create_table_desc_with_llm( create_query, sample_content, extra_desc ) - table_name = SQLStringManipulator( - create_query - ).get_table_from_create_query() + table_name = SQLStringManager(create_query).get_table_from_create_query() # Store description in separate table manager = TableMetadataManager(self.session) diff --git a/backend/models/data_profile.py b/backend/models/data_profile.py index 9e7dacf..936c38f 100644 --- a/backend/models/data_profile.py +++ b/backend/models/data_profile.py @@ -47,6 +47,7 @@ def to_dict(self): class DataProfileCreateRequest(BaseModel): name: str extract_instructions: str + column_names_and_types: dict class DataProfileCreateResponse(BaseModel): diff --git a/backend/routes/data_profile_routes.py b/backend/routes/data_profile_routes.py index 7a0ef7f..0bbb4ce 100644 --- a/backend/routes/data_profile_routes.py +++ b/backend/routes/data_profile_routes.py @@ -18,6 +18,7 @@ from security import get_current_user from utils.image_conversion_manager import ImageConversionManager from utils.object_storage.digitalocean_space_manager import DigitalOceanSpaceManager +from utils.sql_string_manager import SQLStringManager data_profile_router = APIRouter() @@ -45,6 +46,20 @@ async def save_data_profile( request: DataProfileCreateRequest, current_user: User = Depends(get_current_user) ) -> DataProfileCreateResponse: """Save a new data profile to the database""" + if len(request.name) > 50: + raise HTTPException( + status_code=400, detail="Data Profile name cannot be longer than 50 chars" + ) + + formatted_name = request.name.replace(" ", "_").lower() + table_name = f"org_{current_user.organization_id}_{formatted_name}" + sql_string_manager = SQLStringManager() + if not sql_string_manager.is_valid_table_name(table_name): + raise HTTPException( + status_code=400, + detail="Data Profile name must only contain letters, numbers, and underscores", + ) + with DatabaseManager() as session: data_profile_manager = DataProfileManager(session) if data_profile_manager.get_dataprofile_by_name_and_org( @@ -52,10 +67,20 @@ async def save_data_profile( ): raise HTTPException(status_code=400, detail="Data Profile already exists") + # Create the table for the data profile + table_manager = TableManager(session) + table_manager.create_table_for_data_profile( + org_id=current_user.organization_id, + table_name=table_name, + column_names_and_types=request.column_names_and_types, + ) + + # Create the data profile new_data_profile = DataProfile( name=request.name, extract_instructions=request.extract_instructions, organization_id=current_user.organization_id, + table_name=table_name, # TODO: To be further implemented ) created_data_profile = data_profile_manager.create_dataprofile(new_data_profile) diff --git a/backend/utils/sql_string_manipulator.py b/backend/utils/sql_string_manager.py similarity index 56% rename from backend/utils/sql_string_manipulator.py rename to backend/utils/sql_string_manager.py index 68f27f7..b7f3d05 100644 --- a/backend/utils/sql_string_manipulator.py +++ b/backend/utils/sql_string_manager.py @@ -2,7 +2,7 @@ from typing import Optional -class SQLStringManipulator: +class SQLStringManager: """ A class for manipulating SQL query strings. @@ -21,7 +21,7 @@ class SQLStringManipulator: def __init__(self, sql_string: str = ""): """ - Initializes an instance of the SQLStringManipulator class with a query string. + Initializes an instance of the SQLStringManager class with a query string. Parameters: query_str (str): The SQL query string to be manipulated. @@ -31,6 +31,48 @@ def __init__(self, sql_string: str = ""): def set_sql_string(self, sql_string: str): self.sql_string = sql_string + def map_to_postgres_type(self, column_type: str) -> str: + """ + Maps a generic column type to a PostgreSQL data type. + + Parameters: + column_type (str): The generic column type. + + Returns: + str: The PostgreSQL data type. + """ + type_mapping = { + "text": "TEXT", + "integer": "INTEGER", + "money": "DECIMAL", + "date": "DATE", + "boolean": "BOOLEAN", + } + + return type_mapping.get(column_type, "TEXT") + + def generate_create_query_for_data_profile_table( + self, table_name: str, column_names_and_types: dict + ) -> str: + """ + Generates a CREATE TABLE query for a data profile table. + + Parameters: + table_name (str): The name of the table. + column_names_and_types (dict): A dictionary of column names and types. + + Returns: + str: The CREATE TABLE query. + """ + # Generate the CREATE TABLE query + create_query = f"CREATE TABLE {table_name} (" + for column_name, column_type in column_names_and_types.items(): + postgres_type = self.map_to_postgres_type(column_type) + create_query += f"{column_name} {postgres_type}, " + create_query = create_query[:-2] + ");" + + return create_query + def get_table_from_create_query(self) -> Optional[str]: """ Extract the table name from a SQL CREATE TABLE query. @@ -58,6 +100,18 @@ def is_valid_create_table_query(self) -> bool: pattern = r"^CREATE TABLE .+;\s*$" return bool(re.match(pattern, clean_query)) + def is_valid_pg_table_name(self, table_name) -> bool: + # Check if the table name matches the allowed pattern + # Pattern explanation: + # ^[_a-z] : Must start with an underscore or a lowercase letter + # [_a-z0-9]*$ : Can be followed by any number of underscores, lowercase letters, or digits + pattern = r"^[_a-z][_a-z0-9]*$" + + if re.match(pattern, table_name): + return True + else: + return False + def extract_sql_query_from_text(self) -> Optional[str]: """ Extracts an SQL query from a given text. diff --git a/frontend/src/pages/upload/CreateDataProfileWindow.jsx b/frontend/src/pages/upload/CreateDataProfileWindow.jsx index 1cc9185..db2e677 100644 --- a/frontend/src/pages/upload/CreateDataProfileWindow.jsx +++ b/frontend/src/pages/upload/CreateDataProfileWindow.jsx @@ -26,10 +26,11 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) { const [selectedColumnTypes, setSelectedColumnTypes] = useState(null); const [isPreviewLoading, setIsPreviewLoading] = useState(false); const [isPreviewTableOpen, setIsPreviewTableOpen] = useState(false); + const [columnNamesAndTypes, setColumnNamesAndTypes] = useState({}); const handleSubmit = (event) => { event.preventDefault(); - onCreate(name, extractInstructions); + onCreate(name, extractInstructions, columnNamesAndTypes); }; const handlePreview = () => { @@ -67,6 +68,15 @@ function CreateDataProfileWindow({ open, onClose, onCreate }) { } }; + const handleColumnsChange = (columns) => { + const newColumnNamesAndTypes = columns.reduce((acc, column) => { + acc[column.name] = column.type; + return acc; + }, {}); + + setColumnNamesAndTypes(newColumnNamesAndTypes); + }; + return ( )} diff --git a/frontend/src/pages/upload/DataPreviewAndSchemaEditor.jsx b/frontend/src/pages/upload/DataPreviewAndSchemaEditor.jsx index 7b16231..32c40bd 100644 --- a/frontend/src/pages/upload/DataPreviewAndSchemaEditor.jsx +++ b/frontend/src/pages/upload/DataPreviewAndSchemaEditor.jsx @@ -21,6 +21,7 @@ function DataPreviewAndSchemaEditor({ previewData, availableColumnTypes, selectedColumnTypes, + onColumnsChange, }) { const [columns, setColumns] = useState([]); @@ -35,6 +36,10 @@ function DataPreviewAndSchemaEditor({ } }, [previewData, selectedColumnTypes]); + useEffect(() => { + onColumnsChange(columns); // Call the callback function when columns change + }, [columns]); + const handleColumnTypeChange = (index, newType) => { setColumns((prevColumns) => prevColumns.map((column, colIndex) => diff --git a/frontend/src/pages/upload/UploadPage.jsx b/frontend/src/pages/upload/UploadPage.jsx index d2b7409..1b88fd4 100644 --- a/frontend/src/pages/upload/UploadPage.jsx +++ b/frontend/src/pages/upload/UploadPage.jsx @@ -37,11 +37,16 @@ function UploadPage() { .catch((error) => console.error("Error fetching data profiles:", error)); }, []); - const handleCreateDataProfile = (name, extractInstructions) => { + const handleCreateDataProfile = ( + name, + extractInstructions, + columnNamesAndTypes, + ) => { axios .post(`${API_URL}data-profile/`, { name: name, extract_instructions: extractInstructions, + column_names_and_types: columnNamesAndTypes, }) .then((response) => { // Handle successful data profile creation From 6cdcecff61ae0670d60bd2a5b14cef34623f8ef4 Mon Sep 17 00:00:00 2001 From: liberty-rising Date: Mon, 29 Jan 2024 21:03:09 +0100 Subject: [PATCH 2/2] add azure secrets to kubernetes --- k8s/backend-deployment.yaml | 20 ++++++++++++++++++++ k8s/migration-job.yaml | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/k8s/backend-deployment.yaml b/k8s/backend-deployment.yaml index cff4413..278a610 100644 --- a/k8s/backend-deployment.yaml +++ b/k8s/backend-deployment.yaml @@ -18,6 +18,26 @@ spec: - configMapRef: name: backend-config env: + - name: AZURE_CLIENT_ID + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_CLIENT_ID + - name: AZURE_TENANT_ID + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_TENANT_ID + - name: AZURE_APP_VALUE + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_APP_VALUE + - name: AZURE_APP_SECRET + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_APP_SECRET - name: DATABASE_URL valueFrom: secretKeyRef: diff --git a/k8s/migration-job.yaml b/k8s/migration-job.yaml index a03a2a0..3bc1514 100644 --- a/k8s/migration-job.yaml +++ b/k8s/migration-job.yaml @@ -13,6 +13,26 @@ spec: - configMapRef: name: backend-config env: + - name: AZURE_CLIENT_ID + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_CLIENT_ID + - name: AZURE_TENANT_ID + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_TENANT_ID + - name: AZURE_APP_VALUE + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_APP_VALUE + - name: AZURE_APP_SECRET + valueFrom: + secretKeyRef: + name: azure-secrets + key: AZURE_APP_SECRET - name: DATABASE_URL valueFrom: secretKeyRef: