From 0ba9a1963a152ff561a4c61f0ed6b5de2036e34f Mon Sep 17 00:00:00 2001 From: Colin Ho Date: Fri, 31 May 2024 11:41:06 -0700 Subject: [PATCH] [BUG] Translate mssql to tsql in read_sql scan (#2330) When running `read_sql` against SQL server using a SQL Alchemy connection, for example: ``` connection_url = sqlalchemy.engine.URL.create( "mssql+pyodbc", username=user, password=password, host=host, port=1433, database=database, query={ "driver": "ODBC Driver 18 for SQL Server", }, ) def create_conn(): return sqlalchemy.create_engine(connection_url).connect() df = daft.read_sql("SELECT * FROM test_data", create_conn) ``` The query errors with `Unsupported dialect: mssql, please refer to the documentation for supported dialects`. This is because SQLGlot, the library that read_sql uses for query construction, does not recognize `mssql` as a dialect, it instead recognizes `tsql`, which is the name of the SQL dialect for Microsoft SQL Server: https://learn.microsoft.com/en-us/sql/t-sql/language-reference?view=sql-server-ver16 This PR adds a translation step during sql query construction to fix this issue. NOTE: - This PR was tested locally against a Docker instance of Azure SQL Edge. --- daft/sql/sql_scan.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/daft/sql/sql_scan.py b/daft/sql/sql_scan.py index 9da2123fa0..5107f10ded 100644 --- a/daft/sql/sql_scan.py +++ b/daft/sql/sql_scan.py @@ -260,6 +260,9 @@ def _construct_sql_query( # sqlglot does not support "postgresql" dialect, it only supports "postgres" if target_dialect == "postgresql": target_dialect = "postgres" + # sqlglot does not recognize "mssql" as a dialect, it instead recognizes "tsql", which is the SQL dialect for Microsoft SQL Server + elif target_dialect == "mssql": + target_dialect = "tsql" if not any(target_dialect == supported_dialect.value for supported_dialect in sqlglot.Dialects): raise ValueError(