Skip to content

Commit

Permalink
Merge pull request #710 from jwills/jwills_duckdb
Browse files Browse the repository at this point in the history
Add dbt-duckdb as a supported adapter and use it for local dev/test
  • Loading branch information
tlento authored Aug 4, 2023
2 parents a917a6b + 3823895 commit 24cb500
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 158 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230804-064652.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Add dbt-duckdb as a supported adapter and remove legacy DuckDB sql client
time: 2023-08-04T06:46:52.989851-07:00
custom:
Author: jwills
Issue: "583"
4 changes: 2 additions & 2 deletions local-data-warehouses/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
This folder includes utilities to run data warehouses for local development. See the [Contributing guide](../CONTRIBUTING.md)
to ensure your environment is setup properly.

## SQLite
## DuckDB

We assume that you have SQLite installed in your environment. By default, tests will run with SQLite.
By default, tests will run with DuckDB.

## PostgreSQL

Expand Down
6 changes: 6 additions & 0 deletions metricflow/cli/dbt_connectors/adapter_backed_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from metricflow.random_id import random_id
from metricflow.sql.render.big_query import BigQuerySqlQueryPlanRenderer
from metricflow.sql.render.databricks import DatabricksSqlQueryPlanRenderer
from metricflow.sql.render.duckdb_renderer import DuckDbSqlQueryPlanRenderer
from metricflow.sql.render.postgres import PostgresSQLSqlQueryPlanRenderer
from metricflow.sql.render.redshift import RedshiftSqlQueryPlanRenderer
from metricflow.sql.render.snowflake import SnowflakeSqlQueryPlanRenderer
Expand All @@ -40,6 +41,7 @@ class SupportedAdapterTypes(enum.Enum):
SNOWFLAKE = "snowflake"
REDSHIFT = "redshift"
BIGQUERY = "bigquery"
DUCKDB = "duckdb"

@property
def sql_engine_type(self) -> SqlEngine:
Expand All @@ -54,6 +56,8 @@ def sql_engine_type(self) -> SqlEngine:
return SqlEngine.REDSHIFT
elif self is SupportedAdapterTypes.SNOWFLAKE:
return SqlEngine.SNOWFLAKE
elif self is SupportedAdapterTypes.DUCKDB:
return SqlEngine.DUCKDB
else:
assert_values_exhausted(self)

Expand All @@ -70,6 +74,8 @@ def sql_query_plan_renderer(self) -> SqlQueryPlanRenderer:
return RedshiftSqlQueryPlanRenderer()
elif self is SupportedAdapterTypes.SNOWFLAKE:
return SnowflakeSqlQueryPlanRenderer()
elif self is SupportedAdapterTypes.DUCKDB:
return DuckDbSqlQueryPlanRenderer()
else:
assert_values_exhausted(self)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,9 @@ snowflake:
warehouse: "{{ env_var('DBT_ENV_SECRET_WAREHOUSE') }}"
database: "{{ env_var('DBT_ENV_SECRET_DATABASE') }}"
schema: "{{ env_var('DBT_ENV_SECRET_SCHEMA') }}"
duckdb:
target: dev
outputs:
dev:
type: duckdb
schema: "{{ env_var('DBT_ENV_SECRET_SCHEMA') }}"
10 changes: 6 additions & 4 deletions metricflow/test/fixtures/sql_client_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from metricflow.test.fixtures.sql_clients.adapter_backed_ddl_client import AdapterBackedDDLSqlClient
from metricflow.test.fixtures.sql_clients.common_client import SqlDialect
from metricflow.test.fixtures.sql_clients.ddl_sql_client import SqlClientWithDDLMethods
from metricflow.test.fixtures.sql_clients.duckdb import DuckDbSqlClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -54,8 +53,9 @@ def __configure_test_env_from_url(url: str, password: str, schema: str) -> sqlal
"""
parsed_url = sqlalchemy.engine.make_url(url)

assert parsed_url.host, "Engine host is not set in engine connection URL!"
os.environ[DBT_ENV_SECRET_HOST] = parsed_url.host
if parsed_url.drivername != "duckdb":
assert parsed_url.host, "Engine host is not set in engine connection URL!"
os.environ[DBT_ENV_SECRET_HOST] = parsed_url.host

if parsed_url.username:
os.environ[DBT_ENV_SECRET_USER] = parsed_url.username
Expand Down Expand Up @@ -156,7 +156,9 @@ def make_test_sql_client(url: str, password: str, schema: str) -> SqlClientWithD
__initialize_dbt()
return AdapterBackedDDLSqlClient(adapter=get_adapter_by_type("postgres"))
elif dialect == SqlDialect.DUCKDB:
return DuckDbSqlClient.from_connection_details(url, password)
__configure_test_env_from_url(url, password=password, schema=schema)
__initialize_dbt()
return AdapterBackedDDLSqlClient(adapter=get_adapter_by_type("duckdb"))
elif dialect == SqlDialect.DATABRICKS:
__configure_databricks_env_from_url(url, password=password, schema=schema)
__initialize_dbt()
Expand Down
92 changes: 0 additions & 92 deletions metricflow/test/fixtures/sql_clients/duckdb.py

This file was deleted.

59 changes: 1 addition & 58 deletions metricflow/test/sql_clients/test_sql_client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from __future__ import annotations

import datetime
import logging
from typing import Sequence, Set, Union
from typing import Set, Union

import pandas as pd
import pytest
Expand All @@ -11,7 +10,6 @@
from metricflow.protocols.sql_client import SqlClient, SqlEngine
from metricflow.random_id import random_id
from metricflow.sql.sql_bind_parameters import SqlBindParameters
from metricflow.sql.sql_column_type import SqlColumnType
from metricflow.test.compare_df import assert_dataframes_equal
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState
from metricflow.test.fixtures.sql_clients.ddl_sql_client import SqlClientWithDDLMethods
Expand Down Expand Up @@ -45,67 +43,12 @@ def test_query(sql_client: SqlClient) -> None: # noqa: D
_check_1col(df)


def _skip_execution_param_tests_for_unsupported_clients(sql_client: SqlClient) -> None:
if sql_client.sql_engine_type is not SqlEngine.DUCKDB:
pytest.skip(
reason=(
"The dbt Adapter-backed SqlClient implementation does not support bind parameters, so we restrict "
"this test to our DuckDB client, which retains an example implementation."
)
)


def test_query_with_execution_params(sql_client: SqlClient) -> None:
"""Test querying with execution parameters of all supported datatypes."""
_skip_execution_param_tests_for_unsupported_clients(sql_client)
params: Sequence[SqlColumnType] = [
2,
"hi",
3.5,
True,
False,
datetime.datetime(2022, 1, 1),
datetime.date(2020, 12, 31),
]
for param in params:
sql_execution_params = SqlBindParameters.create_from_dict(({"x": param}))
assert sql_execution_params.param_dict["x"] == param # check that pydantic did not coerce type unexpectedly

expr = f"SELECT {sql_client.render_bind_parameter_key('x')} as y"
df = sql_client.query(expr, sql_bind_parameters=sql_execution_params)
assert isinstance(df, pd.DataFrame)
assert df.shape == (1, 1)
assert df.columns.tolist() == ["y"]

# Some engines convert some types to str; convert everything to str for comparison
str_param = str(param)
str_result = str(df["y"][0])
# Some engines use JSON bool syntax (i.e., True -> 'true')
if isinstance(param, bool):
assert str_result in [str_param, str_param.lower()]
# Some engines add decimals to datetime milliseconds; trim here
elif isinstance(param, datetime.datetime):
assert str_result[: len(str_param)] == str_param
else:
assert str_result == str_param


def test_select_one_query(sql_client: SqlClient) -> None: # noqa: D
sql_client.query("SELECT 1")
with pytest.raises(Exception):
sql_client.query("this is garbage")


def test_failed_query_with_execution_params(sql_client: SqlClient) -> None: # noqa: D
_skip_execution_param_tests_for_unsupported_clients(sql_client)
expr = f"SELECT {sql_client.render_bind_parameter_key('x')}"
sql_execution_params = SqlBindParameters.create_from_dict({"x": 1})

sql_client.query(expr, sql_bind_parameters=sql_execution_params)
with pytest.raises(Exception):
sql_client.query("this is garbage")


def test_create_table_from_dataframe( # noqa: D
mf_test_session_state: MetricFlowTestSessionState, ddl_sql_client: SqlClientWithDDLMethods
) -> None:
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ dev-packages = [
# handles import statements they are required in all test environments
sql-client-packages = [
"SQLAlchemy~=1.4.42",
"duckdb-engine~=0.9",
"duckdb~=0.8",
"sqlalchemy2-stubs~=0.0.2a21",
]

Expand All @@ -96,6 +94,10 @@ dbt-snowflake = [
"dbt-snowflake~=1.6.0",
]

dbt-duckdb = [
"dbt-duckdb~=1.6.0",
]

[tool.hatch.build.targets.sdist]
exclude = [
"/.github",
Expand All @@ -112,9 +114,13 @@ exclude = [
description = "Environment for development. Includes a DuckDB-backed client."
features = [
"dev-packages",
"dbt-duckdb",
"sql-client-packages",
]

[tool.hatch.envs.dev-env.env-vars]
MF_TEST_ADAPTER_TYPE="duckdb"

[tool.hatch.envs.postgres-env.env-vars]
MF_SQL_ENGINE_URL="postgresql://metricflow@localhost:5432/metricflow"
MF_SQL_ENGINE_PASSWORD="metricflowing"
Expand Down

0 comments on commit 24cb500

Please sign in to comment.