-
Notifications
You must be signed in to change notification settings - Fork 161
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add configuration options for enable_list_inference and intermediate_…
…format for python models
- Loading branch information
1 parent
3b8c6e8
commit 7245a59
Showing
7 changed files
with
241 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
kind: Features | ||
body: Add configuration options `enable_list_inference` and `intermediate_format` for python | ||
models | ||
time: 2024-04-26T10:53:19.874239-04:00 | ||
custom: | ||
Author: mikealfare | ||
Issue: 1047 1114 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
kind: Fixes | ||
body: Default `enableListInference` to `True` for python models to support nested | ||
lists | ||
time: 2024-04-26T10:52:24.827314-04:00 | ||
custom: | ||
Author: mikealfare | ||
Issue: 1047 1114 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
SINGLE_RECORD = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table" | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": {"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}}, | ||
] | ||
) | ||
return df | ||
""" | ||
|
||
|
||
MULTI_RECORD_DEFAULT = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table", | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, | ||
] | ||
) | ||
return df | ||
""" | ||
|
||
|
||
ORC_FORMAT = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table", | ||
intermediate_format="orc", | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, | ||
] | ||
) | ||
return df | ||
""" | ||
|
||
|
||
ENABLE_LIST_INFERENCE = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table", | ||
enable_list_inference="true", | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, | ||
] | ||
) | ||
return df | ||
""" | ||
|
||
|
||
# this should fail | ||
DISABLE_LIST_INFERENCE = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table", | ||
enable_list_inference="false", | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, | ||
] | ||
) | ||
return df | ||
""" | ||
|
||
|
||
ENABLE_LIST_INFERENCE_PARQUET_FORMAT = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table", | ||
enable_list_inference="true", | ||
intermediate_format="parquet", | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, | ||
] | ||
) | ||
return df | ||
""" | ||
|
||
|
||
DISABLE_LIST_INFERENCE_ORC_FORMAT = """ | ||
import pandas as pd | ||
def model(dbt, session): | ||
dbt.config( | ||
submission_method="serverless", | ||
materialized="table", | ||
enable_list_inference="false", | ||
intermediate_format="orc", | ||
) | ||
df = pd.DataFrame( | ||
[ | ||
{"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, | ||
] | ||
) | ||
return df | ||
""" |
70 changes: 70 additions & 0 deletions
70
tests/functional/python_model_tests/test_list_inference.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
""" | ||
This test case addresses this regression: https://github.com/dbt-labs/dbt-bigquery/issues/1047 | ||
As the comments point out, the issue appears to be that the default settings are: | ||
- list inference: off | ||
- intermediate format: parquet | ||
Adjusting either of these alleviates the issue. | ||
When the regression was first reported, `models.MULTI_RECORD` failed while the other three models passed. | ||
""" | ||
from dbt.tests.util import run_dbt_and_capture | ||
import pytest | ||
|
||
from tests.functional.python_model_tests import models | ||
|
||
|
||
class ListInference: | ||
expect_pass = True | ||
|
||
def test_model(self, project): | ||
result, output = run_dbt_and_capture(["run"], expect_pass=self.expect_pass) | ||
assert len(result) == 1 | ||
|
||
|
||
class TestSingleRecord(ListInference): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"model.py": models.SINGLE_RECORD} | ||
|
||
|
||
class TestMultiRecordDefault(ListInference): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
# this is the model that initially failed for this issue | ||
return {"model.py": models.MULTI_RECORD_DEFAULT} | ||
|
||
|
||
class TestDisableListInference(ListInference): | ||
expect_pass = False | ||
|
||
@pytest.fixture(scope="class") | ||
def models(self): | ||
# this model mimics what was happening before defaulting enable_list_inference=True | ||
return {"model.py": models.DISABLE_LIST_INFERENCE} | ||
|
||
|
||
class TestEnableListInference(ListInference): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"model.py": models.ENABLE_LIST_INFERENCE} | ||
|
||
|
||
class TestOrcFormat(ListInference): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"model.py": models.ORC_FORMAT} | ||
|
||
|
||
class TestDisableListInferenceOrcFormat(ListInference): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
return {"model.py": models.DISABLE_LIST_INFERENCE_ORC_FORMAT} | ||
|
||
|
||
class TestEnableListInferenceParquetFormat(ListInference): | ||
@pytest.fixture(scope="class") | ||
def models(self): | ||
# this is the model that initially failed for this issue | ||
return {"model.py": models.ENABLE_LIST_INFERENCE_PARQUET_FORMAT} |