From 4c88e4ab6af5b9ccec8710bba6354aa8ca53dac9 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Tue, 20 Sep 2022 09:46:51 -0600 Subject: [PATCH 1/7] Convert df to pyspark DataFrame if it is pandas before writing (#469) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Convert df to pyspark DataFrame if it is pandas before writing * Changelog entry * Use `overwriteSchema` option like dbt-databricks * Upstream `py_write_table` macro from dbt-databricks * Convert df to a PySpark DataFrame if it's a Pandas-on-Spark DataFrame before writing * Separate conversion logic from import logic * Raise exception if not able to convert to a Spark DataFrame * Prefer pandas → pandas-on-Spark → Spark over direct pandas → Spark --- .../Under the Hood-20220916-125706.yaml | 7 ++++ .../spark/macros/materializations/table.sql | 35 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Under the Hood-20220916-125706.yaml diff --git a/.changes/unreleased/Under the Hood-20220916-125706.yaml b/.changes/unreleased/Under the Hood-20220916-125706.yaml new file mode 100644 index 000000000..54b82eb55 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220916-125706.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Enable Pandas and Pandas-on-Spark DataFrames for dbt python models +time: 2022-09-16T12:57:06.846297-06:00 +custom: + Author: chamini2 dbeatty10 + Issue: "468" + PR: "469" diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index d39ba0b44..25d70c722 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -41,7 +41,40 @@ # --- Autogenerated dbt materialization code. --- # dbt = dbtObj(spark.table) df = model(dbt, spark) -df.write.mode("overwrite").format("delta").saveAsTable("{{ target_relation }}") + +import importlib.util + +pandas_available = False +pyspark_available = False + +# make sure pandas exists before using it +if importlib.util.find_spec("pandas"): + import pandas + pandas_available = True + +# make sure pyspark.pandas exists before using it +if importlib.util.find_spec("pyspark.pandas"): + import pyspark.pandas + pyspark_available = True + +# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first +# since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)` +# and converting from pandas-on-Spark to Spark DataFrame has no overhead +if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = pyspark.pandas.frame.DataFrame(df) + +# convert to pyspark.sql.dataframe.DataFrame +if isinstance(df, pyspark.sql.dataframe.DataFrame): + pass # since it is already a Spark DataFrame +elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame): + df = df.to_spark() +elif pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = spark.createDataFrame(df) +else: + msg = f"{type(df)} is not a supported type for dbt Python materialization" + raise Exception(msg) + +df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}") {%- endmacro -%} {%macro py_script_comment()%} From c46fff986ab4ef0ff62e2c955bc380e6c8b110f9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Sep 2022 16:02:36 -0400 Subject: [PATCH 2/7] Bump black from 22.3.0 to 22.8.0 (#458) * Bump black from 22.3.0 to 22.8.0 Bumps [black](https://github.com/psf/black) from 22.3.0 to 22.8.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.3.0...22.8.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot --- .changes/unreleased/Dependency-20220914-191910.yaml | 7 +++++++ dev-requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220914-191910.yaml diff --git a/.changes/unreleased/Dependency-20220914-191910.yaml b/.changes/unreleased/Dependency-20220914-191910.yaml new file mode 100644 index 000000000..ad2534c16 --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-191910.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Bump black from 22.3.0 to 22.8.0" +time: 2022-09-14T19:19:10.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 458 diff --git a/dev-requirements.txt b/dev-requirements.txt index 5b29e5e9d..81f95a6df 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory= -black==22.3.0 +black==22.8.0 bumpversion click~=8.0.4 flake8 From f32f9fd723c3879e007c5d87df3c61fa79f2ac77 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Sep 2022 16:03:35 -0400 Subject: [PATCH 3/7] Bump mypy from 0.950 to 0.971 (#456) * Bump mypy from 0.950 to 0.971 Bumps [mypy](https://github.com/python/mypy) from 0.950 to 0.971. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.950...v0.971) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot --- .changes/unreleased/Dependency-20220914-192102.yaml | 7 +++++++ dev-requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220914-192102.yaml diff --git a/.changes/unreleased/Dependency-20220914-192102.yaml b/.changes/unreleased/Dependency-20220914-192102.yaml new file mode 100644 index 000000000..f13fd6c45 --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-192102.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Bump mypy from 0.950 to 0.971" +time: 2022-09-14T19:21:02.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 456 diff --git a/dev-requirements.txt b/dev-requirements.txt index 81f95a6df..87ca93da7 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -13,7 +13,7 @@ flaky freezegun==0.3.9 ipdb mock>=1.3.0 -mypy==0.950 +mypy==0.971 pre-commit pytest-csv pytest-dotenv From e918e2671b044753b36b0ea14a02d22ae66345f8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Sep 2022 10:08:10 -0400 Subject: [PATCH 4/7] Update click requirement from ~=8.0.4 to ~=8.1.3 (#457) * Update click requirement from ~=8.0.4 to ~=8.1.3 Updates the requirements on [click](https://github.com/pallets/click) to permit the latest version. - [Release notes](https://github.com/pallets/click/releases) - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/click/compare/8.0.4...8.1.3) --- updated-dependencies: - dependency-name: click dependency-type: direct:development ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot --- .changes/unreleased/Dependency-20220914-192027.yaml | 7 +++++++ dev-requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220914-192027.yaml diff --git a/.changes/unreleased/Dependency-20220914-192027.yaml b/.changes/unreleased/Dependency-20220914-192027.yaml new file mode 100644 index 000000000..1863b52fc --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-192027.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Update click requirement from ~=8.0.4 to ~=8.1.3" +time: 2022-09-14T19:20:27.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 457 diff --git a/dev-requirements.txt b/dev-requirements.txt index 87ca93da7..8959df95d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -7,7 +7,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory= black==22.8.0 bumpversion -click~=8.0.4 +click~=8.1.3 flake8 flaky freezegun==0.3.9 From d2dfcdf38858df878ac5bc2253336160c00a6bca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Sep 2022 11:58:02 -0400 Subject: [PATCH 5/7] Bump pyodbc from 4.0.32 to 4.0.34 (#459) * Bump pyodbc from 4.0.32 to 4.0.34 Bumps [pyodbc](https://github.com/mkleehammer/pyodbc) from 4.0.32 to 4.0.34. - [Release notes](https://github.com/mkleehammer/pyodbc/releases) - [Commits](https://github.com/mkleehammer/pyodbc/compare/4.0.32...4.0.34) --- updated-dependencies: - dependency-name: pyodbc dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR * Remove newline Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com> --- .changes/unreleased/Dependency-20220913-225328.yaml | 7 +++++++ requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220913-225328.yaml diff --git a/.changes/unreleased/Dependency-20220913-225328.yaml b/.changes/unreleased/Dependency-20220913-225328.yaml new file mode 100644 index 000000000..b934c08c7 --- /dev/null +++ b/.changes/unreleased/Dependency-20220913-225328.yaml @@ -0,0 +1,7 @@ +kind: Dependency +body: "Bump pyodbc from 4.0.32 to 4.0.34" +time: 2022-09-13T22:53:28.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 459 diff --git a/requirements.txt b/requirements.txt index 5d774e4f7..14b36b723 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ PyHive[hive]>=0.6.0,<0.7.0 requests[python]>=2.28.1 -pyodbc==4.0.32 +pyodbc==4.0.34 sqlparams>=3.0.0 thrift>=0.13.0 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability From e07b8a20fa409e24e4c9959739c3eaefaf2eeb5a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 21 Sep 2022 15:04:33 -0400 Subject: [PATCH 6/7] Bump thrift-sasl from 0.4.1 to 0.4.3 (#455) * Bump thrift-sasl from 0.4.1 to 0.4.3 Bumps [thrift-sasl](https://github.com/cloudera/thrift_sasl) from 0.4.1 to 0.4.3. - [Release notes](https://github.com/cloudera/thrift_sasl/releases) - [Changelog](https://github.com/cloudera/thrift_sasl/blob/master/CHANGELOG.md) - [Commits](https://github.com/cloudera/thrift_sasl/compare/v0.4.1...v0.4.3) --- updated-dependencies: - dependency-name: thrift-sasl dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot --- .changes/unreleased/Dependency-20220914-192125.yaml | 7 +++++++ dev-requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220914-192125.yaml diff --git a/.changes/unreleased/Dependency-20220914-192125.yaml b/.changes/unreleased/Dependency-20220914-192125.yaml new file mode 100644 index 000000000..78234be80 --- /dev/null +++ b/.changes/unreleased/Dependency-20220914-192125.yaml @@ -0,0 +1,7 @@ +kind: "Dependency" +body: "Bump thrift-sasl from 0.4.1 to 0.4.3" +time: 2022-09-14T19:21:25.00000Z +custom: + Author: dependabot[bot] + Issue: 417 + PR: 455 diff --git a/dev-requirements.txt b/dev-requirements.txt index 8959df95d..e93c1b41a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -24,4 +24,4 @@ tox>=3.2.0 # Test requirements sasl>=0.2.1 -thrift_sasl==0.4.1 +thrift_sasl==0.4.3 From 8c921d071e925524c958b2f3fdea1405761c1a97 Mon Sep 17 00:00:00 2001 From: James McNeill <55981540+jpmmcneill@users.noreply.github.com> Date: Thu, 22 Sep 2022 14:16:00 +0100 Subject: [PATCH 7/7] Jpmmcneill/spark type boolean (#471) * implement type boolean test spark * changie result --- .changes/unreleased/Features-20220920-000814.yaml | 7 +++++++ tests/functional/adapter/utils/test_data_types.py | 5 +++++ 2 files changed, 12 insertions(+) create mode 100644 .changes/unreleased/Features-20220920-000814.yaml diff --git a/.changes/unreleased/Features-20220920-000814.yaml b/.changes/unreleased/Features-20220920-000814.yaml new file mode 100644 index 000000000..96ba63648 --- /dev/null +++ b/.changes/unreleased/Features-20220920-000814.yaml @@ -0,0 +1,7 @@ +kind: Features +body: implement testing for type_boolean in spark +time: 2022-09-20T00:08:14.15447+01:00 +custom: + Author: jpmmcneill + Issue: "470" + PR: "471" diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py index 65a24a3a9..ce6085803 100644 --- a/tests/functional/adapter/utils/test_data_types.py +++ b/tests/functional/adapter/utils/test_data_types.py @@ -9,6 +9,7 @@ from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp +from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean class TestTypeBigInt(BaseTypeBigInt): @@ -65,3 +66,7 @@ class TestTypeString(BaseTypeString): class TestTypeTimestamp(BaseTypeTimestamp): pass + + +class TestTypeBoolean(BaseTypeBoolean): + pass