diff --git a/.bumpversion.cfg b/.bumpversion.cfg index dc5b81465..218427456 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.5.0b4 +current_version = 1.5.3 parse = (?P[\d]+) # major version number \.(?P[\d]+) # minor version number \.(?P[\d]+) # patch version number diff --git a/.changes/1.5.0-b1.md b/.changes/1.5.0-b1.md deleted file mode 100644 index 4afeff282..000000000 --- a/.changes/1.5.0-b1.md +++ /dev/null @@ -1,21 +0,0 @@ -## dbt-bigquery 1.5.0-b1 - February 22, 2023 - -### Features - -- add option to specify excluded columns on merge ([#5260](https://github.com/dbt-labs/dbt-bigquery/issues/5260)) -- dbt-constraints support for BigQuery as per dbt-core issue #1358 ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444)) - -### Fixes - -- Make BQ access_grant updates thread safe ([#266](https://github.com/dbt-labs/dbt-bigquery/issues/266)) -- Use IEC standard abbreviations (GiB, TiB, etc) ([#477](https://github.com/dbt-labs/dbt-bigquery/issues/477)) -- Pin dataproc serverless spark runtime to `1.1` ([#531](https://github.com/dbt-labs/dbt-bigquery/issues/531)) - -### Under the Hood - -- Remove manual retry to get python job status on cluster ([#422](https://github.com/dbt-labs/dbt-bigquery/issues/422)) -- remove tox call to integration tests ([#459](https://github.com/dbt-labs/dbt-bigquery/issues/459)) - -### Contributors -- [@dave-connors-3](https://github.com/dave-connors-3) ([#5260](https://github.com/dbt-labs/dbt-bigquery/issues/5260)) -- [@victoriapm](https://github.com/victoriapm) ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444)) diff --git a/.changes/1.5.0-b2.md b/.changes/1.5.0-b2.md deleted file mode 100644 index a214d458c..000000000 --- a/.changes/1.5.0-b2.md +++ /dev/null @@ -1,5 +0,0 @@ -## dbt-bigquery 1.5.0-b2 - March 02, 2023 - -### Under the Hood - -- Rename constraints_enabled to contract ([#548](https://github.com/dbt-labs/dbt-bigquery/issues/548)) diff --git a/.changes/1.5.0-b3.md b/.changes/1.5.0-b3.md deleted file mode 100644 index f2415ad97..000000000 --- a/.changes/1.5.0-b3.md +++ /dev/null @@ -1,20 +0,0 @@ -## dbt-bigquery 1.5.0-b3 - March 16, 2023 - -### Features - -- Enforce contracts on models materialized as tables and views ([#529](https://github.com/dbt-labs/dbt-bigquery/issues/529), [#555](https://github.com/dbt-labs/dbt-bigquery/issues/555)) -- add dataproc serverless config to profile ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) - -### Fixes - -- Fix time ingestion partitioning option regression when combined using `require_partition_filter` option on incremental run ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) -- Allow policy_tags to be removed ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) -- Update connections.py with retry logic for 502 BadGateway ([#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) -- Fix for Python incremental model regression ([#581](https://github.com/dbt-labs/dbt-bigquery/issues/581)) -- Fix failing test by removing no erroneous asserts. ([#605](https://github.com/dbt-labs/dbt-bigquery/issues/605)) - -### Contributors -- [@Kayrnt](https://github.com/Kayrnt) ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) -- [@garsir](https://github.com/garsir) ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) -- [@patkearns10](https://github.com/patkearns10) ([#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) -- [@torkjel](https://github.com/torkjel) ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) diff --git a/.changes/1.5.0-b4.md b/.changes/1.5.0-b4.md deleted file mode 100644 index 2262d374d..000000000 --- a/.changes/1.5.0-b4.md +++ /dev/null @@ -1,18 +0,0 @@ -## dbt-bigquery 1.5.0-b4 - March 30, 2023 - -### Features - -- Adding `bytes_billed` to `BigQueryAdapterResponse` ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) -- Modify addapter to support unified constraint fields ([#567](https://github.com/dbt-labs/dbt-bigquery/issues/567)) -- Modify create_table_as to use contract column order ([#579](https://github.com/dbt-labs/dbt-bigquery/issues/579)) - -### Fixes - -- Use _make_ref_key_dict instead of _make_ref_key_msg ([#621](https://github.com/dbt-labs/dbt-bigquery/issues/621)) - -### Under the Hood - -- Treat contract config as a python object ([#548](https://github.com/dbt-labs/dbt-bigquery/issues/548), [#633](https://github.com/dbt-labs/dbt-bigquery/issues/633)) - -### Contributors -- [@bruno-szdl](https://github.com/bruno-szdl) ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) diff --git a/.changes/1.5.0.md b/.changes/1.5.0.md new file mode 100644 index 000000000..61cbcb3bb --- /dev/null +++ b/.changes/1.5.0.md @@ -0,0 +1,44 @@ +## dbt-bigquery 1.5.0 - April 27, 2023 + +### Features + +- add option to specify excluded columns on merge ([#5260](https://github.com/dbt-labs/dbt-bigquery/issues/5260)) +- Support for data type constraints in BigQuery ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444), [#568](https://github.com/dbt-labs/dbt-bigquery/issues/568)) +- Enforce contracts on models materialized as tables and views ([#529](https://github.com/dbt-labs/dbt-bigquery/issues/529), [#555](https://github.com/dbt-labs/dbt-bigquery/issues/555)) +- Adding `bytes_billed` to `BigQueryAdapterResponse` ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) +- add dataproc serverless config to profile ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) +- Modify addapter to support unified constraint fields ([#567](https://github.com/dbt-labs/dbt-bigquery/issues/567)) +- Modify create_table_as to use contract column order ([#579](https://github.com/dbt-labs/dbt-bigquery/issues/579)) +- Add support for model-level constraints ([#569](https://github.com/dbt-labs/dbt-bigquery/issues/569)) + +### Fixes + +- Make BQ access_grant updates thread safe ([#266](https://github.com/dbt-labs/dbt-bigquery/issues/266)) +- Use IEC standard abbreviations (GiB, TiB, etc) ([#477](https://github.com/dbt-labs/dbt-bigquery/issues/477)) +- Fix time ingestion partitioning option regression when combined using `require_partition_filter` option on incremental run ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) +- Pin dataproc serverless spark runtime to `1.1` ([#531](https://github.com/dbt-labs/dbt-bigquery/issues/531)) +- Allow policy_tags to be removed ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) +- Align partitions to be updated in incremental insert_overwrite to _dbt_max_partition wrt to nulls ([#544](https://github.com/dbt-labs/dbt-bigquery/issues/544)) +- Update connections.py with retry logic for 502 BadGateway ([#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) +- Fix for Python incremental model regression ([#581](https://github.com/dbt-labs/dbt-bigquery/issues/581)) +- Fix failing test by removing no erroneous asserts. ([#605](https://github.com/dbt-labs/dbt-bigquery/issues/605)) +- Use _make_ref_key_dict instead of _make_ref_key_msg ([#621](https://github.com/dbt-labs/dbt-bigquery/issues/621)) +- add full refresh capabilities to tabular bigquery python models to accommodate schema changes ([#653](https://github.com/dbt-labs/dbt-bigquery/issues/653)) +- Repair accidental change to dev-requirements ([#657](https://github.com/dbt-labs/dbt-bigquery/issues/657)) +- Fix issue of sporadic failure to apply grants during high transaction volumes ([#614](https://github.com/dbt-labs/dbt-bigquery/issues/614)) + +### Under the Hood + +- Remove manual retry to get python job status on cluster ([#422](https://github.com/dbt-labs/dbt-bigquery/issues/422)) +- remove tox call to integration tests ([#459](https://github.com/dbt-labs/dbt-bigquery/issues/459)) +- Treat contract config as a python object ([#548](https://github.com/dbt-labs/dbt-bigquery/issues/548), [#633](https://github.com/dbt-labs/dbt-bigquery/issues/633)) +- Update bigquery__format_column macro to support prettier ContractError message" ([#656](https://github.com/dbt-labs/dbt-bigquery/issues/656)) + +### Contributors +- [@Kayrnt](https://github.com/Kayrnt) ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) +- [@bruno-szdl](https://github.com/bruno-szdl) ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) +- [@dave-connors-3](https://github.com/dave-connors-3) ([#5260](https://github.com/dbt-labs/dbt-bigquery/issues/5260)) +- [@garsir](https://github.com/garsir) ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) +- [@patkearns10](https://github.com/patkearns10) ([#544](https://github.com/dbt-labs/dbt-bigquery/issues/544), [#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) +- [@torkjel](https://github.com/torkjel) ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) +- [@victoriapm,](https://github.com/victoriapm,) ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444), [#568](https://github.com/dbt-labs/dbt-bigquery/issues/568)) diff --git a/.changes/1.5.0/Features-20220826-115320.yaml b/.changes/1.5.0/Features-20220826-115320.yaml deleted file mode 100644 index 27bca84fc..000000000 --- a/.changes/1.5.0/Features-20220826-115320.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: add option to specify excluded columns on merge -time: 2022-08-26T11:53:20.194981-05:00 -custom: - Author: dave-connors-3 - Issue: "5260" - PR: "227" diff --git a/.changes/1.5.0/Features-20221220-193731.yaml b/.changes/1.5.0/Features-20221220-193731.yaml deleted file mode 100644 index c9aa5c03d..000000000 --- a/.changes/1.5.0/Features-20221220-193731.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: 'Support for data type constraints in BigQuery' -time: 2022-12-20T19:37:31.982821+01:00 -custom: - Author: victoriapm, emmyoop - Issue: 444 568 diff --git a/.changes/1.5.0/Features-20230223-145508.yaml b/.changes/1.5.0/Features-20230223-145508.yaml deleted file mode 100644 index cab391802..000000000 --- a/.changes/1.5.0/Features-20230223-145508.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Enforce contracts on models materialized as tables and views -time: 2023-02-23T14:55:08.186645-05:00 -custom: - Author: michelleark emmyoop - Issue: 529 555 diff --git a/.changes/1.5.0/Features-20230228-094234.yaml b/.changes/1.5.0/Features-20230228-094234.yaml deleted file mode 100644 index 4929c0834..000000000 --- a/.changes/1.5.0/Features-20230228-094234.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: 'Adding `bytes_billed` to `BigQueryAdapterResponse` ' -time: 2023-02-28T09:42:34.557696-03:00 -custom: - Author: bruno-szdl - Issue: "560" diff --git a/.changes/1.5.0/Features-20230303-132509.yaml b/.changes/1.5.0/Features-20230303-132509.yaml deleted file mode 100644 index 3a0ba8403..000000000 --- a/.changes/1.5.0/Features-20230303-132509.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: add dataproc serverless config to profile -time: 2023-03-03T13:25:09.02695-08:00 -custom: - Author: colin-rogers-dbt torkjel - Issue: "530" diff --git a/.changes/1.5.0/Features-20230314-171221.yaml b/.changes/1.5.0/Features-20230314-171221.yaml deleted file mode 100644 index 067812f09..000000000 --- a/.changes/1.5.0/Features-20230314-171221.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Modify addapter to support unified constraint fields -time: 2023-03-14T17:12:21.287702-04:00 -custom: - Author: peterallenwebb - Issue: "567" diff --git a/.changes/1.5.0/Features-20230315-120554.yaml b/.changes/1.5.0/Features-20230315-120554.yaml deleted file mode 100644 index 65718d06b..000000000 --- a/.changes/1.5.0/Features-20230315-120554.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Modify create_table_as to use contract column order -time: 2023-03-15T12:05:54.52431-04:00 -custom: - Author: gshank - Issue: "579" diff --git a/.changes/1.5.0/Fixes-20221213-102005.yaml b/.changes/1.5.0/Fixes-20221213-102005.yaml deleted file mode 100644 index 5e4c486d8..000000000 --- a/.changes/1.5.0/Fixes-20221213-102005.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: Make BQ access_grant updates thread safe -time: 2022-12-13T10:20:05.714134-08:00 -custom: - Author: colin-rogers-dbt - Issue: "266" - PR: "404" diff --git a/.changes/1.5.0/Fixes-20230125-174159.yaml b/.changes/1.5.0/Fixes-20230125-174159.yaml deleted file mode 100644 index 4100080d4..000000000 --- a/.changes/1.5.0/Fixes-20230125-174159.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: Use IEC standard abbreviations (GiB, TiB, etc) -time: 2023-01-25T17:41:59.921173-07:00 -custom: - Author: dbeatty10 - Issue: "477" - PR: "477" diff --git a/.changes/1.5.0/Fixes-20230202-010332.yaml b/.changes/1.5.0/Fixes-20230202-010332.yaml deleted file mode 100644 index f6062e7ea..000000000 --- a/.changes/1.5.0/Fixes-20230202-010332.yaml +++ /dev/null @@ -1,8 +0,0 @@ -kind: Fixes -body: Fix time ingestion partitioning option regression when combined using `require_partition_filter` - option on incremental run -time: 2023-02-02T01:03:32.577336+01:00 -custom: - Author: Kayrnt - Issue: "483" - PR: "485" diff --git a/.changes/1.5.0/Fixes-20230213-203317.yaml b/.changes/1.5.0/Fixes-20230213-203317.yaml deleted file mode 100644 index ae64c4e2d..000000000 --- a/.changes/1.5.0/Fixes-20230213-203317.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Pin dataproc serverless spark runtime to `1.1` -time: 2023-02-13T20:33:17.839861-08:00 -custom: - Author: colin-rogers-dbt - Issue: "531" diff --git a/.changes/1.5.0/Fixes-20230216-140756.yaml b/.changes/1.5.0/Fixes-20230216-140756.yaml deleted file mode 100644 index b273c563e..000000000 --- a/.changes/1.5.0/Fixes-20230216-140756.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Allow policy_tags to be removed -time: 2023-02-16T14:07:56.313767Z -custom: - Author: garsir - Issue: "349" diff --git a/.changes/1.5.0/Fixes-20230227-110426.yaml b/.changes/1.5.0/Fixes-20230227-110426.yaml deleted file mode 100644 index c23c53ee6..000000000 --- a/.changes/1.5.0/Fixes-20230227-110426.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Update connections.py with retry logic for 502 BadGateway -time: 2023-02-27T11:04:26.978726+11:00 -custom: - Author: patkearns10 - Issue: "562" diff --git a/.changes/1.5.0/Fixes-20230309-181313.yaml b/.changes/1.5.0/Fixes-20230309-181313.yaml deleted file mode 100644 index 8681f5eaf..000000000 --- a/.changes/1.5.0/Fixes-20230309-181313.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix for Python incremental model regression -time: 2023-03-09T18:13:13.512904-08:00 -custom: - Author: nssalian - Issue: "581" diff --git a/.changes/1.5.0/Fixes-20230315-130504.yaml b/.changes/1.5.0/Fixes-20230315-130504.yaml deleted file mode 100644 index 5229dca17..000000000 --- a/.changes/1.5.0/Fixes-20230315-130504.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix failing test by removing no erroneous asserts. -time: 2023-03-15T13:05:04.747323-07:00 -custom: - Author: versusfacit - Issue: "605" diff --git a/.changes/1.5.0/Fixes-20230322-162200.yaml b/.changes/1.5.0/Fixes-20230322-162200.yaml deleted file mode 100644 index a80a3f963..000000000 --- a/.changes/1.5.0/Fixes-20230322-162200.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Use _make_ref_key_dict instead of _make_ref_key_msg -time: 2023-03-22T16:22:00.091222-04:00 -custom: - Author: gshank - Issue: "621" diff --git a/.changes/1.5.0/Under the Hood-20221209-161550.yaml b/.changes/1.5.0/Under the Hood-20221209-161550.yaml deleted file mode 100644 index 846cda6f6..000000000 --- a/.changes/1.5.0/Under the Hood-20221209-161550.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Remove manual retry to get python job status on cluster -time: 2022-12-09T16:15:50.834922-08:00 -custom: - Author: ChenyuLInx - Issue: "422" - PR: "423" diff --git a/.changes/1.5.0/Under the Hood-20230130-170118.yaml b/.changes/1.5.0/Under the Hood-20230130-170118.yaml deleted file mode 100644 index 39e0ee8f8..000000000 --- a/.changes/1.5.0/Under the Hood-20230130-170118.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: remove tox call to integration tests -time: 2023-01-30T17:01:18.971636-08:00 -custom: - Author: colin-rogers-dbt - Issue: "459" - PR: "498" diff --git a/.changes/1.5.0/Under the Hood-20230223-105149.yaml b/.changes/1.5.0/Under the Hood-20230223-105149.yaml deleted file mode 100644 index ca315cc1d..000000000 --- a/.changes/1.5.0/Under the Hood-20230223-105149.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Treat contract config as a python object -time: 2023-02-23T10:51:49.737457-05:00 -custom: - Author: gshank emmyoop - Issue: 548 633 diff --git a/.changes/1.5.1.md b/.changes/1.5.1.md new file mode 100644 index 000000000..e5ada813d --- /dev/null +++ b/.changes/1.5.1.md @@ -0,0 +1,5 @@ +## dbt-bigquery 1.5.1 - May 18, 2023 + +### Fixes + +- Fixes adding policy tags when a struct is defined in the yml ([#687](https://github.com/dbt-labs/dbt-bigquery/issues/687)) diff --git a/.changes/1.5.2.md b/.changes/1.5.2.md new file mode 100644 index 000000000..613f6e332 --- /dev/null +++ b/.changes/1.5.2.md @@ -0,0 +1,9 @@ +## dbt-bigquery 1.5.2 - June 15, 2023 + +### Features + +- Support model contracts + constraints on nested columns ([#673](https://github.com/dbt-labs/dbt-bigquery/issues/673)) + +### Fixes + +- test foreign key constraint rendering ([#7512](https://github.com/dbt-labs/dbt-bigquery/issues/7512)) diff --git a/.changes/1.5.3.md b/.changes/1.5.3.md new file mode 100644 index 000000000..5e3f5f74a --- /dev/null +++ b/.changes/1.5.3.md @@ -0,0 +1,5 @@ +## dbt-bigquery 1.5.3 - June 22, 2023 + +### Breaking Changes + +- Drop support for python 3.7 ([#dbt-labs/dbt-core/7082](https://github.com/dbt-labs/dbt-bigquery/issues/dbt-labs/dbt-core/7082)) diff --git a/.changes/unreleased/Features-20230406-104433.yaml b/.changes/unreleased/Features-20230406-104433.yaml deleted file mode 100644 index 4ec1d8120..000000000 --- a/.changes/unreleased/Features-20230406-104433.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Add support for model-level constraints -time: 2023-04-06T10:44:33.045896-04:00 -custom: - Author: peterallenwebb - Issue: "569" diff --git a/.changes/unreleased/Fixes-20230222-133301.yaml b/.changes/unreleased/Fixes-20230222-133301.yaml deleted file mode 100644 index 7c613a109..000000000 --- a/.changes/unreleased/Fixes-20230222-133301.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Align partitions to be updated in incremental insert_overwrite to _dbt_max_partition wrt to nulls -time: 2023-02-22T13:33:01.607225-08:00 -custom: - Author: patkearns10 - Issue: "544" diff --git a/.changes/unreleased/Fixes-20230408-035117.yaml b/.changes/unreleased/Fixes-20230408-035117.yaml deleted file mode 100644 index 56799823b..000000000 --- a/.changes/unreleased/Fixes-20230408-035117.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: ' add full refresh capabilities to tabular bigquery python models to accommodate - schema changes' -time: 2023-04-08T03:51:17.167349-07:00 -custom: - Author: versusfacit - Issue: "653" diff --git a/.changes/unreleased/Fixes-20230411-143515.yaml b/.changes/unreleased/Fixes-20230411-143515.yaml deleted file mode 100644 index c88b53094..000000000 --- a/.changes/unreleased/Fixes-20230411-143515.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Repair accidental change to dev-requirements -time: 2023-04-11T14:35:15.369296-04:00 -custom: - Author: peterallenwebb - Issue: "657" diff --git a/.changes/unreleased/Fixes-20230626-105156.yaml b/.changes/unreleased/Fixes-20230626-105156.yaml new file mode 100644 index 000000000..d1c6b9e25 --- /dev/null +++ b/.changes/unreleased/Fixes-20230626-105156.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: remove call to dataset update if dataset has not changed +time: 2023-06-26T10:51:56.698483-07:00 +custom: + Author: colin-rogers-dbt + Issue: "770" diff --git a/.changes/unreleased/Fixes-20230630-213112.yaml b/.changes/unreleased/Fixes-20230630-213112.yaml new file mode 100644 index 000000000..7238c0cb1 --- /dev/null +++ b/.changes/unreleased/Fixes-20230630-213112.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: 'Contracts: Handle struct column specified both at root and nested levels + + arrays of structs' +time: 2023-06-30T21:31:12.63257-04:00 +custom: + Author: michelleark + Issue: 781 782 diff --git a/.changes/unreleased/Under the Hood-20230411-143129.yaml b/.changes/unreleased/Under the Hood-20230411-143129.yaml deleted file mode 100644 index 2d41e7cff..000000000 --- a/.changes/unreleased/Under the Hood-20230411-143129.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Update bigquery__format_column macro to support prettier ContractError message" -time: 2023-04-11T14:31:29.378726+02:00 -custom: - Author: jtcohen6 - Issue: "656" diff --git a/.github/scripts/integration-test-matrix.js b/.github/scripts/integration-test-matrix.js index 58acf364d..bf7fd2ef7 100644 --- a/.github/scripts/integration-test-matrix.js +++ b/.github/scripts/integration-test-matrix.js @@ -1,6 +1,6 @@ module.exports = ({ context }) => { const defaultPythonVersion = "3.8"; - const supportedPythonVersions = ["3.7", "3.8", "3.9", "3.10", "3.11"]; + const supportedPythonVersions = ["3.8", "3.9", "3.10", "3.11"]; const supportedAdapters = ["bigquery"]; // if PR, generate matrix based on files changed and PR labels diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 7be34358f..ce58f380c 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -63,13 +63,13 @@ jobs: steps: - name: Check out the repository (non-PR) if: github.event_name != 'pull_request_target' - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} @@ -93,7 +93,7 @@ jobs: - 'dev-requirements.txt' - name: Generate integration test matrix id: generate-matrix - uses: actions/github-script@v4 + uses: actions/github-script@v6 env: CHANGES: ${{ steps.get-changes.outputs.changes }} with: @@ -127,11 +127,16 @@ jobs: TOXENV: integration-${{ matrix.adapter }} PYTEST_ADDOPTS: "-v --color=yes -n4 --csv integration_results.csv" DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false @@ -139,7 +144,7 @@ jobs: # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} @@ -156,10 +161,6 @@ jobs: python -m pip --version tox --version - - name: Install dbt-core latest - run: | - python -m pip install "git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core" - - name: Run tox (bigquery) if: matrix.adapter == 'bigquery' env: @@ -167,14 +168,14 @@ jobs: BIGQUERY_TEST_ALT_DATABASE: ${{ secrets.BIGQUERY_TEST_ALT_DATABASE }} BIGQUERY_TEST_NO_ACCESS_DATABASE: ${{ secrets.BIGQUERY_TEST_NO_ACCESS_DATABASE }} DBT_TEST_USER_1: group:buildbot@dbtlabs.com - DBT_TEST_USER_2: group:dev-core@dbtlabs.com + DBT_TEST_USER_2: group:engineering-core-team@dbtlabs.com DBT_TEST_USER_3: serviceAccount:dbt-integration-test-user@dbt-test-env.iam.gserviceaccount.com DATAPROC_REGION: us-central1 DATAPROC_CLUSTER_NAME: dbt-test-1 GCS_BUCKET: dbt-ci - run: tox + run: tox -- --ddtrace - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: logs @@ -183,9 +184,10 @@ jobs: - name: Get current date if: always() id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%dT%H_%M_%S')" #no colons allowed for artifacts + run: | + echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: integration_results_${{ matrix.python-version }}_${{ matrix.os }}_${{ matrix.adapter }}-${{ steps.date.outputs.date }}.csv diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 140557beb..d2669b2d9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,7 +43,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false @@ -71,7 +71,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11'] env: TOXENV: "unit" @@ -79,7 +79,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false @@ -94,18 +94,17 @@ jobs: python -m pip install tox python -m pip --version tox --version - - name: Install dbt-core latest - run: | - python -m pip install "git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core" + - name: Run tox run: tox - name: Get current date if: always() id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%dT%H_%M_%S')" #no colons allowed for artifacts + run: | + echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv @@ -121,7 +120,7 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false @@ -155,9 +154,9 @@ jobs: run: | export is_alpha=0 if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi - echo "::set-output name=is_alpha::$is_alpha" + echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: dist path: dist/ @@ -175,7 +174,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - name: Set up Python ${{ matrix.python-version }} @@ -187,7 +186,7 @@ jobs: python -m pip install --user --upgrade pip python -m pip install --upgrade wheel setuptools twine check-wheel-contents python -m pip --version - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: dist path: dist/ diff --git a/.github/workflows/nightly-release.yml b/.github/workflows/nightly-release.yml index 54c5fdc69..4762d1218 100644 --- a/.github/workflows/nightly-release.yml +++ b/.github/workflows/nightly-release.yml @@ -26,7 +26,7 @@ defaults: shell: bash env: - RELEASE_BRANCH: "1.4.latest" + RELEASE_BRANCH: "1.5.latest" jobs: aggregate-release-data: diff --git a/CHANGELOG.md b/CHANGELOG.md index a1e6f96bf..f52a4f6db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,74 +5,74 @@ - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#adding-changelog-entry) -## dbt-bigquery 1.5.0-b4 - March 30, 2023 +## dbt-bigquery 1.5.3 - June 22, 2023 -### Features - -- Adding `bytes_billed` to `BigQueryAdapterResponse` ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) -- Modify addapter to support unified constraint fields ([#567](https://github.com/dbt-labs/dbt-bigquery/issues/567)) -- Modify create_table_as to use contract column order ([#579](https://github.com/dbt-labs/dbt-bigquery/issues/579)) - -### Fixes - -- Use _make_ref_key_dict instead of _make_ref_key_msg ([#621](https://github.com/dbt-labs/dbt-bigquery/issues/621)) - -### Under the Hood +### Breaking Changes -- Treat contract config as a python object ([#548](https://github.com/dbt-labs/dbt-bigquery/issues/548), [#633](https://github.com/dbt-labs/dbt-bigquery/issues/633)) +- Drop support for python 3.7 ([#dbt-labs/dbt-core/7082](https://github.com/dbt-labs/dbt-bigquery/issues/dbt-labs/dbt-core/7082)) -### Contributors -- [@bruno-szdl](https://github.com/bruno-szdl) ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) -## dbt-bigquery 1.5.0-b3 - March 16, 2023 +## dbt-bigquery 1.5.2 - June 15, 2023 ### Features -- Enforce contracts on models materialized as tables and views ([#529](https://github.com/dbt-labs/dbt-bigquery/issues/529), [#555](https://github.com/dbt-labs/dbt-bigquery/issues/555)) -- add dataproc serverless config to profile ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) +- Support model contracts + constraints on nested columns ([#673](https://github.com/dbt-labs/dbt-bigquery/issues/673)) ### Fixes -- Fix time ingestion partitioning option regression when combined using `require_partition_filter` option on incremental run ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) -- Allow policy_tags to be removed ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) -- Update connections.py with retry logic for 502 BadGateway ([#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) -- Fix for Python incremental model regression ([#581](https://github.com/dbt-labs/dbt-bigquery/issues/581)) -- Fix failing test by removing no erroneous asserts. ([#605](https://github.com/dbt-labs/dbt-bigquery/issues/605)) - -### Contributors -- [@Kayrnt](https://github.com/Kayrnt) ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) -- [@garsir](https://github.com/garsir) ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) -- [@patkearns10](https://github.com/patkearns10) ([#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) -- [@torkjel](https://github.com/torkjel) ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) +- test foreign key constraint rendering ([#7512](https://github.com/dbt-labs/dbt-bigquery/issues/7512)) -## dbt-bigquery 1.5.0-b2 - March 02, 2023 +## dbt-bigquery 1.5.1 - May 18, 2023 -### Under the Hood +### Fixes -- Rename constraints_enabled to contract ([#548](https://github.com/dbt-labs/dbt-bigquery/issues/548)) +- Fixes adding policy tags when a struct is defined in the yml ([#687](https://github.com/dbt-labs/dbt-bigquery/issues/687)) -## dbt-bigquery 1.5.0-b1 - February 22, 2023 +## dbt-bigquery 1.5.0 - April 27, 2023 ### Features - add option to specify excluded columns on merge ([#5260](https://github.com/dbt-labs/dbt-bigquery/issues/5260)) -- dbt-constraints support for BigQuery as per dbt-core issue #1358 ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444)) +- Support for data type constraints in BigQuery ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444), [#568](https://github.com/dbt-labs/dbt-bigquery/issues/568)) +- Enforce contracts on models materialized as tables and views ([#529](https://github.com/dbt-labs/dbt-bigquery/issues/529), [#555](https://github.com/dbt-labs/dbt-bigquery/issues/555)) +- Adding `bytes_billed` to `BigQueryAdapterResponse` ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) +- add dataproc serverless config to profile ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) +- Modify addapter to support unified constraint fields ([#567](https://github.com/dbt-labs/dbt-bigquery/issues/567)) +- Modify create_table_as to use contract column order ([#579](https://github.com/dbt-labs/dbt-bigquery/issues/579)) +- Add support for model-level constraints ([#569](https://github.com/dbt-labs/dbt-bigquery/issues/569)) ### Fixes - Make BQ access_grant updates thread safe ([#266](https://github.com/dbt-labs/dbt-bigquery/issues/266)) - Use IEC standard abbreviations (GiB, TiB, etc) ([#477](https://github.com/dbt-labs/dbt-bigquery/issues/477)) +- Fix time ingestion partitioning option regression when combined using `require_partition_filter` option on incremental run ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) - Pin dataproc serverless spark runtime to `1.1` ([#531](https://github.com/dbt-labs/dbt-bigquery/issues/531)) +- Allow policy_tags to be removed ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) +- Align partitions to be updated in incremental insert_overwrite to _dbt_max_partition wrt to nulls ([#544](https://github.com/dbt-labs/dbt-bigquery/issues/544)) +- Update connections.py with retry logic for 502 BadGateway ([#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) +- Fix for Python incremental model regression ([#581](https://github.com/dbt-labs/dbt-bigquery/issues/581)) +- Fix failing test by removing no erroneous asserts. ([#605](https://github.com/dbt-labs/dbt-bigquery/issues/605)) +- Use _make_ref_key_dict instead of _make_ref_key_msg ([#621](https://github.com/dbt-labs/dbt-bigquery/issues/621)) +- add full refresh capabilities to tabular bigquery python models to accommodate schema changes ([#653](https://github.com/dbt-labs/dbt-bigquery/issues/653)) +- Repair accidental change to dev-requirements ([#657](https://github.com/dbt-labs/dbt-bigquery/issues/657)) +- Fix issue of sporadic failure to apply grants during high transaction volumes ([#614](https://github.com/dbt-labs/dbt-bigquery/issues/614)) ### Under the Hood - Remove manual retry to get python job status on cluster ([#422](https://github.com/dbt-labs/dbt-bigquery/issues/422)) - remove tox call to integration tests ([#459](https://github.com/dbt-labs/dbt-bigquery/issues/459)) +- Treat contract config as a python object ([#548](https://github.com/dbt-labs/dbt-bigquery/issues/548), [#633](https://github.com/dbt-labs/dbt-bigquery/issues/633)) +- Update bigquery__format_column macro to support prettier ContractError message" ([#656](https://github.com/dbt-labs/dbt-bigquery/issues/656)) ### Contributors +- [@Kayrnt](https://github.com/Kayrnt) ([#483](https://github.com/dbt-labs/dbt-bigquery/issues/483)) +- [@bruno-szdl](https://github.com/bruno-szdl) ([#560](https://github.com/dbt-labs/dbt-bigquery/issues/560)) - [@dave-connors-3](https://github.com/dave-connors-3) ([#5260](https://github.com/dbt-labs/dbt-bigquery/issues/5260)) -- [@victoriapm](https://github.com/victoriapm) ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444)) +- [@garsir](https://github.com/garsir) ([#349](https://github.com/dbt-labs/dbt-bigquery/issues/349)) +- [@patkearns10](https://github.com/patkearns10) ([#544](https://github.com/dbt-labs/dbt-bigquery/issues/544), [#562](https://github.com/dbt-labs/dbt-bigquery/issues/562)) +- [@torkjel](https://github.com/torkjel) ([#530](https://github.com/dbt-labs/dbt-bigquery/issues/530)) +- [@victoriapm,](https://github.com/victoriapm,) ([#444](https://github.com/dbt-labs/dbt-bigquery/issues/444), [#568](https://github.com/dbt-labs/dbt-bigquery/issues/568)) ## Previous Releases For information on prior major and minor releases, see their changelogs: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e0c905b39..e9432d363 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,7 +67,7 @@ $EDITOR test.env There are a few methods for running tests locally. #### `tox` -`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.7, Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py37`. The configuration of these tests are located in `tox.ini`. +`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`. #### `pytest` Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like: diff --git a/dbt/adapters/bigquery/__version__.py b/dbt/adapters/bigquery/__version__.py index 4a7a8147e..2c924dd41 100644 --- a/dbt/adapters/bigquery/__version__.py +++ b/dbt/adapters/bigquery/__version__.py @@ -1 +1 @@ -version = "1.5.0b4" +version = "1.5.3" diff --git a/dbt/adapters/bigquery/column.py b/dbt/adapters/bigquery/column.py index 158f6650c..a5a60cfc0 100644 --- a/dbt/adapters/bigquery/column.py +++ b/dbt/adapters/bigquery/column.py @@ -1,10 +1,12 @@ from dataclasses import dataclass -from typing import Optional, List, TypeVar, Iterable, Type, Any +from typing import Optional, List, TypeVar, Iterable, Type, Any, Dict, Union from dbt.adapters.base.column import Column from google.cloud.bigquery import SchemaField +_PARENT_DATA_TYPE_KEY = "__parent_data_type" + Self = TypeVar("Self", bound="BigQueryColumn") @@ -126,3 +128,175 @@ def column_to_bq_schema(self) -> SchemaField: kwargs = {"fields": fields} return SchemaField(self.name, self.dtype, self.mode, **kwargs) # type: ignore[arg-type] + + +def get_nested_column_data_types( + columns: Dict[str, Dict[str, Any]], + constraints: Optional[Dict[str, str]] = None, +) -> Dict[str, Dict[str, Optional[str]]]: + """ + columns: + * Dictionary where keys are of flat columns names and values are dictionary of column attributes + * column names with "." indicate a nested column within a STRUCT type + * e.g. {"a": {"name": "a", "data_type": "string", ...}} + constraints: + * Dictionary where keys are flat column names and values are rendered constraints for the column + * If provided, rendered column is included in returned "data_type" values. + returns: + * Dictionary where keys are root column names and values are corresponding nested data_type values. + * Fields other than "name" and "data_type" are __not__ preserved in the return value for nested columns. + * Fields other than "name" and "data_type" are preserved in the return value for flat columns. + + Example: + columns: { + "a": {"name": "a", "data_type": "string", "description": ...}, + "b.nested": {"name": "b.nested", "data_type": "string"}, + "b.nested2": {"name": "b.nested2", "data_type": "string"} + } + + returns: { + "a": {"name": "a", "data_type": "string"}, + "b": {"name": "b": "data_type": "struct} + } + """ + constraints = constraints or {} + + nested_column_data_types: Dict[str, Optional[Union[str, Dict]]] = {} + for column in columns.values(): + _update_nested_column_data_types( + column["name"], + column.get("data_type"), + constraints.get(column["name"]), + nested_column_data_types, + ) + + formatted_nested_column_data_types: Dict[str, Dict[str, Optional[str]]] = {} + for column_name, unformatted_column_type in nested_column_data_types.items(): + formatted_nested_column_data_types[column_name] = { + "name": column_name, + "data_type": _format_nested_data_type(unformatted_column_type), + } + + # add column configs back to flat columns + for column_name in formatted_nested_column_data_types: + if column_name in columns: + formatted_nested_column_data_types[column_name].update( + { + k: v + for k, v in columns[column_name].items() + if k not in formatted_nested_column_data_types[column_name] + } + ) + + return formatted_nested_column_data_types + + +def _update_nested_column_data_types( + column_name: str, + column_data_type: Optional[str], + column_rendered_constraint: Optional[str], + nested_column_data_types: Dict[str, Optional[Union[str, Dict]]], +) -> None: + """ + Recursively update nested_column_data_types given a column_name, column_data_type, and optional column_rendered_constraint. + + Examples: + >>> nested_column_data_types = {} + >>> BigQueryAdapter._update_nested_column_data_types("a", "string", "not_null", nested_column_data_types) + >>> nested_column_data_types + {"a": "string not null"} + >>> BigQueryAdapter._update_nested_column_data_types("b.c", "string", "not_null", nested_column_data_types) + >>> nested_column_data_types + {"a": "string not null", "b": {"c": "string not null"}} + >>> BigQueryAdapter._update_nested_column_data_types("b.d", "string", None, nested_column_data_types) + >>> nested_column_data_types + {"a": "string not null", "b": {"c": "string not null", "d": "string"}} + """ + column_name_parts = column_name.split(".") + root_column_name = column_name_parts[0] + + if len(column_name_parts) == 1: + # Base case: column is not nested - store its data_type concatenated with constraint if provided. + column_data_type_and_constraints = ( + ( + column_data_type + if column_rendered_constraint is None + else f"{column_data_type} {column_rendered_constraint}" + ) + if column_data_type + else None + ) + + if existing_nested_column_data_type := nested_column_data_types.get(root_column_name): + assert isinstance(existing_nested_column_data_type, dict) # keeping mypy happy + # entry could already exist if this is a parent column -- preserve the parent data type under "_PARENT_DATA_TYPE_KEY" + existing_nested_column_data_type.update( + {_PARENT_DATA_TYPE_KEY: column_data_type_and_constraints} + ) + else: + nested_column_data_types.update({root_column_name: column_data_type_and_constraints}) + else: + parent_data_type = nested_column_data_types.get(root_column_name) + if isinstance(parent_data_type, dict): + # nested dictionary already initialized + pass + elif parent_data_type is None: + # initialize nested dictionary + nested_column_data_types.update({root_column_name: {}}) + else: + # a parent specified its base type -- preserve its data_type and potential rendered constraints + # this is used to specify a top-level 'struct' or 'array' field with its own description, constraints, etc + nested_column_data_types.update( + {root_column_name: {_PARENT_DATA_TYPE_KEY: parent_data_type}} + ) + + # Recursively process rest of remaining column name + remaining_column_name = ".".join(column_name_parts[1:]) + remaining_column_data_types = nested_column_data_types[root_column_name] + assert isinstance(remaining_column_data_types, dict) # keeping mypy happy + _update_nested_column_data_types( + remaining_column_name, + column_data_type, + column_rendered_constraint, + remaining_column_data_types, + ) + + +def _format_nested_data_type( + unformatted_nested_data_type: Optional[Union[str, Dict[str, Any]]] +) -> Optional[str]: + """ + Recursively format a (STRUCT) data type given an arbitrarily nested data type structure. + + Examples: + >>> BigQueryAdapter._format_nested_data_type("string") + 'string' + >>> BigQueryAdapter._format_nested_data_type({'c': 'string not_null', 'd': 'string'}) + 'struct' + >>> BigQueryAdapter._format_nested_data_type({'c': 'string not_null', 'd': {'e': 'string'}}) + 'struct>' + """ + if unformatted_nested_data_type is None: + return None + elif isinstance(unformatted_nested_data_type, str): + return unformatted_nested_data_type + else: + parent_data_type, *parent_constraints = unformatted_nested_data_type.pop( + _PARENT_DATA_TYPE_KEY, "" + ).split() or [None] + + formatted_nested_types = [ + f"{column_name} {_format_nested_data_type(column_type) or ''}".strip() + for column_name, column_type in unformatted_nested_data_type.items() + ] + + formatted_nested_type = f"""struct<{", ".join(formatted_nested_types)}>""" + + if parent_data_type and parent_data_type.lower() == "array": + formatted_nested_type = f"""array<{formatted_nested_type}>""" + + if parent_constraints: + parent_constraints = " ".join(parent_constraints) + formatted_nested_type = f"""{formatted_nested_type} {parent_constraints}""" + + return formatted_nested_type diff --git a/dbt/adapters/bigquery/dataset.py b/dbt/adapters/bigquery/dataset.py index ebffe1072..c886637d7 100644 --- a/dbt/adapters/bigquery/dataset.py +++ b/dbt/adapters/bigquery/dataset.py @@ -6,15 +6,15 @@ logger = AdapterLogger("BigQuery") -def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset: - """Idempotently adds an access entry to a dataset +def is_access_entry_in_dataset(dataset: Dataset, access_entry: AccessEntry) -> bool: + """Check if the access entry already exists in the dataset. Args: dataset (Dataset): the dataset to be updated access_entry (AccessEntry): the access entry to be added to the dataset Returns: - Dataset + bool: True if entry exists in dataset, False otherwise """ access_entries: List[AccessEntry] = dataset.access_entries # we can't simply check if an access entry is in the list as the current equality check @@ -24,8 +24,22 @@ def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> entity_type_match = existing_entry.entity_type == access_entry.entity_type property_match = existing_entry._properties.items() <= access_entry._properties.items() if role_match and entity_type_match and property_match: - logger.warning(f"Access entry {access_entry} " f"already exists in dataset") - return dataset + return True + return False + + +def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset: + """Adds an access entry to a dataset, always use access_entry_present_in_dataset to check + if the access entry already exists before calling this function. + + Args: + dataset (Dataset): the dataset to be updated + access_entry (AccessEntry): the access entry to be added to the dataset + + Returns: + Dataset: the updated dataset + """ + access_entries: List[AccessEntry] = dataset.access_entries access_entries.append(access_entry) dataset.access_entries = access_entries return dataset diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 9016dfb83..528e0f799 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -2,7 +2,11 @@ import threading from typing import Dict, List, Optional, Any, Set, Union, Type -from dbt.contracts.graph.nodes import ColumnLevelConstraint, ModelLevelConstraint, ConstraintType +from dbt.contracts.graph.nodes import ( + ColumnLevelConstraint, + ModelLevelConstraint, + ConstraintType, +) from dbt.dataclass_schema import dbtClassMixin, ValidationError import dbt.deprecations @@ -23,8 +27,9 @@ from dbt.adapters.cache import _make_ref_key_dict +from dbt.adapters.bigquery.column import get_nested_column_data_types from dbt.adapters.bigquery.relation import BigQueryRelation -from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset +from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset from dbt.adapters.bigquery import BigQueryColumn from dbt.adapters.bigquery import BigQueryConnectionManager from dbt.adapters.bigquery.python_submissions import ( @@ -140,7 +145,11 @@ def render(self): def _stub_relation(*args, **kwargs): return BigQueryRelation.create( - database="", schema="", identifier="", quote_policy={}, type=BigQueryRelation.Table + database="", + schema="", + identifier="", + quote_policy={}, + type=BigQueryRelation.Table, ) @@ -261,10 +270,21 @@ def check_schema_exists(self, database: str, schema: str) -> bool: return False return True + @available.parse(lambda *a, **k: {}) + @classmethod + def nest_column_data_types( + cls, + columns: Dict[str, Dict[str, Any]], + constraints: Optional[Dict[str, str]] = None, + ) -> Dict[str, Dict[str, Optional[str]]]: + return get_nested_column_data_types(columns, constraints) + def get_columns_in_relation(self, relation: BigQueryRelation) -> List[BigQueryColumn]: try: table = self.connections.get_bq_table( - database=relation.database, schema=relation.schema, identifier=relation.identifier + database=relation.database, + schema=relation.schema, + identifier=relation.identifier, ) return self._get_dbt_columns_from_bq_table(table) @@ -435,7 +455,10 @@ def _materialize_as_view(self, model: Dict[str, Any]) -> str: logger.debug("Model SQL ({}):\n{}".format(model_alias, model_code)) self.connections.create_view( - database=model_database, schema=model_schema, table_name=model_alias, sql=model_code + database=model_database, + schema=model_schema, + table_name=model_alias, + sql=model_code, ) return "CREATE VIEW" @@ -456,7 +479,10 @@ def _materialize_as_table( logger.debug("Model SQL ({}):\n{}".format(table_name, model_sql)) self.connections.create_table( - database=model_database, schema=model_schema, table_name=table_name, sql=model_sql + database=model_database, + schema=model_schema, + table_name=table_name, + sql=model_sql, ) return "CREATE TABLE" @@ -487,7 +513,10 @@ def get_column_schema_from_query(self, sql: str) -> List[BigQueryColumn]: """ _, iterator = self.connections.raw_execute(sql) columns = [self.Column.create_from_field(field) for field in iterator.schema] - return columns + flattened_columns = [] + for column in columns: + flattened_columns += column.flatten() + return flattened_columns @available.parse(lambda *a, **k: False) def get_columns_in_select_sql(self, select_sql: str) -> List[BigQueryColumn]: @@ -617,7 +646,9 @@ def is_replaceable( try: table = self.connections.get_bq_table( - database=relation.database, schema=relation.schema, identifier=relation.identifier + database=relation.database, + schema=relation.schema, + identifier=relation.identifier, ) except google.cloud.exceptions.NotFound: return True @@ -657,8 +688,8 @@ def _update_column_dict(self, bq_column_dict, dbt_columns, parent=""): if dotted_column_name in dbt_columns: column_config = dbt_columns[dotted_column_name] bq_column_dict["description"] = column_config.get("description") - bq_column_dict["policyTags"] = {"names": column_config.get("policy_tags", list())} - + if bq_column_dict["type"] != "RECORD": + bq_column_dict["policyTags"] = {"names": column_config.get("policy_tags", list())} new_fields = [] for child_col_dict in bq_column_dict.get("fields", list()): new_child_column_dict = self._update_column_dict( @@ -737,7 +768,12 @@ def load_dataframe(self, database, schema, table_name, agate_table, column_overr @available.parse_none def upload_file( - self, local_file_path: str, database: str, table_schema: str, table_name: str, **kwargs + self, + local_file_path: str, + database: str, + table_schema: str, + table_name: str, + **kwargs, ) -> None: conn = self.connections.get_thread_connection() client = conn.handle @@ -849,8 +885,12 @@ def grant_access_to(self, entity, entity_type, role, grant_target_dict): dataset_ref = self.connections.dataset_ref(grant_target.project, grant_target.dataset) dataset = client.get_dataset(dataset_ref) access_entry = AccessEntry(role, entity_type, entity) - dataset = add_access_entry_to_dataset(dataset, access_entry) - client.update_dataset(dataset, ["access_entries"]) + # only perform update if access entry not in dataset + if is_access_entry_in_dataset(dataset, access_entry): + logger.warning(f"Access entry {access_entry} " f"already exists in dataset") + else: + dataset = add_access_entry_to_dataset(dataset, access_entry) + client.update_dataset(dataset, ["access_entries"]) @available.parse_none def get_dataset_location(self, relation): @@ -921,6 +961,31 @@ def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]: "serverless": ServerlessDataProcHelper, } + @available + @classmethod + def render_raw_columns_constraints(cls, raw_columns: Dict[str, Dict[str, Any]]) -> List: + rendered_constraints: Dict[str, str] = {} + for raw_column in raw_columns.values(): + for con in raw_column.get("constraints", None): + constraint = cls._parse_column_constraint(con) + rendered_constraint = cls.process_parsed_constraint( + constraint, cls.render_column_constraint + ) + + if rendered_constraint: + column_name = raw_column["name"] + if column_name not in rendered_constraints: + rendered_constraints[column_name] = rendered_constraint + else: + rendered_constraints[column_name] += f" {rendered_constraint}" + + nested_columns = cls.nest_column_data_types(raw_columns, rendered_constraints) + rendered_column_constraints = [ + f"{cls.quote(column['name']) if column.get('quote') else column['name']} {column['data_type']}" + for column in nested_columns.values() + ] + return rendered_column_constraints + @classmethod def render_column_constraint(cls, constraint: ColumnLevelConstraint) -> Optional[str]: c = super().render_column_constraint(constraint) diff --git a/dbt/adapters/bigquery/python_submissions.py b/dbt/adapters/bigquery/python_submissions.py index e5fbf037e..bf91f8eab 100644 --- a/dbt/adapters/bigquery/python_submissions.py +++ b/dbt/adapters/bigquery/python_submissions.py @@ -7,6 +7,8 @@ from google.api_core.client_options import ClientOptions from google.cloud import storage, dataproc_v1 # type: ignore from google.protobuf.json_format import ParseDict +import time +import uuid OPERATION_RETRY_TIME = 10 @@ -99,7 +101,6 @@ def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: } ) response = operation.result(retry=self.retry) - # check if job failed if response.status.state == 6: raise ValueError(response.status.details) return response @@ -114,16 +115,27 @@ def _get_job_client(self) -> dataproc_v1.BatchControllerClient: def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: batch = self._configure_batch() parent = f"projects/{self.credential.execution_project}/locations/{self.credential.dataproc_region}" + batch_id = uuid.uuid4().hex request = dataproc_v1.CreateBatchRequest( parent=parent, batch=batch, + batch_id=batch_id ) # make the request operation = self.job_client.create_batch(request=request) # type: ignore # this takes quite a while, waiting on GCP response to resolve # (not a google-api-core issue, more likely a dataproc serverless issue) - response = operation.result(retry=self.retry) + + state = "PENDING" + while state not in ["State.SUCCEEDED", "State.FAILED", "State.CANCELLED"]: + response = self.job_client.get_batch( + request = dataproc_v1.GetBatchRequest(name = ''.join([parent, "/batches/", batch_id])), + # retry=self.retry (This retry polls way too many times per second) + ) + state = str(response.state) + time.sleep(2) + return response # there might be useful results here that we can parse and return # Dataproc job output is saved to the Cloud Storage bucket diff --git a/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql b/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql index d078e8ad9..1a4193c71 100644 --- a/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql +++ b/dbt/include/bigquery/macros/utils/get_columns_spec_ddl.sql @@ -3,3 +3,35 @@ {% set formatted = column.column.lower() ~ " " ~ data_type %} {{ return({'name': column.name, 'data_type': data_type, 'formatted': formatted}) }} {%- endmacro -%} + +{% macro bigquery__get_empty_schema_sql(columns) %} + {%- set col_err = [] -%} + {% for col in columns.values() %} + {%- if col['data_type'] is not defined -%} + {{ col_err.append(col['name']) }} + {%- endif -%} + {%- endfor -%} + {%- if (col_err | length) > 0 -%} + {{ exceptions.column_type_missing(column_names=col_err) }} + {%- endif -%} + + {%- set columns = adapter.nest_column_data_types(columns) -%} + {{ return(dbt.default__get_empty_schema_sql(columns)) }} +{% endmacro %} + +{% macro bigquery__get_select_subquery(sql) %} + select {{ adapter.dispatch('get_column_names')() }} + from ( + {{ sql }} + ) as model_subq +{%- endmacro %} + +{% macro bigquery__get_column_names() %} + {#- loop through nested user_provided_columns to get column names -#} + {%- set user_provided_columns = adapter.nest_column_data_types(model['columns']) -%} + {%- for i in user_provided_columns %} + {%- set col = user_provided_columns[i] -%} + {%- set col_name = adapter.quote(col['name']) if col.get('quote') else col['name'] -%} + {{ col_name }}{{ ", " if not loop.last }} + {%- endfor -%} +{% endmacro %} diff --git a/dev-requirements.txt b/dev-requirements.txt index d0ee86a79..f62b3f4ab 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,32 +1,30 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git@1.5.latest#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@1.5.latest#egg=dbt-tests-adapter&subdirectory=tests/adapter # if version 1.x or greater -> pin to major version # if version 0.x -> pin to minor -black~=23.1 +black~=23.3 bumpversion~=0.6.0 click~=8.1 -flake8~=5.0;python_version=="3.7" -flake8~=6.0;python_version>="3.8" +ddtrace~=1.16 +flake8~=6.0 flaky~=3.7 freezegun~=1.2 ipdb~=0.13.13 -mypy==1.1.1 # patch updates have historically introduced breaking changes -pip-tools~=6.12 -pre-commit~=2.21;python_version=="3.7" -pre-commit~=3.2;python_version>="3.8" +mypy==1.2.0 # patch updates have historically introduced breaking changes +pip-tools~=6.13 +pre-commit~=3.2 pre-commit-hooks~=4.4 -pytest~=7.2 +pytest~=7.3 pytest-csv~=3.0 pytest-dotenv~=0.5.2 pytest-logbook~=1.2 pytest-xdist~=3.2 -pytz~=2023.2 -tox~=3.0;python_version=="3.7" -tox~=4.4;python_version>="3.8" -types-pytz~=2023.2 +pytz~=2023.3 +tox~=4.4 +types-pytz~=2023.3 types-requests~=2.28 types-protobuf~=4.22 twine~=4.0 diff --git a/scripts/env-setup.sh b/scripts/env-setup.sh index 726a00cf8..ca873b3aa 100644 --- a/scripts/env-setup.sh +++ b/scripts/env-setup.sh @@ -6,7 +6,7 @@ echo "TOXENV=integration-bigquery" >> $GITHUB_ENV echo "INTEGRATION_TESTS_SECRETS_PREFIX=BIGQUERY_TEST" >> $GITHUB_ENV # Set environment variables required for integration tests echo "DBT_TEST_USER_1=group:buildbot@dbtlabs.com" >> $GITHUB_ENV -echo "DBT_TEST_USER_2=group:dev-core@dbtlabs.com" >> $GITHUB_ENV +echo "DBT_TEST_USER_2=group:engineering-core-team@dbtlabs.com" >> $GITHUB_ENV echo "DBT_TEST_USER_3=serviceAccount:dbt-integration-test-user@dbt-test-env.iam.gserviceaccount.com" >> $GITHUB_ENV echo "DATAPROC_REGION=us-central1" >> $GITHUB_ENV echo "DATAPROC_CLUSTER_NAME=dbt-test-1" >> $GITHUB_ENV diff --git a/setup.py b/setup.py index b401f311e..f405a1201 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,9 @@ import sys # require python 3.7 or newer -if sys.version_info < (3, 7): +if sys.version_info < (3, 8): print("Error: dbt does not support this version of Python.") - print("Please upgrade to Python 3.7 or higher.") + print("Please upgrade to Python 3.8 or higher.") sys.exit(1) try: @@ -58,7 +58,7 @@ def _dbt_core_version(plugin_version: str) -> str: package_name = "dbt-bigquery" -package_version = "1.5.0b4" +package_version = "1.5.3" dbt_core_version = _dbt_core_version(_dbt_bigquery_version()) description = """The BigQuery adapter plugin for dbt""" @@ -87,7 +87,6 @@ def _dbt_core_version(plugin_version: str) -> str: "Operating System :: Microsoft :: Windows", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/test.env.example b/test.env.example index 58893f5af..ffe9ee060 100644 --- a/test.env.example +++ b/test.env.example @@ -8,7 +8,7 @@ BIGQUERY_TEST_SERVICE_ACCOUNT_JSON='{}' # tests for local ci/cd DBT_TEST_USER_1="group:buildbot@dbtlabs.com" -DBT_TEST_USER_2="group:dev-core@dbtlabs.com" +DBT_TEST_USER_2="group:engineering-core-team@dbtlabs.com" DBT_TEST_USER_3="serviceAccount:dbt-integration-test-user@dbt-test-env.iam.gserviceaccount.com" # only needed for python model diff --git a/tests/functional/adapter/constraints/fixtures.py b/tests/functional/adapter/constraints/fixtures.py new file mode 100644 index 000000000..415043403 --- /dev/null +++ b/tests/functional/adapter/constraints/fixtures.py @@ -0,0 +1,118 @@ +my_model_struct_wrong_data_type_sql = """ +{{ config(materialized = "table") }} + +select + STRUCT(1 AS struct_column_being_tested, "test" AS another_struct_column) as a +""" + +my_model_struct_correct_data_type_sql = """ +{{ config(materialized = "table")}} + +select + STRUCT("test" AS struct_column_being_tested, "test" AS b) as a +""" + +model_struct_data_type_schema_yml = """ +version: 2 +models: + - name: contract_struct_wrong + config: + contract: + enforced: true + columns: + - name: a.struct_column_being_tested + data_type: string + - name: a.b + data_type: string + + - name: contract_struct_correct + config: + contract: + enforced: true + columns: + - name: a.struct_column_being_tested + data_type: string + - name: a.b + data_type: string +""" + +my_model_double_struct_wrong_data_type_sql = """ +{{ config(materialized = "table") }} + +select + STRUCT( + STRUCT(1 AS struct_column_being_tested, "test" AS c) as b, + "test" as d + ) as a +""" + +my_model_double_struct_correct_data_type_sql = """ +{{ config(materialized = "table") }} + +select + STRUCT( + STRUCT("test" AS struct_column_being_tested, "test" AS c) as b, + "test" as d + ) as a +""" + +model_double_struct_data_type_schema_yml = """ +version: 2 +models: + - name: contract_struct_wrong + config: + contract: + enforced: true + columns: + - name: a.b.struct_column_being_tested + data_type: string + - name: a.b.c + data_type: string + - name: a.d + data_type: string + + - name: contract_struct_correct + config: + contract: + enforced: true + columns: + - name: a.b.struct_column_being_tested + data_type: string + - name: a.b.c + data_type: string + - name: a.d + data_type: string +""" + + +my_model_struct_sql = """ +{{ + config( + materialized = "table" + ) +}} + +select STRUCT("test" as nested_column, "test" as nested_column2) as id +""" + + +model_struct_schema_yml = """ +version: 2 +models: + - name: my_model + config: + contract: + enforced: true + columns: + - name: id.nested_column + quote: true + data_type: string + description: hello + constraints: + - type: not_null + - type: unique + - name: id.nested_column2 + data_type: string + constraints: + - type: unique +""" diff --git a/tests/functional/adapter/constraints/test_constraints.py b/tests/functional/adapter/constraints/test_constraints.py new file mode 100644 index 000000000..116a0db5a --- /dev/null +++ b/tests/functional/adapter/constraints/test_constraints.py @@ -0,0 +1,339 @@ +import pytest +from dbt.tests.util import relation_from_name +from dbt.tests.adapter.constraints.test_constraints import ( + BaseTableConstraintsColumnsEqual, + BaseViewConstraintsColumnsEqual, + BaseIncrementalConstraintsColumnsEqual, + BaseConstraintsRuntimeDdlEnforcement, + BaseConstraintsRollback, + BaseIncrementalConstraintsRuntimeDdlEnforcement, + BaseIncrementalConstraintsRollback, + BaseModelConstraintsRuntimeEnforcement, + BaseConstraintQuotedColumn, +) +from dbt.tests.adapter.constraints.fixtures import ( + my_model_sql, + my_incremental_model_sql, + my_model_wrong_order_sql, + my_model_view_wrong_order_sql, + my_model_incremental_wrong_order_sql, + my_model_wrong_name_sql, + my_model_view_wrong_name_sql, + my_model_incremental_wrong_name_sql, + my_model_with_quoted_column_name_sql, + model_schema_yml, + constrained_model_schema_yml, + model_quoted_column_schema_yml, + model_fk_constraint_schema_yml, + my_model_wrong_order_depends_on_fk_sql, + foreign_key_model_sql, + my_model_incremental_wrong_order_depends_on_fk_sql, +) + +from tests.functional.adapter.constraints.fixtures import ( + my_model_struct_wrong_data_type_sql, + my_model_struct_correct_data_type_sql, + my_model_double_struct_wrong_data_type_sql, + my_model_double_struct_correct_data_type_sql, + model_struct_data_type_schema_yml, + model_double_struct_data_type_schema_yml, + my_model_struct_sql, + model_struct_schema_yml, +) + +from dbt.tests.util import run_dbt_and_capture, run_dbt + +_expected_sql_bigquery = """ +create or replace table ( + id integer not null primary key not enforced references (id) not enforced, + color string, + date_day string +) +OPTIONS() +as ( + select id, + color, + date_day from + ( + -- depends_on: + select 'blue' as color, + 1 as id, + '2019-01-01' as date_day + ) as model_subq +); +""" + +_expected_struct_sql_bigquery = """ +create or replace table ( + id struct +) +OPTIONS() +as ( + select id from + ( + select STRUCT("test" as nested_column, "test" as nested_column2) as id + ) as model_subq +); +""" + +# Different on BigQuery: +# - does not support a data type named 'text' (TODO handle this via type translation/aliasing!) +constraints_yml = model_schema_yml.replace("text", "string") +model_constraints_yml = constrained_model_schema_yml.replace("text", "string") +model_fk_constraint_schema_yml = model_fk_constraint_schema_yml.replace("text", "string") +constrained_model_schema_yml = constrained_model_schema_yml.replace("text", "string") + +class BigQueryColumnEqualSetup: + @pytest.fixture + def string_type(self): + return "STRING" + + @pytest.fixture + def int_type(self): + return "INT64" + + @pytest.fixture + def data_types(self, int_type, string_type): + # sql_column_value, schema_data_type, error_data_type + return [ + ['1', int_type, int_type], + ["'1'", string_type, string_type], + ["cast('2019-01-01' as date)", "date", "DATE"], + ["true", "bool", "BOOL"], + ["cast('2013-11-03 00:00:00-07' as TIMESTAMP)", "timestamp", "TIMESTAMP"], + ["['a','b','c']", f"ARRAY<{string_type}>", f"ARRAY<{string_type}>"], + ["[1,2,3]", f"ARRAY<{int_type}>", f"ARRAY<{int_type}>"], + ["cast(1 as NUMERIC)", "numeric", "NUMERIC"], + ["""JSON '{"name": "Cooper", "forname": "Alice"}'""", "json", "JSON"], + ] + + +class TestBigQueryTableConstraintsColumnsEqual( + BigQueryColumnEqualSetup, + BaseTableConstraintsColumnsEqual +): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model_wrong_order.sql": my_model_wrong_order_sql, + "my_model_wrong_name.sql": my_model_wrong_name_sql, + "constraints_schema.yml": constraints_yml, + } + + +class TestBigQueryViewConstraintsColumnsEqual( + BigQueryColumnEqualSetup, + BaseViewConstraintsColumnsEqual +): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model_wrong_order.sql": my_model_view_wrong_order_sql, + "my_model_wrong_name.sql": my_model_view_wrong_name_sql, + "constraints_schema.yml": constraints_yml, + } + + +class TestBigQueryIncrementalConstraintsColumnsEqual( + BigQueryColumnEqualSetup, + BaseIncrementalConstraintsColumnsEqual +): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql, + "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql, + "constraints_schema.yml": constraints_yml, + } + + +class BaseStructContract: + @pytest.fixture + def wrong_schema_data_type(self): + return "INT64" + + @pytest.fixture + def correct_schema_data_type(self): + return "STRING" + + @pytest.fixture(scope="class") + def models(self): + return { + "contract_struct_schema.yml": model_struct_data_type_schema_yml, + "contract_struct_wrong.sql": my_model_struct_wrong_data_type_sql, + "contract_struct_correct.sql": my_model_struct_correct_data_type_sql, + } + + def test__struct_contract_wrong_data_type( + self, project, correct_schema_data_type, wrong_schema_data_type + ): + results, log_output = run_dbt_and_capture( + ["run", "-s", "contract_struct_wrong"], expect_pass=False + ) + assert len(results) == 1 + assert results[0].node.config.contract.enforced + + expected = [ + "struct_column_being_tested", + wrong_schema_data_type, + correct_schema_data_type, + "data type mismatch", + ] + assert all([(exp in log_output or exp.upper() in log_output) for exp in expected]) + + def test__struct_contract_correct_data_type(self, project): + results = run_dbt(["run", "-s", "contract_struct_correct"]) + + assert len(results) == 1 + assert results[0].node.config.contract.enforced + + +class TestBigQueryStructContract(BaseStructContract): + pass + + +class TestBigQueryDoubleStructContract(BaseStructContract): + @pytest.fixture(scope="class") + def models(self): + return { + "contract_struct_schema.yml": model_double_struct_data_type_schema_yml, + "contract_struct_wrong.sql": my_model_double_struct_wrong_data_type_sql, + "contract_struct_correct.sql": my_model_double_struct_correct_data_type_sql, + } + + +class TestBigQueryTableConstraintsRuntimeDdlEnforcement(BaseConstraintsRuntimeDdlEnforcement): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_wrong_order_depends_on_fk_sql, + "foreign_key_model.sql": foreign_key_model_sql, + "constraints_schema.yml": model_fk_constraint_schema_yml, + } + + @pytest.fixture(scope="class") + def expected_sql(self, project): + return _expected_sql_bigquery + + +class TestBigQueryStructTableConstraintsRuntimeDdlEnforcement( + BaseConstraintsRuntimeDdlEnforcement +): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_struct_sql, + "constraints_schema.yml": model_struct_schema_yml, + } + + @pytest.fixture(scope="class") + def expected_sql(self, project): + return _expected_struct_sql_bigquery + + +class TestBigQueryTableConstraintsRollback(BaseConstraintsRollback): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_sql, + "constraints_schema.yml": constraints_yml, + } + + @pytest.fixture(scope="class") + def expected_error_messages(self): + return ["Required field id cannot be null"] + +class TestBigQueryIncrementalConstraintsRuntimeDdlEnforcement( + BaseIncrementalConstraintsRuntimeDdlEnforcement +): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_incremental_wrong_order_depends_on_fk_sql, + "foreign_key_model.sql": foreign_key_model_sql, + "constraints_schema.yml": model_fk_constraint_schema_yml, + } + + @pytest.fixture(scope="class") + def expected_sql(self, project): + return _expected_sql_bigquery + + +class TestBigQueryIncrementalConstraintsRollback( + BaseIncrementalConstraintsRollback +): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_incremental_model_sql, + "constraints_schema.yml": constraints_yml, + } + + @pytest.fixture(scope="class") + def expected_error_messages(self): + return ["Required field id cannot be null"] + + +class TestBigQueryModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement): + + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_wrong_order_depends_on_fk_sql, + "foreign_key_model.sql": foreign_key_model_sql, + "constraints_schema.yml": constrained_model_schema_yml, + } + + @pytest.fixture(scope="class") + def expected_sql(self): + return """ +create or replace table ( + id integer not null, + color string, + date_day string, + primary key (id) not enforced, + foreign key (id) references (id) not enforced +) +OPTIONS() +as ( + select id, + color, + date_day from + ( + -- depends_on: + select + 'blue' as color, + 1 as id, + '2019-01-01' as date_day + ) as model_subq +); +""" + + +class TestBigQueryConstraintQuotedColumn(BaseConstraintQuotedColumn): + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_with_quoted_column_name_sql, + "constraints_schema.yml": model_quoted_column_schema_yml.replace("text", "string"), + } + + @pytest.fixture(scope="class") + def expected_sql(self): + return """ +create or replace table ( + id integer not null, + `from` string not null, + date_day string +) +options() +as ( + select id, `from`, date_day + from ( + select + 'blue' as `from`, + 1 as id, + '2019-01-01' as date_day + ) as model_subq +); +""" diff --git a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py index b04579f4b..d22dab3ae 100644 --- a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py +++ b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py @@ -1,9 +1,22 @@ import pytest +from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import ( + StoreTestFailuresBase, +) -from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import StoreTestFailuresBase + +TEST_AUDIT_SCHEMA_SUFFIX = "dbt_test__aud" class TestBigQueryStoreTestFailures(StoreTestFailuresBase): + @pytest.fixture(scope="function", autouse=True) + def teardown_method(self, project): + yield + relation = project.adapter.Relation.create( + database=project.database, schema=f"{project.test_schema}_{TEST_AUDIT_SCHEMA_SUFFIX}" + ) + + project.adapter.drop_schema(relation) + def test_store_and_assert(self, project): self.run_tests_store_one_failure(project) self.run_tests_store_failures_and_assert(project) diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py deleted file mode 100644 index e0d30cf6f..000000000 --- a/tests/functional/adapter/test_constraints.py +++ /dev/null @@ -1,201 +0,0 @@ -import pytest -from dbt.tests.util import relation_from_name -from dbt.tests.adapter.constraints.test_constraints import ( - BaseTableConstraintsColumnsEqual, - BaseViewConstraintsColumnsEqual, - BaseIncrementalConstraintsColumnsEqual, - BaseConstraintsRuntimeDdlEnforcement, - BaseConstraintsRollback, - BaseIncrementalConstraintsRuntimeDdlEnforcement, - BaseIncrementalConstraintsRollback, BaseModelConstraintsRuntimeEnforcement, -) -from dbt.tests.adapter.constraints.fixtures import ( - my_model_sql, - my_incremental_model_sql, - my_model_wrong_order_sql, - my_model_view_wrong_order_sql, - my_model_incremental_wrong_order_sql, - my_model_wrong_name_sql, - my_model_view_wrong_name_sql, - my_model_incremental_wrong_name_sql, - model_schema_yml, constrained_model_schema_yml, -) - -_expected_sql_bigquery = """ -create or replace table ( - id integer not null primary key not enforced, - color string, - date_day string -) -OPTIONS() -as ( - select id, - color, - date_day from - ( - select 'blue' as color, - 1 as id, - '2019-01-01' as date_day - ) as model_subq -); -""" - -# Different on BigQuery: -# - does not support a data type named 'text' (TODO handle this via type translation/aliasing!) -constraints_yml = model_schema_yml.replace("text", "string") -model_constraints_yml = constrained_model_schema_yml.replace("text", "string") - -class BigQueryColumnEqualSetup: - @pytest.fixture - def string_type(self): - return "STRING" - - @pytest.fixture - def int_type(self): - return "INT64" - - @pytest.fixture - def data_types(self, int_type, string_type): - # sql_column_value, schema_data_type, error_data_type - return [ - ['1', int_type, int_type], - ["'1'", string_type, string_type], - ["cast('2019-01-01' as date)", 'date', 'DATE'], - ["true", 'bool', 'BOOL'], - ["cast('2013-11-03 00:00:00-07' as TIMESTAMP)", 'timestamp', 'TIMESTAMP'], - ["['a','b','c']", f'ARRAY<{string_type}>', f'ARRAY<{string_type}>'], - ["[1,2,3]", f'ARRAY<{int_type}>', f'ARRAY<{int_type}>'], - ["cast(1 as NUMERIC)", 'numeric', 'NUMERIC'], - ["""JSON '{"name": "Cooper", "forname": "Alice"}'""", 'json', 'JSON'], - ['STRUCT("Rudisha" AS name, [23.4, 26.3, 26.4, 26.1] AS laps)', 'STRUCT>', 'STRUCT>'] - ] - - -class TestBigQueryTableConstraintsColumnsEqual( - BigQueryColumnEqualSetup, - BaseTableConstraintsColumnsEqual -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_wrong_order.sql": my_model_wrong_order_sql, - "my_model_wrong_name.sql": my_model_wrong_name_sql, - "constraints_schema.yml": constraints_yml, - } - - -class TestBigQueryViewConstraintsColumnsEqual( - BigQueryColumnEqualSetup, - BaseViewConstraintsColumnsEqual -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_wrong_order.sql": my_model_view_wrong_order_sql, - "my_model_wrong_name.sql": my_model_view_wrong_name_sql, - "constraints_schema.yml": constraints_yml, - } - - -class TestBigQueryIncrementalConstraintsColumnsEqual( - BigQueryColumnEqualSetup, - BaseIncrementalConstraintsColumnsEqual -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql, - "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql, - "constraints_schema.yml": constraints_yml, - } - - -class TestBigQueryTableConstraintsRuntimeDdlEnforcement( - BaseConstraintsRuntimeDdlEnforcement -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model.sql": my_model_wrong_order_sql, - "constraints_schema.yml": constraints_yml, - } - - @pytest.fixture(scope="class") - def expected_sql(self, project): - return _expected_sql_bigquery - - -class TestBigQueryTableConstraintsRollback( - BaseConstraintsRollback -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model.sql": my_model_sql, - "constraints_schema.yml": constraints_yml, - } - - @pytest.fixture(scope="class") - def expected_error_messages(self): - return ["Required field id cannot be null"] - -class TestBigQueryIncrementalConstraintsRuntimeDdlEnforcement( - BaseIncrementalConstraintsRuntimeDdlEnforcement -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model.sql": my_model_incremental_wrong_order_sql, - "constraints_schema.yml": constraints_yml, - } - - @pytest.fixture(scope="class") - def expected_sql(self, project): - return _expected_sql_bigquery - - -class TestBigQueryIncrementalConstraintsRollback( - BaseIncrementalConstraintsRollback -): - @pytest.fixture(scope="class") - def models(self): - return { - "my_model.sql": my_incremental_model_sql, - "constraints_schema.yml": constraints_yml, - } - - @pytest.fixture(scope="class") - def expected_error_messages(self): - return ["Required field id cannot be null"] - - -class TestBigQueryModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement): - - @pytest.fixture(scope="class") - def models(self): - return { - "my_model.sql": my_incremental_model_sql, - "constraints_schema.yml": model_constraints_yml, - } - - @pytest.fixture(scope="class") - def expected_sql(self): - return """ -create or replace table ( - id integer not null, - color string, - date_day string, - primary key (id) not enforced -) -OPTIONS() -as ( - select id, - color, - date_day from - ( - select 1 as id, - 'blue' as color, - '2019-01-01' as date_day - ) as model_subq -); -""" diff --git a/tests/functional/adapter/test_grant_access_to.py b/tests/functional/adapter/test_grant_access_to.py index 76996479d..109cab125 100644 --- a/tests/functional/adapter/test_grant_access_to.py +++ b/tests/functional/adapter/test_grant_access_to.py @@ -1,8 +1,10 @@ -from abc import abstractmethod +import time + import pytest -import os + from dbt.tests.util import run_dbt + SELECT_1 = """ {{ config( materialized='view', @@ -45,6 +47,7 @@ def test_grant_access_succeeds(self, project): # Need to run twice to validate idempotency results = run_dbt(["run"]) assert len(results) == 2 + time.sleep(10) results = run_dbt(["run"]) assert len(results) == 2 diff --git a/tests/functional/test_delete_column_policy.py b/tests/functional/test_delete_column_policy.py index 9ba2353a3..90cb7b5a5 100644 --- a/tests/functional/test_delete_column_policy.py +++ b/tests/functional/test_delete_column_policy.py @@ -1,6 +1,9 @@ import pytest from dbt.tests.util import ( - run_dbt, get_connection, relation_from_name, write_config_file + run_dbt, + get_connection, + relation_from_name, + write_config_file, ) from dbt.adapters.bigquery import BigQueryRelation @@ -13,7 +16,9 @@ }} select - 1 field + struct( + 1 as field + ) as first_struct """ _POLICY_TAG_YML = """version: 2 @@ -21,7 +26,8 @@ models: - name: policy_tag_table columns: - - name: field + - name: first_struct + - name: first_struct.field policy_tags: - '{{ var("policy_tag") }}' """ @@ -31,7 +37,8 @@ models: - name: policy_tag_table columns: - - name: field + - name: first_struct + - name: first_struct.field """ # Manually generated https://console.cloud.google.com/bigquery/policy-tags?project=dbt-test-env @@ -41,34 +48,39 @@ class TestBigqueryDeleteColumnPolicy: """See BQ docs for more info on policy tags: - https://cloud.google.com/bigquery/docs/column-level-security#work_with_policy_tags""" + https://cloud.google.com/bigquery/docs/column-level-security#work_with_policy_tags + """ @pytest.fixture(scope="class") def project_config_update(self): - return { - 'config-version': 2, - 'vars': { - 'policy_tag': _POLICY_TAG - } - } + return {"config-version": 2, "vars": {"policy_tag": _POLICY_TAG}} @pytest.fixture(scope="class") def models(self): return { f"{_POLICY_TAG_MODEL_NAME}.sql": _POLICY_TAG_MODEL, - "schema.yml": _POLICY_TAG_YML + "schema.yml": _POLICY_TAG_YML, } def test_bigquery_delete_column_policy_tag(self, project): - results = run_dbt(['run', '-f', '--models', 'policy_tag_table']) + results = run_dbt(["run", "-f", "--models", "policy_tag_table"]) assert len(results) == 1 - write_config_file(_POLICY_TAG_YML_NO_POLICY_TAGS, project.project_root + '/models', "schema.yml") #update the model to remove the policy tag - new_results = run_dbt(['run', '-f', '--models', 'policy_tag_table']) + write_config_file( + _POLICY_TAG_YML_NO_POLICY_TAGS, + project.project_root + "/models", + "schema.yml", + ) # update the model to remove the policy tag + new_results = run_dbt(["run", "-f", "--models", "policy_tag_table"]) assert len(new_results) == 1 - relation: BigQueryRelation = relation_from_name(project.adapter, _POLICY_TAG_MODEL_NAME) + relation: BigQueryRelation = relation_from_name( + project.adapter, _POLICY_TAG_MODEL_NAME + ) adapter = project.adapter with get_connection(project.adapter) as conn: table = conn.handle.get_table( - adapter.connections.get_bq_table(relation.database, relation.schema, relation.table)) + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) for schema_field in table.schema: assert schema_field.policy_tags is None diff --git a/tests/unit/test_column.py b/tests/unit/test_column.py new file mode 100644 index 000000000..10f30594e --- /dev/null +++ b/tests/unit/test_column.py @@ -0,0 +1,246 @@ +import pytest + +from dbt.adapters.bigquery.column import get_nested_column_data_types + + +@pytest.mark.parametrize( + ["columns", "constraints", "expected_nested_columns"], + [ + ({}, None, {}), + ({}, {"not_in_columns": "unique"}, {}), + # Flat column + ( + {"a": {"name": "a", "data_type": "string"}}, + None, + {"a": {"name": "a", "data_type": "string"}}, + ), + # Flat column - missing data_type + ( + {"a": {"name": "a"}}, + None, + {"a": {"name": "a", "data_type": None}}, + ), + # Flat column - with constraints + ( + {"a": {"name": "a", "data_type": "string"}}, + {"a": "not null"}, + {"a": {"name": "a", "data_type": "string not null"}}, + ), + # Flat column - with constraints + other keys + ( + {"a": {"name": "a", "data_type": "string", "quote": True}}, + {"a": "not null"}, + {"a": {"name": "a", "data_type": "string not null", "quote": True}}, + ), + # Single nested column, 1 level + ( + {"b.nested": {"name": "b.nested", "data_type": "string"}}, + None, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - missing data_type + ( + {"b.nested": {"name": "b.nested"}}, + None, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - with constraints + ( + {"b.nested": {"name": "b.nested", "data_type": "string"}}, + {"b.nested": "not null"}, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - with constraints, missing data_type (constraints not valid without data_type) + ( + {"b.nested": {"name": "b.nested"}}, + {"b.nested": "not null"}, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - with constraints + other keys + ( + {"b.nested": {"name": "b.nested", "data_type": "string", "other": "unpreserved"}}, + {"b.nested": "not null"}, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - with corresponding parent column + ( + { + "b": {"name": "b", "data_type": "struct"}, + "b.nested": {"name": "b.nested", "data_type": "string"}, + }, + None, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - with corresponding parent column specified last + ( + { + "b.nested": {"name": "b.nested", "data_type": "string"}, + "b": {"name": "b", "data_type": "struct"}, + }, + None, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Single nested column, 1 level - with corresponding parent column + parent constraint + ( + { + "b": {"name": "b", "data_type": "struct"}, + "b.nested": {"name": "b.nested", "data_type": "string"}, + }, + {"b": "not null"}, + {"b": {"name": "b", "data_type": "struct not null"}}, + ), + # Single nested column, 1 level - with corresponding parent column as array + ( + { + "b": {"name": "b", "data_type": "array"}, + "b.nested": {"name": "b.nested", "data_type": "string"}, + }, + None, + {"b": {"name": "b", "data_type": "array>"}}, + ), + # Single nested column, 1 level - with corresponding parent column as array + constraint + ( + { + "b": {"name": "b", "data_type": "array"}, + "b.nested": {"name": "b.nested", "data_type": "string"}, + }, + {"b": "not null"}, + {"b": {"name": "b", "data_type": "array> not null"}}, + ), + # Multiple nested columns, 1 level + ( + { + "b.nested": {"name": "b.nested", "data_type": "string"}, + "b.nested2": {"name": "b.nested2", "data_type": "int64"}, + }, + None, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Multiple nested columns, 1 level - with constraints + ( + { + "b.nested": {"name": "b.nested", "data_type": "string"}, + "b.nested2": {"name": "b.nested2", "data_type": "int64"}, + }, + {"b.nested": "not null"}, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Multiple nested columns, 1 level - with constraints + ( + { + "b.nested": {"name": "b.nested", "data_type": "string"}, + "b.nested2": {"name": "b.nested2", "data_type": "int64"}, + }, + {"b.nested": "not null"}, + {"b": {"name": "b", "data_type": "struct"}}, + ), + # Mix of flat and nested columns, 1 level + ( + { + "a": {"name": "a", "data_type": "string"}, + "b.nested": {"name": "b.nested", "data_type": "string"}, + "b.nested2": {"name": "b.nested2", "data_type": "int64"}, + }, + None, + { + "b": {"name": "b", "data_type": "struct"}, + "a": {"name": "a", "data_type": "string"}, + }, + ), + # Nested columns, multiple levels + ( + { + "b.user.name.first": { + "name": "b.user.name.first", + "data_type": "string", + }, + "b.user.name.last": { + "name": "b.user.name.last", + "data_type": "string", + }, + "b.user.id": {"name": "b.user.id", "data_type": "int64"}, + "b.user.country": {"name": "b.user.country", "data_type": "string"}, + }, + None, + { + "b": { + "name": "b", + "data_type": "struct, id int64, country string>>", + }, + }, + ), + # Nested columns, multiple levels - missing data_type + ( + { + "b.user.name.first": { + "name": "b.user.name.first", + "data_type": "string", + }, + "b.user.name.last": { + "name": "b.user.name.last", + "data_type": "string", + }, + "b.user.id": {"name": "b.user.id", "data_type": "int64"}, + "b.user.country": {"name": "b.user.country"}, # missing data_type + }, + None, + { + "b": { + "name": "b", + "data_type": "struct, id int64, country>>", + }, + }, + ), + # Nested columns, multiple levels - with constraints! + ( + { + "b.user.name.first": { + "name": "b.user.name.first", + "data_type": "string", + }, + "b.user.name.last": { + "name": "b.user.name.last", + "data_type": "string", + }, + "b.user.id": {"name": "b.user.id", "data_type": "int64"}, + "b.user.country": {"name": "b.user.country", "data_type": "string"}, + }, + {"b.user.name.first": "not null", "b.user.id": "unique"}, + { + "b": { + "name": "b", + "data_type": "struct, id int64 unique, country string>>", + }, + }, + ), + # Nested columns, multiple levels - with parent arrays and constraints! + ( + { + "b.user.names": { + "name": "b.user.names", + "data_type": "array", + }, + "b.user.names.first": { + "name": "b.user.names.first", + "data_type": "string", + }, + "b.user.names.last": { + "name": "b.user.names.last", + "data_type": "string", + }, + "b.user.id": {"name": "b.user.id", "data_type": "int64"}, + "b.user.country": {"name": "b.user.country", "data_type": "string"}, + }, + {"b.user.names.first": "not null", "b.user.id": "unique"}, + { + "b": { + "name": "b", + "data_type": "struct>, id int64 unique, country string>>", + }, + }, + ), + ], +) +def test_get_nested_column_data_types(columns, constraints, expected_nested_columns): + actual_nested_columns = get_nested_column_data_types(columns, constraints) + assert expected_nested_columns == actual_nested_columns diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 53109e5cf..6e2c44ef1 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1,10 +1,10 @@ -from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset +from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset from dbt.adapters.bigquery import BigQueryRelation from google.cloud.bigquery import Dataset, AccessEntry, DatasetReference -def test_add_access_entry_to_dataset_idempotently_adds_entries(): +def test_add_access_entry_to_dataset_updates_dataset(): database = "someDb" dataset = "someDataset" entity = BigQueryRelation.from_dict( @@ -19,11 +19,9 @@ def test_add_access_entry_to_dataset_idempotently_adds_entries(): access_entry = AccessEntry(None, "table", entity) dataset = add_access_entry_to_dataset(dataset, access_entry) assert access_entry in dataset.access_entries - dataset = add_access_entry_to_dataset(dataset, access_entry) - assert len(dataset.access_entries) == 1 -def test_add_access_entry_to_dataset_does_not_add_with_pre_existing_entries(): +def test_add_access_entry_to_dataset_updates_with_pre_existing_entries(): database = "someOtherDb" dataset = "someOtherDataset" entity_2 = BigQueryRelation.from_dict( @@ -40,4 +38,45 @@ def test_add_access_entry_to_dataset_does_not_add_with_pre_existing_entries(): dataset.access_entries = [initial_entry] access_entry = AccessEntry(None, "view", entity_2) dataset = add_access_entry_to_dataset(dataset, access_entry) - assert len(dataset.access_entries) == 1 + assert len(dataset.access_entries) == 2 + + +def test_is_access_entry_in_dataset_returns_true_if_entry_in_dataset(): + database = "someDb" + dataset = "someDataset" + entity = BigQueryRelation.from_dict( + { + "type": None, + "path": { + "database": "test-project", + "schema": "test_schema", + "identifier": "my_table", + }, + "quote_policy": {"identifier": False}, + } + ).to_dict() + dataset_ref = DatasetReference(project=database, dataset_id=dataset) + dataset = Dataset(dataset_ref) + access_entry = AccessEntry(None, "table", entity) + dataset = add_access_entry_to_dataset(dataset, access_entry) + assert is_access_entry_in_dataset(dataset, access_entry) + + +def test_is_access_entry_in_dataset_returns_false_if_entry_not_in_dataset(): + database = "someDb" + dataset = "someDataset" + entity = BigQueryRelation.from_dict( + { + "type": None, + "path": { + "database": "test-project", + "schema": "test_schema", + "identifier": "my_table", + }, + "quote_policy": {"identifier": False}, + } + ).to_dict() + dataset_ref = DatasetReference(project=database, dataset_id=dataset) + dataset = Dataset(dataset_ref) + access_entry = AccessEntry(None, "table", entity) + assert not is_access_entry_in_dataset(dataset, access_entry) diff --git a/tox.ini b/tox.ini index 4d552ab44..5e23df47c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,8 +1,8 @@ [tox] skipsdist = True -envlist = py37,py38,py39,py310,py311 +envlist = py38,py39,py310,py311 -[testenv:{unit,py37,py38,py39,py310,py311,py}] +[testenv:{unit,py38,py39,py310,py311,py}] description = unit testing skip_install = true passenv = @@ -13,7 +13,7 @@ deps = -rdev-requirements.txt -e. -[testenv:{integration,py37,py38,py39,py310,py311,py}-{bigquery}] +[testenv:{integration,py38,py39,py310,py311,py}-{bigquery}] description = adapter plugin integration testing skip_install = true passenv = @@ -22,13 +22,15 @@ passenv = PYTEST_ADDOPTS DATAPROC_* GCS_BUCKET + DD_SERVICE + DD_ENV commands = bigquery: {envpython} -m pytest {posargs} -vv tests/functional --profile service_account deps = -rdev-requirements.txt -e. -[testenv:{python-tests,py37,py38,py39,py310,py311,py}] +[testenv:{python-tests,py38,py39,py310,py311,py}] description = python integration testing skip_install = true passenv = @@ -37,6 +39,8 @@ passenv = PYTEST_ADDOPTS DATAPROC_* GCS_BUCKET + DD_SERVICE + DD_ENV commands = {envpython} -m pytest {posargs} -vv tests/functional -k "TestPython" --profile service_account deps =