diff --git a/.bumpversion.cfg b/.bumpversion.cfg index e74c22bc8..bd9430cbe 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.8.0a1 +current_version = 1.9.0b1 parse = (?P[\d]+) # major version number \.(?P[\d]+) # minor version number \.(?P[\d]+) # patch version number @@ -32,6 +32,4 @@ first_value = 1 [bumpversion:part:nightly] -[bumpversion:file:setup.py] - [bumpversion:file:dbt/adapters/bigquery/__version__.py] diff --git a/.changes/1.9.0-b1.md b/.changes/1.9.0-b1.md new file mode 100644 index 000000000..7d0dd2c8f --- /dev/null +++ b/.changes/1.9.0-b1.md @@ -0,0 +1,44 @@ +## dbt-bigquery 1.9.0-b1 - October 02, 2024 + +### Features + +- Add configuration options `enable_list_inference` and `intermediate_format` for python models ([#1047](https://github.com/dbt-labs/dbt-bigquery/issues/1047), [#1114](https://github.com/dbt-labs/dbt-bigquery/issues/1114)) +- Add tests for cross-database `cast` macro ([#1214](https://github.com/dbt-labs/dbt-bigquery/issues/1214)) +- Cross-database `date` macro ([#1221](https://github.com/dbt-labs/dbt-bigquery/issues/1221)) +- Add support for base 64 encoded json keyfile credentials ([#923](https://github.com/dbt-labs/dbt-bigquery/issues/923)) +- Add support for cancelling queries on keyboard interrupt ([#917](https://github.com/dbt-labs/dbt-bigquery/issues/917)) +- Add Microbatch Strategy to dbt-spark ([#1354](https://github.com/dbt-labs/dbt-bigquery/issues/1354)) + +### Fixes + +- Drop intermediate objects created in BigQuery for incremental models ([#1036](https://github.com/dbt-labs/dbt-bigquery/issues/1036)) +- Fix null column index issue during `dbt docs generate` for external tables ([#1079](https://github.com/dbt-labs/dbt-bigquery/issues/1079)) +- make seed delimiter configurable via `field_delimeter` in model config ([#1119](https://github.com/dbt-labs/dbt-bigquery/issues/1119)) +- Default `enableListInference` to `True` for python models to support nested lists ([#1047](https://github.com/dbt-labs/dbt-bigquery/issues/1047), [#1114](https://github.com/dbt-labs/dbt-bigquery/issues/1114)) +- Catch additional database error exception, NotFound, as a DbtDatabaseError instead of defaulting to a DbtRuntimeError ([#1360](https://github.com/dbt-labs/dbt-bigquery/issues/1360)) + +### Under the Hood + +- Lazy load `agate` ([#1162](https://github.com/dbt-labs/dbt-bigquery/issues/1162)) +- Simplify linting environment and dev dependencies ([#1291](https://github.com/dbt-labs/dbt-bigquery/issues/1291)) + +### Dependencies + +- Update pre-commit requirement from ~=3.5 to ~=3.7 ([#1052](https://github.com/dbt-labs/dbt-bigquery/pull/1052)) +- Update freezegun requirement from ~=1.3 to ~=1.4 ([#1062](https://github.com/dbt-labs/dbt-bigquery/pull/1062)) +- Bump mypy from 1.7.1 to 1.8.0 ([#1064](https://github.com/dbt-labs/dbt-bigquery/pull/1064)) +- Update flake8 requirement from ~=6.1 to ~=7.0 ([#1069](https://github.com/dbt-labs/dbt-bigquery/pull/1069)) +- Bump actions/download-artifact from 3 to 4 ([#1209](https://github.com/dbt-labs/dbt-bigquery/pull/1209)) +- Bump actions/upload-artifact from 3 to 4 ([#1210](https://github.com/dbt-labs/dbt-bigquery/pull/1210)) +- Bump ubuntu from 22.04 to 24.04 in /docker ([#1247](https://github.com/dbt-labs/dbt-bigquery/pull/1247)) +- Update pre-commit-hooks requirement from ~=4.5 to ~=4.6 ([#1281](https://github.com/dbt-labs/dbt-bigquery/pull/1281)) +- Update pytest-xdist requirement from ~=3.5 to ~=3.6 ([#1282](https://github.com/dbt-labs/dbt-bigquery/pull/1282)) +- Update flaky requirement from ~=3.7 to ~=3.8 ([#1283](https://github.com/dbt-labs/dbt-bigquery/pull/1283)) +- Update twine requirement from ~=4.0 to ~=5.1 ([#1293](https://github.com/dbt-labs/dbt-bigquery/pull/1293)) + +### Contributors +- [@d-cole](https://github.com/d-cole) ([#917](https://github.com/dbt-labs/dbt-bigquery/issues/917)) +- [@dwreeves](https://github.com/dwreeves) ([#1162](https://github.com/dbt-labs/dbt-bigquery/issues/1162)) +- [@robeleb1](https://github.com/robeleb1) ([#923](https://github.com/dbt-labs/dbt-bigquery/issues/923)) +- [@salimmoulouel](https://github.com/salimmoulouel) ([#1119](https://github.com/dbt-labs/dbt-bigquery/issues/1119)) +- [@vinit2107](https://github.com/vinit2107) ([#1036](https://github.com/dbt-labs/dbt-bigquery/issues/1036)) diff --git a/.changes/1.9.0/Dependencies-20231211-001048.yaml b/.changes/1.9.0/Dependencies-20231211-001048.yaml new file mode 100644 index 000000000..6f2bfada4 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20231211-001048.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pre-commit requirement from ~=3.5 to ~=3.7" +time: 2023-12-11T00:10:48.00000Z +custom: + Author: dependabot[bot] + PR: 1052 diff --git a/.changes/1.9.0/Dependencies-20231220-002130.yaml b/.changes/1.9.0/Dependencies-20231220-002130.yaml new file mode 100644 index 000000000..d62e50bf2 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20231220-002130.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update freezegun requirement from ~=1.3 to ~=1.4" +time: 2023-12-20T00:21:30.00000Z +custom: + Author: dependabot[bot] + PR: 1062 diff --git a/.changes/1.9.0/Dependencies-20231222-002351.yaml b/.changes/1.9.0/Dependencies-20231222-002351.yaml new file mode 100644 index 000000000..76591de93 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20231222-002351.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump mypy from 1.7.1 to 1.8.0" +time: 2023-12-22T00:23:51.00000Z +custom: + Author: dependabot[bot] + PR: 1064 diff --git a/.changes/1.9.0/Dependencies-20240105-004800.yaml b/.changes/1.9.0/Dependencies-20240105-004800.yaml new file mode 100644 index 000000000..b0d33ceed --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240105-004800.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update flake8 requirement from ~=6.1 to ~=7.0" +time: 2024-01-05T00:48:00.00000Z +custom: + Author: dependabot[bot] + PR: 1069 diff --git a/.changes/1.9.0/Dependencies-20240429-005158.yaml b/.changes/1.9.0/Dependencies-20240429-005158.yaml new file mode 100644 index 000000000..5d380952c --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240429-005158.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump actions/download-artifact from 3 to 4" +time: 2024-04-29T00:51:58.00000Z +custom: + Author: dependabot[bot] + PR: 1209 diff --git a/.changes/1.9.0/Dependencies-20240429-005159.yaml b/.changes/1.9.0/Dependencies-20240429-005159.yaml new file mode 100644 index 000000000..adf2cae65 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240429-005159.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump actions/upload-artifact from 3 to 4" +time: 2024-04-29T00:51:59.00000Z +custom: + Author: dependabot[bot] + PR: 1210 diff --git a/.changes/1.9.0/Dependencies-20240520-230208.yaml b/.changes/1.9.0/Dependencies-20240520-230208.yaml new file mode 100644 index 000000000..f89057233 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240520-230208.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump ubuntu from 22.04 to 24.04 in /docker" +time: 2024-05-20T23:02:08.00000Z +custom: + Author: dependabot[bot] + PR: 1247 diff --git a/.changes/1.9.0/Dependencies-20240718-005755.yaml b/.changes/1.9.0/Dependencies-20240718-005755.yaml new file mode 100644 index 000000000..3d2cca66c --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240718-005755.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pre-commit-hooks requirement from ~=4.5 to ~=4.6" +time: 2024-07-18T00:57:55.00000Z +custom: + Author: dependabot[bot] + PR: 1281 diff --git a/.changes/1.9.0/Dependencies-20240718-005756.yaml b/.changes/1.9.0/Dependencies-20240718-005756.yaml new file mode 100644 index 000000000..ac6b791a8 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240718-005756.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pytest-xdist requirement from ~=3.5 to ~=3.6" +time: 2024-07-18T00:57:56.00000Z +custom: + Author: dependabot[bot] + PR: 1282 diff --git a/.changes/1.9.0/Dependencies-20240718-005757.yaml b/.changes/1.9.0/Dependencies-20240718-005757.yaml new file mode 100644 index 000000000..29e12d68e --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240718-005757.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update flaky requirement from ~=3.7 to ~=3.8" +time: 2024-07-18T00:57:57.00000Z +custom: + Author: dependabot[bot] + PR: 1283 diff --git a/.changes/1.9.0/Dependencies-20240719-003740.yaml b/.changes/1.9.0/Dependencies-20240719-003740.yaml new file mode 100644 index 000000000..feb483a60 --- /dev/null +++ b/.changes/1.9.0/Dependencies-20240719-003740.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update twine requirement from ~=4.0 to ~=5.1" +time: 2024-07-19T00:37:40.00000Z +custom: + Author: dependabot[bot] + PR: 1293 diff --git a/.changes/1.9.0/Features-20240426-105319.yaml b/.changes/1.9.0/Features-20240426-105319.yaml new file mode 100644 index 000000000..0af2f9aa8 --- /dev/null +++ b/.changes/1.9.0/Features-20240426-105319.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Add configuration options `enable_list_inference` and `intermediate_format` for python + models +time: 2024-04-26T10:53:19.874239-04:00 +custom: + Author: mikealfare + Issue: 1047 1114 diff --git a/.changes/1.9.0/Features-20240430-185650.yaml b/.changes/1.9.0/Features-20240430-185650.yaml new file mode 100644 index 000000000..0c0eef567 --- /dev/null +++ b/.changes/1.9.0/Features-20240430-185650.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add tests for cross-database `cast` macro +time: 2024-04-30T18:56:50.238027-06:00 +custom: + Author: dbeatty10 + Issue: "1214" diff --git a/.changes/1.9.0/Features-20240501-151902.yaml b/.changes/1.9.0/Features-20240501-151902.yaml new file mode 100644 index 000000000..1522e9775 --- /dev/null +++ b/.changes/1.9.0/Features-20240501-151902.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Cross-database `date` macro +time: 2024-05-01T15:19:02.929676-06:00 +custom: + Author: dbeatty10 + Issue: 1221 diff --git a/.changes/1.9.0/Features-20240516-125735.yaml b/.changes/1.9.0/Features-20240516-125735.yaml new file mode 100644 index 000000000..d84b098b2 --- /dev/null +++ b/.changes/1.9.0/Features-20240516-125735.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add support for base 64 encoded json keyfile credentials +time: 2024-05-16T12:57:35.383416-07:00 +custom: + Author: robeleb1 + Issue: "923" diff --git a/.changes/1.9.0/Features-20240730-135911.yaml b/.changes/1.9.0/Features-20240730-135911.yaml new file mode 100644 index 000000000..52868c2ee --- /dev/null +++ b/.changes/1.9.0/Features-20240730-135911.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add support for cancelling queries on keyboard interrupt +time: 2024-07-30T13:59:11.585452-07:00 +custom: + Author: d-cole MichelleArk colin-rogers-dbt + Issue: "917" diff --git a/.changes/1.9.0/Features-20240925-232238.yaml b/.changes/1.9.0/Features-20240925-232238.yaml new file mode 100644 index 000000000..903884196 --- /dev/null +++ b/.changes/1.9.0/Features-20240925-232238.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add Microbatch Strategy to dbt-spark +time: 2024-09-25T23:22:38.216277+01:00 +custom: + Author: michelleark + Issue: "1354" diff --git a/.changes/1.9.0/Fixes-20240120-180818.yaml b/.changes/1.9.0/Fixes-20240120-180818.yaml new file mode 100644 index 000000000..0d0740361 --- /dev/null +++ b/.changes/1.9.0/Fixes-20240120-180818.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Drop intermediate objects created in BigQuery for incremental models +time: 2024-01-20T18:08:18.817915-06:00 +custom: + Author: vinit2107 + Issue: "1036" diff --git a/.changes/1.9.0/Fixes-20240201-145323.yaml b/.changes/1.9.0/Fixes-20240201-145323.yaml new file mode 100644 index 000000000..ea198e54a --- /dev/null +++ b/.changes/1.9.0/Fixes-20240201-145323.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix null column index issue during `dbt docs generate` for external tables +time: 2024-02-01T14:53:23.434624-05:00 +custom: + Author: mikealfare + Issue: "1079" diff --git a/.changes/1.9.0/Fixes-20240226-233024.yaml b/.changes/1.9.0/Fixes-20240226-233024.yaml new file mode 100644 index 000000000..efb1b077c --- /dev/null +++ b/.changes/1.9.0/Fixes-20240226-233024.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: make seed delimiter configurable via `field_delimeter` in model config +time: 2024-02-26T23:30:24.141213+01:00 +custom: + Author: salimmoulouel + Issue: "1119" diff --git a/.changes/1.9.0/Fixes-20240426-105224.yaml b/.changes/1.9.0/Fixes-20240426-105224.yaml new file mode 100644 index 000000000..624006ba5 --- /dev/null +++ b/.changes/1.9.0/Fixes-20240426-105224.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Default `enableListInference` to `True` for python models to support nested + lists +time: 2024-04-26T10:52:24.827314-04:00 +custom: + Author: mikealfare + Issue: 1047 1114 diff --git a/.changes/1.9.0/Fixes-20241001-193207.yaml b/.changes/1.9.0/Fixes-20241001-193207.yaml new file mode 100644 index 000000000..584445a5b --- /dev/null +++ b/.changes/1.9.0/Fixes-20241001-193207.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Catch additional database error exception, NotFound, as a DbtDatabaseError instead + of defaulting to a DbtRuntimeError +time: 2024-10-01T19:32:07.304353-04:00 +custom: + Author: mikealfare + Issue: "1360" diff --git a/.changes/1.9.0/Under the Hood-20240331-101418.yaml b/.changes/1.9.0/Under the Hood-20240331-101418.yaml new file mode 100644 index 000000000..baea00248 --- /dev/null +++ b/.changes/1.9.0/Under the Hood-20240331-101418.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Lazy load `agate` +time: 2024-03-31T10:14:18.260074-04:00 +custom: + Author: dwreeves + Issue: "1162" diff --git a/.changes/1.9.0/Under the Hood-20240718-193206.yaml b/.changes/1.9.0/Under the Hood-20240718-193206.yaml new file mode 100644 index 000000000..32b3084f5 --- /dev/null +++ b/.changes/1.9.0/Under the Hood-20240718-193206.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Simplify linting environment and dev dependencies +time: 2024-07-18T19:32:06.044016-04:00 +custom: + Author: mikealfare + Issue: "1291" diff --git a/.changes/unreleased/Breaking Changes-20241016-185117.yaml b/.changes/unreleased/Breaking Changes-20241016-185117.yaml new file mode 100644 index 000000000..55bb37461 --- /dev/null +++ b/.changes/unreleased/Breaking Changes-20241016-185117.yaml @@ -0,0 +1,6 @@ +kind: Breaking Changes +body: Drop support for Python 3.8 +time: 2024-10-16T18:51:17.581547-04:00 +custom: + Author: mikealfare + Issue: "1373" diff --git a/.changes/unreleased/Dependencies-20231002-164012.yaml b/.changes/unreleased/Dependencies-20231002-164012.yaml deleted file mode 100644 index 344aeb148..000000000 --- a/.changes/unreleased/Dependencies-20231002-164012.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=1.19 to ~=1.20" -time: 2023-10-02T16:40:12.00000Z -custom: - Author: dependabot[bot] - PR: 948 diff --git a/.changes/unreleased/Dependencies-20231009-005842.yaml b/.changes/unreleased/Dependencies-20231009-005842.yaml deleted file mode 100644 index acedd8d52..000000000 --- a/.changes/unreleased/Dependencies-20231009-005842.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pre-commit-hooks requirement from ~=4.4 to ~=4.5" -time: 2023-10-09T00:58:42.00000Z -custom: - Author: dependabot[bot] - PR: 960 diff --git a/.changes/unreleased/Dependencies-20231011-002031.yaml b/.changes/unreleased/Dependencies-20231011-002031.yaml deleted file mode 100644 index 5cc3c36a1..000000000 --- a/.changes/unreleased/Dependencies-20231011-002031.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.5.1 to 1.6.0" -time: 2023-10-11T00:20:31.00000Z -custom: - Author: dependabot[bot] - PR: 963 diff --git a/.changes/unreleased/Dependencies-20231016-002928.yaml b/.changes/unreleased/Dependencies-20231016-002928.yaml deleted file mode 100644 index ac9470de7..000000000 --- a/.changes/unreleased/Dependencies-20231016-002928.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pre-commit requirement from ~=3.4 to ~=3.5" -time: 2023-10-16T00:29:28.00000Z -custom: - Author: dependabot[bot] - PR: 969 diff --git a/.changes/unreleased/Dependencies-20231018-010429.yaml b/.changes/unreleased/Dependencies-20231018-010429.yaml deleted file mode 100644 index fec345104..000000000 --- a/.changes/unreleased/Dependencies-20231018-010429.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.9 to ~=23.10" -time: 2023-10-18T01:04:29.00000Z -custom: - Author: dependabot[bot] - PR: 973 diff --git a/.changes/unreleased/Dependencies-20231027-132742.yaml b/.changes/unreleased/Dependencies-20231027-132742.yaml deleted file mode 100644 index d72ac124b..000000000 --- a/.changes/unreleased/Dependencies-20231027-132742.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.6.0 to 1.6.1" -time: 2023-10-27T13:27:42.00000Z -custom: - Author: dependabot[bot] - PR: 985 diff --git a/.changes/unreleased/Dependencies-20231027-201709.yaml b/.changes/unreleased/Dependencies-20231027-201709.yaml deleted file mode 100644 index e9d2a1adb..000000000 --- a/.changes/unreleased/Dependencies-20231027-201709.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=1.20 to ~=2.1" -time: 2023-10-27T20:17:09.00000Z -custom: - Author: dependabot[bot] - PR: 989 diff --git a/.changes/unreleased/Dependencies-20231109-005623.yaml b/.changes/unreleased/Dependencies-20231109-005623.yaml deleted file mode 100644 index bc4ca2342..000000000 --- a/.changes/unreleased/Dependencies-20231109-005623.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.10 to ~=23.11" -time: 2023-11-09T00:56:23.00000Z -custom: - Author: dependabot[bot] - PR: 1013 diff --git a/.changes/unreleased/Dependencies-20231113-002529.yaml b/.changes/unreleased/Dependencies-20231113-002529.yaml deleted file mode 100644 index ad7272882..000000000 --- a/.changes/unreleased/Dependencies-20231113-002529.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest-xdist requirement from ~=3.3 to ~=3.4" -time: 2023-11-13T00:25:29.00000Z -custom: - Author: dependabot[bot] - PR: 1022 diff --git a/.changes/unreleased/Dependencies-20231113-002621.yaml b/.changes/unreleased/Dependencies-20231113-002621.yaml deleted file mode 100644 index d889da51e..000000000 --- a/.changes/unreleased/Dependencies-20231113-002621.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.6.1 to 1.7.0" -time: 2023-11-13T00:26:21.00000Z -custom: - Author: dependabot[bot] - PR: 1023 diff --git a/.changes/unreleased/Dependencies-20231116-001342.yaml b/.changes/unreleased/Dependencies-20231116-001342.yaml deleted file mode 100644 index 6af15169f..000000000 --- a/.changes/unreleased/Dependencies-20231116-001342.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=2.1 to ~=2.2" -time: 2023-11-16T00:13:42.00000Z -custom: - Author: dependabot[bot] - PR: 1028 diff --git a/.changes/unreleased/Dependencies-20231127-004827.yaml b/.changes/unreleased/Dependencies-20231127-004827.yaml deleted file mode 100644 index a78708328..000000000 --- a/.changes/unreleased/Dependencies-20231127-004827.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update wheel requirement from ~=0.41 to ~=0.42" -time: 2023-11-27T00:48:27.00000Z -custom: - Author: dependabot[bot] - PR: 1033 diff --git a/.changes/unreleased/Dependencies-20231128-005012.yaml b/.changes/unreleased/Dependencies-20231128-005012.yaml deleted file mode 100644 index 236ca1e52..000000000 --- a/.changes/unreleased/Dependencies-20231128-005012.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Bump mypy from 1.7.0 to 1.7.1" -time: 2023-11-28T00:50:12.00000Z -custom: - Author: dependabot[bot] - PR: 1034 diff --git a/.changes/unreleased/Dependencies-20231128-005103.yaml b/.changes/unreleased/Dependencies-20231128-005103.yaml deleted file mode 100644 index 205ecd1d8..000000000 --- a/.changes/unreleased/Dependencies-20231128-005103.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update ddtrace requirement from ~=2.2 to ~=2.3" -time: 2023-11-28T00:51:03.00000Z -custom: - Author: dependabot[bot] - PR: 1035 diff --git a/.changes/unreleased/Dependencies-20231129-001523.yaml b/.changes/unreleased/Dependencies-20231129-001523.yaml deleted file mode 100644 index e1c145ced..000000000 --- a/.changes/unreleased/Dependencies-20231129-001523.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update pytest-xdist requirement from ~=3.4 to ~=3.5" -time: 2023-11-29T00:15:23.00000Z -custom: - Author: dependabot[bot] - PR: 1037 diff --git a/.changes/unreleased/Dependencies-20231204-003807.yaml b/.changes/unreleased/Dependencies-20231204-003807.yaml deleted file mode 100644 index 18b9bb618..000000000 --- a/.changes/unreleased/Dependencies-20231204-003807.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update freezegun requirement from ~=1.2 to ~=1.3" -time: 2023-12-04T00:38:07.00000Z -custom: - Author: dependabot[bot] - PR: 1040 diff --git a/.changes/unreleased/Dependencies-20231213-003845.yaml b/.changes/unreleased/Dependencies-20231213-003845.yaml deleted file mode 100644 index 93618b5e0..000000000 --- a/.changes/unreleased/Dependencies-20231213-003845.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: "Dependencies" -body: "Update black requirement from ~=23.11 to ~=23.12" -time: 2023-12-13T00:38:45.00000Z -custom: - Author: dependabot[bot] - PR: 1056 diff --git a/.changes/unreleased/Dependencies-20240124-120321.yaml b/.changes/unreleased/Dependencies-20240124-120321.yaml deleted file mode 100644 index ef725de67..000000000 --- a/.changes/unreleased/Dependencies-20240124-120321.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Dependencies -body: get dbt-tests-adapters from dbt-adapters repo -time: 2024-01-24T12:03:21.523295-08:00 -custom: - Author: colin-rogers-dbt - PR: "1077" diff --git a/.changes/unreleased/Dependencies-20240724-040744.yaml b/.changes/unreleased/Dependencies-20240724-040744.yaml new file mode 100644 index 000000000..fd713788e --- /dev/null +++ b/.changes/unreleased/Dependencies-20240724-040744.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update wheel requirement from ~=0.42 to ~=0.43" +time: 2024-07-24T04:07:44.00000Z +custom: + Author: dependabot[bot] + PR: 1304 diff --git a/.changes/unreleased/Features-20231218-155409.yaml b/.changes/unreleased/Features-20231218-155409.yaml deleted file mode 100644 index bc965b06f..000000000 --- a/.changes/unreleased/Features-20231218-155409.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Add support for checking table-last-modified by metadata -time: 2023-12-18T15:54:09.69635-05:00 -custom: - Author: mikealfare - Issue: "938" diff --git a/.changes/unreleased/Features-20231219-201203.yaml b/.changes/unreleased/Features-20231219-201203.yaml deleted file mode 100644 index eee3f1026..000000000 --- a/.changes/unreleased/Features-20231219-201203.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Support limiting get_catalog by object name -time: 2023-12-19T20:12:03.990725-05:00 -custom: - Author: mikealfare - Issue: "950" diff --git a/.changes/unreleased/Features-20240102-152030.yaml b/.changes/unreleased/Features-20240102-152030.yaml deleted file mode 100644 index 81c683de0..000000000 --- a/.changes/unreleased/Features-20240102-152030.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Update base adapter references as part of decoupling migration -time: 2024-01-02T15:20:30.038221-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1067" diff --git a/.changes/unreleased/Features-20240205-174614.yaml b/.changes/unreleased/Features-20240205-174614.yaml deleted file mode 100644 index 192273d3d..000000000 --- a/.changes/unreleased/Features-20240205-174614.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: Support all types for unit testing in dbt-bigquery, expand coverage of - safe_cast macro -time: 2024-02-05T17:46:14.505597-05:00 -custom: - Author: michelleark - Issue: "1090" diff --git a/.changes/unreleased/Features-20240505-011838.yaml b/.changes/unreleased/Features-20240505-011838.yaml new file mode 100644 index 000000000..66411853f --- /dev/null +++ b/.changes/unreleased/Features-20240505-011838.yaml @@ -0,0 +1,6 @@ +kind: Features +body: add is_retryable test case when raise ServiceUnavailable +time: 2024-05-05T01:18:38.737882+09:00 +custom: + Author: jx2lee + Issue: "682" diff --git a/.changes/unreleased/Features-20240911-234859.yaml b/.changes/unreleased/Features-20240911-234859.yaml new file mode 100644 index 000000000..5351c3315 --- /dev/null +++ b/.changes/unreleased/Features-20240911-234859.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Adds the ability to set optional `quota_project` in profile +time: 2024-09-11T23:48:59.767649+01:00 +custom: + Author: jcarpenter12 + Issue: 1343 1344 diff --git a/.changes/unreleased/Fixes-20231023-082312.yaml b/.changes/unreleased/Fixes-20231023-082312.yaml deleted file mode 100644 index 368c58e95..000000000 --- a/.changes/unreleased/Fixes-20231023-082312.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Patch for json inline --show -time: 2023-10-23T08:23:12.245223-06:00 -custom: - Author: matt-winkler - Issue: "972" diff --git a/.changes/unreleased/Fixes-20231025-131907.yaml b/.changes/unreleased/Fixes-20231025-131907.yaml deleted file mode 100644 index 9a3b8d8a8..000000000 --- a/.changes/unreleased/Fixes-20231025-131907.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Lower bound of `2.11.0` for `google-api-core` -time: 2023-10-25T13:19:07.580826-06:00 -custom: - Author: gmyrianthous dbeatty10 - Issue: "979" diff --git a/.changes/unreleased/Fixes-20231025-223003.yaml b/.changes/unreleased/Fixes-20231025-223003.yaml deleted file mode 100644 index ebec94a30..000000000 --- a/.changes/unreleased/Fixes-20231025-223003.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix refresh syntax, config comparison with empty labels -time: 2023-10-25T22:30:03.0034-04:00 -custom: - Author: mikealfare - Issue: "983" diff --git a/.changes/unreleased/Fixes-20231030-222134.yaml b/.changes/unreleased/Fixes-20231030-222134.yaml deleted file mode 100644 index 62bfc5f27..000000000 --- a/.changes/unreleased/Fixes-20231030-222134.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Assign the correct relation type to materialized views in catalog queries -time: 2023-10-30T22:21:34.401675-04:00 -custom: - Author: mikealfare - Issue: "995" diff --git a/.changes/unreleased/Fixes-20231105-125740.yaml b/.changes/unreleased/Fixes-20231105-125740.yaml deleted file mode 100644 index 928fbb302..000000000 --- a/.changes/unreleased/Fixes-20231105-125740.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix inline comments (--) on the last line of an incremental model -time: 2023-11-05T12:57:40.289399+09:00 -custom: - Author: tnk-ysk - Issue: "896" diff --git a/.changes/unreleased/Fixes-20231105-143145.yaml b/.changes/unreleased/Fixes-20231105-143145.yaml deleted file mode 100644 index fd404e395..000000000 --- a/.changes/unreleased/Fixes-20231105-143145.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: In incremental models, add dummy merge condition on source partition column when partition is required -time: 2023-11-05T14:31:45.869783+09:00 -custom: - Author: tnk-ysk - Issue: "792" diff --git a/.changes/unreleased/Fixes-20231107-100905.yaml b/.changes/unreleased/Fixes-20231107-100905.yaml deleted file mode 100644 index 942298ed9..000000000 --- a/.changes/unreleased/Fixes-20231107-100905.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Support agate Integer type, test with empty seed -time: 2023-11-07T10:09:05.723451-05:00 -custom: - Author: gshank - Issue: "1003" diff --git a/.changes/unreleased/Fixes-20231107-174352.yaml b/.changes/unreleased/Fixes-20231107-174352.yaml deleted file mode 100644 index 80592758d..000000000 --- a/.changes/unreleased/Fixes-20231107-174352.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: Fixed issue where materialized views were failing on re-run with minimal config - parameters -time: 2023-11-07T17:43:52.972135-05:00 -custom: - Author: "mikealfare" - Issue: "1007" diff --git a/.changes/unreleased/Fixes-20231108-171128.yaml b/.changes/unreleased/Fixes-20231108-171128.yaml deleted file mode 100644 index 116ff00d2..000000000 --- a/.changes/unreleased/Fixes-20231108-171128.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Fix broken partition config granularity and batch_id being set to None -time: 2023-11-08T17:11:28.819877-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1006" diff --git a/.changes/unreleased/Fixes-20231111-150959.yaml b/.changes/unreleased/Fixes-20231111-150959.yaml deleted file mode 100644 index 3d9f245a6..000000000 --- a/.changes/unreleased/Fixes-20231111-150959.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: replace deterministic batch_id with uuid -time: 2023-11-11T15:09:59.243797-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1006" diff --git a/.changes/unreleased/Fixes-20231219-153446.yaml b/.changes/unreleased/Fixes-20231219-153446.yaml deleted file mode 100644 index 44a858eb8..000000000 --- a/.changes/unreleased/Fixes-20231219-153446.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: remove json patch to leverage bigquery-python improvement -time: 2023-12-19T15:34:46.843931-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1055" diff --git a/.changes/unreleased/Fixes-20240216-133319.yaml b/.changes/unreleased/Fixes-20240216-133319.yaml deleted file mode 100644 index cb8a2156e..000000000 --- a/.changes/unreleased/Fixes-20240216-133319.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: remove `token` field from connection keys -time: 2024-02-16T13:33:19.524482-08:00 -custom: - Author: versusfacit - Issue: "1105" diff --git a/.changes/unreleased/Fixes-20240219-103324.yaml b/.changes/unreleased/Fixes-20240219-103324.yaml deleted file mode 100644 index 16906db85..000000000 --- a/.changes/unreleased/Fixes-20240219-103324.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Fixes -body: Remove custom query job async timeout logic as it has been fixed in bigquery-python -time: 2024-02-19T10:33:24.3385-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1081" diff --git a/.changes/unreleased/Fixes-20241028-172719.yaml b/.changes/unreleased/Fixes-20241028-172719.yaml new file mode 100644 index 000000000..87ee2c25d --- /dev/null +++ b/.changes/unreleased/Fixes-20241028-172719.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: use "direct" write for non-partitioned python model materializations +time: 2024-10-28T17:27:19.306348-07:00 +custom: + Author: colin-rogers-dbt + Issue: "1318" diff --git a/.changes/unreleased/Under the Hood-20231109-095012.yaml b/.changes/unreleased/Under the Hood-20231109-095012.yaml deleted file mode 100644 index a93215e8f..000000000 --- a/.changes/unreleased/Under the Hood-20231109-095012.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Upgrade spark-bigquery Java deps for serverless to 2.13-0.34.0 -time: 2023-11-09T09:50:12.252774-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1006" diff --git a/.changes/unreleased/Under the Hood-20231116-062142.yaml b/.changes/unreleased/Under the Hood-20231116-062142.yaml deleted file mode 100644 index c28270898..000000000 --- a/.changes/unreleased/Under the Hood-20231116-062142.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Primary and foreign key constraints are not enforced in BigQuery -time: 2023-11-16T06:21:42.935367-08:00 -custom: - Author: dbeatty10 - Issue: "1018" diff --git a/.changes/unreleased/Under the Hood-20231117-121214.yaml b/.changes/unreleased/Under the Hood-20231117-121214.yaml deleted file mode 100644 index 61b0617ad..000000000 --- a/.changes/unreleased/Under the Hood-20231117-121214.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Add tests for --empty flag -time: 2023-11-17T12:12:14.643365-05:00 -custom: - Author: michelleark - Issue: "1029" diff --git a/.changes/unreleased/Under the Hood-20240116-154305.yaml b/.changes/unreleased/Under the Hood-20240116-154305.yaml deleted file mode 100644 index bb115abd6..000000000 --- a/.changes/unreleased/Under the Hood-20240116-154305.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: Migrate to dbt-common and dbt-adapters package -time: 2024-01-16T15:43:05.046735-08:00 -custom: - Author: colin-rogers-dbt - Issue: "1071" diff --git a/.changes/unreleased/Under the Hood-20240910-212052.yaml b/.changes/unreleased/Under the Hood-20240910-212052.yaml new file mode 100644 index 000000000..3e4885dcd --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240910-212052.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Isolating distribution testing +time: 2024-09-10T21:20:52.574204-04:00 +custom: + Author: leahwicz + Issue: "1290" diff --git a/.changes/unreleased/Under the Hood-20241104-173815.yaml b/.changes/unreleased/Under the Hood-20241104-173815.yaml new file mode 100644 index 000000000..e3e81dec1 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20241104-173815.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Separate credentials functionality into its own module for reuse in retry and + python submissions +time: 2024-11-04T17:38:15.940962-05:00 +custom: + Author: mikealfare + Issue: "1391" diff --git a/.changes/unreleased/Under the Hood-20241107-143856.yaml b/.changes/unreleased/Under the Hood-20241107-143856.yaml new file mode 100644 index 000000000..db8557bf0 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20241107-143856.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Create a retry factory to simplify retry strategies across dbt-bigquery +time: 2024-11-07T14:38:56.210445-05:00 +custom: + Author: mikealfare osalama + Issue: "1395" diff --git a/.flake8 b/.flake8 deleted file mode 100644 index da7e039fd..000000000 --- a/.flake8 +++ /dev/null @@ -1,16 +0,0 @@ -[flake8] -select = - E - W - F -ignore = - # makes Flake8 work like black - W503, - W504, - # makes Flake8 work like black - E203, - E741, - E501, -exclude = tests -per-file-ignores = - */__init__.py: F401 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f6283d123..02ed72d45 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1,3 @@ # This codeowners file is used to ensure all PRs require reviews from the adapters team -* @dbt-labs/core-adapters +* @dbt-labs/adapters diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2a6f34492..746dcae22 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,8 +1,29 @@ version: 2 updates: - # python dependencies - package-ecosystem: "pip" directory: "/" schedule: interval: "daily" rebase-strategy: "disabled" + ignore: + - dependency-name: "*" + update-types: + - version-update:semver-patch + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + rebase-strategy: "disabled" + ignore: + - dependency-name: "*" + update-types: + - version-update:semver-patch + - package-ecosystem: "docker" + directory: "/docker" + schedule: + interval: "weekly" + rebase-strategy: "disabled" + ignore: + - dependency-name: "*" + update-types: + - version-update:semver-patch diff --git a/.github/scripts/integration-test-matrix.js b/.github/scripts/integration-test-matrix.js index bf7fd2ef7..bebe08569 100644 --- a/.github/scripts/integration-test-matrix.js +++ b/.github/scripts/integration-test-matrix.js @@ -1,6 +1,6 @@ module.exports = ({ context }) => { - const defaultPythonVersion = "3.8"; - const supportedPythonVersions = ["3.8", "3.9", "3.10", "3.11"]; + const defaultPythonVersion = "3.9"; + const supportedPythonVersions = ["3.9", "3.10", "3.11", "3.12"]; const supportedAdapters = ["bigquery"]; // if PR, generate matrix based on files changed and PR labels @@ -44,7 +44,7 @@ module.exports = ({ context }) => { if (labels.includes("test macos") || testAllLabel) { include.push({ - os: "macos-latest", + os: "macos-14", adapter, "python-version": pythonVersion, }); @@ -78,7 +78,7 @@ module.exports = ({ context }) => { // additionally include runs for all adapters, on macos and windows, // but only for the default python version for (const adapter of supportedAdapters) { - for (const operatingSystem of ["windows-latest", "macos-latest"]) { + for (const operatingSystem of ["windows-latest", "macos-14"]) { include.push({ os: operatingSystem, adapter: adapter, diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh deleted file mode 100755 index d28a40c35..000000000 --- a/.github/scripts/update_dbt_core_branch.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -e -set -e - -git_branch=$1 -target_req_file="dev-requirements.txt" -core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g" -postgres_req_sed_pattern="s|dbt-core.git.*#egg=dbt-postgres|dbt-core.git@${git_branch}#egg=dbt-postgres|g" -tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g" -if [[ "$OSTYPE" == darwin* ]]; then - # mac ships with a different version of sed that requires a delimiter arg - sed -i "" "$core_req_sed_pattern" $target_req_file - sed -i "" "$postgres_req_sed_pattern" $target_req_file - sed -i "" "$tests_req_sed_pattern" $target_req_file -else - sed -i "$core_req_sed_pattern" $target_req_file - sed -i "$postgres_req_sed_pattern" $target_req_file - sed -i "$tests_req_sed_pattern" $target_req_file -fi -core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2) -bumpversion --allow-dirty --new-version "$core_version" major diff --git a/.github/scripts/update_dev_dependency_branches.sh b/.github/scripts/update_dev_dependency_branches.sh new file mode 100755 index 000000000..022df6a8a --- /dev/null +++ b/.github/scripts/update_dev_dependency_branches.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e +set -e + + +dbt_adapters_branch=$1 +dbt_core_branch=$2 +dbt_common_branch=$3 +target_req_file="dev-requirements.txt" +core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${dbt_core_branch}#egg=dbt-core|g" +adapters_req_sed_pattern="s|dbt-adapters.git|dbt-adapters.git@${dbt_adapters_branch}|g" +common_req_sed_pattern="s|dbt-common.git|dbt-common.git@${dbt_common_branch}|g" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "$adapters_req_sed_pattern" $target_req_file + sed -i "" "$core_req_sed_pattern" $target_req_file + sed -i "" "$common_req_sed_pattern" $target_req_file +else + sed -i "$adapters_req_sed_pattern" $target_req_file + sed -i "$core_req_sed_pattern" $target_req_file + sed -i "$common_req_sed_pattern" $target_req_file +fi diff --git a/.github/workflows/docs-issue.yml b/.github/workflows/docs-issue.yml index 00a098df8..f49cf517c 100644 --- a/.github/workflows/docs-issue.yml +++ b/.github/workflows/docs-issue.yml @@ -1,19 +1,18 @@ # **what?** -# Open an issue in docs.getdbt.com when a PR is labeled `user docs` +# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed # **why?** # To reduce barriers for keeping docs up to date # **when?** -# When a PR is labeled `user docs` and is merged. Runs on pull_request_target to run off the workflow already merged, -# not the workflow that existed on the PR branch. This allows old PRs to get comments. +# When an issue is labeled `user docs` and is closed as completed. Can be labeled before or after the issue is closed. -name: Open issues in docs.getdbt.com repo when a PR is labeled -run-name: "Open an issue in docs.getdbt.com for PR #${{ github.event.pull_request.number }}" +name: Open issues in docs.getdbt.com repo when an issue is labeled +run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}" on: - pull_request_target: + issues: types: [labeled, closed] defaults: @@ -21,23 +20,22 @@ defaults: shell: bash permissions: - issues: write # opens new issues - pull-requests: write # comments on PRs - + issues: write # comments on issues jobs: open_issues: - # we only want to run this when the PR has been merged or the label in the labeled event is `user docs`. Otherwise it runs the + # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned. + # If this logic does not exist in this workflow, it runs the # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having # generating the comment before the other runs. This lives here instead of the shared workflow because this is where we # decide if it should run or not. if: | - (github.event.pull_request.merged == true) && - ((github.event.action == 'closed' && contains( github.event.pull_request.labels.*.name, 'user docs')) || + (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && ( + (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) || (github.event.action == 'labeled' && github.event.label.name == 'user docs')) uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main with: issue_repository: "dbt-labs/docs.getdbt.com" - issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} PR #${{ github.event.pull_request.number }}" + issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}" issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated." secrets: inherit diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 7df6973a8..a9179f9ce 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -20,6 +20,8 @@ name: Adapter Integration Tests +run-name: "${{ (contains(github.event_name, 'workflow_') && inputs.name) || github.event_name }}: ${{ (contains(github.event_name, 'workflow_') && inputs.adapter_branch) || github.ref_name }} by @${{ github.actor }}" + on: # pushes to release branches push: @@ -31,20 +33,42 @@ on: # all PRs, important to note that `pull_request_target` workflows # will run in the context of the target branch of a PR pull_request_target: + types: [opened, reopened, synchronize, labeled] # manual trigger workflow_dispatch: inputs: - dbt-core-branch: - description: "branch of dbt-core to use in dev-requirements.txt" + name: + description: "Name to associate with run (example: 'dbt-adapters-242')" + required: false + type: string + default: "Adapter Integration Tests" + adapter_branch: + description: "The branch of this adapter repository to use" + type: string required: false + default: "main" + dbt_adapters_branch: + description: "The branch of dbt-adapters to use" type: string + required: false + default: "main" + dbt_core_branch: + description: "The branch of dbt-core to use" + type: string + required: false + default: "main" + dbt_common_branch: + description: "The branch of dbt-common to use" + type: string + required: false + default: "main" # explicitly turn off permissions for `GITHUB_TOKEN` permissions: read-all # will cancel previous workflows triggered by the same event and for the same ref for PRs or same SHA otherwise concurrency: - group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }} + group: ${{ github.workflow }}-${{ github.event_name }}-${{ contains(github.event_name, 'pull_request') && github.event.pull_request.head.ref || github.sha }}-${{ github.actor }} cancel-in-progress: true # sets default shell to bash, for all operating systems @@ -67,15 +91,22 @@ jobs: run-python-tests: ${{ steps.filter.outputs.bigquery-python }} steps: - - name: Check out the repository (non-PR) - if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + - name: Check out the repository (push) + if: github.event_name == 'push' + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Check out the repository (workflow_dispatch) + if: github.event_name == 'workflow_dispatch' + uses: actions/checkout@v4 with: persist-credentials: false + ref: ${{ inputs.adapter_branch }} - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} @@ -88,7 +119,7 @@ jobs: # 'false' - if none of changed files matches any of filter rules # also, returns: # `changes` - JSON array with names of all filters matching any of the changed files - uses: dorny/paths-filter@v2 + uses: dorny/paths-filter@v3 id: get-changes with: token: ${{ secrets.GITHUB_TOKEN }} @@ -97,10 +128,12 @@ jobs: - 'dbt/**' - 'tests/**' - 'dev-requirements.txt' + - '.github/**' + - '*.py' - name: Generate integration test matrix id: generate-matrix - uses: actions/github-script@v6 + uses: actions/github-script@v7 env: CHANGES: ${{ steps.get-changes.outputs.changes }} with: @@ -142,22 +175,29 @@ jobs: steps: - name: Check out the repository - if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + if: github.event_name != 'pull_request_target' && github.event_name != 'workflow_dispatch' + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Check out the repository (workflow_dispatch) + if: github.event_name == 'workflow_dispatch' + uses: actions/checkout@v4 with: persist-credentials: false + ref: ${{ inputs.adapter_branch }} - # explicity checkout the branch for the PR, + # explicitly checkout the branch for the PR, # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -168,11 +208,14 @@ jobs: python -m pip --version tox --version - - name: Update dev_requirements.txt - if: inputs.dbt-core-branch != '' + - name: Update Adapters and Core branches (update dev_requirements.txt) + if: github.event_name == 'workflow_dispatch' run: | - pip install bumpversion - ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} + ./.github/scripts/update_dev_dependency_branches.sh \ + ${{ inputs.dbt_adapters_branch }} \ + ${{ inputs.dbt_core_branch }} \ + ${{ inputs.dbt_common_branch }} + cat dev-requirements.txt - name: Run tox (bigquery) if: matrix.adapter == 'bigquery' @@ -188,23 +231,25 @@ jobs: GCS_BUCKET: dbt-ci run: tox -- --ddtrace - - uses: actions/upload-artifact@v3 - if: always() - with: - name: logs - path: ./logs - - name: Get current date if: always() id: date run: | echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 + if: always() + with: + name: logs_${{ matrix.python-version }}_${{ matrix.os }}_${{ matrix.adapter }}-${{ steps.date.outputs.date }} + path: ./logs + overwrite: true + + - uses: actions/upload-artifact@v4 if: always() with: name: integration_results_${{ matrix.python-version }}_${{ matrix.os }}_${{ matrix.adapter }}-${{ steps.date.outputs.date }}.csv path: integration_results.csv + overwrite: true # python integration tests are slow so we only run them seperately and for a single OS / python version test-python: @@ -223,7 +268,7 @@ jobs: steps: - name: Check out the repository if: github.event_name != 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false @@ -231,15 +276,15 @@ jobs: # this is necessary for the `pull_request_target` event - name: Check out the repository (PR) if: github.event_name == 'pull_request_target' - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python 3.8 - uses: actions/setup-python@v4 + - name: Set up Python 3.9 + uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" - name: Install python dependencies run: | diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b13c53e9f..7b82f3e0f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,14 +43,14 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install python dependencies run: | @@ -58,7 +58,6 @@ jobs: python -m pip install -r dev-requirements.txt python -m pip --version pre-commit --version - mypy --version dbt --version - name: Run pre-comit hooks run: pre-commit run --all-files --show-diff-on-failure @@ -71,7 +70,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12'] env: TOXENV: "unit" @@ -79,12 +78,12 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -104,11 +103,12 @@ jobs: run: | echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv path: unit_results.csv + overwrite: true build: name: build packages @@ -120,14 +120,14 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install python dependencies run: | @@ -156,13 +156,14 @@ jobs: if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: dist path: dist/ + overwrite: true test-build: - name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }} + name: verify packages / python ${{ matrix.python-version }} / ${{ matrix.os }} / ${{ matrix.dist-type }} if: needs.build.outputs.is_alpha == 0 @@ -173,20 +174,23 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11'] + os: [ubuntu-latest, macos-14, windows-latest] + python-version: ['3.9', '3.10', '3.11', '3.12'] + dist-type: ["whl", "gz"] steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install python dependencies run: | python -m pip install --user --upgrade pip - python -m pip install --upgrade wheel setuptools twine check-wheel-contents + python -m pip install --upgrade wheel python -m pip --version - - uses: actions/download-artifact@v3 + + - uses: actions/download-artifact@v4 with: name: dist path: dist/ @@ -194,15 +198,10 @@ jobs: - name: Show distributions run: ls -lh dist/ - - name: Install wheel distributions + - name: Install ${{ matrix.dist-type }} distributions run: | - find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ - - name: Check wheel distributions - run: | - dbt --version - - name: Install source distributions - run: | - find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ - - name: Check source distributions + find ./dist/*.${{ matrix.dist-type }} -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/ + + - name: Check ${{ matrix.dist-type }} distributions run: | - dbt --version + python -c "import dbt.adapters.bigquery" diff --git a/.github/workflows/nightly-release.yml b/.github/workflows/nightly-release.yml index ef210dacd..1dc396154 100644 --- a/.github/workflows/nightly-release.yml +++ b/.github/workflows/nightly-release.yml @@ -20,6 +20,7 @@ on: permissions: contents: write # this is the permission that allows creating a new release + packages: write # allows creating a Docker release as a GitHub package on GHCR defaults: run: @@ -39,7 +40,7 @@ jobs: steps: - name: "Checkout ${{ github.repository }} Branch ${{ env.RELEASE_BRANCH }}" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ env.RELEASE_BRANCH }} @@ -57,7 +58,7 @@ jobs: - name: "Audit Version And Parse Into Parts" id: semver - uses: dbt-labs/actions/parse-semver@v1.1.0 + uses: dbt-labs/actions/parse-semver@v1.1.1 with: version: ${{ steps.version-number-sources.outputs.current_version }} @@ -79,7 +80,7 @@ jobs: echo "number=$number" >> $GITHUB_OUTPUT - name: "Audit Nightly Release Version And Parse Into Parts" - uses: dbt-labs/actions/parse-semver@v1.1.0 + uses: dbt-labs/actions/parse-semver@v1.1.1 with: version: ${{ steps.nightly-release-version.outputs.number }} diff --git a/.github/workflows/release-internal.yml b/.github/workflows/release-internal.yml new file mode 100644 index 000000000..dbc740909 --- /dev/null +++ b/.github/workflows/release-internal.yml @@ -0,0 +1,51 @@ +# What? +# +# Tag and release an arbitrary ref. Uploads to an internal archive for further processing. +# +# How? +# +# After checking out and testing the provided ref, the image is built and uploaded. +# +# When? +# +# Manual trigger. + +name: "Release to Cloud" +run-name: "Release to Cloud off of ${{ inputs.ref }}" + +on: + workflow_dispatch: + inputs: + ref: + description: "The ref (sha or branch name) to use" + type: string + default: "main" + required: true + package_test_command: + description: "Package test command" + type: string + default: "python -c \"import dbt.adapters.bigquery\"" + required: true + skip_tests: + description: "Should the tests be skipped? (default to false)" + type: boolean + required: true + default: false + +defaults: + run: + shell: bash + +jobs: + invoke-reusable-workflow: + name: "Build and Release Internally" + + uses: "dbt-labs/dbt-release/.github/workflows/internal-archive-release.yml@main" + + with: + package_test_command: "${{ inputs.package_test_command }}" + dbms_name: "bigquery" + ref: "${{ inputs.ref }}" + skip_tests: "${{ inputs.skip_tests }}" + + secrets: "inherit" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 88942e251..ad7cf76b4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,7 +13,8 @@ # # **when?** # This workflow can be run manually on demand or can be called by other workflows -name: Release to GitHub and PyPI +name: "Release to GitHub, PyPI, and Docker" +run-name: "Release ${{ inputs.version_number }} to GitHub, PyPI, and Docker" on: workflow_dispatch: @@ -60,6 +61,11 @@ on: type: boolean default: false required: false + only_docker: + description: "Only release Docker image, skip GitHub & PyPI" + type: boolean + default: false + required: false workflow_call: inputs: sha: @@ -128,12 +134,11 @@ jobs: echo Package test command: ${{ inputs.package_test_command }} echo Test run: ${{ inputs.test_run }} echo Nightly release: ${{ inputs.nightly_release }} + echo Only Docker: ${{ inputs.only_docker }} bump-version-generate-changelog: name: Bump package version, Generate changelog - uses: dbt-labs/dbt-release/.github/workflows/release-prep.yml@main - with: sha: ${{ inputs.sha }} version_number: ${{ inputs.version_number }} @@ -141,17 +146,13 @@ jobs: env_setup_script_path: ${{ inputs.env_setup_script_path }} test_run: ${{ inputs.test_run }} nightly_release: ${{ inputs.nightly_release }} - secrets: inherit log-outputs-bump-version-generate-changelog: name: "[Log output] Bump package version, Generate changelog" - if: ${{ !failure() && !cancelled() }} - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [bump-version-generate-changelog] - runs-on: ubuntu-latest - steps: - name: Print variables run: | @@ -160,11 +161,9 @@ jobs: build-test-package: name: Build, Test, Package - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [bump-version-generate-changelog] - uses: dbt-labs/dbt-release/.github/workflows/build.yml@main - with: sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }} version_number: ${{ inputs.version_number }} @@ -174,19 +173,15 @@ jobs: package_test_command: ${{ inputs.package_test_command }} test_run: ${{ inputs.test_run }} nightly_release: ${{ inputs.nightly_release }} - secrets: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} github-release: name: GitHub Release - if: ${{ !failure() && !cancelled() }} - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} needs: [bump-version-generate-changelog, build-test-package] - uses: dbt-labs/dbt-release/.github/workflows/github-release.yml@main - with: sha: ${{ needs.bump-version-generate-changelog.outputs.final_sha }} version_number: ${{ inputs.version_number }} @@ -195,34 +190,41 @@ jobs: pypi-release: name: PyPI Release - - needs: [github-release] - + if: ${{ !failure() && !cancelled() && !inputs.only_docker }} + needs: [bump-version-generate-changelog, build-test-package] uses: dbt-labs/dbt-release/.github/workflows/pypi-release.yml@main - with: version_number: ${{ inputs.version_number }} test_run: ${{ inputs.test_run }} - secrets: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} + docker-release: + name: "Docker Release" + # We cannot release to docker on a test run because it uses the tag in GitHub as + # what we need to release but draft releases don't actually tag the commit so it + # finds nothing to release + if: ${{ !failure() && !cancelled() && (!inputs.test_run || inputs.only_docker) }} + needs: [bump-version-generate-changelog, build-test-package, github-release] + permissions: + packages: write + uses: dbt-labs/dbt-release/.github/workflows/release-docker.yml@main + with: + version_number: ${{ inputs.version_number }} + test_run: ${{ inputs.test_run }} + slack-notification: name: Slack Notification if: ${{ failure() && (!inputs.test_run || inputs.nightly_release) }} - needs: [ - bump-version-generate-changelog, - build-test-package, github-release, pypi-release, + docker-release, ] - uses: dbt-labs/dbt-release/.github/workflows/slack-post-notification.yml@main with: status: "failure" - secrets: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_DEV_ADAPTER_ALERTS }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a98ba0417..16760bf07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,62 +1,58 @@ # For more on configuring pre-commit hooks (see https://pre-commit.com/) - default_language_version: - python: python3 + python: python3 repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-yaml - args: [--unsafe] - - id: check-json - - id: end-of-file-fixer - - id: trailing-whitespace - - id: check-case-conflict -- repo: https://github.com/psf/black - rev: 23.1.0 - hooks: - - id: black - additional_dependencies: ['click~=8.1'] - args: - - "--line-length=99" - - "--target-version=py38" - - id: black - alias: black-check - stages: [manual] - additional_dependencies: ['click~=8.1'] - args: - - "--line-length=99" - - "--target-version=py38" - - "--check" - - "--diff" -- repo: https://github.com/pycqa/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - - id: flake8 - alias: flake8-check - stages: [manual] -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.1.1 - hooks: - - id: mypy - # N.B.: Mypy is... a bit fragile. - # - # By using `language: system` we run this hook in the local - # environment instead of a pre-commit isolated one. This is needed - # to ensure mypy correctly parses the project. +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-yaml + args: [--unsafe] + - id: check-json + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-case-conflict + +- repo: https://github.com/dbt-labs/pre-commit-hooks + rev: v0.1.0a1 + hooks: + - id: dbt-core-in-adapters-check + +- repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black + args: + - --line-length=99 + - --target-version=py39 + - --target-version=py310 + - --target-version=py311 + - --target-version=py312 + additional_dependencies: [flaky] + +- repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + exclude: tests/ + args: + - --max-line-length=99 + - --select=E,F,W + - --ignore=E203,E501,E741,W503,W504 + - --per-file-ignores=*/__init__.py:F401 - # It may cause trouble in that it adds environmental variables out - # of our control to the mix. Unfortunately, there's nothing we can - # do about per pre-commit's author. - # See https://github.com/pre-commit/pre-commit/issues/730 for details. - args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases] - files: ^dbt/adapters/.* - language: system - - id: mypy - alias: mypy-check - stages: [manual] - args: [--show-error-codes, --pretty, --ignore-missing-imports, --explicit-package-bases] - files: ^dbt/adapters - language: system +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.0 + hooks: + - id: mypy + args: + - --explicit-package-bases + - --ignore-missing-imports + - --pretty + - --show-error-codes + - --warn-unused-ignores + files: ^dbt/adapters/bigquery + additional_dependencies: + - types-protobuf + - types-pytz + - types-requests diff --git a/CHANGELOG.md b/CHANGELOG.md index ade60b8f6..b9bda350a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,52 @@ - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#adding-changelog-entry) +## dbt-bigquery 1.9.0-b1 - October 02, 2024 + +### Features + +- Add configuration options `enable_list_inference` and `intermediate_format` for python models ([#1047](https://github.com/dbt-labs/dbt-bigquery/issues/1047), [#1114](https://github.com/dbt-labs/dbt-bigquery/issues/1114)) +- Add tests for cross-database `cast` macro ([#1214](https://github.com/dbt-labs/dbt-bigquery/issues/1214)) +- Cross-database `date` macro ([#1221](https://github.com/dbt-labs/dbt-bigquery/issues/1221)) +- Add support for base 64 encoded json keyfile credentials ([#923](https://github.com/dbt-labs/dbt-bigquery/issues/923)) +- Add support for cancelling queries on keyboard interrupt ([#917](https://github.com/dbt-labs/dbt-bigquery/issues/917)) +- Add Microbatch Strategy to dbt-spark ([#1354](https://github.com/dbt-labs/dbt-bigquery/issues/1354)) + +### Fixes + +- Drop intermediate objects created in BigQuery for incremental models ([#1036](https://github.com/dbt-labs/dbt-bigquery/issues/1036)) +- Fix null column index issue during `dbt docs generate` for external tables ([#1079](https://github.com/dbt-labs/dbt-bigquery/issues/1079)) +- make seed delimiter configurable via `field_delimeter` in model config ([#1119](https://github.com/dbt-labs/dbt-bigquery/issues/1119)) +- Default `enableListInference` to `True` for python models to support nested lists ([#1047](https://github.com/dbt-labs/dbt-bigquery/issues/1047), [#1114](https://github.com/dbt-labs/dbt-bigquery/issues/1114)) +- Catch additional database error exception, NotFound, as a DbtDatabaseError instead of defaulting to a DbtRuntimeError ([#1360](https://github.com/dbt-labs/dbt-bigquery/issues/1360)) + +### Under the Hood + +- Lazy load `agate` ([#1162](https://github.com/dbt-labs/dbt-bigquery/issues/1162)) +- Simplify linting environment and dev dependencies ([#1291](https://github.com/dbt-labs/dbt-bigquery/issues/1291)) + +### Dependencies + +- Update pre-commit requirement from ~=3.5 to ~=3.7 ([#1052](https://github.com/dbt-labs/dbt-bigquery/pull/1052)) +- Update freezegun requirement from ~=1.3 to ~=1.4 ([#1062](https://github.com/dbt-labs/dbt-bigquery/pull/1062)) +- Bump mypy from 1.7.1 to 1.8.0 ([#1064](https://github.com/dbt-labs/dbt-bigquery/pull/1064)) +- Update flake8 requirement from ~=6.1 to ~=7.0 ([#1069](https://github.com/dbt-labs/dbt-bigquery/pull/1069)) +- Bump actions/download-artifact from 3 to 4 ([#1209](https://github.com/dbt-labs/dbt-bigquery/pull/1209)) +- Bump actions/upload-artifact from 3 to 4 ([#1210](https://github.com/dbt-labs/dbt-bigquery/pull/1210)) +- Bump ubuntu from 22.04 to 24.04 in /docker ([#1247](https://github.com/dbt-labs/dbt-bigquery/pull/1247)) +- Update pre-commit-hooks requirement from ~=4.5 to ~=4.6 ([#1281](https://github.com/dbt-labs/dbt-bigquery/pull/1281)) +- Update pytest-xdist requirement from ~=3.5 to ~=3.6 ([#1282](https://github.com/dbt-labs/dbt-bigquery/pull/1282)) +- Update flaky requirement from ~=3.7 to ~=3.8 ([#1283](https://github.com/dbt-labs/dbt-bigquery/pull/1283)) +- Update twine requirement from ~=4.0 to ~=5.1 ([#1293](https://github.com/dbt-labs/dbt-bigquery/pull/1293)) + +### Contributors +- [@d-cole](https://github.com/d-cole) ([#917](https://github.com/dbt-labs/dbt-bigquery/issues/917)) +- [@dwreeves](https://github.com/dwreeves) ([#1162](https://github.com/dbt-labs/dbt-bigquery/issues/1162)) +- [@robeleb1](https://github.com/robeleb1) ([#923](https://github.com/dbt-labs/dbt-bigquery/issues/923)) +- [@salimmoulouel](https://github.com/salimmoulouel) ([#1119](https://github.com/dbt-labs/dbt-bigquery/issues/1119)) +- [@vinit2107](https://github.com/vinit2107) ([#1036](https://github.com/dbt-labs/dbt-bigquery/issues/1036)) + + ## Previous Releases For information on prior major and minor releases, see their changelogs: - [1.6](https://github.com/dbt-labs/dbt-bigquery/blob/1.6.latest/CHANGELOG.md) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e9432d363..f915af713 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,7 +54,7 @@ To confirm you have the correct version of `dbt-core` installed please run `dbt ### Initial Setup -`dbt-bigquery` contains [unit](https://github.com/dbt-labs/dbt-bigquery/tree/main/tests/unit) and [integration](https://github.com/dbt-labs/dbt-bigquery/tree/main/tests/integration) tests. Integration tests require testing against an actual BigQuery warehouse. We have CI set up to test against a BigQuery warehouse. In order to run integration tests locally, you will need a `test.env` file in the root of the repository that contains credentials for BigQuery. +`dbt-bigquery` contains [unit](https://github.com/dbt-labs/dbt-bigquery/tree/main/tests/unit) and [functional](https://github.com/dbt-labs/dbt-bigquery/tree/main/tests/functional) tests. functional tests require testing against an actual BigQuery warehouse. We have CI set up to test against a BigQuery warehouse. In order to run functional tests locally, you will need a `test.env` file in the root of the repository that contains credentials for BigQuery. Note: This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing. To create your `test.env` file, copy the provided example file, then supply your relevant credentials. @@ -67,7 +67,7 @@ $EDITOR test.env There are a few methods for running tests locally. #### `tox` -`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`. +`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.9, Python 3.10, and Python 3.11 in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py39`. The configuration of these tests are located in `tox.ini`. #### `pytest` Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like: @@ -104,6 +104,6 @@ You don't need to worry about which `dbt-bigquery` version your change will go i dbt Labs provides a CI environment to test changes to the `dbt-bigquery` adapter and periodic checks against the development version of `dbt-core` through Github Actions. -A `dbt-bigquery` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code. +A `dbt-bigquery` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or functional test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code. Once all tests are passing, you have updated the changelog to reflect and tag your issue/pr for reference with a small description of the change, and your PR has been approved, a `dbt-bigquery` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada: diff --git a/Makefile b/Makefile index fc6536f98..bdacb538b 100644 --- a/Makefile +++ b/Makefile @@ -11,17 +11,11 @@ dev-uninstall: ## Uninstalls all packages while maintaining the virtual environm pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y pip uninstall -y dbt-bigquery -.PHONY: ubuntu-py311 -ubuntu-py311: ## Builds and runs an Ubuntu Python 3.11 development container - docker build -f docker_dev/ubuntu.Dockerfile -t dbt-bigquery-ubuntu-py311 . - docker run --rm -it --name dbt-bigquery-ubuntu-py311 -v $(shell pwd):/opt/code dbt-bigquery-ubuntu-py311 +.PHONY: docker-dev +docker-dev: + docker build -f docker/dev.Dockerfile -t dbt-bigquery-dev . + docker run --rm -it --name dbt-bigquery-dev -v $(shell pwd):/opt/code dbt-bigquery-dev -.PHONY: ubuntu-py39 -ubuntu-py39: ## Builds and runs an Ubuntu Python 3.9 development container - docker build -f docker_dev/ubuntu.Dockerfile -t dbt-bigquery-ubuntu-py39 . --build-arg version=3.9 - docker run --rm -it --name dbt-bigquery-ubuntu-py39 -v $(shell pwd):/opt/code dbt-bigquery-ubuntu-py39 - -.PHONY: ubuntu-py38 -ubuntu-py38: ## Builds and runs an Ubuntu Python 3.8 development container - docker build -f docker_dev/ubuntu.Dockerfile -t dbt-bigquery-ubuntu-py38 . --build-arg version=3.8 - docker run --rm -it --name dbt-bigquery-ubuntu-py38 -v $(shell pwd):/opt/code dbt-bigquery-ubuntu-py38 +.PHONY: docker-prod +docker-prod: + docker build -f docker/Dockerfile -t dbt-bigquery . diff --git a/dbt/adapters/bigquery/__init__.py b/dbt/adapters/bigquery/__init__.py index adbd67590..74fa17cda 100644 --- a/dbt/adapters/bigquery/__init__.py +++ b/dbt/adapters/bigquery/__init__.py @@ -1,12 +1,12 @@ -from dbt.adapters.bigquery.connections import BigQueryConnectionManager # noqa -from dbt.adapters.bigquery.connections import BigQueryCredentials -from dbt.adapters.bigquery.relation import BigQueryRelation # noqa -from dbt.adapters.bigquery.column import BigQueryColumn # noqa -from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget, PartitionConfig # noqa +from dbt.adapters.bigquery.column import BigQueryColumn +from dbt.adapters.bigquery.connections import BigQueryConnectionManager +from dbt.adapters.bigquery.credentials import BigQueryCredentials +from dbt.adapters.bigquery.impl import BigQueryAdapter, GrantTarget, PartitionConfig +from dbt.adapters.bigquery.relation import BigQueryRelation from dbt.adapters.base import AdapterPlugin from dbt.include import bigquery Plugin = AdapterPlugin( - adapter=BigQueryAdapter, credentials=BigQueryCredentials, include_path=bigquery.PACKAGE_PATH # type: ignore[arg-type] + adapter=BigQueryAdapter, credentials=BigQueryCredentials, include_path=bigquery.PACKAGE_PATH ) diff --git a/dbt/adapters/bigquery/__version__.py b/dbt/adapters/bigquery/__version__.py index f15b401d1..a4077fff2 100644 --- a/dbt/adapters/bigquery/__version__.py +++ b/dbt/adapters/bigquery/__version__.py @@ -1 +1 @@ -version = "1.8.0a1" +version = "1.9.0b1" diff --git a/dbt/adapters/bigquery/clients.py b/dbt/adapters/bigquery/clients.py new file mode 100644 index 000000000..18c59fc12 --- /dev/null +++ b/dbt/adapters/bigquery/clients.py @@ -0,0 +1,69 @@ +from google.api_core.client_info import ClientInfo +from google.api_core.client_options import ClientOptions +from google.api_core.retry import Retry +from google.auth.exceptions import DefaultCredentialsError +from google.cloud.bigquery import Client as BigQueryClient +from google.cloud.dataproc_v1 import BatchControllerClient, JobControllerClient +from google.cloud.storage import Client as StorageClient + +from dbt.adapters.events.logging import AdapterLogger + +import dbt.adapters.bigquery.__version__ as dbt_version +from dbt.adapters.bigquery.credentials import ( + BigQueryCredentials, + create_google_credentials, + set_default_credentials, +) + + +_logger = AdapterLogger("BigQuery") + + +def create_bigquery_client(credentials: BigQueryCredentials) -> BigQueryClient: + try: + return _create_bigquery_client(credentials) + except DefaultCredentialsError: + _logger.info("Please log into GCP to continue") + set_default_credentials() + return _create_bigquery_client(credentials) + + +@Retry() # google decorator. retries on transient errors with exponential backoff +def create_gcs_client(credentials: BigQueryCredentials) -> StorageClient: + return StorageClient( + project=credentials.execution_project, + credentials=create_google_credentials(credentials), + ) + + +@Retry() # google decorator. retries on transient errors with exponential backoff +def create_dataproc_job_controller_client(credentials: BigQueryCredentials) -> JobControllerClient: + return JobControllerClient( + credentials=create_google_credentials(credentials), + client_options=ClientOptions(api_endpoint=_dataproc_endpoint(credentials)), + ) + + +@Retry() # google decorator. retries on transient errors with exponential backoff +def create_dataproc_batch_controller_client( + credentials: BigQueryCredentials, +) -> BatchControllerClient: + return BatchControllerClient( + credentials=create_google_credentials(credentials), + client_options=ClientOptions(api_endpoint=_dataproc_endpoint(credentials)), + ) + + +@Retry() # google decorator. retries on transient errors with exponential backoff +def _create_bigquery_client(credentials: BigQueryCredentials) -> BigQueryClient: + return BigQueryClient( + credentials.execution_project, + create_google_credentials(credentials), + location=getattr(credentials, "location", None), + client_info=ClientInfo(user_agent=f"dbt-bigquery-{dbt_version.version}"), + client_options=ClientOptions(quota_project_id=credentials.quota_project), + ) + + +def _dataproc_endpoint(credentials: BigQueryCredentials) -> str: + return f"{credentials.dataproc_region}-dataproc.googleapis.com:443" diff --git a/dbt/adapters/bigquery/column.py b/dbt/adapters/bigquery/column.py index 1bdf4323d..a676fef4b 100644 --- a/dbt/adapters/bigquery/column.py +++ b/dbt/adapters/bigquery/column.py @@ -1,9 +1,10 @@ from dataclasses import dataclass -from typing import Optional, List, TypeVar, Iterable, Type, Any, Dict, Union +from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar, Union + +from google.cloud.bigquery import SchemaField from dbt.adapters.base.column import Column -from google.cloud.bigquery import SchemaField _PARENT_DATA_TYPE_KEY = "__parent_data_type" @@ -18,7 +19,7 @@ class BigQueryColumn(Column): "INTEGER": "INT64", } fields: List[Self] # type: ignore - mode: str # type: ignore + mode: str def __init__( self, @@ -110,7 +111,7 @@ def is_numeric(self) -> bool: def is_float(self): return self.dtype.lower() == "float64" - def can_expand_to(self: Self, other_column: Self) -> bool: # type: ignore + def can_expand_to(self: Self, other_column: Self) -> bool: """returns True if both columns are strings""" return self.is_string() and other_column.is_string() @@ -124,7 +125,7 @@ def column_to_bq_schema(self) -> SchemaField: fields = [field.column_to_bq_schema() for field in self.fields] # type: ignore[attr-defined] kwargs = {"fields": fields} - return SchemaField(self.name, self.dtype, self.mode, **kwargs) # type: ignore[arg-type] + return SchemaField(self.name, self.dtype, self.mode, **kwargs) def get_nested_column_data_types( diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index 1e4708f0b..61fa87d40 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -1,92 +1,56 @@ +from collections import defaultdict +from concurrent.futures import TimeoutError +from contextlib import contextmanager +from dataclasses import dataclass import json +from multiprocessing.context import SpawnContext import re -from contextlib import contextmanager -from dataclasses import dataclass, field - -from dbt_common.invocation import get_invocation_id - -from dbt_common.events.contextvars import get_node_info -from mashumaro.helper import pass_through - -from functools import lru_cache -import agate -from requests.exceptions import ConnectionError -from typing import Optional, Any, Dict, Tuple - -import google.auth -import google.auth.exceptions -import google.cloud.bigquery -import google.cloud.exceptions -from google.api_core import retry, client_info -from google.auth import impersonated_credentials -from google.oauth2 import ( - credentials as GoogleCredentials, - service_account as GoogleServiceAccountCredentials, -) - -from dbt.adapters.bigquery import gcloud -from dbt_common.clients import agate_helper -from dbt.adapters.contracts.connection import ConnectionState, AdapterResponse, Credentials -from dbt_common.exceptions import ( - DbtRuntimeError, - DbtConfigError, +from typing import Dict, Hashable, List, Optional, Tuple, TYPE_CHECKING +import uuid + +from google.auth.exceptions import RefreshError +from google.cloud.bigquery import ( + Client, + CopyJobConfig, + Dataset, + DatasetReference, + LoadJobConfig, + QueryJobConfig, + QueryPriority, + SchemaField, + Table, + TableReference, ) +from google.cloud.exceptions import BadRequest, Forbidden, NotFound -from dbt_common.exceptions import DbtDatabaseError -from dbt.adapters.exceptions.connection import FailedToConnectError +from dbt_common.events.contextvars import get_node_info +from dbt_common.events.functions import fire_event +from dbt_common.exceptions import DbtDatabaseError, DbtRuntimeError +from dbt_common.invocation import get_invocation_id from dbt.adapters.base import BaseConnectionManager +from dbt.adapters.contracts.connection import ( + AdapterRequiredConfig, + AdapterResponse, + ConnectionState, +) from dbt.adapters.events.logging import AdapterLogger from dbt.adapters.events.types import SQLQuery -from dbt_common.events.functions import fire_event -from dbt.adapters.bigquery import __version__ as dbt_version - -from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum - -logger = AdapterLogger("BigQuery") - -BQ_QUERY_JOB_SPLIT = "-----Query Job SQL Follows-----" - -WRITE_TRUNCATE = google.cloud.bigquery.job.WriteDisposition.WRITE_TRUNCATE - -REOPENABLE_ERRORS = ( - ConnectionResetError, - ConnectionError, -) - -RETRYABLE_ERRORS = ( - google.cloud.exceptions.ServerError, - google.cloud.exceptions.BadRequest, - google.cloud.exceptions.BadGateway, - ConnectionResetError, - ConnectionError, -) - +from dbt.adapters.exceptions.connection import FailedToConnectError -@lru_cache() -def get_bigquery_defaults(scopes=None) -> Tuple[Any, Optional[str]]: - """ - Returns (credentials, project_id) +from dbt.adapters.bigquery.clients import create_bigquery_client +from dbt.adapters.bigquery.credentials import Priority +from dbt.adapters.bigquery.retry import RetryFactory - project_id is returned available from the environment; otherwise None - """ - # Cached, because the underlying implementation shells out, taking ~1s - try: - credentials, _ = google.auth.default(scopes=scopes) - return credentials, _ - except google.auth.exceptions.DefaultCredentialsError as e: - raise DbtConfigError(f"Failed to authenticate with supplied credentials\nerror:\n{e}") +if TYPE_CHECKING: + # Indirectly imported via agate_helper, which is lazy loaded further downfile. + # Used by mypy for earlier type hints. + import agate -class Priority(StrEnum): - Interactive = "interactive" - Batch = "batch" +logger = AdapterLogger("BigQuery") -class BigQueryConnectionMethod(StrEnum): - OAUTH = "oauth" - SERVICE_ACCOUNT = "service-account" - SERVICE_ACCOUNT_JSON = "service-account-json" - OAUTH_SECRETS = "oauth-secrets" +BQ_QUERY_JOB_SPLIT = "-----Query Job SQL Follows-----" @dataclass @@ -99,133 +63,13 @@ class BigQueryAdapterResponse(AdapterResponse): slot_ms: Optional[int] = None -@dataclass -class DataprocBatchConfig(ExtensibleDbtClassMixin): - def __init__(self, batch_config): - self.batch_config = batch_config - - -@dataclass -class BigQueryCredentials(Credentials): - method: BigQueryConnectionMethod = None # type: ignore - - # BigQuery allows an empty database / project, where it defers to the - # environment for the project - database: Optional[str] = None # type: ignore - schema: Optional[str] = None # type: ignore - execution_project: Optional[str] = None - location: Optional[str] = None - priority: Optional[Priority] = None - maximum_bytes_billed: Optional[int] = None - impersonate_service_account: Optional[str] = None - - job_retry_deadline_seconds: Optional[int] = None - job_retries: Optional[int] = 1 - job_creation_timeout_seconds: Optional[int] = None - job_execution_timeout_seconds: Optional[int] = None - - # Keyfile json creds - keyfile: Optional[str] = None - keyfile_json: Optional[Dict[str, Any]] = None - - # oauth-secrets - token: Optional[str] = None - refresh_token: Optional[str] = None - client_id: Optional[str] = None - client_secret: Optional[str] = None - token_uri: Optional[str] = None - - dataproc_region: Optional[str] = None - dataproc_cluster_name: Optional[str] = None - gcs_bucket: Optional[str] = None - - dataproc_batch: Optional[DataprocBatchConfig] = field( - metadata={ - "serialization_strategy": pass_through, - }, - default=None, - ) - - scopes: Optional[Tuple[str, ...]] = ( - "https://www.googleapis.com/auth/bigquery", - "https://www.googleapis.com/auth/cloud-platform", - "https://www.googleapis.com/auth/drive", - ) - - _ALIASES = { - # 'legacy_name': 'current_name' - "project": "database", - "dataset": "schema", - "target_project": "target_database", - "target_dataset": "target_schema", - "retries": "job_retries", - "timeout_seconds": "job_execution_timeout_seconds", - } - - def __post_init__(self): - if self.keyfile_json and "private_key" in self.keyfile_json: - self.keyfile_json["private_key"] = self.keyfile_json["private_key"].replace( - "\\n", "\n" - ) - if not self.method: - raise DbtRuntimeError("Must specify authentication method") - - if not self.schema: - raise DbtRuntimeError("Must specify schema") - - @property - def type(self): - return "bigquery" - - @property - def unique_field(self): - return self.database - - def _connection_keys(self): - return ( - "method", - "database", - "execution_project", - "schema", - "location", - "priority", - "maximum_bytes_billed", - "impersonate_service_account", - "job_retry_deadline_seconds", - "job_retries", - "job_creation_timeout_seconds", - "job_execution_timeout_seconds", - "keyfile", - "timeout_seconds", - "client_id", - "token_uri", - "dataproc_region", - "dataproc_cluster_name", - "gcs_bucket", - "dataproc_batch", - ) - - @classmethod - def __pre_deserialize__(cls, d: Dict[Any, Any]) -> Dict[Any, Any]: - # We need to inject the correct value of the database (aka project) at - # this stage, ref - # https://github.com/dbt-labs/dbt/pull/2908#discussion_r532927436. - - # `database` is an alias of `project` in BigQuery - if "database" not in d: - _, database = get_bigquery_defaults() - d["database"] = database - # `execution_project` default to dataset/project - if "execution_project" not in d: - d["execution_project"] = d["database"] - return d - - class BigQueryConnectionManager(BaseConnectionManager): TYPE = "bigquery" - DEFAULT_INITIAL_DELAY = 1.0 # Seconds - DEFAULT_MAXIMUM_DELAY = 3.0 # Seconds + def __init__(self, profile: AdapterRequiredConfig, mp_context: SpawnContext): + super().__init__(profile, mp_context) + self.jobs_by_thread: Dict[Hashable, List[str]] = defaultdict(list) + self._retry = RetryFactory(profile.credentials) @classmethod def handle_error(cls, error, message): @@ -246,15 +90,19 @@ def exception_handler(self, sql): try: yield - except google.cloud.exceptions.BadRequest as e: + except BadRequest as e: message = "Bad request while running query" self.handle_error(e, message) - except google.cloud.exceptions.Forbidden as e: + except Forbidden as e: message = "Access denied while running query" self.handle_error(e, message) - except google.auth.exceptions.RefreshError as e: + except NotFound as e: + message = "Not found while running query" + self.handle_error(e, message) + + except RefreshError as e: message = ( "Unable to generate access token, if you're using " "impersonate_service_account, make sure your " @@ -280,11 +128,31 @@ def exception_handler(self, sql): exc_message = exc_message.split(BQ_QUERY_JOB_SPLIT)[0].strip() raise DbtRuntimeError(exc_message) - def cancel_open(self) -> None: - pass + def cancel_open(self): + names = [] + this_connection = self.get_if_exists() + with self.lock: + for thread_id, connection in self.thread_connections.items(): + if connection is this_connection: + continue + + if connection.handle is not None and connection.state == ConnectionState.OPEN: + client: Client = connection.handle + for job_id in self.jobs_by_thread.get(thread_id, []): + with self.exception_handler(f"Cancel job: {job_id}"): + client.cancel_job( + job_id, + retry=self._retry.create_reopen_with_deadline(connection), + ) + self.close(connection) + + if connection.name is not None: + names.append(connection.name) + return names @classmethod def close(cls, connection): + connection.handle.close() connection.state = ConnectionState.CLOSED return connection @@ -317,118 +185,27 @@ def format_rows_number(self, rows_number): rows_number *= 1000.0 return f"{rows_number:3.1f}{unit}".strip() - @classmethod - def get_google_credentials(cls, profile_credentials) -> GoogleCredentials: - method = profile_credentials.method - creds = GoogleServiceAccountCredentials.Credentials - - if method == BigQueryConnectionMethod.OAUTH: - credentials, _ = get_bigquery_defaults(scopes=profile_credentials.scopes) - return credentials - - elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT: - keyfile = profile_credentials.keyfile - return creds.from_service_account_file(keyfile, scopes=profile_credentials.scopes) - - elif method == BigQueryConnectionMethod.SERVICE_ACCOUNT_JSON: - details = profile_credentials.keyfile_json - return creds.from_service_account_info(details, scopes=profile_credentials.scopes) - - elif method == BigQueryConnectionMethod.OAUTH_SECRETS: - return GoogleCredentials.Credentials( - token=profile_credentials.token, - refresh_token=profile_credentials.refresh_token, - client_id=profile_credentials.client_id, - client_secret=profile_credentials.client_secret, - token_uri=profile_credentials.token_uri, - scopes=profile_credentials.scopes, - ) - - error = 'Invalid `method` in profile: "{}"'.format(method) - raise FailedToConnectError(error) - - @classmethod - def get_impersonated_credentials(cls, profile_credentials): - source_credentials = cls.get_google_credentials(profile_credentials) - return impersonated_credentials.Credentials( - source_credentials=source_credentials, - target_principal=profile_credentials.impersonate_service_account, - target_scopes=list(profile_credentials.scopes), - ) - - @classmethod - def get_credentials(cls, profile_credentials): - if profile_credentials.impersonate_service_account: - return cls.get_impersonated_credentials(profile_credentials) - else: - return cls.get_google_credentials(profile_credentials) - - @classmethod - @retry.Retry() # google decorator. retries on transient errors with exponential backoff - def get_bigquery_client(cls, profile_credentials): - creds = cls.get_credentials(profile_credentials) - execution_project = profile_credentials.execution_project - location = getattr(profile_credentials, "location", None) - - info = client_info.ClientInfo(user_agent=f"dbt-bigquery-{dbt_version.version}") - return google.cloud.bigquery.Client( - execution_project, - creds, - location=location, - client_info=info, - ) - @classmethod def open(cls, connection): - if connection.state == "open": + if connection.state == ConnectionState.OPEN: logger.debug("Connection is already open, skipping open.") return connection try: - handle = cls.get_bigquery_client(connection.credentials) - - except google.auth.exceptions.DefaultCredentialsError: - logger.info("Please log into GCP to continue") - gcloud.setup_default_credentials() - - handle = cls.get_bigquery_client(connection.credentials) + connection.handle = create_bigquery_client(connection.credentials) + connection.state = ConnectionState.OPEN + return connection except Exception as e: - logger.debug( - "Got an error when attempting to create a bigquery " "client: '{}'".format(e) - ) - + logger.debug(f"""Got an error when attempting to create a bigquery " "client: '{e}'""") connection.handle = None - connection.state = "fail" - + connection.state = ConnectionState.FAIL raise FailedToConnectError(str(e)) - connection.handle = handle - connection.state = "open" - return connection - - @classmethod - def get_job_execution_timeout_seconds(cls, conn): - credentials = conn.credentials - return credentials.job_execution_timeout_seconds - - @classmethod - def get_job_retries(cls, conn) -> int: - credentials = conn.credentials - return credentials.job_retries - @classmethod - def get_job_creation_timeout_seconds(cls, conn): - credentials = conn.credentials - return credentials.job_creation_timeout_seconds + def get_table_from_response(cls, resp) -> "agate.Table": + from dbt_common.clients import agate_helper - @classmethod - def get_job_retry_deadline_seconds(cls, conn): - credentials = conn.credentials - return credentials.job_retry_deadline_seconds - - @classmethod - def get_table_from_response(cls, resp): column_names = [field.name for field in resp.schema] return agate_helper.table_from_data_flat(resp, column_names) @@ -444,6 +221,18 @@ def get_labels_from_query_comment(cls): return {} + def generate_job_id(self) -> str: + # Generating a fresh job_id for every _query_and_results call to avoid job_id reuse. + # Generating a job id instead of persisting a BigQuery-generated one after client.query is called. + # Using BigQuery's job_id can lead to a race condition if a job has been started and a termination + # is sent before the job_id was stored, leading to a failure to cancel the job. + # By predetermining job_ids (uuid4), we can persist the job_id before the job has been kicked off. + # Doing this, the race condition only leads to attempting to cancel a job that doesn't exist. + job_id = str(uuid.uuid4()) + thread_id = self.get_thread_identifier() + self.jobs_by_thread[thread_id].append(job_id) + return job_id + def raw_execute( self, sql, @@ -452,7 +241,6 @@ def raw_execute( dry_run: bool = False, ): conn = self.get_thread_connection() - client = conn.handle fire_event(SQLQuery(conn_name=conn.name, sql=sql, node_info=get_node_info())) @@ -468,34 +256,28 @@ def raw_execute( priority = conn.credentials.priority if priority == Priority.Batch: - job_params["priority"] = google.cloud.bigquery.QueryPriority.BATCH + job_params["priority"] = QueryPriority.BATCH else: - job_params["priority"] = google.cloud.bigquery.QueryPriority.INTERACTIVE + job_params["priority"] = QueryPriority.INTERACTIVE maximum_bytes_billed = conn.credentials.maximum_bytes_billed if maximum_bytes_billed is not None and maximum_bytes_billed != 0: job_params["maximum_bytes_billed"] = maximum_bytes_billed - job_creation_timeout = self.get_job_creation_timeout_seconds(conn) - job_execution_timeout = self.get_job_execution_timeout_seconds(conn) + with self.exception_handler(sql): + job_id = self.generate_job_id() - def fn(): return self._query_and_results( - client, + conn, sql, job_params, - job_creation_timeout=job_creation_timeout, - job_execution_timeout=job_execution_timeout, + job_id, limit=limit, ) - query_job, iterator = self._retry_and_handle(msg=sql, conn=conn, fn=fn) - - return query_job, iterator - def execute( self, sql, auto_begin=False, fetch=None, limit: Optional[int] = None - ) -> Tuple[BigQueryAdapterResponse, agate.Table]: + ) -> Tuple[BigQueryAdapterResponse, "agate.Table"]: sql = self._add_query_comment(sql) # auto_begin is ignored on bigquery, and only included for consistency query_job, iterator = self.raw_execute(sql, limit=limit) @@ -503,6 +285,8 @@ def execute( if fetch: table = self.get_table_from_response(iterator) else: + from dbt_common.clients import agate_helper + table = agate_helper.empty_table() message = "OK" @@ -558,7 +342,7 @@ def execute( else: message = f"{code}" - response = BigQueryAdapterResponse( # type: ignore[call-arg] + response = BigQueryAdapterResponse( _message=message, rows_affected=num_rows, code=code, @@ -618,9 +402,9 @@ def standard_to_legacy(table): _, iterator = self.raw_execute(sql, use_legacy_sql=True) return self.get_table_from_response(iterator) - def copy_bq_table(self, source, destination, write_disposition): + def copy_bq_table(self, source, destination, write_disposition) -> None: conn = self.get_thread_connection() - client = conn.handle + client: Client = conn.handle # ------------------------------------------------------------------------------- # BigQuery allows to use copy API using two different formats: @@ -648,87 +432,150 @@ def copy_bq_table(self, source, destination, write_disposition): write_disposition, ) - def copy_and_results(): - job_config = google.cloud.bigquery.CopyJobConfig(write_disposition=write_disposition) - copy_job = client.copy_table(source_ref_array, destination_ref, job_config=job_config) - timeout = self.get_job_execution_timeout_seconds(conn) or 300 - iterator = copy_job.result(timeout=timeout) - return copy_job, iterator - - self._retry_and_handle( - msg='copy table "{}" to "{}"'.format( - ", ".join(source_ref.path for source_ref in source_ref_array), - destination_ref.path, - ), - conn=conn, - fn=copy_and_results, + msg = 'copy table "{}" to "{}"'.format( + ", ".join(source_ref.path for source_ref in source_ref_array), + destination_ref.path, + ) + with self.exception_handler(msg): + copy_job = client.copy_table( + source_ref_array, + destination_ref, + job_config=CopyJobConfig(write_disposition=write_disposition), + retry=self._retry.create_reopen_with_deadline(conn), + ) + copy_job.result(timeout=self._retry.create_job_execution_timeout(fallback=300)) + + def write_dataframe_to_table( + self, + client: Client, + file_path: str, + database: str, + schema: str, + identifier: str, + table_schema: List[SchemaField], + field_delimiter: str, + fallback_timeout: Optional[float] = None, + ) -> None: + load_config = LoadJobConfig( + skip_leading_rows=1, + schema=table_schema, + field_delimiter=field_delimiter, ) + table = self.table_ref(database, schema, identifier) + self._write_file_to_table(client, file_path, table, load_config, fallback_timeout) + + def write_file_to_table( + self, + client: Client, + file_path: str, + database: str, + schema: str, + identifier: str, + fallback_timeout: Optional[float] = None, + **kwargs, + ) -> None: + config = kwargs["kwargs"] + if "schema" in config: + config["schema"] = json.load(config["schema"]) + load_config = LoadJobConfig(**config) + table = self.table_ref(database, schema, identifier) + self._write_file_to_table(client, file_path, table, load_config, fallback_timeout) + + def _write_file_to_table( + self, + client: Client, + file_path: str, + table: TableReference, + config: LoadJobConfig, + fallback_timeout: Optional[float] = None, + ) -> None: + + with self.exception_handler("LOAD TABLE"): + with open(file_path, "rb") as f: + job = client.load_table_from_file(f, table, rewind=True, job_config=config) + + response = job.result(retry=self._retry.create_retry(fallback=fallback_timeout)) + + if response.state != "DONE": + raise DbtRuntimeError("BigQuery Timeout Exceeded") + + elif response.error_result: + message = "\n".join(error["message"].strip() for error in response.errors) + raise DbtRuntimeError(message) @staticmethod def dataset_ref(database, schema): - return google.cloud.bigquery.DatasetReference(project=database, dataset_id=schema) + return DatasetReference(project=database, dataset_id=schema) @staticmethod def table_ref(database, schema, table_name): - dataset_ref = google.cloud.bigquery.DatasetReference(database, schema) - return google.cloud.bigquery.TableReference(dataset_ref, table_name) + dataset_ref = DatasetReference(database, schema) + return TableReference(dataset_ref, table_name) - def get_bq_table(self, database, schema, identifier): + def get_bq_table(self, database, schema, identifier) -> Table: """Get a bigquery table for a schema/model.""" conn = self.get_thread_connection() + client: Client = conn.handle # backwards compatibility: fill in with defaults if not specified database = database or conn.credentials.database schema = schema or conn.credentials.schema - table_ref = self.table_ref(database, schema, identifier) - return conn.handle.get_table(table_ref) + return client.get_table( + table=self.table_ref(database, schema, identifier), + retry=self._retry.create_reopen_with_deadline(conn), + ) - def drop_dataset(self, database, schema): + def drop_dataset(self, database, schema) -> None: conn = self.get_thread_connection() - dataset_ref = self.dataset_ref(database, schema) - client = conn.handle - - def fn(): - return client.delete_dataset(dataset_ref, delete_contents=True, not_found_ok=True) - - self._retry_and_handle(msg="drop dataset", conn=conn, fn=fn) + client: Client = conn.handle + with self.exception_handler("drop dataset"): + client.delete_dataset( + dataset=self.dataset_ref(database, schema), + delete_contents=True, + not_found_ok=True, + retry=self._retry.create_reopen_with_deadline(conn), + ) - def create_dataset(self, database, schema): + def create_dataset(self, database, schema) -> Dataset: conn = self.get_thread_connection() - client = conn.handle - dataset_ref = self.dataset_ref(database, schema) - - def fn(): - return client.create_dataset(dataset_ref, exists_ok=True) - - self._retry_and_handle(msg="create dataset", conn=conn, fn=fn) + client: Client = conn.handle + with self.exception_handler("create dataset"): + return client.create_dataset( + dataset=self.dataset_ref(database, schema), + exists_ok=True, + retry=self._retry.create_reopen_with_deadline(conn), + ) def list_dataset(self, database: str): - # the database string we get here is potentially quoted. Strip that off - # for the API call. - database = database.strip("`") + # The database string we get here is potentially quoted. + # Strip that off for the API call. conn = self.get_thread_connection() - client = conn.handle - - def query_schemas(): + client: Client = conn.handle + with self.exception_handler("list dataset"): # this is similar to how we have to deal with listing tables - all_datasets = client.list_datasets(project=database, max_results=10000) + all_datasets = client.list_datasets( + project=database.strip("`"), + max_results=10000, + retry=self._retry.create_reopen_with_deadline(conn), + ) return [ds.dataset_id for ds in all_datasets] - return self._retry_and_handle(msg="list dataset", conn=conn, fn=query_schemas) - def _query_and_results( self, - client, + conn, sql, job_params, - job_creation_timeout=None, - job_execution_timeout=None, + job_id, limit: Optional[int] = None, ): + client: Client = conn.handle """Query the client and wait for results.""" # Cannot reuse job_config if destination is set and ddl is used - job_config = google.cloud.bigquery.QueryJobConfig(**job_params) - query_job = client.query(query=sql, job_config=job_config, timeout=job_creation_timeout) + query_job = client.query( + query=sql, + job_config=QueryJobConfig(**job_params), + job_id=job_id, # note, this disables retry since the job_id will have been used + timeout=self._retry.create_job_creation_timeout(), + ) if ( query_job.location is not None and query_job.job_id is not None @@ -738,34 +585,14 @@ def _query_and_results( self._bq_job_link(query_job.location, query_job.project, query_job.job_id) ) - iterator = query_job.result(max_results=limit, timeout=job_execution_timeout) + timeout = self._retry.create_job_execution_timeout() + try: + iterator = query_job.result(max_results=limit, timeout=timeout) + except TimeoutError: + exc = f"Operation did not complete within the designated timeout of {timeout} seconds." + raise TimeoutError(exc) return query_job, iterator - def _retry_and_handle(self, msg, conn, fn): - """retry a function call within the context of exception_handler.""" - - def reopen_conn_on_error(error): - if isinstance(error, REOPENABLE_ERRORS): - logger.warning("Reopening connection after {!r}".format(error)) - self.close(conn) - self.open(conn) - return - - with self.exception_handler(msg): - return retry.retry_target( - target=fn, - predicate=_ErrorCounter(self.get_job_retries(conn)).count_error, - sleep_generator=self._retry_generator(), - deadline=self.get_job_retry_deadline_seconds(conn), - on_error=reopen_conn_on_error, - ) - - def _retry_generator(self): - """Generates retry intervals that exponentially back off.""" - return retry.exponential_sleep_generator( - initial=self.DEFAULT_INITIAL_DELAY, maximum=self.DEFAULT_MAXIMUM_DELAY - ) - def _labels_from_query_comment(self, comment: str) -> Dict: try: comment_labels = json.loads(comment) @@ -777,39 +604,6 @@ def _labels_from_query_comment(self, comment: str) -> Dict: } -class _ErrorCounter(object): - """Counts errors seen up to a threshold then raises the next error.""" - - def __init__(self, retries): - self.retries = retries - self.error_count = 0 - - def count_error(self, error): - if self.retries == 0: - return False # Don't log - self.error_count += 1 - if _is_retryable(error) and self.error_count <= self.retries: - logger.debug( - "Retry attempt {} of {} after error: {}".format( - self.error_count, self.retries, repr(error) - ) - ) - return True - else: - return False - - -def _is_retryable(error): - """Return true for errors that are unlikely to occur again if retried.""" - if isinstance(error, RETRYABLE_ERRORS): - return True - elif isinstance(error, google.api_core.exceptions.Forbidden) and any( - e["reason"] == "rateLimitExceeded" for e in error.errors - ): - return True - return False - - _SANITIZE_LABEL_PATTERN = re.compile(r"[^a-z0-9_-]") _VALIDATE_LABEL_LENGTH_LIMIT = 63 diff --git a/dbt/adapters/bigquery/credentials.py b/dbt/adapters/bigquery/credentials.py new file mode 100644 index 000000000..94d70a931 --- /dev/null +++ b/dbt/adapters/bigquery/credentials.py @@ -0,0 +1,269 @@ +import base64 +import binascii +from dataclasses import dataclass, field +from functools import lru_cache +from typing import Any, Dict, Iterable, Optional, Tuple, Union + +from google.auth import default +from google.auth.exceptions import DefaultCredentialsError +from google.auth.impersonated_credentials import Credentials as ImpersonatedCredentials +from google.oauth2.credentials import Credentials as GoogleCredentials +from google.oauth2.service_account import Credentials as ServiceAccountCredentials +from mashumaro import pass_through + +from dbt_common.clients.system import run_cmd +from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, StrEnum +from dbt_common.exceptions import DbtConfigError, DbtRuntimeError +from dbt.adapters.contracts.connection import Credentials +from dbt.adapters.events.logging import AdapterLogger +from dbt.adapters.exceptions.connection import FailedToConnectError + + +_logger = AdapterLogger("BigQuery") + + +class Priority(StrEnum): + Interactive = "interactive" + Batch = "batch" + + +@dataclass +class DataprocBatchConfig(ExtensibleDbtClassMixin): + def __init__(self, batch_config): + self.batch_config = batch_config + + +class _BigQueryConnectionMethod(StrEnum): + OAUTH = "oauth" + OAUTH_SECRETS = "oauth-secrets" + SERVICE_ACCOUNT = "service-account" + SERVICE_ACCOUNT_JSON = "service-account-json" + + +@dataclass +class BigQueryCredentials(Credentials): + method: _BigQueryConnectionMethod = None # type: ignore + + # BigQuery allows an empty database / project, where it defers to the + # environment for the project + database: Optional[str] = None + schema: Optional[str] = None + execution_project: Optional[str] = None + quota_project: Optional[str] = None + location: Optional[str] = None + priority: Optional[Priority] = None + maximum_bytes_billed: Optional[int] = None + impersonate_service_account: Optional[str] = None + + job_retry_deadline_seconds: Optional[int] = None + job_retries: Optional[int] = 1 + job_creation_timeout_seconds: Optional[int] = None + job_execution_timeout_seconds: Optional[int] = None + + # Keyfile json creds (unicode or base 64 encoded) + keyfile: Optional[str] = None + keyfile_json: Optional[Dict[str, Any]] = None + + # oauth-secrets + token: Optional[str] = None + refresh_token: Optional[str] = None + client_id: Optional[str] = None + client_secret: Optional[str] = None + token_uri: Optional[str] = None + + dataproc_region: Optional[str] = None + dataproc_cluster_name: Optional[str] = None + gcs_bucket: Optional[str] = None + + dataproc_batch: Optional[DataprocBatchConfig] = field( + metadata={ + "serialization_strategy": pass_through, + }, + default=None, + ) + + scopes: Optional[Tuple[str, ...]] = ( + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/drive", + ) + + _ALIASES = { + # 'legacy_name': 'current_name' + "project": "database", + "dataset": "schema", + "target_project": "target_database", + "target_dataset": "target_schema", + "retries": "job_retries", + "timeout_seconds": "job_execution_timeout_seconds", + } + + def __post_init__(self): + if self.keyfile_json and "private_key" in self.keyfile_json: + self.keyfile_json["private_key"] = self.keyfile_json["private_key"].replace( + "\\n", "\n" + ) + if not self.method: + raise DbtRuntimeError("Must specify authentication method") + + if not self.schema: + raise DbtRuntimeError("Must specify schema") + + @property + def type(self): + return "bigquery" + + @property + def unique_field(self): + return self.database + + def _connection_keys(self): + return ( + "method", + "database", + "execution_project", + "schema", + "location", + "priority", + "maximum_bytes_billed", + "impersonate_service_account", + "job_retry_deadline_seconds", + "job_retries", + "job_creation_timeout_seconds", + "job_execution_timeout_seconds", + "timeout_seconds", + "client_id", + "token_uri", + "dataproc_region", + "dataproc_cluster_name", + "gcs_bucket", + "dataproc_batch", + ) + + @classmethod + def __pre_deserialize__(cls, d: Dict[Any, Any]) -> Dict[Any, Any]: + # We need to inject the correct value of the database (aka project) at + # this stage, ref + # https://github.com/dbt-labs/dbt/pull/2908#discussion_r532927436. + + # `database` is an alias of `project` in BigQuery + if "database" not in d: + _, database = _create_bigquery_defaults() + d["database"] = database + # `execution_project` default to dataset/project + if "execution_project" not in d: + d["execution_project"] = d["database"] + return d + + +def set_default_credentials() -> None: + try: + run_cmd(".", ["gcloud", "--version"]) + except OSError as e: + _logger.debug(e) + msg = """ + dbt requires the gcloud SDK to be installed to authenticate with BigQuery. + Please download and install the SDK, or use a Service Account instead. + + https://cloud.google.com/sdk/ + """ + raise DbtRuntimeError(msg) + + run_cmd(".", ["gcloud", "auth", "application-default", "login"]) + + +def create_google_credentials(credentials: BigQueryCredentials) -> GoogleCredentials: + if credentials.impersonate_service_account: + return _create_impersonated_credentials(credentials) + return _create_google_credentials(credentials) + + +def _create_impersonated_credentials(credentials: BigQueryCredentials) -> ImpersonatedCredentials: + if credentials.scopes and isinstance(credentials.scopes, Iterable): + target_scopes = list(credentials.scopes) + else: + target_scopes = [] + + return ImpersonatedCredentials( + source_credentials=_create_google_credentials(credentials), + target_principal=credentials.impersonate_service_account, + target_scopes=target_scopes, + ) + + +def _create_google_credentials(credentials: BigQueryCredentials) -> GoogleCredentials: + + if credentials.method == _BigQueryConnectionMethod.OAUTH: + creds, _ = _create_bigquery_defaults(scopes=credentials.scopes) + + elif credentials.method == _BigQueryConnectionMethod.SERVICE_ACCOUNT: + creds = ServiceAccountCredentials.from_service_account_file( + credentials.keyfile, scopes=credentials.scopes + ) + + elif credentials.method == _BigQueryConnectionMethod.SERVICE_ACCOUNT_JSON: + details = credentials.keyfile_json + if _is_base64(details): # type:ignore + details = _base64_to_string(details) + creds = ServiceAccountCredentials.from_service_account_info( + details, scopes=credentials.scopes + ) + + elif credentials.method == _BigQueryConnectionMethod.OAUTH_SECRETS: + creds = GoogleCredentials( + token=credentials.token, + refresh_token=credentials.refresh_token, + client_id=credentials.client_id, + client_secret=credentials.client_secret, + token_uri=credentials.token_uri, + scopes=credentials.scopes, + ) + + else: + raise FailedToConnectError(f"Invalid `method` in profile: '{credentials.method}'") + + return creds + + +@lru_cache() +def _create_bigquery_defaults(scopes=None) -> Tuple[Any, Optional[str]]: + """ + Returns (credentials, project_id) + + project_id is returned available from the environment; otherwise None + """ + # Cached, because the underlying implementation shells out, taking ~1s + try: + return default(scopes=scopes) + except DefaultCredentialsError as e: + raise DbtConfigError(f"Failed to authenticate with supplied credentials\nerror:\n{e}") + + +def _is_base64(s: Union[str, bytes]) -> bool: + """ + Checks if the given string or bytes object is valid Base64 encoded. + + Args: + s: The string or bytes object to check. + + Returns: + True if the input is valid Base64, False otherwise. + """ + + if isinstance(s, str): + # For strings, ensure they consist only of valid Base64 characters + if not s.isascii(): + return False + # Convert to bytes for decoding + s = s.encode("ascii") + + try: + # Use the 'validate' parameter to enforce strict Base64 decoding rules + base64.b64decode(s, validate=True) + return True + except (TypeError, binascii.Error): + return False + + +def _base64_to_string(b): + return base64.b64decode(b).decode("utf-8") diff --git a/dbt/adapters/bigquery/dataproc/batch.py b/dbt/adapters/bigquery/dataproc/batch.py deleted file mode 100644 index 61dc3c18b..000000000 --- a/dbt/adapters/bigquery/dataproc/batch.py +++ /dev/null @@ -1,67 +0,0 @@ -from typing import Union, Dict - -import time -from datetime import datetime -from google.cloud.dataproc_v1 import ( - CreateBatchRequest, - BatchControllerClient, - Batch, - GetBatchRequest, -) -from google.protobuf.json_format import ParseDict - -from dbt.adapters.bigquery.connections import DataprocBatchConfig - -_BATCH_RUNNING_STATES = [Batch.State.PENDING, Batch.State.RUNNING] -DEFAULT_JAR_FILE_URI = "gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.13-0.34.0.jar" - - -def create_batch_request( - batch: Batch, batch_id: str, project: str, region: str -) -> CreateBatchRequest: - return CreateBatchRequest( - parent=f"projects/{project}/locations/{region}", # type: ignore - batch_id=batch_id, # type: ignore - batch=batch, # type: ignore - ) - - -def poll_batch_job( - parent: str, batch_id: str, job_client: BatchControllerClient, timeout: int -) -> Batch: - batch_name = "".join([parent, "/batches/", batch_id]) - state = Batch.State.PENDING - response = None - run_time = 0 - while state in _BATCH_RUNNING_STATES and run_time < timeout: - time.sleep(1) - response = job_client.get_batch( # type: ignore - request=GetBatchRequest(name=batch_name), # type: ignore - ) - run_time = datetime.now().timestamp() - response.create_time.timestamp() # type: ignore - state = response.state - if not response: - raise ValueError("No response from Dataproc") - if state != Batch.State.SUCCEEDED: - if run_time >= timeout: - raise ValueError( - f"Operation did not complete within the designated timeout of {timeout} seconds." - ) - else: - raise ValueError(response.state_message) - return response - - -def update_batch_from_config(config_dict: Union[Dict, DataprocBatchConfig], target: Batch): - try: - # updates in place - ParseDict(config_dict, target._pb) - except Exception as e: - docurl = ( - "https://cloud.google.com/dataproc-serverless/docs/reference/rpc/google.cloud.dataproc.v1" - "#google.cloud.dataproc.v1.Batch" - ) - raise ValueError( - f"Unable to parse dataproc_batch as valid batch specification. See {docurl}. {str(e)}" - ) from e - return target diff --git a/dbt/adapters/bigquery/dataset.py b/dbt/adapters/bigquery/dataset.py index 4ecd6daa5..a4504294a 100644 --- a/dbt/adapters/bigquery/dataset.py +++ b/dbt/adapters/bigquery/dataset.py @@ -1,8 +1,10 @@ from typing import List -from google.cloud.bigquery import Dataset, AccessEntry + +from google.cloud.bigquery import AccessEntry, Dataset from dbt.adapters.events.logging import AdapterLogger + logger = AdapterLogger("BigQuery") diff --git a/dbt/adapters/bigquery/gcloud.py b/dbt/adapters/bigquery/gcloud.py deleted file mode 100644 index ea1f644ba..000000000 --- a/dbt/adapters/bigquery/gcloud.py +++ /dev/null @@ -1,29 +0,0 @@ -from dbt_common.exceptions import DbtRuntimeError - -from dbt.adapters.events.logging import AdapterLogger -from dbt_common.clients.system import run_cmd - -NOT_INSTALLED_MSG = """ -dbt requires the gcloud SDK to be installed to authenticate with BigQuery. -Please download and install the SDK, or use a Service Account instead. - -https://cloud.google.com/sdk/ -""" - -logger = AdapterLogger("BigQuery") - - -def gcloud_installed(): - try: - run_cmd(".", ["gcloud", "--version"]) - return True - except OSError as e: - logger.debug(e) - return False - - -def setup_default_credentials(): - if gcloud_installed(): - run_cmd(".", ["gcloud", "auth", "application-default", "login"]) - else: - raise DbtRuntimeError(NOT_INSTALLED_MSG) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 2df35bc65..51c457129 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -1,17 +1,40 @@ from dataclasses import dataclass from datetime import datetime -import json -import threading from multiprocessing.context import SpawnContext +import threading +from typing import ( + Any, + Dict, + FrozenSet, + Iterable, + List, + Optional, + Tuple, + TYPE_CHECKING, + Type, + Set, + Union, +) -import time -from typing import Any, Dict, List, Optional, Type, Set, Union, FrozenSet, Tuple, Iterable - -import agate -from dbt.adapters.contracts.relation import RelationConfig +import google.api_core +import google.auth +import google.oauth2 +import google.cloud.bigquery +from google.cloud.bigquery import AccessEntry, Client, SchemaField, Table as BigQueryTable +import google.cloud.exceptions +import pytz +from dbt_common.contracts.constraints import ( + ColumnLevelConstraint, + ConstraintType, + ModelLevelConstraint, +) +from dbt_common.dataclass_schema import dbtClassMixin +from dbt_common.events.functions import fire_event +import dbt_common.exceptions import dbt_common.exceptions.base -from dbt.adapters.base import ( # type: ignore +from dbt_common.utils import filter_null_values +from dbt.adapters.base import ( AdapterConfig, BaseAdapter, BaseRelation, @@ -22,29 +45,16 @@ available, ) from dbt.adapters.base.impl import FreshnessResponse -from dbt.adapters.cache import _make_ref_key_dict # type: ignore +from dbt.adapters.cache import _make_ref_key_dict from dbt.adapters.capability import Capability, CapabilityDict, CapabilitySupport, Support -import dbt_common.clients.agate_helper from dbt.adapters.contracts.connection import AdapterResponse from dbt.adapters.contracts.macros import MacroResolverProtocol -from dbt_common.contracts.constraints import ColumnLevelConstraint, ConstraintType, ModelLevelConstraint # type: ignore -from dbt_common.dataclass_schema import dbtClassMixin +from dbt.adapters.contracts.relation import RelationConfig from dbt.adapters.events.logging import AdapterLogger -from dbt_common.events.functions import fire_event from dbt.adapters.events.types import SchemaCreation, SchemaDrop -import dbt_common.exceptions -from dbt_common.utils import filter_null_values -import google.api_core -import google.auth -import google.oauth2 -import google.cloud.bigquery -from google.cloud.bigquery import AccessEntry, SchemaField, Table as BigQueryTable -import google.cloud.exceptions -import pytz -from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager -from dbt.adapters.bigquery.column import get_nested_column_data_types -from dbt.adapters.bigquery.connections import BigQueryAdapterResponse +from dbt.adapters.bigquery.column import BigQueryColumn, get_nested_column_data_types +from dbt.adapters.bigquery.connections import BigQueryAdapterResponse, BigQueryConnectionManager from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset from dbt.adapters.bigquery.python_submissions import ( ClusterDataprocHelper, @@ -58,6 +68,11 @@ ) from dbt.adapters.bigquery.utility import sql_escape +if TYPE_CHECKING: + # Indirectly imported via agate_helper, which is lazy loaded further downfile. + # Used by mypy for earlier type hints. + import agate + logger = AdapterLogger("BigQuery") @@ -78,12 +93,6 @@ def render(self): return f"{self.project}.{self.dataset}" -def _stub_relation(*args, **kwargs): - return BigQueryRelation.create( - database="", schema="", identifier="", quote_policy={}, type=BigQueryRelation.Table - ) - - @dataclass class BigqueryConfig(AdapterConfig): cluster_by: Optional[Union[List[str], str]] = None @@ -99,6 +108,8 @@ class BigqueryConfig(AdapterConfig): enable_refresh: Optional[bool] = None refresh_interval_minutes: Optional[int] = None max_staleness: Optional[str] = None + enable_list_inference: Optional[bool] = None + intermediate_format: Optional[str] = None class BigQueryAdapter(BaseAdapter): @@ -144,10 +155,10 @@ def date_function(cls) -> str: @classmethod def is_cancelable(cls) -> bool: - return False + return True def drop_relation(self, relation: BigQueryRelation) -> None: - is_cached = self._schema_is_cached(relation.database, relation.schema) # type: ignore[arg-type] + is_cached = self._schema_is_cached(relation.database, relation.schema) if is_cached: self.cache_dropped(relation) @@ -242,7 +253,7 @@ def add_time_ingestion_partition_column(self, partition_by, columns) -> List[Big ) return columns - def expand_column_types(self, goal: BigQueryRelation, current: BigQueryRelation) -> None: # type: ignore[override] + def expand_column_types(self, goal: BigQueryRelation, current: BigQueryRelation) -> None: # This is a no-op on BigQuery pass @@ -307,7 +318,7 @@ def get_relation( # TODO: the code below is copy-pasted from SQLAdapter.create_schema. Is there a better way? def create_schema(self, relation: BigQueryRelation) -> None: # use SQL 'create schema' - relation = relation.without_identifier() # type: ignore + relation = relation.without_identifier() fire_event(SchemaCreation(relation=_make_ref_key_dict(relation))) kwargs = { @@ -332,32 +343,34 @@ def quote(cls, identifier: str) -> str: return "`{}`".format(identifier) @classmethod - def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_text_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "string" @classmethod - def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_number_type(cls, agate_table: "agate.Table", col_idx: int) -> str: + import agate + decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) # type: ignore[attr-defined] return "float64" if decimals else "int64" @classmethod - def convert_integer_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_integer_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "int64" @classmethod - def convert_boolean_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_boolean_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "bool" @classmethod - def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_datetime_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "datetime" @classmethod - def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_date_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "date" @classmethod - def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str: + def convert_time_type(cls, agate_table: "agate.Table", col_idx: int) -> str: return "time" ### @@ -385,14 +398,14 @@ def _get_dbt_columns_from_bq_table(self, table) -> List[BigQueryColumn]: return columns def _agate_to_schema( - self, agate_table: agate.Table, column_override: Dict[str, str] + self, agate_table: "agate.Table", column_override: Dict[str, str] ) -> List[SchemaField]: """Convert agate.Table with column names to a list of bigquery schemas.""" bq_schema = [] for idx, col_name in enumerate(agate_table.column_names): inferred_type = self.convert_agate_type(agate_table, idx) type_ = column_override.get(col_name, inferred_type) - bq_schema.append(SchemaField(col_name, type_)) # type: ignore[arg-type] + bq_schema.append(SchemaField(col_name, type_)) return bq_schema @available.parse(lambda *a, **k: "") @@ -439,22 +452,6 @@ def get_columns_in_select_sql(self, select_sql: str) -> List[BigQueryColumn]: logger.debug("get_columns_in_select_sql error: {}".format(e)) return [] - @classmethod - def poll_until_job_completes(cls, job, timeout): - retry_count = timeout - - while retry_count > 0 and job.state != "DONE": - retry_count -= 1 - time.sleep(1) - job.reload() - - if job.state != "DONE": - raise dbt_common.exceptions.DbtRuntimeError("BigQuery Timeout Exceeded") - - elif job.error_result: - message = "\n".join(error["message"].strip() for error in job.errors) - raise dbt_common.exceptions.DbtRuntimeError(message) - def _bq_table_to_relation(self, bq_table) -> Union[BigQueryRelation, None]: if bq_table is None: return None @@ -652,51 +649,57 @@ def alter_table_add_columns(self, relation, columns): client.update_table(new_table, ["schema"]) @available.parse_none - def load_dataframe(self, database, schema, table_name, agate_table, column_override): - bq_schema = self._agate_to_schema(agate_table, column_override) - conn = self.connections.get_thread_connection() - client = conn.handle - - table_ref = self.connections.table_ref(database, schema, table_name) - - load_config = google.cloud.bigquery.LoadJobConfig() - load_config.skip_leading_rows = 1 - load_config.schema = bq_schema - - with open(agate_table.original_abspath, "rb") as f: - job = client.load_table_from_file(f, table_ref, rewind=True, job_config=load_config) - - timeout = self.connections.get_job_execution_timeout_seconds(conn) or 300 - with self.connections.exception_handler("LOAD TABLE"): - self.poll_until_job_completes(job, timeout) + def load_dataframe( + self, + database: str, + schema: str, + table_name: str, + agate_table: "agate.Table", + column_override: Dict[str, str], + field_delimiter: str, + ) -> None: + connection = self.connections.get_thread_connection() + client: Client = connection.handle + table_schema = self._agate_to_schema(agate_table, column_override) + file_path = agate_table.original_abspath # type: ignore + + self.connections.write_dataframe_to_table( + client, + file_path, + database, + schema, + table_name, + table_schema, + field_delimiter, + fallback_timeout=300, + ) @available.parse_none def upload_file( - self, local_file_path: str, database: str, table_schema: str, table_name: str, **kwargs + self, + local_file_path: str, + database: str, + table_schema: str, + table_name: str, + **kwargs, ) -> None: - conn = self.connections.get_thread_connection() - client = conn.handle - - table_ref = self.connections.table_ref(database, table_schema, table_name) - - load_config = google.cloud.bigquery.LoadJobConfig() - for k, v in kwargs["kwargs"].items(): - if k == "schema": - setattr(load_config, k, json.loads(v)) - else: - setattr(load_config, k, v) - - with open(local_file_path, "rb") as f: - job = client.load_table_from_file(f, table_ref, rewind=True, job_config=load_config) - - timeout = self.connections.get_job_execution_timeout_seconds(conn) or 300 - with self.connections.exception_handler("LOAD TABLE"): - self.poll_until_job_completes(job, timeout) + connection = self.connections.get_thread_connection() + client: Client = connection.handle + + self.connections.write_file_to_table( + client, + local_file_path, + database, + table_schema, + table_name, + fallback_timeout=300, + **kwargs, + ) @classmethod def _catalog_filter_table( - cls, table: agate.Table, used_schemas: FrozenSet[Tuple[str, str]] - ) -> agate.Table: + cls, table: "agate.Table", used_schemas: FrozenSet[Tuple[str, str]] + ) -> "agate.Table": table = table.rename( column_names={col.name: col.name.replace("__", ":") for col in table.columns} ) @@ -710,8 +713,8 @@ def _get_catalog_schemas(self, relation_config: Iterable[RelationConfig]) -> Sch for candidate, schemas in candidates.items(): database = candidate.database if database not in db_schemas: - db_schemas[database] = set(self.list_schemas(database)) # type: ignore[index] - if candidate.schema in db_schemas[database]: # type: ignore[index] + db_schemas[database] = set(self.list_schemas(database)) + if candidate.schema in db_schemas[database]: result[candidate] = schemas else: logger.debug( @@ -727,7 +730,7 @@ def calculate_freshness_from_metadata( macro_resolver: Optional[MacroResolverProtocol] = None, ) -> Tuple[Optional[AdapterResponse], FreshnessResponse]: conn = self.connections.get_thread_connection() - client: google.cloud.bigquery.Client = conn.handle + client: Client = conn.handle table_ref = self.get_table_ref_from_relation(source) table = client.get_table(table_ref) @@ -818,7 +821,7 @@ def describe_relation( return None @available.parse_none - def grant_access_to(self, entity, entity_type, role, grant_target_dict): + def grant_access_to(self, entity, entity_type, role, grant_target_dict) -> None: """ Given an entity, grants it access to a dataset. """ @@ -847,7 +850,7 @@ def get_dataset_location(self, relation): dataset = client.get_dataset(dataset_ref) return dataset.location - def get_rows_different_sql( # type: ignore[override] + def get_rows_different_sql( self, relation_a: BigQueryRelation, relation_b: BigQueryRelation, @@ -895,7 +898,7 @@ def run_sql_for_tests(self, sql, fetch, conn=None): return list(res) def generate_python_submission_response(self, submission_result) -> BigQueryAdapterResponse: - return BigQueryAdapterResponse(_message="OK") # type: ignore[call-arg] + return BigQueryAdapterResponse(_message="OK") @property def default_python_submission_method(self) -> str: @@ -935,7 +938,7 @@ def render_raw_columns_constraints(cls, raw_columns: Dict[str, Dict[str, Any]]) @classmethod def render_column_constraint(cls, constraint: ColumnLevelConstraint) -> Optional[str]: - c = super().render_column_constraint(constraint) # type: ignore + c = super().render_column_constraint(constraint) if ( constraint.type == ConstraintType.primary_key or constraint.type == ConstraintType.foreign_key @@ -945,7 +948,7 @@ def render_column_constraint(cls, constraint: ColumnLevelConstraint) -> Optional @classmethod def render_model_constraint(cls, constraint: ModelLevelConstraint) -> Optional[str]: - c = super().render_model_constraint(constraint) # type: ignore + c = super().render_model_constraint(constraint) if ( constraint.type == ConstraintType.primary_key or constraint.type == ConstraintType.foreign_key diff --git a/dbt/adapters/bigquery/python_submissions.py b/dbt/adapters/bigquery/python_submissions.py index 065c65d8b..cd7f7d86f 100644 --- a/dbt/adapters/bigquery/python_submissions.py +++ b/dbt/adapters/bigquery/python_submissions.py @@ -1,187 +1,165 @@ -import uuid from typing import Dict, Union +import uuid -from dbt.adapters.events.logging import AdapterLogger +from google.cloud.dataproc_v1 import Batch, CreateBatchRequest, Job, RuntimeConfig from dbt.adapters.base import PythonJobHelper -from google.api_core.future.polling import POLLING_PREDICATE - -from dbt.adapters.bigquery import BigQueryConnectionManager, BigQueryCredentials -from google.api_core import retry -from google.api_core.client_options import ClientOptions -from google.cloud import storage, dataproc_v1 # type: ignore -from google.cloud.dataproc_v1.types.batches import Batch - -from dbt.adapters.bigquery.dataproc.batch import ( - create_batch_request, - poll_batch_job, - DEFAULT_JAR_FILE_URI, - update_batch_from_config, +from dbt.adapters.events.logging import AdapterLogger +from google.protobuf.json_format import ParseDict + +from dbt.adapters.bigquery.credentials import BigQueryCredentials, DataprocBatchConfig +from dbt.adapters.bigquery.clients import ( + create_dataproc_batch_controller_client, + create_dataproc_job_controller_client, + create_gcs_client, ) +from dbt.adapters.bigquery.retry import RetryFactory + -OPERATION_RETRY_TIME = 10 -logger = AdapterLogger("BigQuery") +_logger = AdapterLogger("BigQuery") -class BaseDataProcHelper(PythonJobHelper): - def __init__(self, parsed_model: Dict, credential: BigQueryCredentials) -> None: - """_summary_ +_DEFAULT_JAR_FILE_URI = "gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.13-0.34.0.jar" - Args: - credential (_type_): _description_ - """ + +class _BaseDataProcHelper(PythonJobHelper): + def __init__(self, parsed_model: Dict, credentials: BigQueryCredentials) -> None: # validate all additional stuff for python is set - schema = parsed_model["schema"] - identifier = parsed_model["alias"] - self.parsed_model = parsed_model - python_required_configs = [ - "dataproc_region", - "gcs_bucket", - ] - for required_config in python_required_configs: - if not getattr(credential, required_config): + for required_config in ["dataproc_region", "gcs_bucket"]: + if not getattr(credentials, required_config): raise ValueError( f"Need to supply {required_config} in profile to submit python job" ) - self.model_file_name = f"{schema}/{identifier}.py" - self.credential = credential - self.GoogleCredentials = BigQueryConnectionManager.get_credentials(credential) - self.storage_client = storage.Client( - project=self.credential.execution_project, credentials=self.GoogleCredentials - ) - self.gcs_location = "gs://{}/{}".format(self.credential.gcs_bucket, self.model_file_name) + + self._storage_client = create_gcs_client(credentials) + self._project = credentials.execution_project + self._region = credentials.dataproc_region + + schema = parsed_model["schema"] + identifier = parsed_model["alias"] + self._model_file_name = f"{schema}/{identifier}.py" + self._gcs_bucket = credentials.gcs_bucket + self._gcs_path = f"gs://{credentials.gcs_bucket}/{self._model_file_name}" # set retry policy, default to timeout after 24 hours - self.timeout = self.parsed_model["config"].get( - "timeout", self.credential.job_execution_timeout_seconds or 60 * 60 * 24 - ) - self.result_polling_policy = retry.Retry( - predicate=POLLING_PREDICATE, maximum=10.0, timeout=self.timeout - ) - self.client_options = ClientOptions( - api_endpoint="{}-dataproc.googleapis.com:443".format(self.credential.dataproc_region) + retry = RetryFactory(credentials) + self._polling_retry = retry.create_polling( + model_timeout=parsed_model["config"].get("timeout") ) - self.job_client = self._get_job_client() - def _upload_to_gcs(self, filename: str, compiled_code: str) -> None: - bucket = self.storage_client.get_bucket(self.credential.gcs_bucket) - blob = bucket.blob(filename) + def _write_to_gcs(self, compiled_code: str) -> None: + bucket = self._storage_client.get_bucket(self._gcs_bucket) + blob = bucket.blob(self._model_file_name) blob.upload_from_string(compiled_code) - def submit(self, compiled_code: str) -> dataproc_v1.types.jobs.Job: - # upload python file to GCS - self._upload_to_gcs(self.model_file_name, compiled_code) - # submit dataproc job - return self._submit_dataproc_job() - - def _get_job_client( - self, - ) -> Union[dataproc_v1.JobControllerClient, dataproc_v1.BatchControllerClient]: - raise NotImplementedError("_get_job_client not implemented") - - def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: - raise NotImplementedError("_submit_dataproc_job not implemented") +class ClusterDataprocHelper(_BaseDataProcHelper): + def __init__(self, parsed_model: Dict, credentials: BigQueryCredentials) -> None: + super().__init__(parsed_model, credentials) + self._job_controller_client = create_dataproc_job_controller_client(credentials) + self._cluster_name = parsed_model["config"].get( + "dataproc_cluster_name", credentials.dataproc_cluster_name + ) -class ClusterDataprocHelper(BaseDataProcHelper): - def _get_job_client(self) -> dataproc_v1.JobControllerClient: - if not self._get_cluster_name(): + if not self._cluster_name: raise ValueError( "Need to supply dataproc_cluster_name in profile or config to submit python job with cluster submission method" ) - return dataproc_v1.JobControllerClient( # type: ignore - client_options=self.client_options, credentials=self.GoogleCredentials - ) - def _get_cluster_name(self) -> str: - return self.parsed_model["config"].get( - "dataproc_cluster_name", self.credential.dataproc_cluster_name - ) + def submit(self, compiled_code: str) -> Job: + _logger.debug(f"Submitting cluster job to: {self._cluster_name}") - def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: - job = { - "placement": {"cluster_name": self._get_cluster_name()}, - "pyspark_job": { - "main_python_file_uri": self.gcs_location, + self._write_to_gcs(compiled_code) + + request = { + "project_id": self._project, + "region": self._region, + "job": { + "placement": {"cluster_name": self._cluster_name}, + "pyspark_job": { + "main_python_file_uri": self._gcs_path, + }, }, } - operation = self.job_client.submit_job_as_operation( # type: ignore - request={ - "project_id": self.credential.execution_project, - "region": self.credential.dataproc_region, - "job": job, - } - ) - # check if job failed - response = operation.result(polling=self.result_polling_policy) + + # submit the job + operation = self._job_controller_client.submit_job_as_operation(request) + + # wait for the job to complete + response: Job = operation.result(polling=self._polling_retry) + if response.status.state == 6: raise ValueError(response.status.details) + return response -class ServerlessDataProcHelper(BaseDataProcHelper): - def _get_job_client(self) -> dataproc_v1.BatchControllerClient: - return dataproc_v1.BatchControllerClient( - client_options=self.client_options, credentials=self.GoogleCredentials - ) +class ServerlessDataProcHelper(_BaseDataProcHelper): + def __init__(self, parsed_model: Dict, credentials: BigQueryCredentials) -> None: + super().__init__(parsed_model, credentials) + self._batch_controller_client = create_dataproc_batch_controller_client(credentials) + self._batch_id = parsed_model["config"].get("batch_id", str(uuid.uuid4())) + self._jar_file_uri = parsed_model["config"].get("jar_file_uri", _DEFAULT_JAR_FILE_URI) + self._dataproc_batch = credentials.dataproc_batch + + def submit(self, compiled_code: str) -> Batch: + _logger.debug(f"Submitting batch job with id: {self._batch_id}") - def _get_batch_id(self) -> str: - model = self.parsed_model - default_batch_id = str(uuid.uuid4()) - return model["config"].get("batch_id", default_batch_id) - - def _submit_dataproc_job(self) -> Batch: - batch_id = self._get_batch_id() - logger.info(f"Submitting batch job with id: {batch_id}") - request = create_batch_request( - batch=self._configure_batch(), - batch_id=batch_id, - region=self.credential.dataproc_region, # type: ignore - project=self.credential.execution_project, # type: ignore - ) # type: ignore - # make the request - self.job_client.create_batch(request=request) # type: ignore - return poll_batch_job( - parent=request.parent, - batch_id=batch_id, - job_client=self.job_client, # type: ignore - timeout=self.timeout, + self._write_to_gcs(compiled_code) + + request = CreateBatchRequest( + parent=f"projects/{self._project}/locations/{self._region}", + batch=self._create_batch(), + batch_id=self._batch_id, ) - # there might be useful results here that we can parse and return - # Dataproc job output is saved to the Cloud Storage bucket - # allocated to the job. Use regex to obtain the bucket and blob info. - # matches = re.match("gs://(.*?)/(.*)", response.driver_output_resource_uri) - # output = ( - # self.storage_client - # .get_bucket(matches.group(1)) - # .blob(f"{matches.group(2)}.000000000") - # .download_as_string() - # ) - - def _configure_batch(self): + + # submit the batch + operation = self._batch_controller_client.create_batch(request) + + # wait for the batch to complete + response: Batch = operation.result(polling=self._polling_retry) + + return response + + def _create_batch(self) -> Batch: # create the Dataproc Serverless job config # need to pin dataproc version to 1.1 as it now defaults to 2.0 # https://cloud.google.com/dataproc-serverless/docs/concepts/properties # https://cloud.google.com/dataproc-serverless/docs/reference/rest/v1/projects.locations.batches#runtimeconfig - batch = dataproc_v1.Batch( + batch = Batch( { - "runtime_config": dataproc_v1.RuntimeConfig( + "runtime_config": RuntimeConfig( version="1.1", properties={ "spark.executor.instances": "2", }, - ) + ), + "pyspark_batch": { + "main_python_file_uri": self._gcs_path, + "jar_file_uris": [self._jar_file_uri], + }, } ) - # Apply defaults - batch.pyspark_batch.main_python_file_uri = self.gcs_location - jar_file_uri = self.parsed_model["config"].get( - "jar_file_uri", - DEFAULT_JAR_FILE_URI, - ) - batch.pyspark_batch.jar_file_uris = [jar_file_uri] # Apply configuration from dataproc_batch key, possibly overriding defaults. - if self.credential.dataproc_batch: - batch = update_batch_from_config(self.credential.dataproc_batch, batch) + if self._dataproc_batch: + batch = _update_batch_from_config(self._dataproc_batch, batch) + return batch + + +def _update_batch_from_config( + config_dict: Union[Dict, DataprocBatchConfig], target: Batch +) -> Batch: + try: + # updates in place + ParseDict(config_dict, target._pb) + except Exception as e: + docurl = ( + "https://cloud.google.com/dataproc-serverless/docs/reference/rpc/google.cloud.dataproc.v1" + "#google.cloud.dataproc.v1.Batch" + ) + raise ValueError( + f"Unable to parse dataproc_batch as valid batch specification. See {docurl}. {str(e)}" + ) from e + return target diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 8abda577b..4edc8d7ac 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,9 +1,13 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +from itertools import chain, islice from typing import FrozenSet, Optional, TypeVar -from itertools import chain, islice +from dbt_common.exceptions import CompilationError +from dbt_common.utils.dict import filter_null_values from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema +from dbt.adapters.contracts.relation import RelationConfig, RelationType from dbt.adapters.relation_configs import RelationConfigChangeAction + from dbt.adapters.bigquery.relation_configs import ( BigQueryClusterConfigChange, BigQueryMaterializedViewConfig, @@ -11,9 +15,6 @@ BigQueryOptionsConfigChange, BigQueryPartitionConfigChange, ) -from dbt.adapters.contracts.relation import RelationType, RelationConfig -from dbt_common.exceptions import CompilationError -from dbt_common.utils.dict import filter_null_values Self = TypeVar("Self", bound="BigQueryRelation") @@ -23,9 +24,23 @@ class BigQueryRelation(BaseRelation): quote_character: str = "`" location: Optional[str] = None - renameable_relations: FrozenSet[RelationType] = frozenset({RelationType.Table}) - replaceable_relations: FrozenSet[RelationType] = frozenset( - {RelationType.Table, RelationType.View} + require_alias: bool = False + + renameable_relations: FrozenSet[RelationType] = field( + default_factory=lambda: frozenset( + { + RelationType.Table, + } + ) + ) + + replaceable_relations: FrozenSet[RelationType] = field( + default_factory=lambda: frozenset( + { + RelationType.View, + RelationType.Table, + } + ) ) def matches( @@ -64,7 +79,7 @@ def dataset(self): def materialized_view_from_relation_config( cls, relation_config: RelationConfig ) -> BigQueryMaterializedViewConfig: - return BigQueryMaterializedViewConfig.from_relation_config(relation_config) # type: ignore + return BigQueryMaterializedViewConfig.from_relation_config(relation_config) @classmethod def materialized_view_config_changeset( diff --git a/dbt/adapters/bigquery/relation_configs/_base.py b/dbt/adapters/bigquery/relation_configs/_base.py index be34a08ac..8bc861587 100644 --- a/dbt/adapters/bigquery/relation_configs/_base.py +++ b/dbt/adapters/bigquery/relation_configs/_base.py @@ -1,7 +1,6 @@ from dataclasses import dataclass -from typing import Optional, Dict +from typing import Optional, Dict, TYPE_CHECKING -import agate from dbt.adapters.base.relation import Policy from dbt.adapters.relation_configs import RelationConfigBase from google.cloud.bigquery import Table as BigQueryTable @@ -13,6 +12,11 @@ ) from dbt.adapters.contracts.relation import ComponentName, RelationConfig +if TYPE_CHECKING: + # Indirectly imported via agate_helper, which is lazy loaded further downfile. + # Used by mypy for earlier type hints. + import agate + @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryBaseRelationConfig(RelationConfigBase): @@ -28,7 +32,7 @@ def quote_policy(cls) -> Policy: def from_relation_config(cls, relation_config: RelationConfig) -> Self: relation_config_dict = cls.parse_relation_config(relation_config) relation = cls.from_dict(relation_config_dict) - return relation # type: ignore + return relation @classmethod def parse_relation_config(cls, relation_config: RelationConfig) -> Dict: @@ -40,7 +44,7 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict: def from_bq_table(cls, table: BigQueryTable) -> Self: relation_config = cls.parse_bq_table(table) relation = cls.from_dict(relation_config) - return relation # type: ignore + return relation @classmethod def parse_bq_table(cls, table: BigQueryTable) -> Dict: @@ -55,8 +59,10 @@ def _render_part(cls, component: ComponentName, value: Optional[str]) -> Optiona return None @classmethod - def _get_first_row(cls, results: agate.Table) -> agate.Row: + def _get_first_row(cls, results: "agate.Table") -> "agate.Row": try: return results.rows[0] except IndexError: + import agate + return agate.Row(values=set()) diff --git a/dbt/adapters/bigquery/relation_configs/_cluster.py b/dbt/adapters/bigquery/relation_configs/_cluster.py index 53092cb35..b3dbaf2e9 100644 --- a/dbt/adapters/bigquery/relation_configs/_cluster.py +++ b/dbt/adapters/bigquery/relation_configs/_cluster.py @@ -25,13 +25,13 @@ class BigQueryClusterConfig(BigQueryBaseRelationConfig): @classmethod def from_dict(cls, config_dict: Dict[str, Any]) -> Self: kwargs_dict = {"fields": config_dict.get("fields")} - return super().from_dict(kwargs_dict) # type: ignore + return super().from_dict(kwargs_dict) @classmethod def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any]: config_dict = {} - if cluster_by := relation_config.config.extra.get("cluster_by"): # type: ignore + if cluster_by := relation_config.config.extra.get("cluster_by"): # users may input a single field as a string if isinstance(cluster_by, str): cluster_by = [cluster_by] @@ -40,7 +40,7 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any return config_dict @classmethod - def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: # type: ignore + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: config_dict = {"fields": frozenset(table.clustering_fields)} return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/_materialized_view.py b/dbt/adapters/bigquery/relation_configs/_materialized_view.py index 81ca6b3de..7c63ba3bc 100644 --- a/dbt/adapters/bigquery/relation_configs/_materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/_materialized_view.py @@ -61,7 +61,7 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf if cluster := config_dict.get("cluster"): kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) - materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore + materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) return materialized_view @classmethod diff --git a/dbt/adapters/bigquery/relation_configs/_options.py b/dbt/adapters/bigquery/relation_configs/_options.py index dbbbc8f68..a0e2de861 100644 --- a/dbt/adapters/bigquery/relation_configs/_options.py +++ b/dbt/adapters/bigquery/relation_configs/_options.py @@ -128,13 +128,13 @@ def formatted_setting(name: str) -> Any: } ) - options: Self = super().from_dict(kwargs_dict) # type: ignore + options: Self = super().from_dict(kwargs_dict) return options @classmethod def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any]: config_dict = { - option: relation_config.config.extra.get(option) # type: ignore + option: relation_config.config.extra.get(option) for option in [ "enable_refresh", "refresh_interval_minutes", @@ -148,13 +148,11 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any } # update dbt-specific versions of these settings - if hours_to_expiration := relation_config.config.extra.get( # type: ignore - "hours_to_expiration" - ): # type: ignore + if hours_to_expiration := relation_config.config.extra.get("hours_to_expiration"): config_dict.update( {"expiration_timestamp": datetime.now() + timedelta(hours=hours_to_expiration)} ) - if not relation_config.config.persist_docs: # type: ignore + if not relation_config.config.persist_docs: del config_dict["description"] return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/_partition.py b/dbt/adapters/bigquery/relation_configs/_partition.py index 555aa3664..e1a5ac171 100644 --- a/dbt/adapters/bigquery/relation_configs/_partition.py +++ b/dbt/adapters/bigquery/relation_configs/_partition.py @@ -111,7 +111,7 @@ def parse_model_node(cls, relation_config: RelationConfig) -> Dict[str, Any]: This doesn't currently collect `time_ingestion_partitioning` and `copy_partitions` because this was built for materialized views, which do not support those settings. """ - config_dict: Dict[str, Any] = relation_config.config.extra.get("partition_by") # type: ignore + config_dict: Dict[str, Any] = relation_config.config.extra.get("partition_by") if "time_ingestion_partitioning" in config_dict: del config_dict["time_ingestion_partitioning"] if "copy_partitions" in config_dict: diff --git a/dbt/adapters/bigquery/retry.py b/dbt/adapters/bigquery/retry.py new file mode 100644 index 000000000..391c00e46 --- /dev/null +++ b/dbt/adapters/bigquery/retry.py @@ -0,0 +1,128 @@ +from typing import Callable, Optional + +from google.api_core.exceptions import Forbidden +from google.api_core.future.polling import DEFAULT_POLLING +from google.api_core.retry import Retry +from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.exceptions import BadGateway, BadRequest, ServerError +from requests.exceptions import ConnectionError + +from dbt.adapters.contracts.connection import Connection, ConnectionState +from dbt.adapters.events.logging import AdapterLogger +from dbt.adapters.exceptions.connection import FailedToConnectError + +from dbt.adapters.bigquery.clients import create_bigquery_client +from dbt.adapters.bigquery.credentials import BigQueryCredentials + + +_logger = AdapterLogger("BigQuery") + + +_SECOND = 1.0 +_MINUTE = 60 * _SECOND +_HOUR = 60 * _MINUTE +_DAY = 24 * _HOUR +_DEFAULT_INITIAL_DELAY = _SECOND +_DEFAULT_MAXIMUM_DELAY = 3 * _SECOND +_DEFAULT_POLLING_MAXIMUM_DELAY = 10 * _SECOND + + +class RetryFactory: + + def __init__(self, credentials: BigQueryCredentials) -> None: + self._retries = credentials.job_retries or 0 + self._job_creation_timeout = credentials.job_creation_timeout_seconds + self._job_execution_timeout = credentials.job_execution_timeout_seconds + self._job_deadline = credentials.job_retry_deadline_seconds + + def create_job_creation_timeout(self, fallback: float = _MINUTE) -> float: + return ( + self._job_creation_timeout or fallback + ) # keep _MINUTE here so it's not overridden by passing fallback=None + + def create_job_execution_timeout(self, fallback: float = _DAY) -> float: + return ( + self._job_execution_timeout or fallback + ) # keep _DAY here so it's not overridden by passing fallback=None + + def create_retry(self, fallback: Optional[float] = None) -> Retry: + return DEFAULT_RETRY.with_timeout(self._job_execution_timeout or fallback or _DAY) + + def create_polling(self, model_timeout: Optional[float] = None) -> Retry: + return DEFAULT_POLLING.with_timeout(model_timeout or self._job_execution_timeout or _DAY) + + def create_reopen_with_deadline(self, connection: Connection) -> Retry: + """ + This strategy mimics what was accomplished with _retry_and_handle + """ + return Retry( + predicate=_DeferredException(self._retries), + initial=_DEFAULT_INITIAL_DELAY, + maximum=_DEFAULT_MAXIMUM_DELAY, + deadline=self._job_deadline, + on_error=_create_reopen_on_error(connection), + ) + + +class _DeferredException: + """ + Count ALL errors, not just retryable errors, up to a threshold. + Raise the next error, regardless of whether it is retryable. + """ + + def __init__(self, retries: int) -> None: + self._retries: int = retries + self._error_count = 0 + + def __call__(self, error: Exception) -> bool: + # exit immediately if the user does not want retries + if self._retries == 0: + return False + + # count all errors + self._error_count += 1 + + # if the error is retryable, and we haven't breached the threshold, log and continue + if _is_retryable(error) and self._error_count <= self._retries: + _logger.debug( + f"Retry attempt {self._error_count} of {self._retries} after error: {repr(error)}" + ) + return True + + # otherwise raise + return False + + +def _create_reopen_on_error(connection: Connection) -> Callable[[Exception], None]: + + def on_error(error: Exception): + if isinstance(error, (ConnectionResetError, ConnectionError)): + _logger.warning("Reopening connection after {!r}".format(error)) + connection.handle.close() + + try: + connection.handle = create_bigquery_client(connection.credentials) + connection.state = ConnectionState.OPEN + + except Exception as e: + _logger.debug( + f"""Got an error when attempting to create a bigquery " "client: '{e}'""" + ) + connection.handle = None + connection.state = ConnectionState.FAIL + raise FailedToConnectError(str(e)) + + return on_error + + +def _is_retryable(error: Exception) -> bool: + """Return true for errors that are unlikely to occur again if retried.""" + if isinstance( + error, (BadGateway, BadRequest, ConnectionError, ConnectionResetError, ServerError) + ): + return True + elif isinstance(error, Forbidden) and any( + e["reason"] == "rateLimitExceeded" for e in error.errors + ): + return True + return False diff --git a/dbt/include/bigquery/macros/catalog/catalog.sql b/dbt/include/bigquery/macros/catalog/catalog.sql index de16f82bf..268debc5f 100644 --- a/dbt/include/bigquery/macros/catalog/catalog.sql +++ b/dbt/include/bigquery/macros/catalog/catalog.sql @@ -121,10 +121,12 @@ end as table_name, tables.table_type, tables.table_comment, - columns.column_name, - columns.column_index, - columns.column_type, - columns.column_comment, + -- coalesce column metadata fields to ensure they are non-null for catalog generation + -- external table columns are not present in COLUMN_FIELD_PATHS + coalesce(columns.column_name, '') as column_name, + coalesce(columns.column_index, 1) as column_index, + coalesce(columns.column_type, '') as column_type, + coalesce(columns.column_comment, '') as column_comment, 'Shard count' as `stats__date_shards__label`, table_stats.shard_count as `stats__date_shards__value`, diff --git a/dbt/include/bigquery/macros/materializations/incremental.sql b/dbt/include/bigquery/macros/materializations/incremental.sql index 2cbb14d9b..935280d63 100644 --- a/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/dbt/include/bigquery/macros/materializations/incremental.sql @@ -4,12 +4,16 @@ {% set invalid_strategy_msg -%} Invalid incremental strategy provided: {{ strategy }} - Expected one of: 'merge', 'insert_overwrite' + Expected one of: 'merge', 'insert_overwrite', 'microbatch' {%- endset %} - {% if strategy not in ['merge', 'insert_overwrite'] %} + {% if strategy not in ['merge', 'insert_overwrite', 'microbatch'] %} {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} {% endif %} + {% if strategy == 'microbatch' %} + {% do bq_validate_microbatch_config(config) %} + {% endif %} + {% do return(strategy) %} {% endmacro %} @@ -48,8 +52,13 @@ tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists, copy_partitions ) %} - {% else %} {# strategy == 'merge' #} + {% elif strategy == 'microbatch' %} + + {% set build_sql = bq_generate_microbatch_build_sql( + tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists, copy_partitions + ) %} + {% else %} {# strategy == 'merge' #} {% set build_sql = bq_generate_incremental_merge_build_sql( tmp_relation, target_relation, sql, unique_key, partition_by, dest_columns, tmp_relation_exists, incremental_predicates ) %} @@ -151,10 +160,6 @@ {{ build_sql }} {% endcall %} - {%- if language == 'python' and tmp_relation -%} - {{ adapter.drop_relation(tmp_relation) }} - {%- endif -%} - {% endif %} {{ run_hooks(post_hooks) }} @@ -166,6 +171,10 @@ {% do persist_docs(target_relation, model) %} + {%- if tmp_relation_exists -%} + {{ adapter.drop_relation(tmp_relation) }} + {%- endif -%} + {{ return({'relations': [target_relation]}) }} {%- endmaterialization %} diff --git a/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql b/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql new file mode 100644 index 000000000..d4c4b7453 --- /dev/null +++ b/dbt/include/bigquery/macros/materializations/incremental_strategy/microbatch.sql @@ -0,0 +1,28 @@ +{% macro bq_validate_microbatch_config(config) %} + {% if config.get("partition_by") is none %} + {% set missing_partition_msg -%} + The 'microbatch' strategy requires a `partition_by` config. + {%- endset %} + {% do exceptions.raise_compiler_error(missing_partition_msg) %} + {% endif %} + + {% if config.get("partition_by").granularity != config.get('batch_size') %} + {% set invalid_partition_by_granularity_msg -%} + The 'microbatch' strategy requires a `partition_by` config with the same granularity as its configured `batch_size`. + Got: + `batch_size`: {{ config.get('batch_size') }} + `partition_by.granularity`: {{ config.get("partition_by").granularity }} + {%- endset %} + {% do exceptions.raise_compiler_error(invalid_partition_by_granularity_msg) %} + {% endif %} +{% endmacro %} + +{% macro bq_generate_microbatch_build_sql( + tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists, copy_partitions +) %} + {% set build_sql = bq_insert_overwrite_sql( + tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists, copy_partitions + ) %} + + {{ return(build_sql) }} +{% endmacro %} diff --git a/dbt/include/bigquery/macros/materializations/seed.sql b/dbt/include/bigquery/macros/materializations/seed.sql index 6ac7337f3..c89d00598 100644 --- a/dbt/include/bigquery/macros/materializations/seed.sql +++ b/dbt/include/bigquery/macros/materializations/seed.sql @@ -11,7 +11,7 @@ {%- set column_override = model['config'].get('column_types', {}) -%} {{ adapter.load_dataframe(model['database'], model['schema'], model['alias'], - agate_table, column_override) }} + agate_table, column_override, model['config']['delimiter']) }} {% call statement() %} alter table {{ this.render() }} set {{ bigquery_table_options(config, model) }} diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 68117b06a..41bb69770 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -49,12 +49,19 @@ from pyspark.sql import SparkSession {%- set raw_partition_by = config.get('partition_by', none) -%} {%- set raw_cluster_by = config.get('cluster_by', none) -%} +{%- set enable_list_inference = config.get('enable_list_inference', true) -%} +{%- set intermediate_format = config.get('intermediate_format', none) -%} + {%- set partition_config = adapter.parse_partition_by(raw_partition_by) %} spark = SparkSession.builder.appName('smallTest').getOrCreate() spark.conf.set("viewsEnabled","true") spark.conf.set("temporaryGcsBucket","{{target.gcs_bucket}}") +spark.conf.set("enableListInference", "{{ enable_list_inference }}") +{% if intermediate_format %} +spark.conf.set("intermediateFormat", "{{ intermediate_format }}") +{% endif %} {{ compiled_code }} dbt = dbtObj(spark.read.format("bigquery").load) @@ -106,10 +113,19 @@ else: msg = f"{type(df)} is not a supported type for dbt Python materialization" raise Exception(msg) +# For writeMethod we need to use "indirect" if materializing a partitioned table +# otherwise we can use "direct". Note that indirect will fail if the GCS bucket has a retention policy set on it. +{%- if partition_config %} + {%- set write_method = 'indirect' -%} +{%- else %} + {% set write_method = 'direct' -%} +{%- endif %} + df.write \ .mode("overwrite") \ .format("bigquery") \ - .option("writeMethod", "indirect").option("writeDisposition", 'WRITE_TRUNCATE') \ + .option("writeMethod", "{{ write_method }}") \ + .option("writeDisposition", 'WRITE_TRUNCATE') \ {%- if partition_config is not none %} {%- if partition_config.data_type | lower in ('date','timestamp','datetime') %} .option("partitionField", "{{- partition_config.field -}}") \ diff --git a/dbt/include/bigquery/macros/utils/date.sql b/dbt/include/bigquery/macros/utils/date.sql new file mode 100644 index 000000000..0f3b85aca --- /dev/null +++ b/dbt/include/bigquery/macros/utils/date.sql @@ -0,0 +1,3 @@ +{% macro bigquery__date(year, month, day) -%} + date({{ year }}, {{ month }}, {{ day }}) +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/utils/string_literal.sql b/dbt/include/bigquery/macros/utils/string_literal.sql new file mode 100644 index 000000000..07e67319a --- /dev/null +++ b/dbt/include/bigquery/macros/utils/string_literal.sql @@ -0,0 +1,3 @@ +{%- macro bigquery__string_literal(value) -%} + '''{{ value }}''' +{%- endmacro -%} diff --git a/dev-requirements.txt b/dev-requirements.txt index 0af563a7d..2c0134110 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,32 +1,20 @@ # install latest changes in dbt-core -# TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-common.git git+https://github.com/dbt-labs/dbt-adapters.git git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter -# if version 1.x or greater -> pin to major version -# if version 0.x -> pin to minor -black~=23.12 -bumpversion~=0.6.0 -click~=8.1 -ddtrace~=2.3 -flake8~=6.1 -flaky~=3.7 -freezegun~=1.3 -ipdb~=0.13.13 -mypy==1.7.1 # patch updates have historically introduced breaking changes -pip-tools~=7.3 -pre-commit~=3.5 -pre-commit-hooks~=4.5 +git+https://github.com/dbt-labs/dbt-common.git +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core + +# dev +ddtrace==2.3.0 +pre-commit~=3.7.0 pytest~=7.4 pytest-csv~=3.0 pytest-dotenv~=0.5.2 pytest-logbook~=1.2 -pytest-xdist~=3.5 -pytz~=2023.3 +pytest-xdist~=3.6 tox~=4.11 -types-pytz~=2023.3 -types-protobuf~=4.24 -types-requests~=2.31 -twine~=4.0 -wheel~=0.42 + +# build +bumpversion~=0.6.0 +twine~=5.1 +wheel~=0.43 diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..bda507dc5 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,37 @@ +# this image gets published to GHCR for production use +ARG py_version=3.11.2 + +FROM python:$py_version-slim-bullseye AS base + +RUN apt-get update \ + && apt-get dist-upgrade -y \ + && apt-get install -y --no-install-recommends \ + build-essential=12.9 \ + ca-certificates=20210119 \ + git=1:2.30.2-1+deb11u2 \ + libpq-dev=13.14-0+deb11u1 \ + make=4.3-4.1 \ + openssh-client=1:8.4p1-5+deb11u3 \ + software-properties-common=0.96.20.2-2.1 \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +ENV PYTHONIOENCODING=utf-8 +ENV LANG=C.UTF-8 + +RUN python -m pip install --upgrade "pip==24.0" "setuptools==69.2.0" "wheel==0.43.0" --no-cache-dir + + +FROM base AS dbt-bigquery + +ARG commit_ref=main + +HEALTHCHECK CMD dbt --version || exit 1 + +WORKDIR /usr/app/dbt/ +ENTRYPOINT ["dbt"] + +RUN python -m pip install --no-cache-dir "dbt-bigquery @ git+https://github.com/dbt-labs/dbt-bigquery@${commit_ref}" diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..8c60deaa3 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,58 @@ +# Docker for dbt +This docker file is suitable for building dbt Docker images locally or using with CI/CD to automate populating a container registry. + + +## Building an image: +This Dockerfile can create images for the following target: `dbt-bigquery` + +In order to build a new image, run the following docker command. +```shell +docker build --tag --target dbt-bigquery +``` +--- +> **Note:** Docker must be configured to use [BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) in order for images to build properly! + +--- + +By default the image will be populated with the latest version of `dbt-bigquery` on `main`. +If you need to use a different version you can specify it by git ref using the `--build-arg` flag: +```shell +docker build --tag \ + --target dbt-bigquery \ + --build-arg commit_ref= \ + +``` + +### Examples: +To build an image named "my-dbt" that supports Snowflake using the latest releases: +```shell +cd dbt-core/docker +docker build --tag my-dbt --target dbt-bigquery . +``` + +To build an image named "my-other-dbt" that supports Snowflake using the adapter version 1.0.0b1: +```shell +cd dbt-core/docker +docker build \ + --tag my-other-dbt \ + --target dbt-bigquery \ + --build-arg commit_ref=v1.0.0b1 \ + . +``` + +## Running an image in a container: +The `ENTRYPOINT` for this Dockerfile is the command `dbt` so you can bind-mount your project to `/usr/app` and use dbt as normal: +```shell +docker run \ + --network=host \ + --mount type=bind,source=path/to/project,target=/usr/app \ + --mount type=bind,source=path/to/profiles.yml,target=/root/.dbt/profiles.yml \ + my-dbt \ + ls +``` +--- +**Notes:** +* Bind-mount sources _must_ be an absolute path +* You may need to make adjustments to the docker networking setting depending on the specifics of your data warehouse/database host. + +--- diff --git a/docker/dev.Dockerfile b/docker/dev.Dockerfile new file mode 100644 index 000000000..f122f5343 --- /dev/null +++ b/docker/dev.Dockerfile @@ -0,0 +1,50 @@ +# this image does not get published, it is intended for local development only, see `Makefile` for usage +FROM ubuntu:24.04 AS base + +# prevent python installation from asking for time zone region +ARG DEBIAN_FRONTEND=noninteractive + +# add python repository +RUN apt-get update \ + && apt-get install -y software-properties-common=0.99.48 \ + && add-apt-repository -y ppa:deadsnakes/ppa \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +# install python +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential=12.10ubuntu1 \ + git-all=1:2.43.0-1ubuntu7.1 \ + python3.9=3.9.20-1+noble1 \ + python3.9-dev=3.9.20-1+noble1 \ + python3.9-distutils=3.9.20-1+noble1 \ + python3.9-venv=3.9.20-1+noble1 \ + python3-pip=24.0+dfsg-1ubuntu1 \ + python3-wheel=0.42.0-2 \ + && apt-get clean \ + && rm -rf \ + /var/lib/apt/lists/* \ + /tmp/* \ + /var/tmp/* + +# update the default system interpreter to the newly installed version +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 + + +FROM base AS dbt-bigquery-dev + +HEALTHCHECK CMD python --version || exit 1 + +# send stdout/stderr to terminal +ENV PYTHONUNBUFFERED=1 + +# setup mount for local code +WORKDIR /opt/code +VOLUME /opt/code + +# create a virtual environment +RUN python3 -m venv /opt/venv diff --git a/docker_dev/README.md b/docker_dev/README.md deleted file mode 100644 index dd487fea7..000000000 --- a/docker_dev/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Docker Dev Images - -These images are solely for development purposes. They are -saved here for convenience. There should be no expectation -of stability or maintenance. diff --git a/docker_dev/ubuntu.Dockerfile b/docker_dev/ubuntu.Dockerfile deleted file mode 100644 index bac3f5993..000000000 --- a/docker_dev/ubuntu.Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -FROM ubuntu:latest - -# default to py3.11, this can be overridden at build, e.g. `docker build ... --build-arg version=3.10` -ARG version=3.11 - -# prevent python installation from asking for time zone region -ARG DEBIAN_FRONTEND=noninteractive - -# get add-apt-repository -RUN apt-get update && \ - apt-get install -y software-properties-common - -# add the python repository -RUN apt-get update && \ - add-apt-repository -y ppa:deadsnakes/ppa - -# install python and git (for installing dbt-core) -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - python$version \ - python$version-dev \ - python$version-distutils \ - python$version-venv \ - python3-pip \ - python3-wheel \ - build-essential \ - git-all - -# clean up -RUN apt-get clean && \ - rm -rf \ - /var/lib/apt/lists/* \ - /tmp/* \ - /var/tmp/* - -# update the default system interpreter to the newly installed version -RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python$version 1 - -# setup mount for our code -WORKDIR /opt/code -VOLUME /opt/code - -# install tox in the system interpreter (it creates it's own virtual environments) -RUN pip install tox - -# explicitly create a virtual environment as well for interactive testing -RUN python3 -m venv /opt/venv - -# send stdout/stderr to terminal -ENV PYTHONUNBUFFERED=1 diff --git a/mypy.ini b/mypy.ini index b111482fc..247a47fec 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,4 +1,2 @@ [mypy] mypy_path = third-party-stubs/ -namespace_packages = True -exclude = third-party-stubs/* diff --git a/setup.py b/setup.py index 2e969e246..79f6025ea 100644 --- a/setup.py +++ b/setup.py @@ -2,9 +2,9 @@ import sys # require a supported version of Python -if sys.version_info < (3, 8): +if sys.version_info < (3, 9): print("Error: dbt does not support this version of Python.") - print("Please upgrade to Python 3.8 or higher.") + print("Please upgrade to Python 3.9 or higher.") sys.exit(1) try: @@ -36,7 +36,6 @@ def _dbt_bigquery_version() -> str: package_name = "dbt-bigquery" -package_version = "1.8.0a1" description = """The BigQuery adapter plugin for dbt""" setup( @@ -51,14 +50,17 @@ def _dbt_bigquery_version() -> str: packages=find_namespace_packages(include=["dbt", "dbt.*"]), include_package_data=True, install_requires=[ - "dbt-common<1.0", - "dbt-adapters~=0.1.0a1", - "google-cloud-bigquery~=3.0", + "dbt-common>=1.10,<2.0", + "dbt-adapters>=1.7,<2.0", + # 3.20 introduced pyarrow>=3.0 under the `pandas` extra + "google-cloud-bigquery[pandas]>=3.0,<4.0", "google-cloud-storage~=2.4", "google-cloud-dataproc~=5.0", # ---- # Expect compatibility with all new versions of these packages, so lower bounds only. "google-api-core>=2.11.0", + # add dbt-core to ensure backwards compatibility of installation, this is not a functional dependency + "dbt-core>=1.8.0", ], zip_safe=False, classifiers=[ @@ -67,10 +69,10 @@ def _dbt_bigquery_version() -> str: "Operating System :: Microsoft :: Windows", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], - python_requires=">=3.8", + python_requires=">=3.9", ) diff --git a/tests/conftest.py b/tests/conftest.py index 78f3d82e1..33f7f9d17 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,9 @@ import pytest import os import json +from dbt.adapters.bigquery.credentials import _is_base64, _base64_to_string -# Import the fuctional fixtures as a plugin +# Import the functional fixtures as a plugin # Note: fixtures with session scope need to be local pytest_plugins = ["dbt.tests.fixtures.project"] @@ -38,6 +39,8 @@ def oauth_target(): def service_account_target(): credentials_json_str = os.getenv("BIGQUERY_TEST_SERVICE_ACCOUNT_JSON").replace("'", "") + if _is_base64(credentials_json_str): + credentials_json_str = _base64_to_string(credentials_json_str) credentials = json.loads(credentials_json_str) project_id = credentials.get("project_id") return { diff --git a/tests/functional/adapter/dbt_show/test_dbt_show.py b/tests/functional/adapter/dbt_show/test_dbt_show.py index acb54cc47..6794547a5 100644 --- a/tests/functional/adapter/dbt_show/test_dbt_show.py +++ b/tests/functional/adapter/dbt_show/test_dbt_show.py @@ -1,5 +1,9 @@ import pytest -from dbt.tests.adapter.dbt_show.test_dbt_show import BaseShowSqlHeader, BaseShowLimit +from dbt.tests.adapter.dbt_show.test_dbt_show import ( + BaseShowSqlHeader, + BaseShowLimit, + BaseShowDoesNotHandleDoubleLimit, +) from dbt.tests.util import run_dbt @@ -20,8 +24,7 @@ ] ) as v - ) as model_limit_subq - limit 5 + ) """ model_with_null_json_struct = """ @@ -48,7 +51,11 @@ def models(self): } def test_sql_header(self, project): - run_dbt(["show", "--select", "json_struct_model"]) + run_dbt(["show", "--select", "json_struct_model", "-d"]) def test_show_with_null_json_struct(self, project): run_dbt(["show", "--select", "null_json_struct_model"]) + + +class TestBigQueryShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit): + DATABASE_ERROR_MESSAGE = "Syntax error: Expected end of input but got keyword LIMIT" diff --git a/tests/functional/adapter/empty/test_empty.py b/tests/functional/adapter/empty/test_empty.py index c224c51df..3bf47f35d 100644 --- a/tests/functional/adapter/empty/test_empty.py +++ b/tests/functional/adapter/empty/test_empty.py @@ -1,5 +1,9 @@ -from dbt.tests.adapter.empty.test_empty import BaseTestEmpty +from dbt.tests.adapter.empty.test_empty import BaseTestEmpty, BaseTestEmptyInlineSourceRef class TestBigQueryEmpty(BaseTestEmpty): pass + + +class TestBigQueryEmptyInlineSourceRef(BaseTestEmptyInlineSourceRef): + pass diff --git a/tests/functional/adapter/incremental/incremental_strategy_fixtures.py b/tests/functional/adapter/incremental/incremental_strategy_fixtures.py index 17391b48d..02efbb6c2 100644 --- a/tests/functional/adapter/incremental/incremental_strategy_fixtures.py +++ b/tests/functional/adapter/incremental/incremental_strategy_fixtures.py @@ -555,3 +555,59 @@ select * from data """.lstrip() + +microbatch_model_no_unique_id_sql = """ +{{ config( + materialized='incremental', + incremental_strategy='microbatch', + partition_by={ + 'field': 'event_time', + 'data_type': 'timestamp', + 'granularity': 'day' + }, + event_time='event_time', + batch_size='day', + begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0) + ) +}} +select * from {{ ref('input_model') }} +""" + +microbatch_input_sql = """ +{{ config(materialized='table', event_time='event_time') }} +select 1 as id, TIMESTAMP '2020-01-01 00:00:00-0' as event_time +union all +select 2 as id, TIMESTAMP '2020-01-02 00:00:00-0' as event_time +union all +select 3 as id, TIMESTAMP '2020-01-03 00:00:00-0' as event_time +""" + +microbatch_model_no_partition_by_sql = """ +{{ config( + materialized='incremental', + incremental_strategy='microbatch', + event_time='event_time', + batch_size='day', + begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0) + ) +}} +select * from {{ ref('input_model') }} +""" + + +microbatch_model_invalid_partition_by_sql = """ +{{ config( + materialized='incremental', + incremental_strategy='microbatch', + event_time='event_time', + batch_size='day', + begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0), + partition_by={ + 'field': 'event_time', + 'data_type': 'timestamp', + 'granularity': 'hour' + } + ) +}} +select * from {{ ref('input_model') }} +""" diff --git a/tests/functional/adapter/incremental/test_incremental_microbatch.py b/tests/functional/adapter/incremental/test_incremental_microbatch.py new file mode 100644 index 000000000..d1bbbcea3 --- /dev/null +++ b/tests/functional/adapter/incremental/test_incremental_microbatch.py @@ -0,0 +1,55 @@ +import os +import pytest +from unittest import mock + +from dbt.tests.util import run_dbt_and_capture +from dbt.tests.adapter.incremental.test_incremental_microbatch import ( + BaseMicrobatch, + patch_microbatch_end_time, +) + +from tests.functional.adapter.incremental.incremental_strategy_fixtures import ( + microbatch_model_no_unique_id_sql, + microbatch_input_sql, + microbatch_model_no_partition_by_sql, + microbatch_model_invalid_partition_by_sql, +) + + +class TestBigQueryMicrobatch(BaseMicrobatch): + @pytest.fixture(scope="class") + def microbatch_model_sql(self) -> str: + return microbatch_model_no_unique_id_sql + + +class TestBigQueryMicrobatchMissingPartitionBy: + @pytest.fixture(scope="class") + def models(self) -> str: + return { + "microbatch.sql": microbatch_model_no_partition_by_sql, + "input_model.sql": microbatch_input_sql, + } + + @mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"}) + def test_execution_failure_no_partition_by(self, project): + with patch_microbatch_end_time("2020-01-03 13:57:00"): + _, stdout = run_dbt_and_capture(["run"], expect_pass=False) + assert "The 'microbatch' strategy requires a `partition_by` config" in stdout + + +class TestBigQueryMicrobatchInvalidPartitionByGranularity: + @pytest.fixture(scope="class") + def models(self) -> str: + return { + "microbatch.sql": microbatch_model_invalid_partition_by_sql, + "input_model.sql": microbatch_input_sql, + } + + @mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"}) + def test_execution_failure_no_partition_by(self, project): + with patch_microbatch_end_time("2020-01-03 13:57:00"): + _, stdout = run_dbt_and_capture(["run"], expect_pass=False) + assert ( + "The 'microbatch' strategy requires a `partition_by` config with the same granularity as its configured `batch_size`" + in stdout + ) diff --git a/tests/functional/adapter/test_json_keyfile.py b/tests/functional/adapter/test_json_keyfile.py new file mode 100644 index 000000000..a5caaebdf --- /dev/null +++ b/tests/functional/adapter/test_json_keyfile.py @@ -0,0 +1,87 @@ +import base64 +import json +import pytest +from dbt.adapters.bigquery.credentials import _is_base64 + + +def string_to_base64(s): + return base64.b64encode(s.encode("utf-8")) + + +@pytest.fixture +def example_json_keyfile(): + keyfile = json.dumps( + { + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "", + } + ) + + return keyfile + + +@pytest.fixture +def example_json_keyfile_b64(): + keyfile = json.dumps( + { + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "-----BEGIN PRIVATE KEY----------END PRIVATE KEY-----\n", + "client_email": "", + "client_id": "", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "", + } + ) + + return string_to_base64(keyfile) + + +def test_valid_base64_strings(example_json_keyfile_b64): + valid_strings = [ + "SGVsbG8gV29ybGQh", # "Hello World!" + "Zm9vYmFy", # "foobar" + "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string + "", # Empty string + example_json_keyfile_b64.decode("utf-8"), + ] + + for s in valid_strings: + assert _is_base64(s) is True + + +def test_valid_base64_bytes(example_json_keyfile_b64): + valid_bytes = [ + b"SGVsbG8gV29ybGQh", # "Hello World!" + b"Zm9vYmFy", # "foobar" + b"QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVowMTIzNDU2Nzg5", # A long string + b"", # Empty bytes + example_json_keyfile_b64, + ] + for s in valid_bytes: + assert _is_base64(s) is True + + +def test_invalid_base64(example_json_keyfile): + invalid_inputs = [ + "This is not Base64", + "SGVsbG8gV29ybGQ", # Incorrect padding + "Invalid#Base64", + 12345, # Not a string or bytes + b"Invalid#Base64", + "H\xffGVsbG8gV29ybGQh", # Contains invalid character \xff + example_json_keyfile, + ] + for s in invalid_inputs: + assert _is_base64(s) is False diff --git a/tests/functional/adapter/test_simple_seed.py b/tests/functional/adapter/test_simple_seed.py index b01f99346..5ec19d420 100644 --- a/tests/functional/adapter/test_simple_seed.py +++ b/tests/functional/adapter/test_simple_seed.py @@ -5,7 +5,6 @@ from dbt.tests.adapter.simple_seed.test_seed import BaseTestEmptySeed from dbt.tests.adapter.utils.base_utils import run_dbt - _SEED_CONFIGS_CSV = """ seed_id,stuff 1,a @@ -156,3 +155,38 @@ def test__bigquery_seed_table_with_labels_config_bigquery(self, project): class TestBigQueryEmptySeed(BaseTestEmptySeed): pass + + +class TestBigQuerySeedWithUniqueDelimiter(TestSimpleSeedConfigs): + @pytest.fixture(scope="class") + def seeds(self): + return { + "seed_enabled.csv": seeds__enabled_in_config_csv.replace(",", "|"), + "seed_tricky.csv": seeds__tricky_csv.replace(",", "\t"), + "seed_configs.csv": _SEED_CONFIGS_CSV, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "config-version": 2, + "seeds": { + "test": { + "enabled": False, + "quote_columns": True, + "seed_enabled": { + "enabled": True, + "+column_types": self.seed_enabled_types(), + "delimiter": "|", + }, + "seed_tricky": { + "enabled": True, + "+column_types": self.seed_tricky_types(), + "delimiter": "\t", + }, + "seed_configs": { + "enabled": True, + }, + }, + }, + } diff --git a/tests/functional/adapter/test_string_literal_macro.py b/tests/functional/adapter/test_string_literal_macro.py new file mode 100644 index 000000000..d67f4be71 --- /dev/null +++ b/tests/functional/adapter/test_string_literal_macro.py @@ -0,0 +1,17 @@ +import pytest +from dbt.tests.util import run_dbt + + +_MODEL_SQL = """ +select {{ dbt.string_literal('my multiline +string') }} as test +""" + + +class TestStringLiteralQuoting: + @pytest.fixture(scope="class") + def models(self): + return {"my_model.sql": _MODEL_SQL} + + def test_string_literal_quoting(self, project): + run_dbt() diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index 58cecdc7e..384b17108 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -8,9 +8,11 @@ from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct from dbt.tests.adapter.utils.test_any_value import BaseAnyValue from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr +from dbt.tests.adapter.utils.test_cast import BaseCast from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText from dbt.tests.adapter.utils.test_concat import BaseConcat from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampAware +from dbt.tests.adapter.utils.test_date import BaseDate from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff from dbt.tests.adapter.utils.test_date_spine import BaseDateSpine @@ -104,6 +106,10 @@ class TestBoolOr(BaseBoolOr): pass +class TestCast(BaseCast): + pass + + class TestCastBoolToText(BaseCastBoolToText): pass @@ -117,6 +123,10 @@ class TestCurrentTimestamp(BaseCurrentTimestampAware): pass +class TestDate(BaseDate): + pass + + class TestDateAdd(BaseDateAdd): pass diff --git a/dbt/adapters/bigquery/dataproc/__init__.py b/tests/functional/python_model_tests/__init__.py similarity index 100% rename from dbt/adapters/bigquery/dataproc/__init__.py rename to tests/functional/python_model_tests/__init__.py diff --git a/tests/functional/python_model_tests/files.py b/tests/functional/python_model_tests/files.py new file mode 100644 index 000000000..1cb95602a --- /dev/null +++ b/tests/functional/python_model_tests/files.py @@ -0,0 +1,125 @@ +SINGLE_RECORD = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table" + ) + + df = pd.DataFrame( + [ + {"column_name": {"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}}, + ] + ) + + return df +""" + + +MULTI_RECORD = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +ORC_FORMAT = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + intermediate_format="orc", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +ENABLE_LIST_INFERENCE = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + enable_list_inference="true", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +ENABLE_LIST_INFERENCE_PARQUET_FORMAT = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + enable_list_inference="true", + intermediate_format="parquet", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df +""" + + +DISABLE_LIST_INFERENCE_ORC_FORMAT = """ +import pandas as pd + +def model(dbt, session): + + dbt.config( + submission_method="serverless", + materialized="table", + enable_list_inference="false", + intermediate_format="orc", + ) + + df = pd.DataFrame( + [ + {"column_name": [{"name": "hello", "my_list": ["h", "e", "l", "l", "o"]}]}, + ] + ) + + return df + +""" diff --git a/tests/functional/python_model_tests/test_list_inference.py b/tests/functional/python_model_tests/test_list_inference.py new file mode 100644 index 000000000..88b1c4fa5 --- /dev/null +++ b/tests/functional/python_model_tests/test_list_inference.py @@ -0,0 +1,37 @@ +""" +This test case addresses this regression: https://github.com/dbt-labs/dbt-bigquery/issues/1047 + +As the comments point out, the issue appears when the settings are: + - list inference: off + - intermediate format: parquet + +Adjusting either of these alleviates the issue. + +When the regression was first reported, `files.MULTI_RECORD` failed while the other models passed. +""" + +from dbt.tests.util import run_dbt_and_capture +import pytest + +from tests.functional.python_model_tests import files + + +class TestPythonListInference: + @pytest.fixture(scope="class") + def models(self): + return { + # this is what worked prior to this issue + "single_record.py": files.SINGLE_RECORD, + # this is the model that initially failed for this issue + "multi_record.py": files.MULTI_RECORD, + # these are explicit versions of the default settings + "enable_list_inference.py": files.ENABLE_LIST_INFERENCE, + "enable_list_inference_parquet_format.py": files.ENABLE_LIST_INFERENCE_PARQUET_FORMAT, + # orc format also resolves the issue, regardless of list inference + "orc_format.py": files.ORC_FORMAT, + "disable_list_inference_orc_format.py": files.DISABLE_LIST_INFERENCE_ORC_FORMAT, + } + + def test_models_success(self, project, models): + result, output = run_dbt_and_capture(["run"]) + assert len(result) == len(models) diff --git a/tests/functional/test_cancel.py b/tests/functional/test_cancel.py new file mode 100644 index 000000000..823687b52 --- /dev/null +++ b/tests/functional/test_cancel.py @@ -0,0 +1,134 @@ +import platform + +import time + +import os +import signal +import subprocess + +import pytest + +from dbt.tests.util import get_connection + +_SEED_CSV = """ +id, name, astrological_sign, moral_alignment +1, Alice, Aries, Lawful Good +2, Bob, Taurus, Neutral Good +3, Thaddeus, Gemini, Chaotic Neutral +4, Zebulon, Cancer, Lawful Evil +5, Yorick, Leo, True Neutral +6, Xavier, Virgo, Chaotic Evil +7, Wanda, Libra, Lawful Neutral +""" + +_LONG_RUNNING_MODEL_SQL = """ + {{ config(materialized='table') }} + with array_1 as ( + select generated_ids from UNNEST(GENERATE_ARRAY(1, 200000)) AS generated_ids + ), + array_2 as ( + select generated_ids from UNNEST(GENERATE_ARRAY(2, 200000)) AS generated_ids + ) + + SELECT array_1.generated_ids + FROM array_1 + LEFT JOIN array_1 as jnd on 1=1 + LEFT JOIN array_2 as jnd2 on 1=1 + LEFT JOIN array_1 as jnd3 on jnd3.generated_ids >= jnd2.generated_ids +""" + + +def _get_info_schema_jobs_query(project_id, dataset_id, table_id): + """ + Running this query requires roles/bigquery.resourceViewer on the project, + see: https://cloud.google.com/bigquery/docs/information-schema-jobs#required_role + :param project_id: + :param dataset_id: + :param table_id: + :return: a single job id that matches the model we tried to create and was cancelled + """ + return f""" + SELECT job_id + FROM `region-us`.`INFORMATION_SCHEMA.JOBS_BY_PROJECT` + WHERE creation_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 5 HOUR) + AND statement_type = 'CREATE_TABLE_AS_SELECT' + AND state = 'DONE' + AND job_id IS NOT NULL + AND project_id = '{project_id}' + AND error_result.reason = 'stopped' + AND error_result.message = 'Job execution was cancelled: User requested cancellation' + AND destination_table.table_id = '{table_id}' + AND destination_table.dataset_id = '{dataset_id}' + """ + + +def _run_dbt_in_subprocess(project, dbt_command): + + run_dbt_process = subprocess.Popen( + [ + "dbt", + dbt_command, + "--profiles-dir", + project.profiles_dir, + "--project-dir", + project.project_root, + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=False, + env=os.environ.copy(), + ) + std_out_log = "" + while True: + std_out_line = run_dbt_process.stdout.readline().decode("utf-8") + std_out_log += std_out_line + if std_out_line != "": + print(std_out_line) + if "1 of 1 START" in std_out_line: + time.sleep(1) + run_dbt_process.send_signal(signal.SIGINT) + + if run_dbt_process.poll(): + break + + return std_out_log + + +def _get_job_id(project, table_name): + # Because we run this in a subprocess we have to actually call Bigquery and look up the job id + with get_connection(project.adapter): + job_id = project.run_sql( + _get_info_schema_jobs_query(project.database, project.test_schema, table_name) + ) + + return job_id + + +@pytest.mark.skipif( + platform.system() == "Windows", reason="running signt is unsupported on Windows." +) +class TestBigqueryCancelsQueriesOnKeyboardInterrupt: + @pytest.fixture(scope="class", autouse=True) + def models(self): + return { + "model.sql": _LONG_RUNNING_MODEL_SQL, + } + + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return { + "seed.csv": _SEED_CSV, + } + + def test_bigquery_cancels_queries_for_model_on_keyboard_interrupt(self, project): + std_out_log = _run_dbt_in_subprocess(project, "run") + + assert "CANCEL query model.test.model" in std_out_log + assert len(_get_job_id(project, "model")) == 1 + + @pytest.mark.skip(reason="cannot reliably cancel seed queries in time") + def test_bigquery_cancels_queries_for_seed_on_keyboard_interrupt(self, project): + std_out_log = _run_dbt_in_subprocess(project, "seed") + + assert "CANCEL query seed.test.seed" in std_out_log + # we can't assert the job id since we can't kill the seed process fast enough to cancel it diff --git a/tests/functional/test_drop_temp_relation.py b/tests/functional/test_drop_temp_relation.py new file mode 100644 index 000000000..4cdfaedae --- /dev/null +++ b/tests/functional/test_drop_temp_relation.py @@ -0,0 +1,60 @@ +import pytest +from google.api_core.exceptions import NotFound +from dbt.adapters.bigquery.relation import BigQueryRelation +from dbt.tests.util import run_dbt, get_connection, relation_from_name + + +_INCREMENTAL_MODEL = """ +{{ + config( + materialized="incremental", + on_schema_change="sync_all_columns" + ) +}} + select 20 as id, cast('2020-01-01 01:00:00' as datetime) as date_hour union all + select 40 as id, cast('2020-01-01 02:00:00' as datetime) as date_hour +""" + +_INCREMENTAL_MODEL_YAML = """version: 2 +models: +- name: test_drop_relation + columns: + - name: id + type: int64 + - name: date_hour + type: datetime +""" + + +class BaseIncrementalModelConfig: + @pytest.fixture(scope="class") + def models(self): + return { + "test_drop_relation.sql": _INCREMENTAL_MODEL, + "schema.yml": _INCREMENTAL_MODEL_YAML, + } + + +class TestIncrementalModel(BaseIncrementalModelConfig): + def test_incremental_model_succeeds(self, project): + """ + Steps: + 1. Create the model + 2. Merge into the model using __dbt_tmp table + 3. Assert raises NotFound exception + """ + results = run_dbt(["run"]) + assert len(results) == 1 + results = run_dbt(["run"]) + assert len(results) == 1 + relation: BigQueryRelation = relation_from_name( + project.adapter, "test_drop_relation__dbt_tmp" + ) + adapter = project.adapter + with pytest.raises(NotFound): + with get_connection(project.adapter) as conn: + conn.handle.get_table( + adapter.connections.get_bq_table( + relation.database, relation.schema, relation.table + ) + ) diff --git a/tests/functional/test_quota_project.py b/tests/functional/test_quota_project.py new file mode 100644 index 000000000..0b4bb90c4 --- /dev/null +++ b/tests/functional/test_quota_project.py @@ -0,0 +1,27 @@ +import os + +import pytest + +from dbt.tests.util import run_dbt + +_QUOTA_PROJECT = os.getenv("BIGQUERY_TEST_ALT_DATABASE") + + +class TestNoQuotaProject: + def test_no_quota_project(self, project): + results = run_dbt() + for result in results: + assert None == result.adapter_response["quota_project"] + + +class TestQuotaProjectOption: + @pytest.fixture(scope="class") + def profiles_config_update(self, dbt_profile_target): + outputs = {"default": dbt_profile_target} + outputs["default"]["quota_project"] = _QUOTA_PROJECT + yield + + def test_quota_project_option(self, project): + results = run_dbt() + for result in results: + assert _QUOTA_PROJECT == result.adapter_response["quota_project"] diff --git a/tests/unit/test_bigquery_adapter.py b/tests/unit/test_bigquery_adapter.py index 34abd0caf..e57db9a62 100644 --- a/tests/unit/test_bigquery_adapter.py +++ b/tests/unit/test_bigquery_adapter.py @@ -20,10 +20,9 @@ from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT from dbt_common.clients import agate_helper import dbt_common.exceptions -from dbt.context.manifest import generate_query_header_context +from dbt.context.query_header import generate_query_header_context from dbt.contracts.files import FileHash from dbt.contracts.graph.manifest import ManifestStateCheck -from dbt.logger import GLOBAL_LOGGER as logger # noqa from dbt.context.providers import RuntimeConfigObject, generate_runtime_macro_context from google.cloud.bigquery import AccessEntry @@ -33,6 +32,7 @@ inject_adapter, TestAdapterConversions, load_internal_manifest_macros, + mock_connection, ) @@ -203,7 +203,7 @@ def get_adapter(self, target) -> BigQueryAdapter: class TestBigQueryAdapterAcquire(BaseTestBigQueryAdapter): @patch( - "dbt.adapters.bigquery.connections.get_bigquery_defaults", + "dbt.adapters.bigquery.credentials._create_bigquery_defaults", return_value=("credentials", "project_id"), ) @patch("dbt.adapters.bigquery.BigQueryConnectionManager.open", return_value=_bq_conn()) @@ -244,10 +244,12 @@ def test_acquire_connection_oauth_validations(self, mock_open_connection): mock_open_connection.assert_called_once() @patch( - "dbt.adapters.bigquery.connections.get_bigquery_defaults", + "dbt.adapters.bigquery.credentials._create_bigquery_defaults", return_value=("credentials", "project_id"), ) - @patch("dbt.adapters.bigquery.BigQueryConnectionManager.open", return_value=_bq_conn()) + @patch( + "dbt.adapters.bigquery.connections.BigQueryConnectionManager.open", return_value=_bq_conn() + ) def test_acquire_connection_dataproc_serverless( self, mock_open_connection, mock_get_bigquery_defaults ): @@ -369,41 +371,42 @@ def test_acquire_connection_maximum_bytes_billed(self, mock_open_connection): def test_cancel_open_connections_empty(self): adapter = self.get_adapter("oauth") - self.assertEqual(adapter.cancel_open_connections(), None) + self.assertEqual(len(list(adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): adapter = self.get_adapter("oauth") - adapter.connections.thread_connections[0] = object() - self.assertEqual(adapter.cancel_open_connections(), None) + key = adapter.connections.get_thread_identifier() + adapter.connections.thread_connections[key] = mock_connection("master") + self.assertEqual(len(list(adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): adapter = self.get_adapter("oauth") - adapter.connections.thread_connections.update( - { - 0: object(), - 1: object(), - } - ) - # actually does nothing - self.assertEqual(adapter.cancel_open_connections(), None) + master = mock_connection("master") + model = mock_connection("model") + key = adapter.connections.get_thread_identifier() + + adapter.connections.thread_connections.update({key: master, 1: model}) + self.assertEqual(len(list(adapter.cancel_open_connections())), 1) - @patch("dbt.adapters.bigquery.impl.google.auth.default") - @patch("dbt.adapters.bigquery.impl.google.cloud.bigquery") - def test_location_user_agent(self, mock_bq, mock_auth_default): + @patch("dbt.adapters.bigquery.clients.ClientOptions") + @patch("dbt.adapters.bigquery.credentials.default") + @patch("dbt.adapters.bigquery.clients.BigQueryClient") + def test_location_user_agent(self, MockClient, mock_auth_default, MockClientOptions): creds = MagicMock() mock_auth_default.return_value = (creds, MagicMock()) adapter = self.get_adapter("loc") connection = adapter.acquire_connection("dummy") - mock_client = mock_bq.Client + mock_client_options = MockClientOptions.return_value - mock_client.assert_not_called() + MockClient.assert_not_called() connection.handle - mock_client.assert_called_once_with( + MockClient.assert_called_once_with( "dbt-unit-000000", creds, location="Luna Station", client_info=HasUserAgent(), + client_options=mock_client_options, ) diff --git a/tests/unit/test_bigquery_connection_manager.py b/tests/unit/test_bigquery_connection_manager.py index 564601b2f..d4c95792e 100644 --- a/tests/unit/test_bigquery_connection_manager.py +++ b/tests/unit/test_bigquery_connection_manager.py @@ -1,82 +1,59 @@ import json import unittest -from contextlib import contextmanager from requests.exceptions import ConnectionError from unittest.mock import patch, MagicMock, Mock, ANY import dbt.adapters +import google.cloud.bigquery from dbt.adapters.bigquery import BigQueryCredentials from dbt.adapters.bigquery import BigQueryRelation from dbt.adapters.bigquery.connections import BigQueryConnectionManager -from dbt.logger import GLOBAL_LOGGER as logger # noqa +from dbt.adapters.bigquery.retry import RetryFactory class TestBigQueryConnectionManager(unittest.TestCase): def setUp(self): - credentials = Mock(BigQueryCredentials) - profile = Mock(query_comment=None, credentials=credentials) - self.connections = BigQueryConnectionManager(profile=profile, mp_context=Mock()) + self.credentials = Mock(BigQueryCredentials) + self.credentials.method = "oauth" + self.credentials.job_retries = 1 + self.credentials.job_retry_deadline_seconds = 1 + self.credentials.scopes = tuple() - self.mock_client = Mock(dbt.adapters.bigquery.impl.google.cloud.bigquery.Client) - self.mock_connection = MagicMock() + self.mock_client = Mock(google.cloud.bigquery.Client) + self.mock_connection = MagicMock() self.mock_connection.handle = self.mock_client + self.mock_connection.credentials = self.credentials + self.connections = BigQueryConnectionManager( + profile=Mock(credentials=self.credentials, query_comment=None), + mp_context=Mock(), + ) self.connections.get_thread_connection = lambda: self.mock_connection - self.connections.get_job_retry_deadline_seconds = lambda x: None - self.connections.get_job_retries = lambda x: 1 - - @patch("dbt.adapters.bigquery.connections._is_retryable", return_value=True) - def test_retry_and_handle(self, is_retryable): - self.connections.DEFAULT_MAXIMUM_DELAY = 2.0 - - @contextmanager - def dummy_handler(msg): - yield - - self.connections.exception_handler = dummy_handler - - class DummyException(Exception): - """Count how many times this exception is raised""" - count = 0 + @patch( + "dbt.adapters.bigquery.retry.create_bigquery_client", + return_value=Mock(google.cloud.bigquery.Client), + ) + def test_retry_connection_reset(self, mock_client_factory): + new_mock_client = mock_client_factory.return_value - def __init__(self): - DummyException.count += 1 + @self.connections._retry.create_reopen_with_deadline(self.mock_connection) + def generate_connection_reset_error(): + raise ConnectionResetError - def raiseDummyException(): - raise DummyException() + assert self.mock_connection.handle is self.mock_client - with self.assertRaises(DummyException): - self.connections._retry_and_handle( - "some sql", Mock(credentials=Mock(retries=8)), raiseDummyException - ) - self.assertEqual(DummyException.count, 9) - - @patch("dbt.adapters.bigquery.connections._is_retryable", return_value=True) - def test_retry_connection_reset(self, is_retryable): - self.connections.open = MagicMock() - self.connections.close = MagicMock() - self.connections.DEFAULT_MAXIMUM_DELAY = 2.0 - - @contextmanager - def dummy_handler(msg): - yield - - self.connections.exception_handler = dummy_handler - - def raiseConnectionResetError(): - raise ConnectionResetError("Connection broke") - - mock_conn = Mock(credentials=Mock(retries=1)) with self.assertRaises(ConnectionResetError): - self.connections._retry_and_handle("some sql", mock_conn, raiseConnectionResetError) - self.connections.close.assert_called_once_with(mock_conn) - self.connections.open.assert_called_once_with(mock_conn) + # this will always raise the error, we just want to test that the connection was reopening in between + generate_connection_reset_error() + + assert self.mock_connection.handle is new_mock_client + assert new_mock_client is not self.mock_client def test_is_retryable(self): - _is_retryable = dbt.adapters.bigquery.connections._is_retryable + _is_retryable = dbt.adapters.bigquery.retry._is_retryable exceptions = dbt.adapters.bigquery.impl.google.cloud.exceptions internal_server_error = exceptions.InternalServerError("code broke") bad_request_error = exceptions.BadRequest("code broke") @@ -85,12 +62,14 @@ def test_is_retryable(self): rate_limit_error = exceptions.Forbidden( "code broke", errors=[{"reason": "rateLimitExceeded"}] ) + service_unavailable_error = exceptions.ServiceUnavailable("service is unavailable") self.assertTrue(_is_retryable(internal_server_error)) self.assertTrue(_is_retryable(bad_request_error)) self.assertTrue(_is_retryable(connection_error)) self.assertFalse(_is_retryable(client_error)) self.assertTrue(_is_retryable(rate_limit_error)) + self.assertTrue(_is_retryable(service_unavailable_error)) def test_drop_dataset(self): mock_table = Mock() @@ -103,29 +82,30 @@ def test_drop_dataset(self): self.mock_client.delete_table.assert_not_called() self.mock_client.delete_dataset.assert_called_once() - @patch("dbt.adapters.bigquery.impl.google.cloud.bigquery") - def test_query_and_results(self, mock_bq): - self.mock_client.query = Mock(return_value=Mock(state="DONE")) + @patch("dbt.adapters.bigquery.connections.QueryJobConfig") + def test_query_and_results(self, MockQueryJobConfig): self.connections._query_and_results( - self.mock_client, + self.mock_connection, "sql", - {"job_param_1": "blah"}, - job_creation_timeout=15, - job_execution_timeout=3, + {"dry_run": True}, + job_id=1, ) - mock_bq.QueryJobConfig.assert_called_once() + MockQueryJobConfig.assert_called_once() self.mock_client.query.assert_called_once_with( - query="sql", job_config=mock_bq.QueryJobConfig(), timeout=15 + query="sql", + job_config=MockQueryJobConfig(), + job_id=1, + timeout=self.credentials.job_creation_timeout_seconds, ) def test_copy_bq_table_appends(self): self._copy_table(write_disposition=dbt.adapters.bigquery.impl.WRITE_APPEND) - args, kwargs = self.mock_client.copy_table.call_args self.mock_client.copy_table.assert_called_once_with( [self._table_ref("project", "dataset", "table1")], self._table_ref("project", "dataset", "table2"), job_config=ANY, + retry=ANY, ) args, kwargs = self.mock_client.copy_table.call_args self.assertEqual( @@ -139,6 +119,7 @@ def test_copy_bq_table_truncates(self): [self._table_ref("project", "dataset", "table1")], self._table_ref("project", "dataset", "table2"), job_config=ANY, + retry=ANY, ) args, kwargs = self.mock_client.copy_table.call_args self.assertEqual( @@ -160,7 +141,7 @@ def test_list_dataset_correctly_calls_lists_datasets(self): self.mock_client.list_datasets = mock_list_dataset result = self.connections.list_dataset("project") self.mock_client.list_datasets.assert_called_once_with( - project="project", max_results=10000 + project="project", max_results=10000, retry=ANY ) assert result == ["d1"] diff --git a/tests/unit/test_configure_dataproc_batch.py b/tests/unit/test_configure_dataproc_batch.py index 94cb28efb..6e5757589 100644 --- a/tests/unit/test_configure_dataproc_batch.py +++ b/tests/unit/test_configure_dataproc_batch.py @@ -1,6 +1,6 @@ from unittest.mock import patch -from dbt.adapters.bigquery.dataproc.batch import update_batch_from_config +from dbt.adapters.bigquery.python_submissions import _update_batch_from_config from google.cloud import dataproc_v1 from .test_bigquery_adapter import BaseTestBigQueryAdapter @@ -12,7 +12,7 @@ # parsed credentials class TestConfigureDataprocBatch(BaseTestBigQueryAdapter): @patch( - "dbt.adapters.bigquery.connections.get_bigquery_defaults", + "dbt.adapters.bigquery.credentials._create_bigquery_defaults", return_value=("credentials", "project_id"), ) def test_update_dataproc_serverless_batch(self, mock_get_bigquery_defaults): @@ -39,7 +39,7 @@ def test_update_dataproc_serverless_batch(self, mock_get_bigquery_defaults): batch = dataproc_v1.Batch() - batch = update_batch_from_config(raw_batch_config, batch) + batch = _update_batch_from_config(raw_batch_config, batch) def to_str_values(d): """google's protobuf types expose maps as dict[str, str]""" @@ -64,7 +64,7 @@ def to_str_values(d): ) @patch( - "dbt.adapters.bigquery.connections.get_bigquery_defaults", + "dbt.adapters.bigquery.credentials._create_bigquery_defaults", return_value=("credentials", "project_id"), ) def test_default_dataproc_serverless_batch(self, mock_get_bigquery_defaults): diff --git a/tests/unit/test_renamed_relations.py b/tests/unit/test_renamed_relations.py new file mode 100644 index 000000000..8e787e6a3 --- /dev/null +++ b/tests/unit/test_renamed_relations.py @@ -0,0 +1,16 @@ +from dbt.adapters.bigquery.relation import BigQueryRelation +from dbt.adapters.contracts.relation import RelationType + + +def test_renameable_relation(): + relation = BigQueryRelation.create( + database="my_db", + schema="my_schema", + identifier="my_table", + type=RelationType.Table, + ) + assert relation.renameable_relations == frozenset( + { + RelationType.Table, + } + ) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index 88b09ce60..633b6d565 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -2,6 +2,7 @@ Note that all imports should be inside the functions to avoid import/mocking issues. """ + import string import os from unittest import mock diff --git a/tox.ini b/tox.ini index 30e3c5816..240d85e34 100644 --- a/tox.ini +++ b/tox.ini @@ -1,8 +1,8 @@ [tox] skipsdist = True -envlist = py38,py39,py310,py311 +envlist = py39,py310,py311,py312 -[testenv:{unit,py38,py39,py310,py311,py}] +[testenv:{unit,py39,py310,py311,py312,py}] description = unit testing skip_install = true passenv = @@ -13,7 +13,7 @@ deps = -rdev-requirements.txt -e. -[testenv:{integration,py38,py39,py310,py311,py}-{bigquery}] +[testenv:{integration,py39,py310,py311,py312,py}-{bigquery}] description = adapter plugin integration testing skip_install = true passenv = @@ -31,9 +31,9 @@ commands = bigquery: {envpython} -m pytest -n auto {posargs} -vv tests/functional -k "not TestPython" --profile service_account deps = -rdev-requirements.txt - -e. + . -[testenv:{python-tests,py38,py39,py310,py311,py}] +[testenv:{python-tests,py39,py310,py311,py312,py}] description = python integration testing skip_install = true passenv =