From 540bd7cb0708af980e75b0d1a55ef6fe2d072837 Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Fri, 19 Aug 2022 23:29:14 +0900 Subject: [PATCH 01/24] Fix a typo in `changelog-existence.md` (#276) --- .github/workflows/changelog-existence.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/changelog-existence.yml b/.github/workflows/changelog-existence.yml index 1284bfe46..02bce4582 100644 --- a/.github/workflows/changelog-existence.yml +++ b/.github/workflows/changelog-existence.yml @@ -36,6 +36,6 @@ jobs: changelog: uses: dbt-labs/actions/.github/workflows/changelog-existence.yml@main with: - changelog_comment: 'Thank you for your pull request! We could not find a changelog entry for this change. For details on how to document a change, see the [dbt-bigquery contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.MD).' + changelog_comment: 'Thank you for your pull request! We could not find a changelog entry for this change. For details on how to document a change, see the [dbt-bigquery contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md).' skip_label: 'Skip Changelog' secrets: inherit # this is only acceptable because we own the action we're calling From 56a95e27d9b249c4f3d3a2e75c719306c1da5fb3 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Wed, 24 Aug 2022 09:40:03 -0500 Subject: [PATCH 02/24] version bump, changie. and backports (#282) --- .github/workflows/backport.yml | 40 +++++++++++++ .github/workflows/version-bump.yml | 96 ++++-------------------------- 2 files changed, 51 insertions(+), 85 deletions(-) create mode 100644 .github/workflows/backport.yml diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml new file mode 100644 index 000000000..d5c7fffed --- /dev/null +++ b/.github/workflows/backport.yml @@ -0,0 +1,40 @@ +# **what?** +# When a PR is merged, if it has the backport label, it will create +# a new PR to backport those changes to the given branch. If it can't +# cleanly do a backport, it will comment on the merged PR of the failure. +# +# Label naming convention: "backport " +# Example: backport 1.0.latest +# +# You MUST "Squash and merge" the original PR or this won't work. + +# **why?** +# Changes sometimes need to be backported to release branches. +# This automates the backporting process + +# **when?** +# Once a PR is "Squash and merge"'d, by adding a backport label, this is triggered + +name: Backport +on: + pull_request: + types: + - labeled + +permissions: + contents: write + pull-requests: write + +jobs: + backport: + name: Backport + runs-on: ubuntu-latest + # Only react to merged PRs for security reasons. + # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target. + if: > + github.event.pull_request.merged + && contains(github.event.label.name, 'backport') + steps: + - uses: tibdex/backport@v2.0.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml index d9df91c52..bde34d683 100644 --- a/.github/workflows/version-bump.yml +++ b/.github/workflows/version-bump.yml @@ -1,18 +1,15 @@ # **what?** -# This workflow will take a version number and a dry run flag. With that +# This workflow will take the new version number to bump to. With that # it will run versionbump to update the version number everywhere in the -# code base and then generate an update Docker requirements file. If this -# is a dry run, a draft PR will open with the changes. If this isn't a dry -# run, the changes will be committed to the branch this is run on. +# code base and then run changie to create the corresponding changelog. +# A PR will be created with the changes that can be reviewed before committing. # **why?** # This is to aid in releasing dbt and making sure we have updated -# the versions and Docker requirements in all places. +# the version in all places and generated the changelog. # **when?** -# This is triggered either manually OR -# from the repository_dispatch event "version-bump" which is sent from -# the dbt-release repo Action +# This is triggered manually name: Version Bump @@ -20,83 +17,12 @@ on: workflow_dispatch: inputs: version_number: - description: 'The version number to bump to' + description: 'The version number to bump to (ex. 1.2.0, 1.3.0b1)' required: true - is_dry_run: - description: 'Creates a draft PR to allow testing instead of committing to a branch' - required: true - default: 'true' - repository_dispatch: - types: [version-bump] jobs: - bump: - runs-on: ubuntu-latest - steps: - - name: Check out the repository - uses: actions/checkout@v2 - - - name: Set version and dry run values - id: variables - env: - VERSION_NUMBER: "${{ github.event.client_payload.version_number == '' && github.event.inputs.version_number || github.event.client_payload.version_number }}" - IS_DRY_RUN: "${{ github.event.client_payload.is_dry_run == '' && github.event.inputs.is_dry_run || github.event.client_payload.is_dry_run }}" - run: | - echo Repository dispatch event version: ${{ github.event.client_payload.version_number }} - echo Repository dispatch event dry run: ${{ github.event.client_payload.is_dry_run }} - echo Workflow dispatch event version: ${{ github.event.inputs.version_number }} - echo Workflow dispatch event dry run: ${{ github.event.inputs.is_dry_run }} - echo ::set-output name=VERSION_NUMBER::$VERSION_NUMBER - echo ::set-output name=IS_DRY_RUN::$IS_DRY_RUN - - - uses: actions/setup-python@v2 - with: - python-version: "3.8" - - - name: Install python dependencies - run: | - python3 -m venv env - source env/bin/activate - python -m pip install --upgrade pip - - - name: Create PR branch - if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} - run: | - git checkout -b bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID - git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID - git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID - - - name: Bumping version - run: | - source env/bin/activate - python -m pip install -r dev-requirements.txt - env/bin/bumpversion --allow-dirty --new-version ${{steps.variables.outputs.VERSION_NUMBER}} major - git status - - - name: Commit version bump directly - uses: EndBug/add-and-commit@v7 - if: ${{ steps.variables.outputs.IS_DRY_RUN == 'false' }} - with: - author_name: 'Github Build Bot' - author_email: 'buildbot@fishtownanalytics.com' - message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' - - - name: Commit version bump to branch - uses: EndBug/add-and-commit@v7 - if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} - with: - author_name: 'Github Build Bot' - author_email: 'buildbot@fishtownanalytics.com' - message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' - branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' - push: 'origin origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' - - - name: Create Pull Request - uses: peter-evans/create-pull-request@v3 - if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} - with: - author: 'Github Build Bot ' - draft: true - base: ${{github.ref}} - title: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' - branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' + version_bump_and_changie: + uses: dbt-labs/actions/.github/workflows/version-bump.yml@main + with: + version_number: ${{ inputs.version_number }} + secrets: inherit # ok since what we are calling is internally maintained From 8b47eb8efe69b114b3c63f05dd1f7cf470954204 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Thu, 25 Aug 2022 09:18:23 -0700 Subject: [PATCH 03/24] add supported language (#290) * add supported language * add changelog --- .changes/unreleased/Under the Hood-20220825-073235.yaml | 7 +++++++ .../bigquery/macros/materializations/incremental.sql | 2 +- dbt/include/bigquery/macros/materializations/table.sql | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20220825-073235.yaml diff --git a/.changes/unreleased/Under the Hood-20220825-073235.yaml b/.changes/unreleased/Under the Hood-20220825-073235.yaml new file mode 100644 index 000000000..f33d77173 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220825-073235.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: specify supported_languages for materialization that support python models +time: 2022-08-25T07:32:35.820396-07:00 +custom: + Author: ChenyuLInx + Issue: "288" + PR: "290" diff --git a/dbt/include/bigquery/macros/materializations/incremental.sql b/dbt/include/bigquery/macros/materializations/incremental.sql index c699e816e..a594e993e 100644 --- a/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/dbt/include/bigquery/macros/materializations/incremental.sql @@ -136,7 +136,7 @@ {% endmacro %} -{% materialization incremental, adapter='bigquery' -%} +{% materialization incremental, adapter='bigquery', supported_languages=['sql', 'python'] -%} {%- set unique_key = config.get('unique_key') -%} {%- set full_refresh_mode = (should_full_refresh()) -%} diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 886ba3fa9..75751f331 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -1,4 +1,4 @@ -{% materialization table, adapter='bigquery' -%} +{% materialization table, adapter='bigquery', supported_languages=['sql', 'python']-%} {%- set language = model['language'] -%} {%- set identifier = model['alias'] -%} From ad885cd6db8b3a26278cb16d452038b061aa88e8 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Thu, 25 Aug 2022 15:51:23 -0500 Subject: [PATCH 04/24] Add `slot_ms` to `BigQueryAdapterResponse` (#271) * Add `slot_millis` to `BigQueryAdapterResponse` * Update CHANGELOG.md * Rename `slot_millis` to `slot_ms` * Rename the tile of the pull request * add assert check of slot_ms to test__bigquery_adapter_functions Co-authored-by: Yu ISHIKAWA --- .changes/unreleased/Under the Hood-20220817-154151.yaml | 7 +++++++ dbt/adapters/bigquery/connections.py | 4 ++++ .../bigquery_test/test_bigquery_adapter_functions.py | 1 + 3 files changed, 12 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20220817-154151.yaml diff --git a/.changes/unreleased/Under the Hood-20220817-154151.yaml b/.changes/unreleased/Under the Hood-20220817-154151.yaml new file mode 100644 index 000000000..d3de5a992 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220817-154151.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Adding `slot_ms` go `BigQueryAdapterResponse` +time: 2022-08-17T15:41:51.080936-05:00 +custom: + Author: yu-iskw + Issue: "194" + PR: "195" diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index 502a7b9dd..50437622b 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -89,6 +89,7 @@ class BigQueryAdapterResponse(AdapterResponse): location: Optional[str] = None project_id: Optional[str] = None job_id: Optional[str] = None + slot_ms: Optional[int] = None @dataclass @@ -460,6 +461,7 @@ def execute( project_id = None num_rows_formatted = None processed_bytes = None + slot_ms = None if query_job.statement_type == "CREATE_VIEW": code = "CREATE VIEW" @@ -488,6 +490,7 @@ def execute( # set common attributes bytes_processed = query_job.total_bytes_processed + slot_ms = query_job.slot_millis processed_bytes = self.format_bytes(bytes_processed) location = query_job.location job_id = query_job.job_id @@ -511,6 +514,7 @@ def execute( location=location, project_id=project_id, job_id=job_id, + slot_ms=slot_ms, ) return response, table diff --git a/tests/integration/bigquery_test/test_bigquery_adapter_functions.py b/tests/integration/bigquery_test/test_bigquery_adapter_functions.py index 8ad4a27fc..427470f1f 100644 --- a/tests/integration/bigquery_test/test_bigquery_adapter_functions.py +++ b/tests/integration/bigquery_test/test_bigquery_adapter_functions.py @@ -26,6 +26,7 @@ def test__bigquery_adapter_functions(self): assert result.adapter_response["location"] is not None assert result.adapter_response["project_id"] is not None assert result.adapter_response["job_id"] is not None + assert result.adapter_response["slot_ms"] is not None test_results = self.run_dbt(['test']) From 170c65e08250017e1413dfb3f14df4bcbb11c113 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Thu, 25 Aug 2022 23:34:12 -0500 Subject: [PATCH 05/24] expanding test env example (#120) * expanding test env example * minor changes after mila review * minor wording changes * updating * update after feedback from @VersusFacit * updating based on suggestions from @kwigley * updating test.env.example file * reformat descriptions * changing : to = in example so that users can c/p without making any changes --- test.env.example | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test.env.example b/test.env.example index d49649307..58893f5af 100644 --- a/test.env.example +++ b/test.env.example @@ -1,6 +1,12 @@ -BIGQUERY_TEST_ALT_DATABASE= +# Note: These values will come from your BigQuery account and GCP projects. + +# Test Environment field definitions +# Name of a GCP project you don't have access to query. BIGQUERY_TEST_NO_ACCESS_DATABASE= +# Authentication method required to hookup to BigQuery via client library. BIGQUERY_TEST_SERVICE_ACCOUNT_JSON='{}' + +# tests for local ci/cd DBT_TEST_USER_1="group:buildbot@dbtlabs.com" DBT_TEST_USER_2="group:dev-core@dbtlabs.com" DBT_TEST_USER_3="serviceAccount:dbt-integration-test-user@dbt-test-env.iam.gserviceaccount.com" From 597aaa4667320a2f0e21abdf6ac10d523757183e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 29 Aug 2022 17:05:49 -0400 Subject: [PATCH 06/24] Bumping version to 1.3.0b2 and generate CHANGELOG (#292) Co-authored-by: Github Build Bot --- .bumpversion.cfg | 2 +- .changes/1.3.0-b2.md | 10 ++++++++++ .../Under the Hood-20220804-155719.yaml | 0 .../Under the Hood-20220806-142912.yaml | 0 .../Under the Hood-20220817-154151.yaml | 0 .../Under the Hood-20220825-073235.yaml | 0 CHANGELOG.md | 10 +++++++++- dbt/adapters/bigquery/__version__.py | 2 +- setup.py | 2 +- 9 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 .changes/1.3.0-b2.md rename .changes/{unreleased => 1.3.0}/Under the Hood-20220804-155719.yaml (100%) rename .changes/{unreleased => 1.3.0}/Under the Hood-20220806-142912.yaml (100%) rename .changes/{unreleased => 1.3.0}/Under the Hood-20220817-154151.yaml (100%) rename .changes/{unreleased => 1.3.0}/Under the Hood-20220825-073235.yaml (100%) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index f0da053eb..d3aa82250 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.3.0b1 +current_version = 1.3.0b2 parse = (?P\d+) \.(?P\d+) \.(?P\d+) diff --git a/.changes/1.3.0-b2.md b/.changes/1.3.0-b2.md new file mode 100644 index 000000000..a62c278cc --- /dev/null +++ b/.changes/1.3.0-b2.md @@ -0,0 +1,10 @@ +## dbt-bigquery 1.3.0-b2 - August 29, 2022 +### Under the Hood +- Add changie to dbt-bigquery ([#254](https://github.com/dbt-labs/dbt-bigquery/issues/254), [#253](https://github.com/dbt-labs/dbt-bigquery/pull/253)) +- Add location/job_id/project_id to adapter response to enable easy job linking ([#92](https://github.com/dbt-labs/dbt-bigquery/issues/92), [#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) +- Adding `slot_ms` go `BigQueryAdapterResponse` ([#194](https://github.com/dbt-labs/dbt-bigquery/issues/194), [#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) +- specify supported_languages for materialization that support python models ([#288](https://github.com/dbt-labs/dbt-bigquery/issues/288), [#290](https://github.com/dbt-labs/dbt-bigquery/pull/290)) + +### Contributors +- [@Kayrnt](https://github.com/Kayrnt) ([#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) +- [@yu-iskw](https://github.com/yu-iskw) ([#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) diff --git a/.changes/unreleased/Under the Hood-20220804-155719.yaml b/.changes/1.3.0/Under the Hood-20220804-155719.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20220804-155719.yaml rename to .changes/1.3.0/Under the Hood-20220804-155719.yaml diff --git a/.changes/unreleased/Under the Hood-20220806-142912.yaml b/.changes/1.3.0/Under the Hood-20220806-142912.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20220806-142912.yaml rename to .changes/1.3.0/Under the Hood-20220806-142912.yaml diff --git a/.changes/unreleased/Under the Hood-20220817-154151.yaml b/.changes/1.3.0/Under the Hood-20220817-154151.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20220817-154151.yaml rename to .changes/1.3.0/Under the Hood-20220817-154151.yaml diff --git a/.changes/unreleased/Under the Hood-20220825-073235.yaml b/.changes/1.3.0/Under the Hood-20220825-073235.yaml similarity index 100% rename from .changes/unreleased/Under the Hood-20220825-073235.yaml rename to .changes/1.3.0/Under the Hood-20220825-073235.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 93ea208a1..a9038a76d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,14 +4,22 @@ - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases. - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#adding-changelog-entry) +## dbt-bigquery 1.3.0-b2 - August 29, 2022 +### Under the Hood +- Add changie to dbt-bigquery ([#254](https://github.com/dbt-labs/dbt-bigquery/issues/254), [#253](https://github.com/dbt-labs/dbt-bigquery/pull/253)) +- Add location/job_id/project_id to adapter response to enable easy job linking ([#92](https://github.com/dbt-labs/dbt-bigquery/issues/92), [#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) +- Adding `slot_ms` go `BigQueryAdapterResponse` ([#194](https://github.com/dbt-labs/dbt-bigquery/issues/194), [#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) +- specify supported_languages for materialization that support python models ([#288](https://github.com/dbt-labs/dbt-bigquery/issues/288), [#290](https://github.com/dbt-labs/dbt-bigquery/pull/290)) +### Contributors +- [@Kayrnt](https://github.com/Kayrnt) ([#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) +- [@yu-iskw](https://github.com/yu-iskw) ([#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) ## dbt-bigquery 1.3.0-b1 - August 04, 2022 ### Features - Implement `create_schema` via SQL, instead of Python method, allowing users to override if desired. drop_schema remains a Python method for the time being. ([#182](https://github.com/dbt-labs/dbt-bigquery/issues/182), [#183](https://github.com/dbt-labs/dbt-bigquery/pull/183)) - Added table and incrementail materializations for python models via DataProc. ([#209](https://github.com/dbt-labs/dbt-bigquery/issues/209), [#226](https://github.com/dbt-labs/dbt-bigquery/pull/226)) ### Under the Hood - Implement minimal changes to support dbt Core incremental materialization refactor. ([#232](https://github.com/dbt-labs/dbt-bigquery/issues/232), [#223](https://github.com/dbt-labs/dbt-bigquery/pull/223)) - ## Previous Releases For information on prior major and minor releases, see their changelogs: - [1.2](https://github.com/dbt-labs/dbt-bigquery/blob/1.2.latest/CHANGELOG.md) diff --git a/dbt/adapters/bigquery/__version__.py b/dbt/adapters/bigquery/__version__.py index 4b49b750d..e2c1a233c 100644 --- a/dbt/adapters/bigquery/__version__.py +++ b/dbt/adapters/bigquery/__version__.py @@ -1 +1 @@ -version = "1.3.0b1" +version = "1.3.0b2" diff --git a/setup.py b/setup.py index 175fa2f1a..69919fe1b 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def _get_dbt_core_version(): package_name = "dbt-bigquery" -package_version = "1.3.0b1" +package_version = "1.3.0b2" dbt_core_version = _get_dbt_core_version() description = """The BigQuery adapter plugin for dbt""" From 53f8b907915cf0d45a4265d08c655b1568014792 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 2 Sep 2022 11:36:13 -0400 Subject: [PATCH 07/24] Bump black from 22.6.0 to 22.8.0 (#294) * Bump black from 22.6.0 to 22.8.0 Bumps [black](https://github.com/psf/black) from 22.6.0 to 22.8.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.6.0...22.8.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Add automated changelog yaml from template for bot PR * Remove newline Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com> --- .changes/unreleased/Dependency-20220902-152810.yaml | 7 +++++++ dev-requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220902-152810.yaml diff --git a/.changes/unreleased/Dependency-20220902-152810.yaml b/.changes/unreleased/Dependency-20220902-152810.yaml new file mode 100644 index 000000000..2d7fb11ef --- /dev/null +++ b/.changes/unreleased/Dependency-20220902-152810.yaml @@ -0,0 +1,7 @@ +kind: Dependency +body: "Bump black from 22.6.0 to 22.8.0" +time: 2022-09-02T15:28:10.00000Z +custom: + Author: dependabot[bot] + Issue: 254 + PR: 294 diff --git a/dev-requirements.txt b/dev-requirements.txt index 5ff54ac51..129dbbe64 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,7 +3,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter -black==22.6.0 +black==22.8.0 bumpversion flake8 flaky From 84c20fcef86d9777b6ce6a4745b6415ec288ed93 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Mon, 12 Sep 2022 11:33:56 -0700 Subject: [PATCH 08/24] Enhancement/enable serverless (#303) * Experiment with Dataproc Serverless * add serverless as another submission method * add changelog and run tests against target core branch * fix syntax * fix schema overwrite * use 0.21 version of connector Co-authored-by: Jeremy Cohen --- .../unreleased/Features-20220909-122924.yaml | 7 + dbt/adapters/bigquery/impl.py | 128 +++------------ dbt/adapters/bigquery/python_submissions.py | 152 ++++++++++++++++++ .../macros/materializations/table.sql | 2 +- dev-requirements.txt | 4 +- tests/conftest.py | 2 +- tests/functional/adapter/test_python_model.py | 2 +- 7 files changed, 187 insertions(+), 110 deletions(-) create mode 100644 .changes/unreleased/Features-20220909-122924.yaml create mode 100644 dbt/adapters/bigquery/python_submissions.py diff --git a/.changes/unreleased/Features-20220909-122924.yaml b/.changes/unreleased/Features-20220909-122924.yaml new file mode 100644 index 000000000..cde9bbb43 --- /dev/null +++ b/.changes/unreleased/Features-20220909-122924.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Add support for Dataproc Serverless +time: 2022-09-09T12:29:24.993388-07:00 +custom: + Author: ChenyuLInx + Issue: "248" + PR: "303" diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index dde6f865d..602aafd19 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, Any, Set, Union +from typing import Dict, List, Optional, Any, Set, Union, Type from dbt.dataclass_schema import dbtClassMixin, ValidationError import dbt.deprecations @@ -7,14 +7,24 @@ import dbt.clients.agate_helper from dbt import ui # type: ignore -from dbt.adapters.base import BaseAdapter, available, RelationType, SchemaSearchMap, AdapterConfig -from dbt.adapters.base.impl import log_code_execution +from dbt.adapters.base import ( + BaseAdapter, + available, + RelationType, + SchemaSearchMap, + AdapterConfig, + PythonJobHelper, +) from dbt.adapters.cache import _make_key from dbt.adapters.bigquery.relation import BigQueryRelation from dbt.adapters.bigquery import BigQueryColumn from dbt.adapters.bigquery import BigQueryConnectionManager +from dbt.adapters.bigquery.python_submissions import ( + ClusterDataprocHelper, + ServerlessDataProcHelper, +) from dbt.adapters.bigquery.connections import BigQueryAdapterResponse from dbt.contracts.graph.manifest import Manifest from dbt.events import AdapterLogger @@ -835,108 +845,16 @@ def run_sql_for_tests(self, sql, fetch, conn=None): else: return list(res) - @available.parse_none - @log_code_execution - def submit_python_job(self, parsed_model: dict, compiled_code: str): - # TODO improve the typing here. N.B. Jinja returns a `jinja2.runtime.Undefined` instead - # of `None` which evaluates to True! - - # TODO limit this function to run only when doing the materialization of python nodes - # TODO should we also to timeout here? - - # validate all additional stuff for python is set - schema = getattr(parsed_model, "schema", self.config.credentials.schema) - identifier = parsed_model["alias"] - python_required_configs = [ - "dataproc_region", - "dataproc_cluster_name", - "gcs_bucket", - ] - for required_config in python_required_configs: - if not getattr(self.connections.profile.credentials, required_config): - raise ValueError( - f"Need to supply {required_config} in profile to submit python job" - ) - if not hasattr(self, "dataproc_helper"): - self.dataproc_helper = DataProcHelper(self.connections.profile.credentials) - model_file_name = f"{schema}/{identifier}.py" - # upload python file to GCS - self.dataproc_helper.upload_to_gcs(model_file_name, compiled_code) - # submit dataproc job - self.dataproc_helper.submit_dataproc_job(model_file_name) - - # TODO proper result for this - message = "OK" - code = None - num_rows = None - bytes_processed = None - return BigQueryAdapterResponse( # type: ignore[call-arg] - _message=message, - rows_affected=num_rows, - code=code, - bytes_processed=bytes_processed, - ) - + def generate_python_submission_response(self, submission_result) -> BigQueryAdapterResponse: + return BigQueryAdapterResponse(_message="OK") # type: ignore[call-arg] -class DataProcHelper: - def __init__(self, credential): - """_summary_ + @property + def default_python_submission_method(self) -> str: + return "serverless" - Args: - credential (_type_): _description_ - """ - try: - # Library only needed for python models - from google.cloud import dataproc_v1 - from google.cloud import storage - except ImportError: - raise RuntimeError( - "You need to install [dataproc] extras to run python model in dbt-bigquery" - ) - self.credential = credential - self.GoogleCredentials = BigQueryConnectionManager.get_credentials(credential) - self.storage_client = storage.Client( - project=self.credential.database, credentials=self.GoogleCredentials - ) - self.job_client = dataproc_v1.JobControllerClient( - client_options={ - "api_endpoint": "{}-dataproc.googleapis.com:443".format( - self.credential.dataproc_region - ) - }, - credentials=self.GoogleCredentials, - ) - - def upload_to_gcs(self, filename: str, compiled_code: str): - bucket = self.storage_client.get_bucket(self.credential.gcs_bucket) - blob = bucket.blob(filename) - blob.upload_from_string(compiled_code) - - def submit_dataproc_job(self, filename: str): - # Create the job config. - job = { - "placement": {"cluster_name": self.credential.dataproc_cluster_name}, - "pyspark_job": { - "main_python_file_uri": "gs://{}/{}".format(self.credential.gcs_bucket, filename) - }, + @property + def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]: + return { + "cluster": ClusterDataprocHelper, + "serverless": ServerlessDataProcHelper, } - operation = self.job_client.submit_job_as_operation( - request={ - "project_id": self.credential.database, - "region": self.credential.dataproc_region, - "job": job, - } - ) - response = operation.result() - return response - - # TODO: there might be useful results here that we can parse and return - # Dataproc job output is saved to the Cloud Storage bucket - # allocated to the job. Use regex to obtain the bucket and blob info. - # matches = re.match("gs://(.*?)/(.*)", response.driver_output_resource_uri) - # output = ( - # self.storage_client - # .get_bucket(matches.group(1)) - # .blob(f"{matches.group(2)}.000000000") - # .download_as_string() - # ) diff --git a/dbt/adapters/bigquery/python_submissions.py b/dbt/adapters/bigquery/python_submissions.py new file mode 100644 index 000000000..408984c2f --- /dev/null +++ b/dbt/adapters/bigquery/python_submissions.py @@ -0,0 +1,152 @@ +from typing import Dict, Union + +from dbt.adapters.base import PythonJobHelper +from dbt.adapters.bigquery import BigQueryConnectionManager, BigQueryCredentials +from google.api_core import retry +from google.api_core.client_options import ClientOptions + +try: + # library only needed for python models + from google.cloud import storage, dataproc_v1 # type: ignore +except ImportError: + _has_dataproc_lib = False +else: + _has_dataproc_lib = True + + +class BaseDataProcHelper(PythonJobHelper): + def __init__(self, parsed_model: Dict, credential: BigQueryCredentials) -> None: + """_summary_ + + Args: + credential (_type_): _description_ + """ + if not _has_dataproc_lib: + raise RuntimeError( + "You need to install [dataproc] extras to run python model in dbt-bigquery" + ) + # validate all additional stuff for python is set + schema = parsed_model["schema"] + identifier = parsed_model["alias"] + self.parsed_model = parsed_model + python_required_configs = [ + "dataproc_region", + "gcs_bucket", + ] + for required_config in python_required_configs: + if not getattr(credential, required_config): + raise ValueError( + f"Need to supply {required_config} in profile to submit python job" + ) + self.model_file_name = f"{schema}/{identifier}.py" + self.credential = credential + self.GoogleCredentials = BigQueryConnectionManager.get_credentials(credential) + self.storage_client = storage.Client( + project=self.credential.database, credentials=self.GoogleCredentials + ) + self.gcs_location = "gs://{}/{}".format(self.credential.gcs_bucket, self.model_file_name) + + # set retry policy, default to timeout after 24 hours + self.timeout = self.parsed_model["config"].get( + "timeout", self.credential.job_execution_timeout_seconds or 60 * 60 * 24 + ) + self.retry = retry.Retry(maximum=10.0, deadline=self.timeout) + self.client_options = ClientOptions( + api_endpoint="{}-dataproc.googleapis.com:443".format(self.credential.dataproc_region) + ) + self.job_client = self._get_job_client() + + def _upload_to_gcs(self, filename: str, compiled_code: str) -> None: + bucket = self.storage_client.get_bucket(self.credential.gcs_bucket) + blob = bucket.blob(filename) + blob.upload_from_string(compiled_code) + + def submit(self, compiled_code: str) -> dataproc_v1.types.jobs.Job: + # upload python file to GCS + self._upload_to_gcs(self.model_file_name, compiled_code) + # submit dataproc job + return self._submit_dataproc_job() + + def _get_job_client( + self, + ) -> Union[dataproc_v1.JobControllerClient, dataproc_v1.BatchControllerClient]: + raise NotImplementedError("_get_job_client not implemented") + + def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: + raise NotImplementedError("_submit_dataproc_job not implemented") + + +class ClusterDataprocHelper(BaseDataProcHelper): + def _get_job_client(self) -> dataproc_v1.JobControllerClient: + if not self._get_cluster_name(): + raise ValueError( + "Need to supply dataproc_cluster_name in profile or config to submit python job with cluster submission method" + ) + return dataproc_v1.JobControllerClient( # type: ignore + client_options=self.client_options, credentials=self.GoogleCredentials + ) + + def _get_cluster_name(self) -> str: + return self.parsed_model["config"].get( + "dataproc_cluster_name", self.credential.dataproc_cluster_name + ) + + def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: + job = { + "placement": {"cluster_name": self._get_cluster_name()}, + "pyspark_job": { + "main_python_file_uri": self.gcs_location, + }, + } + operation = self.job_client.submit_job_as_operation( # type: ignore + request={ + "project_id": self.credential.database, + "region": self.credential.dataproc_region, + "job": job, + } + ) + response = operation.result(retry=self.retry) + return response + + +class ServerlessDataProcHelper(BaseDataProcHelper): + def _get_job_client(self) -> dataproc_v1.BatchControllerClient: + return dataproc_v1.BatchControllerClient( + client_options=self.client_options, credentials=self.GoogleCredentials + ) + + def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: + # create the Dataproc Serverless job config + batch = dataproc_v1.Batch() + batch.pyspark_batch.main_python_file_uri = self.gcs_location + # how to keep this up to date? + # we should probably also open this up to be configurable + batch.pyspark_batch.jar_file_uris = [ + "gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.21.1.jar" + ] + # should we make all of these spark/dataproc properties configurable? + # https://cloud.google.com/dataproc-serverless/docs/concepts/properties + # https://cloud.google.com/dataproc-serverless/docs/reference/rest/v1/projects.locations.batches#runtimeconfig + batch.runtime_config.properties = { + "spark.executor.instances": "2", + } + parent = f"projects/{self.credential.database}/locations/{self.credential.dataproc_region}" + request = dataproc_v1.CreateBatchRequest( + parent=parent, + batch=batch, + ) + # make the request + operation = self.job_client.create_batch(request=request) # type: ignore + # this takes quite a while, waiting on GCP response to resolve + response = operation.result(retry=self.retry) + return response + # there might be useful results here that we can parse and return + # Dataproc job output is saved to the Cloud Storage bucket + # allocated to the job. Use regex to obtain the bucket and blob info. + # matches = re.match("gs://(.*?)/(.*)", response.driver_output_resource_uri) + # output = ( + # self.storage_client + # .get_bucket(matches.group(1)) + # .blob(f"{matches.group(2)}.000000000") + # .download_as_string() + # ) diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 75751f331..5ca735aa1 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -67,6 +67,6 @@ df = model(dbt, spark) df.write \ .mode("overwrite") \ .format("bigquery") \ - .option("writeMethod", "direct") \ + .option("writeMethod", "direct").option("writeDisposition", 'WRITE_TRUNCATE') \ .save("{{target_relation}}") {% endmacro %} diff --git a/dev-requirements.txt b/dev-requirements.txt index 129dbbe64..23418bf3f 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git@enhancement/python_submission_helper#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git@enhancement/python_submission_helper#egg=dbt-tests-adapter&subdirectory=tests/adapter black==22.8.0 bumpversion diff --git a/tests/conftest.py b/tests/conftest.py index 7b0c69fc3..e74fa424b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,6 +44,6 @@ def service_account_target(): 'keyfile_json': credentials, # following 3 for python model 'dataproc_region': os.getenv("DATAPROC_REGION"), - 'dataproc_cluster_name': os.getenv("DATAPROC_CLUSTER_NAME"), + 'dataproc_cluster_name': os.getenv("DATAPROC_CLUSTER_NAME"), # only needed for cluster submission method 'gcs_bucket': os.getenv("GCS_BUCKET") } diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 03ad871e2..68bb90e68 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -3,7 +3,7 @@ from dbt.tests.util import run_dbt, write_file import dbt.tests.adapter.python_model.test_python_model as dbt_tests -@pytest.skip("cluster unstable", allow_module_level=True) + class TestPythonIncrementalMatsDataproc(dbt_tests.BasePythonIncrementalTests): pass From 7daebe845d62d7f9e5c286e9ece50cab25b44a24 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Mon, 12 Sep 2022 14:02:24 -0700 Subject: [PATCH 09/24] fix dev-req (#304) --- dbt/adapters/bigquery/python_submissions.py | 8 +++++--- dev-requirements.txt | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/bigquery/python_submissions.py b/dbt/adapters/bigquery/python_submissions.py index 408984c2f..1cdbf438f 100644 --- a/dbt/adapters/bigquery/python_submissions.py +++ b/dbt/adapters/bigquery/python_submissions.py @@ -121,9 +121,11 @@ def _submit_dataproc_job(self) -> dataproc_v1.types.jobs.Job: batch.pyspark_batch.main_python_file_uri = self.gcs_location # how to keep this up to date? # we should probably also open this up to be configurable - batch.pyspark_batch.jar_file_uris = [ - "gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.21.1.jar" - ] + jar_file_uri = self.parsed_model["config"].get( + "jar_file_uri", + "gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.21.1.jar", + ) + batch.pyspark_batch.jar_file_uris = [jar_file_uri] # should we make all of these spark/dataproc properties configurable? # https://cloud.google.com/dataproc-serverless/docs/concepts/properties # https://cloud.google.com/dataproc-serverless/docs/reference/rest/v1/projects.locations.batches#runtimeconfig diff --git a/dev-requirements.txt b/dev-requirements.txt index 23418bf3f..129dbbe64 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,7 +1,7 @@ # install latest changes in dbt-core # TODO: how to automate switching from develop to version branches? -git+https://github.com/dbt-labs/dbt-core.git@enhancement/python_submission_helper#egg=dbt-core&subdirectory=core -git+https://github.com/dbt-labs/dbt-core.git@enhancement/python_submission_helper#egg=dbt-tests-adapter&subdirectory=tests/adapter +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core +git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter black==22.8.0 bumpversion From 6f4fd0165d33ec79f4e71f0a9b6d977cddcfac25 Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Mon, 12 Sep 2022 16:01:05 -0700 Subject: [PATCH 10/24] Make storage and dataproc required Lib and disable unstable test (#305) * mock optional library * skip python tests again and remove type hint for optional lib * add dataproc and storage as required * remove try except for import --- dbt/adapters/bigquery/python_submissions.py | 13 +------------ setup.py | 8 ++------ tests/functional/adapter/test_python_model.py | 2 +- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/dbt/adapters/bigquery/python_submissions.py b/dbt/adapters/bigquery/python_submissions.py index 1cdbf438f..8d12fa2cc 100644 --- a/dbt/adapters/bigquery/python_submissions.py +++ b/dbt/adapters/bigquery/python_submissions.py @@ -4,14 +4,7 @@ from dbt.adapters.bigquery import BigQueryConnectionManager, BigQueryCredentials from google.api_core import retry from google.api_core.client_options import ClientOptions - -try: - # library only needed for python models - from google.cloud import storage, dataproc_v1 # type: ignore -except ImportError: - _has_dataproc_lib = False -else: - _has_dataproc_lib = True +from google.cloud import storage, dataproc_v1 # type: ignore class BaseDataProcHelper(PythonJobHelper): @@ -21,10 +14,6 @@ def __init__(self, parsed_model: Dict, credential: BigQueryCredentials) -> None: Args: credential (_type_): _description_ """ - if not _has_dataproc_lib: - raise RuntimeError( - "You need to install [dataproc] extras to run python model in dbt-bigquery" - ) # validate all additional stuff for python is set schema = parsed_model["schema"] identifier = parsed_model["alias"] diff --git a/setup.py b/setup.py index 69919fe1b..81a267711 100644 --- a/setup.py +++ b/setup.py @@ -72,13 +72,9 @@ def _get_dbt_core_version(): "google-cloud-bigquery>=1.25.0,<3", "google-api-core>=1.16.0,<3", "googleapis-common-protos>=1.6.0,<2", + "google-cloud-storage>=2.4.0", + "google-cloud-dataproc>=4.0.3", ], - extras_require={ - "dataproc": [ - "google-cloud-storage[dataproc]>=2.4.0", - "google-cloud-dataproc[dataproc]>=4.0.3", - ], - }, zip_safe=False, classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py index 68bb90e68..03ad871e2 100644 --- a/tests/functional/adapter/test_python_model.py +++ b/tests/functional/adapter/test_python_model.py @@ -3,7 +3,7 @@ from dbt.tests.util import run_dbt, write_file import dbt.tests.adapter.python_model.test_python_model as dbt_tests - +@pytest.skip("cluster unstable", allow_module_level=True) class TestPythonIncrementalMatsDataproc(dbt_tests.BasePythonIncrementalTests): pass From b4cc9b3ec1cddfba2b2d58d08af5a84f6d72b1b6 Mon Sep 17 00:00:00 2001 From: leahwicz <60146280+leahwicz@users.noreply.github.com> Date: Tue, 13 Sep 2022 18:43:09 -0400 Subject: [PATCH 11/24] Update repo templates (#295) --- .github/ISSUE_TEMPLATE/bug-report.yml | 84 ++++++++++++++++++++ .github/ISSUE_TEMPLATE/bug_report.md | 33 -------- .github/ISSUE_TEMPLATE/config.yml | 14 ++++ .github/ISSUE_TEMPLATE/feature-request.yml | 59 ++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 23 ------ .github/ISSUE_TEMPLATE/regression-report.yml | 82 +++++++++++++++++++ .github/ISSUE_TEMPLATE/release.md | 10 --- .github/pull_request_template.md | 7 +- 8 files changed, 245 insertions(+), 67 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug-report.yml delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yml delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/regression-report.yml delete mode 100644 .github/ISSUE_TEMPLATE/release.md diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml new file mode 100644 index 000000000..b785e189b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -0,0 +1,84 @@ +name: 🐞 Bug +description: Report a bug or an issue you've found with dbt-bigquery +title: "[Bug] " +labels: ["bug", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + - type: checkboxes + attributes: + label: Is this a new bug in dbt-bigquery? + description: > + In other words, is this an error, flaw, failure or fault in our software? + + If this is a bug that broke existing functionality that used to work, please open a regression issue. + If this is a bug in the dbt-core logic, please open an issue in the dbt-core repository. + If this is a bug experienced while using dbt Cloud, please report to [support](mailto:support@getdbt.com). + If this is a request for help or troubleshooting code in your own dbt project, please join our [dbt Community Slack](https://www.getdbt.com/community/join-the-community/) or open a [Discussion question](https://github.com/dbt-labs/docs.getdbt.com/discussions). + + Please search to see if an issue already exists for the bug you encountered. + options: + - label: I believe this is a new bug in dbt-bigquery + required: true + - label: I have searched the existing issues, and I could not find an existing issue for this bug + required: true + - type: textarea + attributes: + label: Current Behavior + description: A concise description of what you're experiencing. + validations: + required: true + - type: textarea + attributes: + label: Expected Behavior + description: A concise description of what you expected to happen. + validations: + required: true + - type: textarea + attributes: + label: Steps To Reproduce + description: Steps to reproduce the behavior. + placeholder: | + 1. In this environment... + 2. With this config... + 3. Run '...' + 4. See error... + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: | + If applicable, log output to help explain your problem. + render: shell + validations: + required: false + - type: textarea + attributes: + label: Environment + description: | + examples: + - **OS**: Ubuntu 20.04 + - **Python**: 3.9.12 (`python3 --version`) + - **dbt-core**: 1.1.1 (`dbt --version`) + - **dbt-bigquery**: 1.1.0 (`dbt --version`) + value: | + - OS: + - Python: + - dbt-core: + - dbt-bigquery: + render: markdown + validations: + required: false + - type: textarea + attributes: + label: Additional Context + description: | + Links? References? Anything that will give us more context about the issue you are encountering! + + Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index c2cb2a061..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -name: Bug report -about: Report a bug or an issue you've found with dbt-bigquery -title: '' -labels: bug, triage -assignees: '' - ---- - -### Describe the bug -A clear and concise description of what the bug is. What command did you run? What happened? - -### Steps To Reproduce -In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here. - -### Expected behavior -A clear and concise description of what you expected to happen. - -### Screenshots and log output -If applicable, add screenshots or log output to help explain your problem. - -### System information -**The output of `dbt --version`:** -``` -<output goes here> -``` - -**The operating system you're using:** - -**The output of `python --version`:** - -### Additional context -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..afd2ea18c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,14 @@ +blank_issues_enabled: false +contact_links: + - name: Ask the community for help + url: https://github.com/dbt-labs/docs.getdbt.com/discussions + about: Need help troubleshooting? Check out our guide on how to ask + - name: Contact dbt Cloud support + url: mailto:support@getdbt.com + about: Are you using dbt Cloud? Contact our support team for help! + - name: Participate in Discussions + url: https://github.com/dbt-labs/dbt-bigquery/discussions + about: Do you have a Big Idea for dbt-bigquery? Read open discussions, or start a new one + - name: Create an issue for dbt-core + url: https://github.com/dbt-labs/dbt-core/issues/new/choose + about: Report a bug or request a feature for dbt-core diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml new file mode 100644 index 000000000..3cd55868b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -0,0 +1,59 @@ +name: ✨ Feature +description: Propose a straightforward extension of dbt-bigquery functionality +title: "[Feature] <title>" +labels: ["enhancement", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this feature request! + - type: checkboxes + attributes: + label: Is this your first time submitting a feature request? + description: > + We want to make sure that features are distinct and discoverable, + so that other members of the community can find them and offer their thoughts. + + Issues are the right place to request straightforward extensions of existing dbt-bigquery functionality. + For "big ideas" about future capabilities of dbt-bigquery, we ask that you open a + [discussion](https://github.com/dbt-labs/dbt-bigquery/discussions) in the "Ideas" category instead. + options: + - label: I have read the [expectations for open source contributors](https://docs.getdbt.com/docs/contributing/oss-expectations) + required: true + - label: I have searched the existing issues, and I could not find an existing issue for this feature + required: true + - label: I am requesting a straightforward extension of existing dbt-bigquery functionality, rather than a Big Idea better suited to a discussion + required: true + - type: textarea + attributes: + label: Describe the feature + description: A clear and concise description of what you want to happen. + validations: + required: true + - type: textarea + attributes: + label: Describe alternatives you've considered + description: | + A clear and concise description of any alternative solutions or features you've considered. + validations: + required: false + - type: textarea + attributes: + label: Who will this benefit? + description: | + What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. + validations: + required: false + - type: input + attributes: + label: Are you interested in contributing this feature? + description: Let us know if you want to write some code, and how we can help. + validations: + required: false + - type: textarea + attributes: + label: Anything else? + description: | + Links? References? Anything that will give us more context about the feature you are suggesting! + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 5c0a9545f..000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for dbt-bigquery -title: '' -labels: enhancement, triage -assignees: '' - ---- - -### Describe the feature -A clear and concise description of what you want to happen. - -### Describe alternatives you've considered -A clear and concise description of any alternative solutions or features you've considered. - -### Additional context -Please include any other relevant context here. - -### Who will this benefit? -What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly. - -### Are you interested in contributing this feature? -Let us know if you want to write some code, and how we can help. diff --git a/.github/ISSUE_TEMPLATE/regression-report.yml b/.github/ISSUE_TEMPLATE/regression-report.yml new file mode 100644 index 000000000..c9f4853cd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/regression-report.yml @@ -0,0 +1,82 @@ +name: ☣️ Regression +description: Report a regression you've observed in a newer version of dbt-bigquery +title: "[Regression] <title>" +labels: ["bug", "regression", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this regression report! + - type: checkboxes + attributes: + label: Is this a regression in a recent version of dbt-bigquery? + description: > + A regression is when documented functionality works as expected in an older version of dbt-bigquery, + and no longer works after upgrading to a newer version of dbt-bigquery + options: + - label: I believe this is a regression in dbt-bigquery functionality + required: true + - label: I have searched the existing issues, and I could not find an existing issue for this regression + required: true + - type: textarea + attributes: + label: Current Behavior + description: A concise description of what you're experiencing. + validations: + required: true + - type: textarea + attributes: + label: Expected/Previous Behavior + description: A concise description of what you expected to happen. + validations: + required: true + - type: textarea + attributes: + label: Steps To Reproduce + description: Steps to reproduce the behavior. + placeholder: | + 1. In this environment... + 2. With this config... + 3. Run '...' + 4. See error... + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: | + If applicable, log output to help explain your problem. + render: shell + validations: + required: false + - type: textarea + attributes: + label: Environment + description: | + examples: + - **OS**: Ubuntu 20.04 + - **Python**: 3.9.12 (`python3 --version`) + - **dbt-core (working version)**: 1.1.1 (`dbt --version`) + - **dbt-bigquery (working version)**: 1.1.0 (`dbt --version`) + - **dbt-core (regression version)**: 1.2.0 (`dbt --version`) + - **dbt-bigquery (regression version)**: 1.2.0 (`dbt --version`) + value: | + - OS: + - Python: + - dbt-core (working version): + - dbt-bigquery (working version): + - dbt-core (regression version): + - dbt-bigquery (regression version): + render: markdown + validations: + required: true + - type: textarea + attributes: + label: Additional Context + description: | + Links? References? Anything that will give us more context about the issue you are encountering! + + Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md deleted file mode 100644 index c3cb05c3c..000000000 --- a/.github/ISSUE_TEMPLATE/release.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Release -about: Release a new version of dbt-bigquery -title: '' -labels: release -assignees: '' - ---- - -### TBD diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5d8734bd5..33a1e4538 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -11,11 +11,16 @@ resolves # ### Description -<!--- Describe the Pull Request here --> +<!--- + Describe the Pull Request here. Add any references and info to help reviewers + understand your changes. Include any tradeoffs you considered. +--> ### Checklist +- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md) and understand what's expected of me - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) - [ ] I have run this code in development and it appears to resolve the stated issue - [ ] This PR includes tests, or tests are not required/relevant for this PR +- [ ] I have [opened an issue to add/update docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose), or docs changes are not required/relevant for this PR - [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry) From 61ed3b02a999995fe547dcc263ad1e0fd7e575f8 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Fri, 16 Sep 2022 12:00:28 -0700 Subject: [PATCH 12/24] update retry settings experiment (#310) * update retry settings * changelog entry --- .changes/unreleased/Under the Hood-20220915-145212.yaml | 7 +++++++ dbt/adapters/bigquery/connections.py | 4 +--- tests/conftest.py | 2 ++ 3 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20220915-145212.yaml diff --git a/.changes/unreleased/Under the Hood-20220915-145212.yaml b/.changes/unreleased/Under the Hood-20220915-145212.yaml new file mode 100644 index 000000000..6ab035256 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220915-145212.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Update BQ job and call retry settings +time: 2022-09-15T14:52:12.902965-07:00 +custom: + Author: colin-rogers-dbt + Issue: "311" + PR: "310" diff --git a/dbt/adapters/bigquery/connections.py b/dbt/adapters/bigquery/connections.py index 50437622b..336db3fd1 100644 --- a/dbt/adapters/bigquery/connections.py +++ b/dbt/adapters/bigquery/connections.py @@ -184,10 +184,8 @@ def __pre_deserialize__(cls, d: Dict[Any, Any]) -> Dict[Any, Any]: class BigQueryConnectionManager(BaseConnectionManager): TYPE = "bigquery" - QUERY_TIMEOUT = 300 - RETRIES = 1 DEFAULT_INITIAL_DELAY = 1.0 # Seconds - DEFAULT_MAXIMUM_DELAY = 1.0 # Seconds + DEFAULT_MAXIMUM_DELAY = 3.0 # Seconds @classmethod def handle_error(cls, error, message): diff --git a/tests/conftest.py b/tests/conftest.py index e74fa424b..6ca033f8d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -29,6 +29,7 @@ def oauth_target(): 'type': 'bigquery', 'method': 'oauth', 'threads': 1, + 'job_retries': 2, } @@ -40,6 +41,7 @@ def service_account_target(): 'type': 'bigquery', 'method': 'service-account-json', 'threads': 1, + 'job_retries': 2, 'project': project_id, 'keyfile_json': credentials, # following 3 for python model From 9699a48c3efd923f95d1fc13ea77827fa222d7db Mon Sep 17 00:00:00 2001 From: Matteo Ferrando <matteo.ferrando2@gmail.com> Date: Mon, 19 Sep 2022 12:46:58 -0400 Subject: [PATCH 13/24] Convert df to pyspark DataFrame if it is pandas before writing (#301) --- .changes/unreleased/Under the Hood-20220908-143218.yaml | 7 +++++++ dbt/include/bigquery/macros/materializations/table.sql | 8 ++++++++ 2 files changed, 15 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20220908-143218.yaml diff --git a/.changes/unreleased/Under the Hood-20220908-143218.yaml b/.changes/unreleased/Under the Hood-20220908-143218.yaml new file mode 100644 index 000000000..7053f96b6 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220908-143218.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Convert df to pyspark DataFrame if it is pandas before writing +time: 2022-09-08T14:32:18.243412-04:00 +custom: + Author: chamini2 + Issue: "301" + PR: "301" diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 5ca735aa1..519b8f994 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -63,6 +63,14 @@ df = model(dbt, spark) # COMMAND ---------- # this is materialization code dbt generated, please do not modify +# make sure pandas exists +import importlib.util +package_name = 'pandas' +if importlib.util.find_spec(package_name): + import pandas + if isinstance(df, pandas.core.frame.DataFrame): + # convert to pyspark.DataFrame + df = spark.createDataFrame(df) df.write \ .mode("overwrite") \ From 353e724b4666fa4201e525439c4efcdff3fe9464 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Sep 2022 16:41:56 -0400 Subject: [PATCH 14/24] Bump mypy from 0.942 to 0.971 (#296) * Bump mypy from 0.942 to 0.971 Bumps [mypy](https://github.com/python/mypy) from 0.942 to 0.971. - [Release notes](https://github.com/python/mypy/releases) - [Commits](https://github.com/python/mypy/compare/v0.942...v0.971) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Add automated changelog yaml from template for bot PR * Remove newline Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com> Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com> --- .changes/unreleased/Dependency-20220920-200205.yaml | 7 +++++++ dev-requirements.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Dependency-20220920-200205.yaml diff --git a/.changes/unreleased/Dependency-20220920-200205.yaml b/.changes/unreleased/Dependency-20220920-200205.yaml new file mode 100644 index 000000000..6dcfa5924 --- /dev/null +++ b/.changes/unreleased/Dependency-20220920-200205.yaml @@ -0,0 +1,7 @@ +kind: Dependency +body: "Bump mypy from 0.942 to 0.971" +time: 2022-09-20T20:02:05.00000Z +custom: + Author: dependabot[bot] + Issue: 254 + PR: 296 diff --git a/dev-requirements.txt b/dev-requirements.txt index 129dbbe64..676703d3e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -9,7 +9,7 @@ flake8 flaky freezegun==1.1.0 ipdb -mypy==0.942 +mypy==0.971 pip-tools pre-commit pytest From 1e6558126e3d2cd761d8114804cd5e00ae3b6f93 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Wed, 21 Sep 2022 12:13:41 -0600 Subject: [PATCH 15/24] Convert df to Spark DataFrame if it is a pandas or pandas-on-Spark DataFrame before writing (#317) * Convert df to Spark DataFrame if it is a pandas or pandas-on-Spark DataFrame before writing * Changelog entry --- .../Under the Hood-20220920-151057.yaml | 7 ++++ .../macros/materializations/table.sql | 37 +++++++++++++++---- 2 files changed, 37 insertions(+), 7 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20220920-151057.yaml diff --git a/.changes/unreleased/Under the Hood-20220920-151057.yaml b/.changes/unreleased/Under the Hood-20220920-151057.yaml new file mode 100644 index 000000000..86914c4f3 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220920-151057.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Enable pandas-on-Spark DataFrames for dbt python models +time: 2022-09-20T15:10:57.712169-06:00 +custom: + Author: dbeatty10 + Issue: "316" + PR: "317" diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 519b8f994..3ecdf37a5 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -63,14 +63,37 @@ df = model(dbt, spark) # COMMAND ---------- # this is materialization code dbt generated, please do not modify -# make sure pandas exists import importlib.util -package_name = 'pandas' -if importlib.util.find_spec(package_name): - import pandas - if isinstance(df, pandas.core.frame.DataFrame): - # convert to pyspark.DataFrame - df = spark.createDataFrame(df) + +pandas_available = False +pyspark_available = False + +# make sure pandas exists before using it +if importlib.util.find_spec("pandas"): + import pandas + pandas_available = True + +# make sure pyspark.pandas exists before using it +if importlib.util.find_spec("pyspark.pandas"): + import pyspark.pandas + pyspark_available = True + +# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first +# since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)` +# and converting from pandas-on-Spark to Spark DataFrame has no overhead +if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = pyspark.pandas.frame.DataFrame(df) + +# convert to pyspark.sql.dataframe.DataFrame +if isinstance(df, pyspark.sql.dataframe.DataFrame): + pass # since it is already a Spark DataFrame +elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame): + df = df.to_spark() +elif pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = spark.createDataFrame(df) +else: + msg = f"{type(df)} is not a supported type for dbt Python materialization" + raise Exception(msg) df.write \ .mode("overwrite") \ From 24a787fae36aa30a6c5dc29c9944a91a671fb78a Mon Sep 17 00:00:00 2001 From: James McNeill <55981540+jpmmcneill@users.noreply.github.com> Date: Thu, 22 Sep 2022 14:15:51 +0100 Subject: [PATCH 16/24] add type boolean (#313) * add type boolean * changie result * dont pull main Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Co-authored-by: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> --- .changes/unreleased/Features-20220919-232721.yaml | 7 +++++++ dbt/adapters/bigquery/column.py | 1 + tests/functional/adapter/utils/test_data_types.py | 5 +++++ 3 files changed, 13 insertions(+) create mode 100644 .changes/unreleased/Features-20220919-232721.yaml diff --git a/.changes/unreleased/Features-20220919-232721.yaml b/.changes/unreleased/Features-20220919-232721.yaml new file mode 100644 index 000000000..4f5c90dd0 --- /dev/null +++ b/.changes/unreleased/Features-20220919-232721.yaml @@ -0,0 +1,7 @@ +kind: Features +body: add type_boolean as a data type macro +time: 2022-09-19T23:27:21.864912+01:00 +custom: + Author: jpmmcneill + Issue: "315" + PR: "313" diff --git a/dbt/adapters/bigquery/column.py b/dbt/adapters/bigquery/column.py index 1a6d042b6..0f9269283 100644 --- a/dbt/adapters/bigquery/column.py +++ b/dbt/adapters/bigquery/column.py @@ -15,6 +15,7 @@ class BigQueryColumn(Column): "TIMESTAMP": "TIMESTAMP", "FLOAT": "FLOAT64", "INTEGER": "INT64", + "BOOLEAN": "BOOLEAN", "RECORD": "RECORD", } fields: List[Self] # type: ignore diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py index af084a0ef..e1669910c 100644 --- a/tests/functional/adapter/utils/test_data_types.py +++ b/tests/functional/adapter/utils/test_data_types.py @@ -5,6 +5,7 @@ from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp +from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean class TestTypeBigInt(BaseTypeBigInt): @@ -31,4 +32,8 @@ class TestTypeString(BaseTypeString): class TestTypeTimestamp(BaseTypeTimestamp): pass + +class TestTypeBoolean(BaseTypeBoolean): + pass + \ No newline at end of file From e110bd6124e6be21f5603d196b7438958a976352 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Mon, 26 Sep 2022 14:29:00 -0600 Subject: [PATCH 17/24] Tests for `current_timestamp` (#322) * Test failure will confirm that it is not a timezone-naive data type * Temporarily update dev-requirements.text * Tests for current_timestamp * Restore original dev-requirements.txt --- tests/functional/adapter/utils/test_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index 2a37ac6b7..ae1aadcbc 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -4,6 +4,7 @@ from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText from dbt.tests.adapter.utils.test_concat import BaseConcat +from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampAware from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd from dbt.tests.adapter.utils.test_datediff import BaseDateDiff from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc @@ -39,6 +40,11 @@ class TestConcat(BaseConcat): pass +# Use either BaseCurrentTimestampAware or BaseCurrentTimestampNaive but not both +class TestCurrentTimestamp(BaseCurrentTimestampAware): + pass + + class TestDateAdd(BaseDateAdd): pass From 164478c20cfff851c40351f3f9e0ab0a67936099 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Mon, 26 Sep 2022 16:49:46 -0600 Subject: [PATCH 18/24] Array macros (#308) * Temporary dev-requirements * Changelog entry * Implementations and tests for array macros * Remove `cast_array_to_string` macro * ARRAY in BigQuery does not support set operation comparisons using EXCEPT * Restore original dev-requirements.txt --- .../unreleased/Features-20220913-084836.yaml | 7 +++ .../bigquery/macros/utils/array_append.sql | 3 + .../bigquery/macros/utils/array_concat.sql | 3 + .../bigquery/macros/utils/array_construct.sql | 7 +++ .../adapter/utils/fixture_array_append.py | 13 ++++ .../adapter/utils/fixture_array_concat.py | 15 +++++ .../adapter/utils/fixture_array_construct.py | 20 ++++++ tests/functional/adapter/utils/test_utils.py | 63 ++++++++++++++++++- 8 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Features-20220913-084836.yaml create mode 100644 dbt/include/bigquery/macros/utils/array_append.sql create mode 100644 dbt/include/bigquery/macros/utils/array_concat.sql create mode 100644 dbt/include/bigquery/macros/utils/array_construct.sql create mode 100644 tests/functional/adapter/utils/fixture_array_append.py create mode 100644 tests/functional/adapter/utils/fixture_array_concat.py create mode 100644 tests/functional/adapter/utils/fixture_array_construct.py diff --git a/.changes/unreleased/Features-20220913-084836.yaml b/.changes/unreleased/Features-20220913-084836.yaml new file mode 100644 index 000000000..0a4bd275d --- /dev/null +++ b/.changes/unreleased/Features-20220913-084836.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Array macros +time: 2022-09-13T08:48:36.255365-06:00 +custom: + Author: graciegoheen dbeatty10 + Issue: "307" + PR: "308" diff --git a/dbt/include/bigquery/macros/utils/array_append.sql b/dbt/include/bigquery/macros/utils/array_append.sql new file mode 100644 index 000000000..78bd5cc43 --- /dev/null +++ b/dbt/include/bigquery/macros/utils/array_append.sql @@ -0,0 +1,3 @@ +{% macro bigquery__array_append(array, new_element) -%} + {{ array_concat(array, array_construct([new_element])) }} +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/utils/array_concat.sql b/dbt/include/bigquery/macros/utils/array_concat.sql new file mode 100644 index 000000000..eff8f524a --- /dev/null +++ b/dbt/include/bigquery/macros/utils/array_concat.sql @@ -0,0 +1,3 @@ +{% macro bigquery__array_concat(array_1, array_2) -%} + array_concat({{ array_1 }}, {{ array_2 }}) +{%- endmacro %} diff --git a/dbt/include/bigquery/macros/utils/array_construct.sql b/dbt/include/bigquery/macros/utils/array_construct.sql new file mode 100644 index 000000000..270b1f785 --- /dev/null +++ b/dbt/include/bigquery/macros/utils/array_construct.sql @@ -0,0 +1,7 @@ +{% macro bigquery__array_construct(inputs, data_type) -%} + {% if inputs|length > 0 %} + [ {{ inputs|join(' , ') }} ] + {% else %} + ARRAY<{{data_type}}>[] + {% endif %} +{%- endmacro %} diff --git a/tests/functional/adapter/utils/fixture_array_append.py b/tests/functional/adapter/utils/fixture_array_append.py new file mode 100644 index 000000000..0558d66e1 --- /dev/null +++ b/tests/functional/adapter/utils/fixture_array_append.py @@ -0,0 +1,13 @@ +# array_append + +# EXCEPT can't be used with ARRAYs in BigQuery, so convert to a string +models__array_append_expected_sql = """ +select 1 as id, {{ array_to_string(array_construct([1,2,3,4])) }} as array_col union all +select 2 as id, {{ array_to_string(array_construct([4])) }} as array_col +""" + + +models__array_append_actual_sql = """ +select 1 as id, {{ array_to_string(array_append(array_construct([1,2,3]), 4)) }} as array_col union all +select 2 as id, {{ array_to_string(array_append(array_construct([]), 4)) }} as array_col +""" diff --git a/tests/functional/adapter/utils/fixture_array_concat.py b/tests/functional/adapter/utils/fixture_array_concat.py new file mode 100644 index 000000000..51af8bf12 --- /dev/null +++ b/tests/functional/adapter/utils/fixture_array_concat.py @@ -0,0 +1,15 @@ +# array_concat + +# EXCEPT can't be used with ARRAYs in BigQuery, so convert to a string +models__array_concat_expected_sql = """ +select 1 as id, {{ array_to_string(array_construct([1,2,3,4,5,6])) }} as array_col union all +select 2 as id, {{ array_to_string(array_construct([2])) }} as array_col union all +select 3 as id, {{ array_to_string(array_construct([3])) }} as array_col +""" + + +models__array_concat_actual_sql = """ +select 1 as id, {{ array_to_string(array_concat(array_construct([1,2,3]), array_construct([4,5,6]))) }} as array_col union all +select 2 as id, {{ array_to_string(array_concat(array_construct([]), array_construct([2]))) }} as array_col union all +select 3 as id, {{ array_to_string(array_concat(array_construct([3]), array_construct([]))) }} as array_col +""" diff --git a/tests/functional/adapter/utils/fixture_array_construct.py b/tests/functional/adapter/utils/fixture_array_construct.py new file mode 100644 index 000000000..13d0bb2f3 --- /dev/null +++ b/tests/functional/adapter/utils/fixture_array_construct.py @@ -0,0 +1,20 @@ +# array_construct + +# EXCEPT can't be used with ARRAYs in BigQuery, so convert to a string +models__array_construct_expected_sql = """ +select 1 as id, {{ array_to_string(array_construct([1,2,3])) }} as array_col union all +select 2 as id, {{ array_to_string(array_construct([])) }} as array_col +""" + + +models__array_construct_actual_sql = """ +select 1 as id, {{ array_to_string(array_construct([1,2,3])) }} as array_col union all +select 2 as id, {{ array_to_string(array_construct([])) }} as array_col +""" + + +macros__array_to_string_sql = """ +{% macro array_to_string(array) %} + (select string_agg(cast(element as string), ',') from unnest({{ array }}) element) +{% endmacro %} +""" diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py index ae1aadcbc..5f5c5d83b 100644 --- a/tests/functional/adapter/utils/test_utils.py +++ b/tests/functional/adapter/utils/test_utils.py @@ -1,5 +1,8 @@ import pytest -from dbt.tests.adapter.utils.base_utils import BaseUtils + +from dbt.tests.adapter.utils.test_array_append import BaseArrayAppend +from dbt.tests.adapter.utils.test_array_concat import BaseArrayConcat +from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct from dbt.tests.adapter.utils.test_any_value import BaseAnyValue from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText @@ -22,12 +25,70 @@ from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast from dbt.tests.adapter.utils.test_split_part import BaseSplitPart from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral +from tests.functional.adapter.utils.fixture_array_append import ( + models__array_append_actual_sql, + models__array_append_expected_sql, +) +from tests.functional.adapter.utils.fixture_array_concat import ( + models__array_concat_actual_sql, + models__array_concat_expected_sql, +) +from tests.functional.adapter.utils.fixture_array_construct import ( + models__array_construct_actual_sql, + models__array_construct_expected_sql, + macros__array_to_string_sql, +) class TestAnyValue(BaseAnyValue): pass +class TestArrayAppend(BaseArrayAppend): + @pytest.fixture(scope="class") + def models(self): + return { + "actual.sql": models__array_append_actual_sql, + "expected.sql": models__array_append_expected_sql, + } + + @pytest.fixture(scope="class") + def macros(self): + return { + "array_to_string.sql": macros__array_to_string_sql, + } + + +class TestArrayConcat(BaseArrayConcat): + @pytest.fixture(scope="class") + def models(self): + return { + "actual.sql": models__array_concat_actual_sql, + "expected.sql": models__array_concat_expected_sql, + } + + @pytest.fixture(scope="class") + def macros(self): + return { + "array_to_string.sql": macros__array_to_string_sql, + } + + +class TestArrayConstruct(BaseArrayConstruct): + @pytest.fixture(scope="class") + def models(self): + return { + "actual.sql": models__array_construct_actual_sql, + "expected.sql": models__array_construct_expected_sql, + } + + @pytest.fixture(scope="class") + def macros(self): + return { + "array_to_string.sql": macros__array_to_string_sql, + } + + class TestBoolOr(BaseBoolOr): pass From 8d0c3bbedf647ef7e4110e47ac3626cfd9b5ae9c Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Tue, 27 Sep 2022 06:00:14 -0600 Subject: [PATCH 19/24] Convert df to pyspark DataFrame if it is koalas before writing (#321) * Convert df to pyspark DataFrame if it is koalas before writing * Changelog entry * Preferentially convert Koalas DataFrames to pandas-on-Spark DataFrames first * Update changelog entry * Fix explanation Co-authored-by: Chenyu Li <chenyu.li@dbtlabs.com> --- .../unreleased/Under the Hood-20220924-134858.yaml | 7 +++++++ .../bigquery/macros/materializations/table.sql | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Under the Hood-20220924-134858.yaml diff --git a/.changes/unreleased/Under the Hood-20220924-134858.yaml b/.changes/unreleased/Under the Hood-20220924-134858.yaml new file mode 100644 index 000000000..a4fbceb3f --- /dev/null +++ b/.changes/unreleased/Under the Hood-20220924-134858.yaml @@ -0,0 +1,7 @@ +kind: Under the Hood +body: Convert df to pyspark DataFrame if it is koalas before writing +time: 2022-09-24T13:48:58.614746-06:00 +custom: + Author: dbeatty10 ueshin + Issue: "320" + PR: "321" diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 3ecdf37a5..1bea3f487 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -67,6 +67,7 @@ import importlib.util pandas_available = False pyspark_available = False +koalas_available = False # make sure pandas exists before using it if importlib.util.find_spec("pandas"): @@ -78,17 +79,26 @@ if importlib.util.find_spec("pyspark.pandas"): import pyspark.pandas pyspark_available = True -# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first +# make sure databricks.koalas exists before using it +if importlib.util.find_spec("databricks.koalas"): + import databricks.koalas + koalas_available = True + +# preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)` # and converting from pandas-on-Spark to Spark DataFrame has no overhead if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame): df = pyspark.pandas.frame.DataFrame(df) +elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame): + df = databricks.koalas.frame.DataFrame(df) # convert to pyspark.sql.dataframe.DataFrame if isinstance(df, pyspark.sql.dataframe.DataFrame): pass # since it is already a Spark DataFrame elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame): df = df.to_spark() +elif koalas_available and isinstance(df, databricks.koalas.frame.DataFrame): + df = df.to_spark() elif pandas_available and isinstance(df, pandas.core.frame.DataFrame): df = spark.createDataFrame(df) else: From 8b354f9f8d6d7cf47ac6f9deb16204394316254a Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Wed, 28 Sep 2022 11:52:10 -0500 Subject: [PATCH 20/24] CT-1166 bigquery conversion of test_alias (#318) * init pr, ipmorting test from core * referencing new Base class in core instead of tests * readding ref to MACROS__EXPECT_VALUE_SQL as seems to be overwritten when adding new bigquery macro * remove old integration test * swapping schema ref to database ref * minor import change, rechanging back to uique_schema version of test pulled in from core * rolling back test to unforked state will document thoughts on what problem is to raise as this may tie into other tests having hanging tables * change core pointer * change core pointer Co-authored-by: Chenyu Li <chenyu.li@dbtlabs.com> --- tests/functional/adapter/test_aliases.py | 48 +++++++++++++ .../integration/aliases_test/macros/cast.sql | 13 ---- .../aliases_test/macros/expect_value.sql | 10 --- .../models-dupe-custom-database/README.md | 2 - .../models-dupe-custom-database/model_a.sql | 1 - .../models-dupe-custom-database/model_b.sql | 1 - .../models-dupe-custom-database/schema.yml | 12 ---- .../aliases_test/models/alias_in_project.sql | 2 - .../models/alias_in_project_with_override.sql | 4 -- .../aliases_test/models/foo_alias.sql | 9 --- .../aliases_test/models/ref_foo_alias.sql | 16 ----- .../aliases_test/models/schema.yml | 22 ------ .../integration/aliases_test/test_aliases.py | 70 ------------------- 13 files changed, 48 insertions(+), 162 deletions(-) create mode 100644 tests/functional/adapter/test_aliases.py delete mode 100644 tests/integration/aliases_test/macros/cast.sql delete mode 100644 tests/integration/aliases_test/macros/expect_value.sql delete mode 100644 tests/integration/aliases_test/models-dupe-custom-database/README.md delete mode 100644 tests/integration/aliases_test/models-dupe-custom-database/model_a.sql delete mode 100644 tests/integration/aliases_test/models-dupe-custom-database/model_b.sql delete mode 100644 tests/integration/aliases_test/models-dupe-custom-database/schema.yml delete mode 100644 tests/integration/aliases_test/models/alias_in_project.sql delete mode 100644 tests/integration/aliases_test/models/alias_in_project_with_override.sql delete mode 100644 tests/integration/aliases_test/models/foo_alias.sql delete mode 100644 tests/integration/aliases_test/models/ref_foo_alias.sql delete mode 100644 tests/integration/aliases_test/models/schema.yml delete mode 100644 tests/integration/aliases_test/test_aliases.py diff --git a/tests/functional/adapter/test_aliases.py b/tests/functional/adapter/test_aliases.py new file mode 100644 index 000000000..2b4cdd381 --- /dev/null +++ b/tests/functional/adapter/test_aliases.py @@ -0,0 +1,48 @@ +import pytest +import os +from dbt.tests.adapter.aliases.test_aliases import BaseAliases, BaseSameAliasDifferentDatabases + +MACROS__BIGQUERY_CAST_SQL = """ + +{% macro bigquery__string_literal(s) %} + cast('{{ s }}' as string) +{% endmacro %} + +""" + +MACROS__EXPECT_VALUE_SQL = """ + +-- cross-db compatible test, similar to accepted_values + +{% test expect_value(model, field, value) %} + +select * +from {{ model }} +where {{ field }} != '{{ value }}' + +{% endtest %} + +""" + +class TestAliasesBigQuery(BaseAliases): + @pytest.fixture(scope="class") + def macros(self): + return {"bigquery_cast.sql": MACROS__BIGQUERY_CAST_SQL, "expect_value.sql": MACROS__EXPECT_VALUE_SQL} + + +class TestSameTestSameAliasDifferentDatabasesBigQuery(BaseSameAliasDifferentDatabases): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "config-version": 2, + "macro-paths": ["macros"], + "models": { + "test": { + "alias": "duped_alias", + "model_b": {"database": os.getenv("BIGQUERY_TEST_ALT_DATABASE")}, + }, + }, + } + @pytest.fixture(scope="class") + def macros(self): + return {"bigquery_cast.sql": MACROS__BIGQUERY_CAST_SQL, "expect_value.sql": MACROS__EXPECT_VALUE_SQL} diff --git a/tests/integration/aliases_test/macros/cast.sql b/tests/integration/aliases_test/macros/cast.sql deleted file mode 100644 index 584492077..000000000 --- a/tests/integration/aliases_test/macros/cast.sql +++ /dev/null @@ -1,13 +0,0 @@ - - -{% macro string_literal(s) -%} - {{ adapter.dispatch('string_literal', macro_namespace='test')(s) }} -{%- endmacro %} - -{% macro default__string_literal(s) %} - '{{ s }}'::text -{% endmacro %} - -{% macro bigquery__string_literal(s) %} - cast('{{ s }}' as string) -{% endmacro %} diff --git a/tests/integration/aliases_test/macros/expect_value.sql b/tests/integration/aliases_test/macros/expect_value.sql deleted file mode 100644 index 0ee66151f..000000000 --- a/tests/integration/aliases_test/macros/expect_value.sql +++ /dev/null @@ -1,10 +0,0 @@ - --- cross-db compatible test, similar to accepted_values - -{% test expect_value(model, field, value) %} - -select * -from {{ model }} -where {{ field }} != '{{ value }}' - -{% endtest %} diff --git a/tests/integration/aliases_test/models-dupe-custom-database/README.md b/tests/integration/aliases_test/models-dupe-custom-database/README.md deleted file mode 100644 index 7d0add229..000000000 --- a/tests/integration/aliases_test/models-dupe-custom-database/README.md +++ /dev/null @@ -1,2 +0,0 @@ -these should succeed, as both models have the same alias, -but they are configured to be built in _different_ schemas diff --git a/tests/integration/aliases_test/models-dupe-custom-database/model_a.sql b/tests/integration/aliases_test/models-dupe-custom-database/model_a.sql deleted file mode 100644 index 9ce91382c..000000000 --- a/tests/integration/aliases_test/models-dupe-custom-database/model_a.sql +++ /dev/null @@ -1 +0,0 @@ -select {{ string_literal(this.name) }} as tablename diff --git a/tests/integration/aliases_test/models-dupe-custom-database/model_b.sql b/tests/integration/aliases_test/models-dupe-custom-database/model_b.sql deleted file mode 100644 index 9ce91382c..000000000 --- a/tests/integration/aliases_test/models-dupe-custom-database/model_b.sql +++ /dev/null @@ -1 +0,0 @@ -select {{ string_literal(this.name) }} as tablename diff --git a/tests/integration/aliases_test/models-dupe-custom-database/schema.yml b/tests/integration/aliases_test/models-dupe-custom-database/schema.yml deleted file mode 100644 index a7613882c..000000000 --- a/tests/integration/aliases_test/models-dupe-custom-database/schema.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 2 -models: -- name: model_a - tests: - - expect_value: - field: tablename - value: duped_alias -- name: model_b - tests: - - expect_value: - field: tablename - value: duped_alias diff --git a/tests/integration/aliases_test/models/alias_in_project.sql b/tests/integration/aliases_test/models/alias_in_project.sql deleted file mode 100644 index aa9ecd0bf..000000000 --- a/tests/integration/aliases_test/models/alias_in_project.sql +++ /dev/null @@ -1,2 +0,0 @@ - -select {{ string_literal(this.name) }} as tablename diff --git a/tests/integration/aliases_test/models/alias_in_project_with_override.sql b/tests/integration/aliases_test/models/alias_in_project_with_override.sql deleted file mode 100644 index 67cb93d93..000000000 --- a/tests/integration/aliases_test/models/alias_in_project_with_override.sql +++ /dev/null @@ -1,4 +0,0 @@ - -{{ config(alias='override_alias') }} - -select {{ string_literal(this.name) }} as tablename diff --git a/tests/integration/aliases_test/models/foo_alias.sql b/tests/integration/aliases_test/models/foo_alias.sql deleted file mode 100644 index 3f7d16467..000000000 --- a/tests/integration/aliases_test/models/foo_alias.sql +++ /dev/null @@ -1,9 +0,0 @@ - -{{ - config( - alias='foo', - materialized='table' - ) -}} - -select {{ string_literal(this.name) }} as tablename diff --git a/tests/integration/aliases_test/models/ref_foo_alias.sql b/tests/integration/aliases_test/models/ref_foo_alias.sql deleted file mode 100644 index e01463bb7..000000000 --- a/tests/integration/aliases_test/models/ref_foo_alias.sql +++ /dev/null @@ -1,16 +0,0 @@ - -{{ - config( - materialized='table' - ) -}} - -with trigger_ref as ( - - -- we should still be able to ref a model by its filepath - select * from {{ ref('foo_alias') }} - -) - --- this name should still be the filename -select {{ string_literal(this.name) }} as tablename diff --git a/tests/integration/aliases_test/models/schema.yml b/tests/integration/aliases_test/models/schema.yml deleted file mode 100644 index b3a82faad..000000000 --- a/tests/integration/aliases_test/models/schema.yml +++ /dev/null @@ -1,22 +0,0 @@ -version: 2 -models: -- name: foo_alias - tests: - - expect_value: - field: tablename - value: foo -- name: ref_foo_alias - tests: - - expect_value: - field: tablename - value: ref_foo_alias -- name: alias_in_project - tests: - - expect_value: - field: tablename - value: project_alias -- name: alias_in_project_with_override - tests: - - expect_value: - field: tablename - value: override_alias diff --git a/tests/integration/aliases_test/test_aliases.py b/tests/integration/aliases_test/test_aliases.py deleted file mode 100644 index 404c89499..000000000 --- a/tests/integration/aliases_test/test_aliases.py +++ /dev/null @@ -1,70 +0,0 @@ -from tests.integration.base import DBTIntegrationTest, use_profile - - -class TestAliases(DBTIntegrationTest): - @property - def schema(self): - return "aliases" - - @property - def models(self): - return "models" - - @property - def project_config(self): - return { - 'config-version': 2, - "macro-paths": ['macros'], - "models": { - "test": { - "alias_in_project": { - "alias": 'project_alias', - }, - "alias_in_project_with_override": { - "alias": 'project_alias', - }, - } - } - } - - @use_profile('bigquery') - def test__alias_model_name_bigquery(self): - results = self.run_dbt(['run']) - self.assertEqual(len(results), 4) - self.run_dbt(['test']) - - -class TestSameAliasDifferentDatabases(DBTIntegrationTest): - setup_alternate_db = True - - @property - def schema(self): - return "aliases_026" - - @property - def models(self): - return "models-dupe-custom-database" - - @property - def project_config(self): - return { - 'config-version': 2, - "macro-paths": ['macros'], - 'models': { - 'test': { - 'alias': 'duped_alias', - 'model_b': { - 'database': self.alternative_database, - }, - }, - } - } - - @use_profile('bigquery') - def test__bigquery_same_alias_succeeds_in_different_schemas(self): - results = self.run_dbt(['run']) - self.assertEqual(len(results), 2) - res = self.run_dbt(['test']) - - # Make extra sure the tests ran - self.assertTrue(len(res) > 0) From 4d4ef84661fb570a75ae3a893f813972c9f90b94 Mon Sep 17 00:00:00 2001 From: Christophe Oudar <kayrnt@gmail.com> Date: Wed, 28 Sep 2022 19:40:55 +0200 Subject: [PATCH 21/24] Update Readme to link to the repository CONTRIBUTING.md (#252) * Update Readme to link to the repository CONTRIBUTING.md * review changes Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Co-authored-by: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 976bdf918..6ec77bd2e 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ more information on using dbt with BigQuery, consult [the docs](https://docs.get ## Reporting bugs and contributing code - Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/), or open [an issue](https://github.com/dbt-labs/dbt-bigquery/issues/new) -- Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt/blob/HEAD/CONTRIBUTING.md) +- Want to help us build dbt? Check out the [Contributing Guide](https://github.com/dbt-labs/dbt-bigquery/blob/HEAD/CONTRIBUTING.md) ## Code of Conduct From ac4cee86a7217952c7ce864267890a99534f6f09 Mon Sep 17 00:00:00 2001 From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com> Date: Wed, 28 Sep 2022 14:36:57 -0600 Subject: [PATCH 22/24] try/except rather than find_spec for optional imports (#328) * try/except rather than find_spec for optional imports * Remove unused import --- .../bigquery/macros/materializations/table.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbt/include/bigquery/macros/materializations/table.sql b/dbt/include/bigquery/macros/materializations/table.sql index 1bea3f487..441b97d28 100644 --- a/dbt/include/bigquery/macros/materializations/table.sql +++ b/dbt/include/bigquery/macros/materializations/table.sql @@ -63,26 +63,26 @@ df = model(dbt, spark) # COMMAND ---------- # this is materialization code dbt generated, please do not modify -import importlib.util - -pandas_available = False -pyspark_available = False -koalas_available = False - # make sure pandas exists before using it -if importlib.util.find_spec("pandas"): +try: import pandas pandas_available = True +except ImportError: + pandas_available = False # make sure pyspark.pandas exists before using it -if importlib.util.find_spec("pyspark.pandas"): +try: import pyspark.pandas pyspark_available = True +except ImportError: + pyspark_available = False # make sure databricks.koalas exists before using it -if importlib.util.find_spec("databricks.koalas"): +try: import databricks.koalas koalas_available = True +except ImportError: + koalas_available = False # preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)` From e7658fda1b9de15d6269715e5282f66b7c3d914f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 29 Sep 2022 09:18:14 -0400 Subject: [PATCH 23/24] Bumping version to 1.4.0a1 and generate changelog (#331) * Bumping version to 1.4.0a1 and generate CHANGELOG * Fix up changelog Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com> Co-authored-by: Leah Antkiewicz <leah.antkiewicz@fishtownanalytics.com> --- .bumpversion.cfg | 2 +- .changes/1.3.0-b1.md | 6 ------ .changes/1.3.0-b2.md | 10 ---------- .changes/1.3.0/Features-20220804-154944.yaml | 8 -------- .changes/1.3.0/Features-20220804-155057.yaml | 7 ------- .../1.3.0/Under the Hood-20220804-155219.yaml | 7 ------- .../1.3.0/Under the Hood-20220804-155719.yaml | 7 ------- .../1.3.0/Under the Hood-20220806-142912.yaml | 7 ------- .../1.3.0/Under the Hood-20220817-154151.yaml | 7 ------- .../1.3.0/Under the Hood-20220825-073235.yaml | 7 ------- .../unreleased/Dependency-20220902-152810.yaml | 7 ------- .../unreleased/Dependency-20220920-200205.yaml | 7 ------- .../unreleased/Features-20220909-122924.yaml | 7 ------- .../unreleased/Features-20220913-084836.yaml | 7 ------- .../unreleased/Features-20220919-232721.yaml | 7 ------- .../Under the Hood-20220908-143218.yaml | 7 ------- .../Under the Hood-20220915-145212.yaml | 7 ------- .../Under the Hood-20220920-151057.yaml | 7 ------- .../Under the Hood-20220924-134858.yaml | 7 ------- CHANGELOG.md | 16 +--------------- dbt/adapters/bigquery/__version__.py | 2 +- setup.py | 2 +- 22 files changed, 4 insertions(+), 147 deletions(-) delete mode 100644 .changes/1.3.0-b1.md delete mode 100644 .changes/1.3.0-b2.md delete mode 100644 .changes/1.3.0/Features-20220804-154944.yaml delete mode 100644 .changes/1.3.0/Features-20220804-155057.yaml delete mode 100644 .changes/1.3.0/Under the Hood-20220804-155219.yaml delete mode 100644 .changes/1.3.0/Under the Hood-20220804-155719.yaml delete mode 100644 .changes/1.3.0/Under the Hood-20220806-142912.yaml delete mode 100644 .changes/1.3.0/Under the Hood-20220817-154151.yaml delete mode 100644 .changes/1.3.0/Under the Hood-20220825-073235.yaml delete mode 100644 .changes/unreleased/Dependency-20220902-152810.yaml delete mode 100644 .changes/unreleased/Dependency-20220920-200205.yaml delete mode 100644 .changes/unreleased/Features-20220909-122924.yaml delete mode 100644 .changes/unreleased/Features-20220913-084836.yaml delete mode 100644 .changes/unreleased/Features-20220919-232721.yaml delete mode 100644 .changes/unreleased/Under the Hood-20220908-143218.yaml delete mode 100644 .changes/unreleased/Under the Hood-20220915-145212.yaml delete mode 100644 .changes/unreleased/Under the Hood-20220920-151057.yaml delete mode 100644 .changes/unreleased/Under the Hood-20220924-134858.yaml diff --git a/.bumpversion.cfg b/.bumpversion.cfg index d3aa82250..047a5cd7d 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.3.0b2 +current_version = 1.4.0a1 parse = (?P<major>\d+) \.(?P<minor>\d+) \.(?P<patch>\d+) diff --git a/.changes/1.3.0-b1.md b/.changes/1.3.0-b1.md deleted file mode 100644 index 9d296a9bb..000000000 --- a/.changes/1.3.0-b1.md +++ /dev/null @@ -1,6 +0,0 @@ -## dbt-bigquery 1.3.0-b1 - August 04, 2022 -### Features -- Implement `create_schema` via SQL, instead of Python method, allowing users to override if desired. drop_schema remains a Python method for the time being. ([#182](https://github.com/dbt-labs/dbt-bigquery/issues/182), [#183](https://github.com/dbt-labs/dbt-bigquery/pull/183)) -- Added table and incrementail materializations for python models via DataProc. ([#209](https://github.com/dbt-labs/dbt-bigquery/issues/209), [#226](https://github.com/dbt-labs/dbt-bigquery/pull/226)) -### Under the Hood -- Implement minimal changes to support dbt Core incremental materialization refactor. ([#232](https://github.com/dbt-labs/dbt-bigquery/issues/232), [#223](https://github.com/dbt-labs/dbt-bigquery/pull/223)) diff --git a/.changes/1.3.0-b2.md b/.changes/1.3.0-b2.md deleted file mode 100644 index a62c278cc..000000000 --- a/.changes/1.3.0-b2.md +++ /dev/null @@ -1,10 +0,0 @@ -## dbt-bigquery 1.3.0-b2 - August 29, 2022 -### Under the Hood -- Add changie to dbt-bigquery ([#254](https://github.com/dbt-labs/dbt-bigquery/issues/254), [#253](https://github.com/dbt-labs/dbt-bigquery/pull/253)) -- Add location/job_id/project_id to adapter response to enable easy job linking ([#92](https://github.com/dbt-labs/dbt-bigquery/issues/92), [#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) -- Adding `slot_ms` go `BigQueryAdapterResponse` ([#194](https://github.com/dbt-labs/dbt-bigquery/issues/194), [#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) -- specify supported_languages for materialization that support python models ([#288](https://github.com/dbt-labs/dbt-bigquery/issues/288), [#290](https://github.com/dbt-labs/dbt-bigquery/pull/290)) - -### Contributors -- [@Kayrnt](https://github.com/Kayrnt) ([#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) -- [@yu-iskw](https://github.com/yu-iskw) ([#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) diff --git a/.changes/1.3.0/Features-20220804-154944.yaml b/.changes/1.3.0/Features-20220804-154944.yaml deleted file mode 100644 index 65bd2b32e..000000000 --- a/.changes/1.3.0/Features-20220804-154944.yaml +++ /dev/null @@ -1,8 +0,0 @@ -kind: Features -body: 'Implement `create_schema` via SQL, instead of Python method, allowing users - to override if desired. drop_schema remains a Python method for the time being. ' -time: 2022-08-04T15:49:44.409354-05:00 -custom: - Author: jtcohen6 - Issue: "182" - PR: "183" diff --git a/.changes/1.3.0/Features-20220804-155057.yaml b/.changes/1.3.0/Features-20220804-155057.yaml deleted file mode 100644 index 39bd6ad45..000000000 --- a/.changes/1.3.0/Features-20220804-155057.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: Added table and incrementail materializations for python models via DataProc. -time: 2022-08-04T15:50:57.234209-05:00 -custom: - Author: stu-k - Issue: "209" - PR: "226" diff --git a/.changes/1.3.0/Under the Hood-20220804-155219.yaml b/.changes/1.3.0/Under the Hood-20220804-155219.yaml deleted file mode 100644 index f5e6b2d9c..000000000 --- a/.changes/1.3.0/Under the Hood-20220804-155219.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Implement minimal changes to support dbt Core incremental materialization refactor. -time: 2022-08-04T15:52:19.342843-05:00 -custom: - Author: gshank - Issue: "232" - PR: "223" diff --git a/.changes/1.3.0/Under the Hood-20220804-155719.yaml b/.changes/1.3.0/Under the Hood-20220804-155719.yaml deleted file mode 100644 index 631333c02..000000000 --- a/.changes/1.3.0/Under the Hood-20220804-155719.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Add changie to dbt-bigquery -time: 2022-08-04T15:57:19.060995-05:00 -custom: - Author: mcknight-42 - Issue: "254" - PR: "253" diff --git a/.changes/1.3.0/Under the Hood-20220806-142912.yaml b/.changes/1.3.0/Under the Hood-20220806-142912.yaml deleted file mode 100644 index 6581f8e22..000000000 --- a/.changes/1.3.0/Under the Hood-20220806-142912.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Add location/job_id/project_id to adapter response to enable easy job linking -time: 2022-08-06T14:29:12.271054+02:00 -custom: - Author: Kayrnt - Issue: "92" - PR: "250" diff --git a/.changes/1.3.0/Under the Hood-20220817-154151.yaml b/.changes/1.3.0/Under the Hood-20220817-154151.yaml deleted file mode 100644 index d3de5a992..000000000 --- a/.changes/1.3.0/Under the Hood-20220817-154151.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Adding `slot_ms` go `BigQueryAdapterResponse` -time: 2022-08-17T15:41:51.080936-05:00 -custom: - Author: yu-iskw - Issue: "194" - PR: "195" diff --git a/.changes/1.3.0/Under the Hood-20220825-073235.yaml b/.changes/1.3.0/Under the Hood-20220825-073235.yaml deleted file mode 100644 index f33d77173..000000000 --- a/.changes/1.3.0/Under the Hood-20220825-073235.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: specify supported_languages for materialization that support python models -time: 2022-08-25T07:32:35.820396-07:00 -custom: - Author: ChenyuLInx - Issue: "288" - PR: "290" diff --git a/.changes/unreleased/Dependency-20220902-152810.yaml b/.changes/unreleased/Dependency-20220902-152810.yaml deleted file mode 100644 index 2d7fb11ef..000000000 --- a/.changes/unreleased/Dependency-20220902-152810.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Dependency -body: "Bump black from 22.6.0 to 22.8.0" -time: 2022-09-02T15:28:10.00000Z -custom: - Author: dependabot[bot] - Issue: 254 - PR: 294 diff --git a/.changes/unreleased/Dependency-20220920-200205.yaml b/.changes/unreleased/Dependency-20220920-200205.yaml deleted file mode 100644 index 6dcfa5924..000000000 --- a/.changes/unreleased/Dependency-20220920-200205.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Dependency -body: "Bump mypy from 0.942 to 0.971" -time: 2022-09-20T20:02:05.00000Z -custom: - Author: dependabot[bot] - Issue: 254 - PR: 296 diff --git a/.changes/unreleased/Features-20220909-122924.yaml b/.changes/unreleased/Features-20220909-122924.yaml deleted file mode 100644 index cde9bbb43..000000000 --- a/.changes/unreleased/Features-20220909-122924.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: Add support for Dataproc Serverless -time: 2022-09-09T12:29:24.993388-07:00 -custom: - Author: ChenyuLInx - Issue: "248" - PR: "303" diff --git a/.changes/unreleased/Features-20220913-084836.yaml b/.changes/unreleased/Features-20220913-084836.yaml deleted file mode 100644 index 0a4bd275d..000000000 --- a/.changes/unreleased/Features-20220913-084836.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: Array macros -time: 2022-09-13T08:48:36.255365-06:00 -custom: - Author: graciegoheen dbeatty10 - Issue: "307" - PR: "308" diff --git a/.changes/unreleased/Features-20220919-232721.yaml b/.changes/unreleased/Features-20220919-232721.yaml deleted file mode 100644 index 4f5c90dd0..000000000 --- a/.changes/unreleased/Features-20220919-232721.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Features -body: add type_boolean as a data type macro -time: 2022-09-19T23:27:21.864912+01:00 -custom: - Author: jpmmcneill - Issue: "315" - PR: "313" diff --git a/.changes/unreleased/Under the Hood-20220908-143218.yaml b/.changes/unreleased/Under the Hood-20220908-143218.yaml deleted file mode 100644 index 7053f96b6..000000000 --- a/.changes/unreleased/Under the Hood-20220908-143218.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Convert df to pyspark DataFrame if it is pandas before writing -time: 2022-09-08T14:32:18.243412-04:00 -custom: - Author: chamini2 - Issue: "301" - PR: "301" diff --git a/.changes/unreleased/Under the Hood-20220915-145212.yaml b/.changes/unreleased/Under the Hood-20220915-145212.yaml deleted file mode 100644 index 6ab035256..000000000 --- a/.changes/unreleased/Under the Hood-20220915-145212.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Update BQ job and call retry settings -time: 2022-09-15T14:52:12.902965-07:00 -custom: - Author: colin-rogers-dbt - Issue: "311" - PR: "310" diff --git a/.changes/unreleased/Under the Hood-20220920-151057.yaml b/.changes/unreleased/Under the Hood-20220920-151057.yaml deleted file mode 100644 index 86914c4f3..000000000 --- a/.changes/unreleased/Under the Hood-20220920-151057.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Enable pandas-on-Spark DataFrames for dbt python models -time: 2022-09-20T15:10:57.712169-06:00 -custom: - Author: dbeatty10 - Issue: "316" - PR: "317" diff --git a/.changes/unreleased/Under the Hood-20220924-134858.yaml b/.changes/unreleased/Under the Hood-20220924-134858.yaml deleted file mode 100644 index a4fbceb3f..000000000 --- a/.changes/unreleased/Under the Hood-20220924-134858.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Under the Hood -body: Convert df to pyspark DataFrame if it is koalas before writing -time: 2022-09-24T13:48:58.614746-06:00 -custom: - Author: dbeatty10 ueshin - Issue: "320" - PR: "321" diff --git a/CHANGELOG.md b/CHANGELOG.md index a9038a76d..73a284e72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,24 +4,10 @@ - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases. - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-bigquery/blob/main/CONTRIBUTING.md#adding-changelog-entry) -## dbt-bigquery 1.3.0-b2 - August 29, 2022 -### Under the Hood -- Add changie to dbt-bigquery ([#254](https://github.com/dbt-labs/dbt-bigquery/issues/254), [#253](https://github.com/dbt-labs/dbt-bigquery/pull/253)) -- Add location/job_id/project_id to adapter response to enable easy job linking ([#92](https://github.com/dbt-labs/dbt-bigquery/issues/92), [#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) -- Adding `slot_ms` go `BigQueryAdapterResponse` ([#194](https://github.com/dbt-labs/dbt-bigquery/issues/194), [#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) -- specify supported_languages for materialization that support python models ([#288](https://github.com/dbt-labs/dbt-bigquery/issues/288), [#290](https://github.com/dbt-labs/dbt-bigquery/pull/290)) -### Contributors -- [@Kayrnt](https://github.com/Kayrnt) ([#250](https://github.com/dbt-labs/dbt-bigquery/pull/250)) -- [@yu-iskw](https://github.com/yu-iskw) ([#195](https://github.com/dbt-labs/dbt-bigquery/pull/195)) -## dbt-bigquery 1.3.0-b1 - August 04, 2022 -### Features -- Implement `create_schema` via SQL, instead of Python method, allowing users to override if desired. drop_schema remains a Python method for the time being. ([#182](https://github.com/dbt-labs/dbt-bigquery/issues/182), [#183](https://github.com/dbt-labs/dbt-bigquery/pull/183)) -- Added table and incrementail materializations for python models via DataProc. ([#209](https://github.com/dbt-labs/dbt-bigquery/issues/209), [#226](https://github.com/dbt-labs/dbt-bigquery/pull/226)) -### Under the Hood -- Implement minimal changes to support dbt Core incremental materialization refactor. ([#232](https://github.com/dbt-labs/dbt-bigquery/issues/232), [#223](https://github.com/dbt-labs/dbt-bigquery/pull/223)) ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.3](https://github.com/dbt-labs/dbt-bigquery/blob/1.3.latest/CHANGELOG.md) - [1.2](https://github.com/dbt-labs/dbt-bigquery/blob/1.2.latest/CHANGELOG.md) - [1.1](https://github.com/dbt-labs/dbt-bigquery/blob/1.1.latest/CHANGELOG.md) - [1.0](https://github.com/dbt-labs/dbt-bigquery/blob/1.0.latest/CHANGELOG.md) diff --git a/dbt/adapters/bigquery/__version__.py b/dbt/adapters/bigquery/__version__.py index e2c1a233c..70ba273f5 100644 --- a/dbt/adapters/bigquery/__version__.py +++ b/dbt/adapters/bigquery/__version__.py @@ -1 +1 @@ -version = "1.3.0b2" +version = "1.4.0a1" diff --git a/setup.py b/setup.py index 81a267711..3345dbc04 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def _get_dbt_core_version(): package_name = "dbt-bigquery" -package_version = "1.3.0b2" +package_version = "1.4.0a1" dbt_core_version = _get_dbt_core_version() description = """The BigQuery adapter plugin for dbt""" From 86d5dbe2c3d5ee863ef0b97a47f276cf26518c83 Mon Sep 17 00:00:00 2001 From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> Date: Fri, 30 Sep 2022 16:37:13 -0700 Subject: [PATCH 24/24] consolidate timestamps (#323) * update retry settings * changelog entry * consolidate timestamps * add changie * whitespace fix * fix macro name * update expected test fixture * Update Features-20220926-105700.yaml * add backcompat to test fixture * add backcompat * update dev-requirements * Update change log body * update test class name * lowercase timestamps --- .../unreleased/Features-20220926-105700.yaml | 7 +++++++ dbt/include/bigquery/macros/adapters.sql | 11 ----------- .../bigquery/macros/utils/timestamps.sql | 12 ++++++++++++ .../adapter/utils/test_timestamps.py | 18 ++++++++++++++++++ 4 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 .changes/unreleased/Features-20220926-105700.yaml create mode 100644 dbt/include/bigquery/macros/utils/timestamps.sql create mode 100644 tests/functional/adapter/utils/test_timestamps.py diff --git a/.changes/unreleased/Features-20220926-105700.yaml b/.changes/unreleased/Features-20220926-105700.yaml new file mode 100644 index 000000000..61e0ac741 --- /dev/null +++ b/.changes/unreleased/Features-20220926-105700.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Migrate dbt-utils current_timestamp macros into core + adapters +time: 2022-09-26T10:57:00.942765-07:00 +custom: + Author: colin-rogers-dbt + Issue: "324" + PR: "323" diff --git a/dbt/include/bigquery/macros/adapters.sql b/dbt/include/bigquery/macros/adapters.sql index cbfba2627..07cf3c3e5 100644 --- a/dbt/include/bigquery/macros/adapters.sql +++ b/dbt/include/bigquery/macros/adapters.sql @@ -109,17 +109,6 @@ {%- endmacro %} -{% macro bigquery__current_timestamp() -%} - CURRENT_TIMESTAMP() -{%- endmacro %} - - -{% macro bigquery__snapshot_string_as_time(timestamp) -%} - {%- set result = 'TIMESTAMP("' ~ timestamp ~ '")' -%} - {{ return(result) }} -{%- endmacro %} - - {% macro bigquery__list_schemas(database) -%} {{ return(adapter.list_schemas(database)) }} {% endmacro %} diff --git a/dbt/include/bigquery/macros/utils/timestamps.sql b/dbt/include/bigquery/macros/utils/timestamps.sql new file mode 100644 index 000000000..cdcbfd51e --- /dev/null +++ b/dbt/include/bigquery/macros/utils/timestamps.sql @@ -0,0 +1,12 @@ +{% macro bigquery__current_timestamp() -%} + current_timestamp() +{%- endmacro %} + +{% macro bigquery__snapshot_string_as_time(timestamp) -%} + {%- set result = 'TIMESTAMP("' ~ timestamp ~ '")' -%} + {{ return(result) }} +{%- endmacro %} + +{% macro bigquery__current_timestamp_backcompat() -%} + current_timestamp +{%- endmacro %} diff --git a/tests/functional/adapter/utils/test_timestamps.py b/tests/functional/adapter/utils/test_timestamps.py new file mode 100644 index 000000000..2f35e40ee --- /dev/null +++ b/tests/functional/adapter/utils/test_timestamps.py @@ -0,0 +1,18 @@ +import pytest +from dbt.tests.adapter.utils.test_timestamps import BaseCurrentTimestamps + + +class TestCurrentTimestampBigQuery(BaseCurrentTimestamps): + @pytest.fixture(scope="class") + def expected_schema(self): + return { + "current_timestamp": "TIMESTAMP", + "current_timestamp_in_utc_backcompat": "TIMESTAMP", + "current_timestamp_backcompat": "TIMESTAMP", + } + + @pytest.fixture(scope="class") + def expected_sql(self): + return """select current_timestamp() as current_timestamp, + current_timestamp as current_timestamp_in_utc_backcompat, + current_timestamp as current_timestamp_backcompat""" \ No newline at end of file