From f03d217ebb004bb889eff80d61da14bc72983370 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Wed, 4 May 2022 15:31:21 -0500 Subject: [PATCH 01/54] rename dev_requirements.txt -> dev-requirements.txt to match dbt-core (#344) * rename dev_requirements.txt -> dev-requirements.txt to match dbt-core * add changelog entry --- .github/workflows/release.yml | 34 ++++++++++---------- .github/workflows/version-bump.yml | 20 ++++++------ CHANGELOG.md | 1 + dev_requirements.txt => dev-requirements.txt | 0 tox.ini | 14 ++++---- 5 files changed, 35 insertions(+), 34 deletions(-) rename dev_requirements.txt => dev-requirements.txt (100%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b25ea884e..b40371e8a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,28 +3,28 @@ name: Build and Release on: workflow_dispatch: - + # Release version number that must be updated for each release env: version_number: '0.20.0rc2' -jobs: +jobs: Test: runs-on: ubuntu-latest steps: - name: Setup Python uses: actions/setup-python@v2.2.2 - with: + with: python-version: '3.8' - + - uses: actions/checkout@v2 - - name: Test release + - name: Test release run: | python3 -m venv env source env/bin/activate sudo apt-get install libsasl2-dev - pip install -r dev_requirements.txt + pip install -r dev-requirements.txt pip install twine wheel setuptools python setup.py sdist bdist_wheel pip install dist/dbt-spark-*.tar.gz @@ -38,9 +38,9 @@ jobs: steps: - name: Setup Python uses: actions/setup-python@v2.2.2 - with: + with: python-version: '3.8' - + - uses: actions/checkout@v2 - name: Bumping version @@ -48,7 +48,7 @@ jobs: python3 -m venv env source env/bin/activate sudo apt-get install libsasl2-dev - pip install -r dev_requirements.txt + pip install -r dev-requirements.txt bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}} bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty git status @@ -60,7 +60,7 @@ jobs: author_email: 'leah.antkiewicz@dbtlabs.com' message: 'Bumping version to ${{env.version_number}}' tag: v${{env.version_number}} - + # Need to set an output variable because env variables can't be taken as input # This is needed for the next step with releasing to GitHub - name: Find release type @@ -69,7 +69,7 @@ jobs: IS_PRERELEASE: ${{ contains(env.version_number, 'rc') || contains(env.version_number, 'b') }} run: | echo ::set-output name=isPrerelease::$IS_PRERELEASE - + - name: Create GitHub release uses: actions/create-release@v1 env: @@ -88,7 +88,7 @@ jobs: # or $ pip install "dbt-spark[PyHive]==${{env.version_number}}" ``` - + PypiRelease: name: Pypi release runs-on: ubuntu-latest @@ -97,13 +97,13 @@ jobs: steps: - name: Setup Python uses: actions/setup-python@v2.2.2 - with: + with: python-version: '3.8' - + - uses: actions/checkout@v2 with: ref: v${{env.version_number}} - + - name: Release to pypi env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} @@ -112,8 +112,8 @@ jobs: python3 -m venv env source env/bin/activate sudo apt-get install libsasl2-dev - pip install -r dev_requirements.txt + pip install -r dev-requirements.txt pip install twine wheel setuptools python setup.py sdist bdist_wheel twine upload --non-interactive dist/dbt_spark-${{env.version_number}}-py3-none-any.whl dist/dbt-spark-${{env.version_number}}.tar.gz - + diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml index 7fb8bb6eb..a8b3236ce 100644 --- a/.github/workflows/version-bump.yml +++ b/.github/workflows/version-bump.yml @@ -1,16 +1,16 @@ # **what?** # This workflow will take a version number and a dry run flag. With that -# it will run versionbump to update the version number everywhere in the +# it will run versionbump to update the version number everywhere in the # code base and then generate an update Docker requirements file. If this # is a dry run, a draft PR will open with the changes. If this isn't a dry # run, the changes will be committed to the branch this is run on. # **why?** -# This is to aid in releasing dbt and making sure we have updated +# This is to aid in releasing dbt and making sure we have updated # the versions and Docker requirements in all places. # **when?** -# This is triggered either manually OR +# This is triggered either manually OR # from the repository_dispatch event "version-bump" which is sent from # the dbt-release repo Action @@ -25,11 +25,11 @@ on: is_dry_run: description: 'Creates a draft PR to allow testing instead of committing to a branch' required: true - default: 'true' + default: 'true' repository_dispatch: types: [version-bump] -jobs: +jobs: bump: runs-on: ubuntu-latest steps: @@ -58,19 +58,19 @@ jobs: sudo apt-get install libsasl2-dev python3 -m venv env source env/bin/activate - pip install --upgrade pip - + pip install --upgrade pip + - name: Create PR branch if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }} run: | git checkout -b bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID - + - name: Bumping version run: | source env/bin/activate - pip install -r dev_requirements.txt + pip install -r dev-requirements.txt env/bin/bumpversion --allow-dirty --new-version ${{steps.variables.outputs.VERSION_NUMBER}} major git status @@ -100,4 +100,4 @@ jobs: draft: true base: ${{github.ref}} title: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}' - branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' + branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}' diff --git a/CHANGELOG.md b/CHANGELOG.md index f9a094942..5ad68a5ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Features - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279)) +- rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344)) ### Under the hood - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299)) diff --git a/dev_requirements.txt b/dev-requirements.txt similarity index 100% rename from dev_requirements.txt rename to dev-requirements.txt diff --git a/tox.ini b/tox.ini index 1e0e2b8b6..59b931dca 100644 --- a/tox.ini +++ b/tox.ini @@ -8,7 +8,7 @@ basepython = python3.8 commands = /bin/bash -c '$(which flake8) --max-line-length 99 --select=E,W,F --ignore=W504 dbt/' passenv = DBT_* PYTEST_ADDOPTS deps = - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt [testenv:unit] basepython = python3.8 @@ -16,7 +16,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit' passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt [testenv:integration-spark-databricks-http] basepython = python3.8 @@ -24,7 +24,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_http_clus passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt -e. [testenv:integration-spark-databricks-odbc-cluster] @@ -34,7 +34,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_cluster { passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER deps = -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt -e. [testenv:integration-spark-databricks-odbc-sql-endpoint] @@ -44,7 +44,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile databricks_sql_endpo passenv = DBT_* PYTEST_ADDOPTS ODBC_DRIVER deps = -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt -e. @@ -55,7 +55,7 @@ commands = /bin/bash -c '{envpython} -m pytest -v --profile apache_spark {posarg passenv = DBT_* PYTEST_ADDOPTS deps = -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt -e. [testenv:integration-spark-session] @@ -67,5 +67,5 @@ passenv = PIP_CACHE_DIR deps = -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt + -r{toxinidir}/dev-requirements.txt -e.[session] From 5ff1c42560fbf4265d9cc36eb18f18fff0f91877 Mon Sep 17 00:00:00 2001 From: Mila Page <67295367+VersusFacit@users.noreply.github.com> Date: Fri, 13 May 2022 09:06:43 -0700 Subject: [PATCH 02/54] Add commit hook tooling. (#356) * Add commit hook tooling. (#346) * Make pre commit code changes (#345) * Refactor exception function using accurate type specs. * Upgrade the mypy version in order to use TypeAlias-es. * Upgrade the mypy version in the worfklow. Fix bug introduced by using str's instead of Optional[str]'s * Address code review comments: Remove integration test command since there are multiple ways we can run tests in spark. * Add changelog entry * Altering names of dev_requirements references. --- .bumpversion.cfg | 5 +- .flake8 | 12 + .github/ISSUE_TEMPLATE/dependabot.yml | 2 +- .github/ISSUE_TEMPLATE/release.md | 2 +- .github/pull_request_template.md | 2 +- .github/workflows/jira-creation.yml | 2 +- .github/workflows/jira-label.yml | 3 +- .github/workflows/jira-transition.yml | 2 +- .github/workflows/main.yml | 26 +- .github/workflows/release.yml | 1 - .github/workflows/stale.yml | 2 - .gitignore | 53 +++- .pre-commit-config.yaml | 66 +++++ CHANGELOG.md | 1 + MANIFEST.in | 2 +- Makefile | 56 ++++ dbt/adapters/spark/__init__.py | 5 +- dbt/adapters/spark/column.py | 25 +- dbt/adapters/spark/connections.py | 210 +++++++-------- dbt/adapters/spark/impl.py | 240 ++++++++---------- dbt/adapters/spark/relation.py | 10 +- dbt/adapters/spark/session.py | 22 +- dbt/include/spark/__init__.py | 1 + dbt/include/spark/macros/adapters.sql | 14 +- .../incremental/incremental.sql | 10 +- .../incremental/strategies.sql | 10 +- .../materializations/incremental/validate.sql | 4 +- .../macros/materializations/snapshot.sql | 2 +- .../spark/macros/materializations/table.sql | 2 +- dev-requirements.txt | 20 +- scripts/build-dist.sh | 2 +- setup.py | 72 +++--- tox.ini | 8 - 33 files changed, 489 insertions(+), 405 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml create mode 100644 Makefile diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 744284849..4de02c345 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -4,7 +4,7 @@ parse = (?P\d+) \.(?P\d+) \.(?P\d+) ((?Pa|b|rc)(?P\d+))? -serialize = +serialize = {major}.{minor}.{patch}{prerelease}{num} {major}.{minor}.{patch} commit = False @@ -13,7 +13,7 @@ tag = False [bumpversion:part:prerelease] first_value = a optional_value = final -values = +values = a b rc @@ -25,4 +25,3 @@ first_value = 1 [bumpversion:file:setup.py] [bumpversion:file:dbt/adapters/spark/__version__.py] - diff --git a/.flake8 b/.flake8 new file mode 100644 index 000000000..f39d154c0 --- /dev/null +++ b/.flake8 @@ -0,0 +1,12 @@ +[flake8] +select = + E + W + F +ignore = + W503 # makes Flake8 work like black + W504 + E203 # makes Flake8 work like black + E741 + E501 +exclude = test diff --git a/.github/ISSUE_TEMPLATE/dependabot.yml b/.github/ISSUE_TEMPLATE/dependabot.yml index 8a8c85b9f..2a6f34492 100644 --- a/.github/ISSUE_TEMPLATE/dependabot.yml +++ b/.github/ISSUE_TEMPLATE/dependabot.yml @@ -5,4 +5,4 @@ updates: directory: "/" schedule: interval: "daily" - rebase-strategy: "disabled" \ No newline at end of file + rebase-strategy: "disabled" diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md index ac28792a3..a69349f54 100644 --- a/.github/ISSUE_TEMPLATE/release.md +++ b/.github/ISSUE_TEMPLATE/release.md @@ -7,4 +7,4 @@ assignees: '' --- -### TBD \ No newline at end of file +### TBD diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 60e12779b..5928b1cbf 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -18,4 +18,4 @@ resolves # - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) - [ ] I have run this code in development and it appears to resolve the stated issue - [ ] This PR includes tests, or tests are not required/relevant for this PR -- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section. \ No newline at end of file +- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section. diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml index c84e106a7..b4016befc 100644 --- a/.github/workflows/jira-creation.yml +++ b/.github/workflows/jira-creation.yml @@ -13,7 +13,7 @@ name: Jira Issue Creation on: issues: types: [opened, labeled] - + permissions: issues: write diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml index fd533a170..3da2e3a38 100644 --- a/.github/workflows/jira-label.yml +++ b/.github/workflows/jira-label.yml @@ -13,7 +13,7 @@ name: Jira Label Mirroring on: issues: types: [labeled, unlabeled] - + permissions: issues: read @@ -24,4 +24,3 @@ jobs: JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} - diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml index 71273c7a9..ed9f9cd4f 100644 --- a/.github/workflows/jira-transition.yml +++ b/.github/workflows/jira-transition.yml @@ -21,4 +21,4 @@ jobs: secrets: JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} - JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} \ No newline at end of file + JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fbdbbbaae..56685bfc6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,19 +37,10 @@ defaults: jobs: code-quality: - name: ${{ matrix.toxenv }} + name: code-quality runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - toxenv: [flake8] - - env: - TOXENV: ${{ matrix.toxenv }} - PYTEST_ADDOPTS: "-v --color=yes" - steps: - name: Check out the repository uses: actions/checkout@v2 @@ -58,18 +49,19 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 - with: + with: python-version: '3.8' - name: Install python dependencies run: | sudo apt-get install libsasl2-dev pip install --user --upgrade pip - pip install tox - pip --version - tox --version - - name: Run tox - run: tox + pip install -r dev-requirements.txt + pre-commit --version + mypy --version + dbt --version + - name: pre-commit hooks + run: pre-commit run --all-files --show-diff-on-failure unit: name: unit test / python ${{ matrix.python-version }} @@ -153,7 +145,7 @@ jobs: - name: Check wheel contents run: | check-wheel-contents dist/*.whl --ignore W007,W008 - + - name: Check if this is an alpha version id: check-is-alpha run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b40371e8a..554e13a8d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -116,4 +116,3 @@ jobs: pip install twine wheel setuptools python setup.py sdist bdist_wheel twine upload --non-interactive dist/dbt_spark-${{env.version_number}}-py3-none-any.whl dist/dbt-spark-${{env.version_number}}.tar.gz - diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 2848ce8f7..a56455d55 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -13,5 +13,3 @@ jobs: stale-pr-message: "This PR has been marked as Stale because it has been open for 180 days with no activity. If you would like the PR to remain open, please remove the stale label or comment on the PR, or it will be closed in 7 days." # mark issues/PRs stale when they haven't seen activity in 180 days days-before-stale: 180 - # ignore checking issues with the following labels - exempt-issue-labels: "epic, discussion" \ No newline at end of file diff --git a/.gitignore b/.gitignore index cc586f5fe..189589cf4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,47 @@ -.hive-metastore/ -.spark-warehouse/ -*.egg-info -env/ -*.pyc +# Byte-compiled / optimized / DLL files __pycache__ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +env*/ +dbt_env/ +dist/ +*.egg-info +logs/ + + +# Unit test .tox/ .env +test.env + + +# Django stuff +*.log + +# Mypy +*.pytest_cache/ + +# Vim +*.sw* + +# Pyenv +.python-version + +# pycharm .idea/ -build/ -dist/ -dbt-integration-tests -test/integration/.user.yml + +# MacOS .DS_Store -test.env + +# vscode .vscode -*.log -logs/ \ No newline at end of file + +# other +.hive-metastore/ +.spark-warehouse/ +dbt-integration-tests +test/integration/.user.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..e70156dcd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,66 @@ +# For more on configuring pre-commit hooks (see https://pre-commit.com/) + +# TODO: remove global exclusion of tests when testing overhaul is complete +exclude: '^tests/.*' + +# Force all unspecified python hooks to run python 3.8 +default_language_version: + python: python3.8 + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: check-yaml + args: [--unsafe] + - id: check-json + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-case-conflict +- repo: https://github.com/psf/black + rev: 21.12b0 + hooks: + - id: black + additional_dependencies: ['click==8.0.4'] + args: + - "--line-length=99" + - "--target-version=py38" + - id: black + alias: black-check + stages: [manual] + additional_dependencies: ['click==8.0.4'] + args: + - "--line-length=99" + - "--target-version=py38" + - "--check" + - "--diff" +- repo: https://gitlab.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + - id: flake8 + alias: flake8-check + stages: [manual] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.950 + hooks: + - id: mypy + # N.B.: Mypy is... a bit fragile. + # + # By using `language: system` we run this hook in the local + # environment instead of a pre-commit isolated one. This is needed + # to ensure mypy correctly parses the project. + + # It may cause trouble in that it adds environmental variables out + # of our control to the mix. Unfortunately, there's nothing we can + # do about per pre-commit's author. + # See https://github.com/pre-commit/pre-commit/issues/730 for details. + args: [--show-error-codes, --ignore-missing-imports] + files: ^dbt/adapters/.* + language: system + - id: mypy + alias: mypy-check + stages: [manual] + args: [--show-error-codes, --pretty, --ignore-missing-imports] + files: ^dbt/adapters + language: system diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ad68a5ce..77eb72581 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344)) ### Under the hood +- Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356)) - Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299)) - Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320)) - Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324)) diff --git a/MANIFEST.in b/MANIFEST.in index 78412d5b8..cfbc714ed 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -recursive-include dbt/include *.sql *.yml *.md \ No newline at end of file +recursive-include dbt/include *.sql *.yml *.md diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..a520c425f --- /dev/null +++ b/Makefile @@ -0,0 +1,56 @@ +.DEFAULT_GOAL:=help + +.PHONY: dev +dev: ## Installs adapter in develop mode along with development depedencies + @\ + pip install -r dev-requirements.txt && pre-commit install + +.PHONY: mypy +mypy: ## Runs mypy against staged changes for static type checking. + @\ + pre-commit run --hook-stage manual mypy-check | grep -v "INFO" + +.PHONY: flake8 +flake8: ## Runs flake8 against staged changes to enforce style guide. + @\ + pre-commit run --hook-stage manual flake8-check | grep -v "INFO" + +.PHONY: black +black: ## Runs black against staged changes to enforce style guide. + @\ + pre-commit run --hook-stage manual black-check -v | grep -v "INFO" + +.PHONY: lint +lint: ## Runs flake8 and mypy code checks against staged changes. + @\ + pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \ + pre-commit run mypy-check --hook-stage manual | grep -v "INFO" + +.PHONY: linecheck +linecheck: ## Checks for all Python lines 100 characters or more + @\ + find dbt -type f -name "*.py" -exec grep -I -r -n '.\{100\}' {} \; + +.PHONY: unit +unit: ## Runs unit tests with py38. + @\ + tox -e py38 + +.PHONY: test +test: ## Runs unit tests with py38 and code checks against staged changes. + @\ + tox -p -e py38; \ + pre-commit run black-check --hook-stage manual | grep -v "INFO"; \ + pre-commit run flake8-check --hook-stage manual | grep -v "INFO"; \ + pre-commit run mypy-check --hook-stage manual | grep -v "INFO" + +.PHONY: clean + @echo "cleaning repo" + @git clean -f -X + +.PHONY: help +help: ## Show this help message. + @echo 'usage: make [target]' + @echo + @echo 'targets:' + @grep -E '^[7+a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py index 469e202b9..6ecc5eccf 100644 --- a/dbt/adapters/spark/__init__.py +++ b/dbt/adapters/spark/__init__.py @@ -8,6 +8,5 @@ from dbt.include import spark Plugin = AdapterPlugin( - adapter=SparkAdapter, - credentials=SparkCredentials, - include_path=spark.PACKAGE_PATH) + adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH +) diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index fd377ad15..4df6b301b 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -1,11 +1,11 @@ from dataclasses import dataclass -from typing import TypeVar, Optional, Dict, Any +from typing import Any, Dict, Optional, TypeVar, Union from dbt.adapters.base.column import Column from dbt.dataclass_schema import dbtClassMixin from hologram import JsonDict -Self = TypeVar('Self', bound='SparkColumn') +Self = TypeVar("Self", bound="SparkColumn") @dataclass @@ -31,7 +31,7 @@ def literal(self, value): @property def quoted(self) -> str: - return '`{}`'.format(self.column) + return "`{}`".format(self.column) @property def data_type(self) -> str: @@ -42,26 +42,23 @@ def __repr__(self) -> str: @staticmethod def convert_table_stats(raw_stats: Optional[str]) -> Dict[str, Any]: - table_stats = {} + table_stats: Dict[str, Union[int, str, bool]] = {} if raw_stats: # format: 1109049927 bytes, 14093476 rows stats = { - stats.split(" ")[1]: int(stats.split(" ")[0]) - for stats in raw_stats.split(', ') + stats.split(" ")[1]: int(stats.split(" ")[0]) for stats in raw_stats.split(", ") } for key, val in stats.items(): - table_stats[f'stats:{key}:label'] = key - table_stats[f'stats:{key}:value'] = val - table_stats[f'stats:{key}:description'] = '' - table_stats[f'stats:{key}:include'] = True + table_stats[f"stats:{key}:label"] = key + table_stats[f"stats:{key}:value"] = val + table_stats[f"stats:{key}:description"] = "" + table_stats[f"stats:{key}:include"] = True return table_stats - def to_column_dict( - self, omit_none: bool = True, validate: bool = False - ) -> JsonDict: + def to_column_dict(self, omit_none: bool = True, validate: bool = False) -> JsonDict: original_dict = self.to_dict(omit_none=omit_none) # If there are stats, merge them into the root of the dict - original_stats = original_dict.pop('table_stats', None) + original_stats = original_dict.pop("table_stats", None) if original_stats: original_dict.update(original_stats) return original_dict diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 11163ccf0..59ceb9dd8 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -26,6 +26,7 @@ from hologram.helpers import StrEnum from dataclasses import dataclass, field from typing import Any, Dict, Optional + try: from thrift.transport.TSSLSocket import TSSLSocket import thrift @@ -33,11 +34,7 @@ import sasl import thrift_sasl except ImportError: - TSSLSocket = None - thrift = None - ssl = None - sasl = None - thrift_sasl = None + pass # done deliberately: setting modules to None explicitly violates MyPy contracts by degrading type semantics import base64 import time @@ -52,10 +49,10 @@ def _build_odbc_connnection_string(**kwargs) -> str: class SparkConnectionMethod(StrEnum): - THRIFT = 'thrift' - HTTP = 'http' - ODBC = 'odbc' - SESSION = 'session' + THRIFT = "thrift" + HTTP = "http" + ODBC = "odbc" + SESSION = "session" @dataclass @@ -71,7 +68,7 @@ class SparkCredentials(Credentials): port: int = 443 auth: Optional[str] = None kerberos_service_name: Optional[str] = None - organization: str = '0' + organization: str = "0" connect_retries: int = 0 connect_timeout: int = 10 use_ssl: bool = False @@ -81,27 +78,24 @@ class SparkCredentials(Credentials): @classmethod def __pre_deserialize__(cls, data): data = super().__pre_deserialize__(data) - if 'database' not in data: - data['database'] = None + if "database" not in data: + data["database"] = None return data def __post_init__(self): # spark classifies database and schema as the same thing - if ( - self.database is not None and - self.database != self.schema - ): + if self.database is not None and self.database != self.schema: raise dbt.exceptions.RuntimeException( - f' schema: {self.schema} \n' - f' database: {self.database} \n' - f'On Spark, database must be omitted or have the same value as' - f' schema.' + f" schema: {self.schema} \n" + f" database: {self.database} \n" + f"On Spark, database must be omitted or have the same value as" + f" schema." ) self.database = None if self.method == SparkConnectionMethod.ODBC: try: - import pyodbc # noqa: F401 + import pyodbc # noqa: F401 except ImportError as e: raise dbt.exceptions.RuntimeException( f"{self.method} connection method requires " @@ -111,22 +105,16 @@ def __post_init__(self): f"ImportError({e.msg})" ) from e - if ( - self.method == SparkConnectionMethod.ODBC and - self.cluster and - self.endpoint - ): + if self.method == SparkConnectionMethod.ODBC and self.cluster and self.endpoint: raise dbt.exceptions.RuntimeException( "`cluster` and `endpoint` cannot both be set when" f" using {self.method} method to connect to Spark" ) if ( - self.method == SparkConnectionMethod.HTTP or - self.method == SparkConnectionMethod.THRIFT - ) and not ( - ThriftState and THttpClient and hive - ): + self.method == SparkConnectionMethod.HTTP + or self.method == SparkConnectionMethod.THRIFT + ) and not (ThriftState and THttpClient and hive): raise dbt.exceptions.RuntimeException( f"{self.method} connection method requires " "additional dependencies. \n" @@ -148,19 +136,19 @@ def __post_init__(self): @property def type(self): - return 'spark' + return "spark" @property def unique_field(self): return self.host def _connection_keys(self): - return ('host', 'port', 'cluster', - 'endpoint', 'schema', 'organization') + return ("host", "port", "cluster", "endpoint", "schema", "organization") class PyhiveConnectionWrapper(object): """Wrap a Spark connection in a way that no-ops transactions""" + # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html # noqa def __init__(self, handle): @@ -178,9 +166,7 @@ def cancel(self): try: self._cursor.cancel() except EnvironmentError as exc: - logger.debug( - "Exception while cancelling query: {}".format(exc) - ) + logger.debug("Exception while cancelling query: {}".format(exc)) def close(self): if self._cursor: @@ -189,9 +175,7 @@ def close(self): try: self._cursor.close() except EnvironmentError as exc: - logger.debug( - "Exception while closing cursor: {}".format(exc) - ) + logger.debug("Exception while closing cursor: {}".format(exc)) self.handle.close() def rollback(self, *args, **kwargs): @@ -247,23 +231,20 @@ def execute(self, sql, bindings=None): dbt.exceptions.raise_database_error(poll_state.errorMessage) elif state not in STATE_SUCCESS: - status_type = ThriftState._VALUES_TO_NAMES.get( - state, - 'Unknown<{!r}>'.format(state)) + status_type = ThriftState._VALUES_TO_NAMES.get(state, "Unknown<{!r}>".format(state)) - dbt.exceptions.raise_database_error( - "Query failed with status: {}".format(status_type)) + dbt.exceptions.raise_database_error("Query failed with status: {}".format(status_type)) logger.debug("Poll status: {}, query complete".format(state)) @classmethod def _fix_binding(cls, value): """Convert complex datatypes to primitives that can be loaded by - the Spark driver""" + the Spark driver""" if isinstance(value, NUMBERS): return float(value) elif isinstance(value, datetime): - return value.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + return value.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] else: return value @@ -273,7 +254,6 @@ def description(self): class PyodbcConnectionWrapper(PyhiveConnectionWrapper): - def execute(self, sql, bindings=None): if sql.strip().endswith(";"): sql = sql.strip()[:-1] @@ -282,19 +262,17 @@ def execute(self, sql, bindings=None): self._cursor.execute(sql) else: # pyodbc only supports `qmark` sql params! - query = sqlparams.SQLParams('format', 'qmark') + query = sqlparams.SQLParams("format", "qmark") sql, bindings = query.format(sql, bindings) self._cursor.execute(sql, *bindings) class SparkConnectionManager(SQLConnectionManager): - TYPE = 'spark' + TYPE = "spark" SPARK_CLUSTER_HTTP_PATH = "/sql/protocolv1/o/{organization}/{cluster}" SPARK_SQL_ENDPOINT_HTTP_PATH = "/sql/1.0/endpoints/{endpoint}" - SPARK_CONNECTION_URL = ( - "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH - ) + SPARK_CONNECTION_URL = "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH @contextmanager def exception_handler(self, sql): @@ -308,7 +286,7 @@ def exception_handler(self, sql): raise thrift_resp = exc.args[0] - if hasattr(thrift_resp, 'status'): + if hasattr(thrift_resp, "status"): msg = thrift_resp.status.errorMessage raise dbt.exceptions.RuntimeException(msg) else: @@ -320,10 +298,8 @@ def cancel(self, connection): @classmethod def get_response(cls, cursor) -> AdapterResponse: # https://github.com/dbt-labs/dbt-spark/issues/142 - message = 'OK' - return AdapterResponse( - _message=message - ) + message = "OK" + return AdapterResponse(_message=message) # No transactions on Spark.... def add_begin_query(self, *args, **kwargs): @@ -346,12 +322,13 @@ def validate_creds(cls, creds, required): if not hasattr(creds, key): raise dbt.exceptions.DbtProfileError( "The config '{}' is required when using the {} method" - " to connect to Spark".format(key, method)) + " to connect to Spark".format(key, method) + ) @classmethod def open(cls, connection): if connection.state == ConnectionState.OPEN: - logger.debug('Connection is already open, skipping open.') + logger.debug("Connection is already open, skipping open.") return connection creds = connection.credentials @@ -360,19 +337,18 @@ def open(cls, connection): for i in range(1 + creds.connect_retries): try: if creds.method == SparkConnectionMethod.HTTP: - cls.validate_creds(creds, ['token', 'host', 'port', - 'cluster', 'organization']) + cls.validate_creds(creds, ["token", "host", "port", "cluster", "organization"]) # Prepend https:// if it is missing host = creds.host - if not host.startswith('https://'): - host = 'https://' + creds.host + if not host.startswith("https://"): + host = "https://" + creds.host conn_url = cls.SPARK_CONNECTION_URL.format( host=host, port=creds.port, organization=creds.organization, - cluster=creds.cluster + cluster=creds.cluster, ) logger.debug("connection url: {}".format(conn_url)) @@ -381,15 +357,12 @@ def open(cls, connection): raw_token = "token:{}".format(creds.token).encode() token = base64.standard_b64encode(raw_token).decode() - transport.setCustomHeaders({ - 'Authorization': 'Basic {}'.format(token) - }) + transport.setCustomHeaders({"Authorization": "Basic {}".format(token)}) conn = hive.connect(thrift_transport=transport) handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.THRIFT: - cls.validate_creds(creds, - ['host', 'port', 'user', 'schema']) + cls.validate_creds(creds, ["host", "port", "user", "schema"]) if creds.use_ssl: transport = build_ssl_transport( @@ -397,26 +370,33 @@ def open(cls, connection): port=creds.port, username=creds.user, auth=creds.auth, - kerberos_service_name=creds.kerberos_service_name) + kerberos_service_name=creds.kerberos_service_name, + ) conn = hive.connect(thrift_transport=transport) else: - conn = hive.connect(host=creds.host, - port=creds.port, - username=creds.user, - auth=creds.auth, - kerberos_service_name=creds.kerberos_service_name) # noqa + conn = hive.connect( + host=creds.host, + port=creds.port, + username=creds.user, + auth=creds.auth, + kerberos_service_name=creds.kerberos_service_name, + ) # noqa handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.ODBC: if creds.cluster is not None: - required_fields = ['driver', 'host', 'port', 'token', - 'organization', 'cluster'] + required_fields = [ + "driver", + "host", + "port", + "token", + "organization", + "cluster", + ] http_path = cls.SPARK_CLUSTER_HTTP_PATH.format( - organization=creds.organization, - cluster=creds.cluster + organization=creds.organization, cluster=creds.cluster ) elif creds.endpoint is not None: - required_fields = ['driver', 'host', 'port', 'token', - 'endpoint'] + required_fields = ["driver", "host", "port", "token", "endpoint"] http_path = cls.SPARK_SQL_ENDPOINT_HTTP_PATH.format( endpoint=creds.endpoint ) @@ -429,13 +409,12 @@ def open(cls, connection): cls.validate_creds(creds, required_fields) dbt_spark_version = __version__.version - user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)" # noqa + user_agent_entry = ( + f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)" # noqa + ) # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm - ssp = { - f"SSP_{k}": f"{{{v}}}" - for k, v in creds.server_side_parameters.items() - } + ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()} # https://www.simba.com/products/Spark/doc/v2/ODBC_InstallGuide/unix/content/odbc/options/driver.htm connection_str = _build_odbc_connnection_string( @@ -461,6 +440,7 @@ def open(cls, connection): Connection, SessionConnectionWrapper, ) + handle = SessionConnectionWrapper(Connection()) else: raise dbt.exceptions.DbtProfileError( @@ -472,9 +452,9 @@ def open(cls, connection): if isinstance(e, EOFError): # The user almost certainly has invalid credentials. # Perhaps a token expired, or something - msg = 'Failed to connect' + msg = "Failed to connect" if creds.token is not None: - msg += ', is your token valid?' + msg += ", is your token valid?" raise dbt.exceptions.FailedToConnectException(msg) from e retryable_message = _is_retryable_error(e) if retryable_message and creds.connect_retries > 0: @@ -496,9 +476,7 @@ def open(cls, connection): logger.warning(msg) time.sleep(creds.connect_timeout) else: - raise dbt.exceptions.FailedToConnectException( - 'failed to connect' - ) from e + raise dbt.exceptions.FailedToConnectException("failed to connect") from e else: raise exc @@ -507,56 +485,50 @@ def open(cls, connection): return connection -def build_ssl_transport(host, port, username, auth, - kerberos_service_name, password=None): +def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None): transport = None if port is None: port = 10000 if auth is None: - auth = 'NONE' + auth = "NONE" socket = TSSLSocket(host, port, cert_reqs=ssl.CERT_NONE) - if auth == 'NOSASL': + if auth == "NOSASL": # NOSASL corresponds to hive.server2.authentication=NOSASL # in hive-site.xml transport = thrift.transport.TTransport.TBufferedTransport(socket) - elif auth in ('LDAP', 'KERBEROS', 'NONE', 'CUSTOM'): + elif auth in ("LDAP", "KERBEROS", "NONE", "CUSTOM"): # Defer import so package dependency is optional - if auth == 'KERBEROS': + if auth == "KERBEROS": # KERBEROS mode in hive.server2.authentication is GSSAPI # in sasl library - sasl_auth = 'GSSAPI' + sasl_auth = "GSSAPI" else: - sasl_auth = 'PLAIN' + sasl_auth = "PLAIN" if password is None: # Password doesn't matter in NONE mode, just needs # to be nonempty. - password = 'x' + password = "x" def sasl_factory(): sasl_client = sasl.Client() - sasl_client.setAttr('host', host) - if sasl_auth == 'GSSAPI': - sasl_client.setAttr('service', kerberos_service_name) - elif sasl_auth == 'PLAIN': - sasl_client.setAttr('username', username) - sasl_client.setAttr('password', password) + sasl_client.setAttr("host", host) + if sasl_auth == "GSSAPI": + sasl_client.setAttr("service", kerberos_service_name) + elif sasl_auth == "PLAIN": + sasl_client.setAttr("username", username) + sasl_client.setAttr("password", password) else: raise AssertionError sasl_client.init() return sasl_client - transport = thrift_sasl.TSaslClientTransport(sasl_factory, - sasl_auth, socket) + transport = thrift_sasl.TSaslClientTransport(sasl_factory, sasl_auth, socket) return transport -def _is_retryable_error(exc: Exception) -> Optional[str]: - message = getattr(exc, 'message', None) - if message is None: - return None - message = message.lower() - if 'pending' in message: - return exc.message - if 'temporarily_unavailable' in message: - return exc.message - return None +def _is_retryable_error(exc: Exception) -> str: + message = str(exc).lower() + if "pending" in message or "temporarily_unavailable" in message: + return str(exc) + else: + return "" diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index eb001fbc9..dd090a23b 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,7 +1,9 @@ import re from concurrent.futures import Future from dataclasses import dataclass -from typing import Optional, List, Dict, Any, Union, Iterable +from typing import Any, Dict, Iterable, List, Optional, Union +from typing_extensions import TypeAlias + import agate from dbt.contracts.relation import RelationType @@ -21,19 +23,19 @@ logger = AdapterLogger("Spark") -GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation' -LIST_SCHEMAS_MACRO_NAME = 'list_schemas' -LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' -DROP_RELATION_MACRO_NAME = 'drop_relation' -FETCH_TBL_PROPERTIES_MACRO_NAME = 'fetch_tbl_properties' +GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation" +LIST_SCHEMAS_MACRO_NAME = "list_schemas" +LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching" +DROP_RELATION_MACRO_NAME = "drop_relation" +FETCH_TBL_PROPERTIES_MACRO_NAME = "fetch_tbl_properties" -KEY_TABLE_OWNER = 'Owner' -KEY_TABLE_STATISTICS = 'Statistics' +KEY_TABLE_OWNER = "Owner" +KEY_TABLE_STATISTICS = "Statistics" @dataclass class SparkConfig(AdapterConfig): - file_format: str = 'parquet' + file_format: str = "parquet" location_root: Optional[str] = None partition_by: Optional[Union[List[str], str]] = None clustered_by: Optional[Union[List[str], str]] = None @@ -44,48 +46,44 @@ class SparkConfig(AdapterConfig): class SparkAdapter(SQLAdapter): COLUMN_NAMES = ( - 'table_database', - 'table_schema', - 'table_name', - 'table_type', - 'table_comment', - 'table_owner', - 'column_name', - 'column_index', - 'column_type', - 'column_comment', - - 'stats:bytes:label', - 'stats:bytes:value', - 'stats:bytes:description', - 'stats:bytes:include', - - 'stats:rows:label', - 'stats:rows:value', - 'stats:rows:description', - 'stats:rows:include', + "table_database", + "table_schema", + "table_name", + "table_type", + "table_comment", + "table_owner", + "column_name", + "column_index", + "column_type", + "column_comment", + "stats:bytes:label", + "stats:bytes:value", + "stats:bytes:description", + "stats:bytes:include", + "stats:rows:label", + "stats:rows:value", + "stats:rows:description", + "stats:rows:include", ) - INFORMATION_COLUMNS_REGEX = re.compile( - r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) + INFORMATION_COLUMNS_REGEX = re.compile(r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) - INFORMATION_STATISTICS_REGEX = re.compile( - r"^Statistics: (.*)$", re.MULTILINE) + INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE) HUDI_METADATA_COLUMNS = [ - '_hoodie_commit_time', - '_hoodie_commit_seqno', - '_hoodie_record_key', - '_hoodie_partition_path', - '_hoodie_file_name' + "_hoodie_commit_time", + "_hoodie_commit_seqno", + "_hoodie_record_key", + "_hoodie_partition_path", + "_hoodie_file_name", ] - Relation = SparkRelation - Column = SparkColumn - ConnectionManager = SparkConnectionManager - AdapterSpecificConfigs = SparkConfig + Relation: TypeAlias = SparkRelation + Column: TypeAlias = SparkColumn + ConnectionManager: TypeAlias = SparkConnectionManager + AdapterSpecificConfigs: TypeAlias = SparkConfig @classmethod def date_function(cls) -> str: - return 'current_timestamp()' + return "current_timestamp()" @classmethod def convert_text_type(cls, agate_table, col_idx): @@ -109,31 +107,28 @@ def convert_datetime_type(cls, agate_table, col_idx): return "timestamp" def quote(self, identifier): - return '`{}`'.format(identifier) + return "`{}`".format(identifier) def add_schema_to_cache(self, schema) -> str: """Cache a new schema in dbt. It will show up in `list relations`.""" if schema is None: name = self.nice_connection_name() dbt.exceptions.raise_compiler_error( - 'Attempted to cache a null schema for {}'.format(name) + "Attempted to cache a null schema for {}".format(name) ) if dbt.flags.USE_CACHE: self.cache.add_schema(None, schema) # so jinja doesn't render things - return '' + return "" def list_relations_without_caching( self, schema_relation: SparkRelation ) -> List[SparkRelation]: - kwargs = {'schema_relation': schema_relation} + kwargs = {"schema_relation": schema_relation} try: - results = self.execute_macro( - LIST_RELATIONS_MACRO_NAME, - kwargs=kwargs - ) + results = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs) except dbt.exceptions.RuntimeException as e: - errmsg = getattr(e, 'msg', '') + errmsg = getattr(e, "msg", "") if f"Database '{schema_relation}' not found" in errmsg: return [] else: @@ -146,13 +141,12 @@ def list_relations_without_caching( if len(row) != 4: raise dbt.exceptions.RuntimeException( f'Invalid value from "show table extended ...", ' - f'got {len(row)} values, expected 4' + f"got {len(row)} values, expected 4" ) _schema, name, _, information = row - rel_type = RelationType.View \ - if 'Type: VIEW' in information else RelationType.Table - is_delta = 'Provider: delta' in information - is_hudi = 'Provider: hudi' in information + rel_type = RelationType.View if "Type: VIEW" in information else RelationType.Table + is_delta = "Provider: delta" in information + is_hudi = "Provider: hudi" in information relation = self.Relation.create( schema=_schema, identifier=name, @@ -166,7 +160,7 @@ def list_relations_without_caching( return relations def get_relation( - self, database: str, schema: str, identifier: str + self, database: Optional[str], schema: str, identifier: str ) -> Optional[BaseRelation]: if not self.Relation.include_policy.database: database = None @@ -174,9 +168,7 @@ def get_relation( return super().get_relation(database, schema, identifier) def parse_describe_extended( - self, - relation: Relation, - raw_rows: List[agate.Row] + self, relation: Relation, raw_rows: List[agate.Row] ) -> List[SparkColumn]: # Convert the Row to a dict dict_rows = [dict(zip(row._keys, row._values)) for row in raw_rows] @@ -185,44 +177,45 @@ def parse_describe_extended( pos = self.find_table_information_separator(dict_rows) # Remove rows that start with a hash, they are comments - rows = [ - row for row in raw_rows[0:pos] - if not row['col_name'].startswith('#') - ] - metadata = { - col['col_name']: col['data_type'] for col in raw_rows[pos + 1:] - } + rows = [row for row in raw_rows[0:pos] if not row["col_name"].startswith("#")] + metadata = {col["col_name"]: col["data_type"] for col in raw_rows[pos + 1 :]} raw_table_stats = metadata.get(KEY_TABLE_STATISTICS) table_stats = SparkColumn.convert_table_stats(raw_table_stats) - return [SparkColumn( - table_database=None, - table_schema=relation.schema, - table_name=relation.name, - table_type=relation.type, - table_owner=str(metadata.get(KEY_TABLE_OWNER)), - table_stats=table_stats, - column=column['col_name'], - column_index=idx, - dtype=column['data_type'], - ) for idx, column in enumerate(rows)] + return [ + SparkColumn( + table_database=None, + table_schema=relation.schema, + table_name=relation.name, + table_type=relation.type, + table_owner=str(metadata.get(KEY_TABLE_OWNER)), + table_stats=table_stats, + column=column["col_name"], + column_index=idx, + dtype=column["data_type"], + ) + for idx, column in enumerate(rows) + ] @staticmethod def find_table_information_separator(rows: List[dict]) -> int: pos = 0 for row in rows: - if not row['col_name'] or row['col_name'].startswith('#'): + if not row["col_name"] or row["col_name"].startswith("#"): break pos += 1 return pos def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: - cached_relations = self.cache.get_relations( - relation.database, relation.schema) - cached_relation = next((cached_relation - for cached_relation in cached_relations - if str(cached_relation) == str(relation)), - None) + cached_relations = self.cache.get_relations(relation.database, relation.schema) + cached_relation = next( + ( + cached_relation + for cached_relation in cached_relations + if str(cached_relation) == str(relation) + ), + None, + ) columns = [] if cached_relation and cached_relation.information: columns = self.parse_columns_from_information(cached_relation) @@ -238,30 +231,21 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: # spark would throw error when table doesn't exist, where other # CDW would just return and empty list, normalizing the behavior here errmsg = getattr(e, "msg", "") - if ( - "Table or view not found" in errmsg or - "NoSuchTableException" in errmsg - ): + if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg: pass else: raise e # strip hudi metadata columns. - columns = [x for x in columns - if x.name not in self.HUDI_METADATA_COLUMNS] + columns = [x for x in columns if x.name not in self.HUDI_METADATA_COLUMNS] return columns - def parse_columns_from_information( - self, relation: SparkRelation - ) -> List[SparkColumn]: - owner_match = re.findall( - self.INFORMATION_OWNER_REGEX, relation.information) + def parse_columns_from_information(self, relation: SparkRelation) -> List[SparkColumn]: + owner_match = re.findall(self.INFORMATION_OWNER_REGEX, relation.information) owner = owner_match[0] if owner_match else None - matches = re.finditer( - self.INFORMATION_COLUMNS_REGEX, relation.information) + matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, relation.information) columns = [] - stats_match = re.findall( - self.INFORMATION_STATISTICS_REGEX, relation.information) + stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, relation.information) raw_table_stats = stats_match[0] if stats_match else None table_stats = SparkColumn.convert_table_stats(raw_table_stats) for match_num, match in enumerate(matches): @@ -275,28 +259,25 @@ def parse_columns_from_information( table_owner=owner, column=column_name, dtype=column_type, - table_stats=table_stats + table_stats=table_stats, ) columns.append(column) return columns - def _get_columns_for_catalog( - self, relation: SparkRelation - ) -> Iterable[Dict[str, Any]]: + def _get_columns_for_catalog(self, relation: SparkRelation) -> Iterable[Dict[str, Any]]: columns = self.parse_columns_from_information(relation) for column in columns: # convert SparkColumns into catalog dicts as_dict = column.to_column_dict() - as_dict['column_name'] = as_dict.pop('column', None) - as_dict['column_type'] = as_dict.pop('dtype') - as_dict['table_database'] = None + as_dict["column_name"] = as_dict.pop("column", None) + as_dict["column_type"] = as_dict.pop("dtype") + as_dict["table_database"] = None yield as_dict def get_properties(self, relation: Relation) -> Dict[str, str]: properties = self.execute_macro( - FETCH_TBL_PROPERTIES_MACRO_NAME, - kwargs={'relation': relation} + FETCH_TBL_PROPERTIES_MACRO_NAME, kwargs={"relation": relation} ) return dict(properties) @@ -304,28 +285,30 @@ def get_catalog(self, manifest): schema_map = self._get_catalog_schemas(manifest) if len(schema_map) > 1: dbt.exceptions.raise_compiler_error( - f'Expected only one database in get_catalog, found ' - f'{list(schema_map)}' + f"Expected only one database in get_catalog, found " f"{list(schema_map)}" ) with executor(self.config) as tpe: futures: List[Future[agate.Table]] = [] for info, schemas in schema_map.items(): for schema in schemas: - futures.append(tpe.submit_connected( - self, schema, - self._get_one_catalog, info, [schema], manifest - )) + futures.append( + tpe.submit_connected( + self, schema, self._get_one_catalog, info, [schema], manifest + ) + ) catalogs, exceptions = catch_as_completed(futures) return catalogs, exceptions def _get_one_catalog( - self, information_schema, schemas, manifest, + self, + information_schema, + schemas, + manifest, ) -> agate.Table: if len(schemas) != 1: dbt.exceptions.raise_compiler_error( - f'Expected only one schema in spark _get_one_catalog, found ' - f'{schemas}' + f"Expected only one schema in spark _get_one_catalog, found " f"{schemas}" ) database = information_schema.database @@ -335,15 +318,10 @@ def _get_one_catalog( for relation in self.list_relations(database, schema): logger.debug("Getting table schema for relation {}", relation) columns.extend(self._get_columns_for_catalog(relation)) - return agate.Table.from_object( - columns, column_types=DEFAULT_TYPE_TESTER - ) + return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER) def check_schema_exists(self, database, schema): - results = self.execute_macro( - LIST_SCHEMAS_MACRO_NAME, - kwargs={'database': database} - ) + results = self.execute_macro(LIST_SCHEMAS_MACRO_NAME, kwargs={"database": database}) exists = True if schema in [row[0] for row in results] else False return exists @@ -353,7 +331,7 @@ def get_rows_different_sql( relation_a: BaseRelation, relation_b: BaseRelation, column_names: Optional[List[str]] = None, - except_operator: str = 'EXCEPT', + except_operator: str = "EXCEPT", ) -> str: """Generate SQL for a query that returns a single row with a two columns: the number of rows that are different between the two @@ -366,7 +344,7 @@ def get_rows_different_sql( names = sorted((self.quote(c.name) for c in columns)) else: names = sorted((self.quote(n) for n in column_names)) - columns_csv = ', '.join(names) + columns_csv = ", ".join(names) sql = COLUMNS_EQUAL_SQL.format( columns=columns_csv, @@ -384,7 +362,7 @@ def run_sql_for_tests(self, sql, fetch, conn): try: cursor.execute(sql) if fetch == "one": - if hasattr(cursor, 'fetchone'): + if hasattr(cursor, "fetchone"): return cursor.fetchone() else: # AttributeError: 'PyhiveConnectionWrapper' object has no attribute 'fetchone' @@ -406,7 +384,7 @@ def run_sql_for_tests(self, sql, fetch, conn): # "trivial". Which is true, though it seems like an unreasonable cause for # failure! It also doesn't like the `from foo, bar` syntax as opposed to # `from foo cross join bar`. -COLUMNS_EQUAL_SQL = ''' +COLUMNS_EQUAL_SQL = """ with diff_count as ( SELECT 1 as id, @@ -433,4 +411,4 @@ def run_sql_for_tests(self, sql, fetch, conn): diff_count.num_missing as num_mismatched from row_count_diff cross join diff_count -'''.strip() +""".strip() diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 043cabfa0..249caf0d7 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -24,19 +24,19 @@ class SparkIncludePolicy(Policy): class SparkRelation(BaseRelation): quote_policy: SparkQuotePolicy = SparkQuotePolicy() include_policy: SparkIncludePolicy = SparkIncludePolicy() - quote_character: str = '`' + quote_character: str = "`" is_delta: Optional[bool] = None is_hudi: Optional[bool] = None - information: str = None + information: Optional[str] = None def __post_init__(self): if self.database != self.schema and self.database: - raise RuntimeException('Cannot set database in spark!') + raise RuntimeException("Cannot set database in spark!") def render(self): if self.include_policy.database and self.include_policy.schema: raise RuntimeException( - 'Got a spark relation with schema and database set to ' - 'include, but only one can be set' + "Got a spark relation with schema and database set to " + "include, but only one can be set" ) return super().render() diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py index 6010df920..beb77d548 100644 --- a/dbt/adapters/spark/session.py +++ b/dbt/adapters/spark/session.py @@ -4,7 +4,7 @@ import datetime as dt from types import TracebackType -from typing import Any +from typing import Any, List, Optional, Tuple from dbt.events import AdapterLogger from dbt.utils import DECIMALS @@ -25,17 +25,17 @@ class Cursor: """ def __init__(self) -> None: - self._df: DataFrame | None = None - self._rows: list[Row] | None = None + self._df: Optional[DataFrame] = None + self._rows: Optional[List[Row]] = None def __enter__(self) -> Cursor: return self def __exit__( self, - exc_type: type[BaseException] | None, - exc_val: Exception | None, - exc_tb: TracebackType | None, + exc_type: Optional[BaseException], + exc_val: Optional[Exception], + exc_tb: Optional[TracebackType], ) -> bool: self.close() return True @@ -43,13 +43,13 @@ def __exit__( @property def description( self, - ) -> list[tuple[str, str, None, None, None, None, bool]]: + ) -> List[Tuple[str, str, None, None, None, None, bool]]: """ Get the description. Returns ------- - out : list[tuple[str, str, None, None, None, None, bool]] + out : List[Tuple[str, str, None, None, None, None, bool]] The description. Source @@ -109,13 +109,13 @@ def execute(self, sql: str, *parameters: Any) -> None: spark_session = SparkSession.builder.enableHiveSupport().getOrCreate() self._df = spark_session.sql(sql) - def fetchall(self) -> list[Row] | None: + def fetchall(self) -> Optional[List[Row]]: """ Fetch all data. Returns ------- - out : list[Row] | None + out : Optional[List[Row]] The rows. Source @@ -126,7 +126,7 @@ def fetchall(self) -> list[Row] | None: self._rows = self._df.collect() return self._rows - def fetchone(self) -> Row | None: + def fetchone(self) -> Optional[Row]: """ Fetch the first output. diff --git a/dbt/include/spark/__init__.py b/dbt/include/spark/__init__.py index 564a3d1e8..b177e5d49 100644 --- a/dbt/include/spark/__init__.py +++ b/dbt/include/spark/__init__.py @@ -1,2 +1,3 @@ import os + PACKAGE_PATH = os.path.dirname(__file__) diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index e96501c45..22381d9ea 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -231,7 +231,7 @@ {% set comment = column_dict[column_name]['description'] %} {% set escaped_comment = comment | replace('\'', '\\\'') %} {% set comment_query %} - alter table {{ relation }} change column + alter table {{ relation }} change column {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} comment '{{ escaped_comment }}'; {% endset %} @@ -260,25 +260,25 @@ {% macro spark__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %} - + {% if remove_columns %} {% set platform_name = 'Delta Lake' if relation.is_delta else 'Apache Spark' %} {{ exceptions.raise_compiler_error(platform_name + ' does not support dropping columns from tables') }} {% endif %} - + {% if add_columns is none %} {% set add_columns = [] %} {% endif %} - + {% set sql -%} - + alter {{ relation.type }} {{ relation }} - + {% if add_columns %} add columns {% endif %} {% for column in add_columns %} {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }} {% endfor %} - + {%- endset -%} {% do run_query(sql) %} diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql index d0b6e89ba..8d8e69d93 100644 --- a/dbt/include/spark/macros/materializations/incremental/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -1,17 +1,17 @@ {% materialization incremental, adapter='spark' -%} - + {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#} {%- set raw_file_format = config.get('file_format', default='parquet') -%} {%- set raw_strategy = config.get('incremental_strategy', default='append') -%} - + {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%} {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%} - + {%- set unique_key = config.get('unique_key', none) -%} {%- set partition_by = config.get('partition_by', none) -%} {%- set full_refresh_mode = (should_full_refresh()) -%} - + {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %} {% set target_relation = this %} @@ -42,7 +42,7 @@ {%- endcall -%} {% do persist_docs(target_relation, model) %} - + {{ run_hooks(post_hooks) }} {{ return({'relations': [target_relation]}) }} diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql index 215b5f3f9..28b8f2001 100644 --- a/dbt/include/spark/macros/materializations/incremental/strategies.sql +++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql @@ -1,5 +1,5 @@ {% macro get_insert_overwrite_sql(source_relation, target_relation) %} - + {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} insert overwrite table {{ target_relation }} @@ -41,20 +41,20 @@ {% else %} {% do predicates.append('FALSE') %} {% endif %} - + {{ sql_header if sql_header is not none }} - + merge into {{ target }} as DBT_INTERNAL_DEST using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE on {{ predicates | join(' and ') }} - + when matched then update set {% if update_columns -%}{%- for column_name in update_columns %} {{ column_name }} = DBT_INTERNAL_SOURCE.{{ column_name }} {%- if not loop.last %}, {%- endif %} {%- endfor %} {%- else %} * {% endif %} - + when not matched then insert * {% endmacro %} diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql index 3e9de359b..ffd56f106 100644 --- a/dbt/include/spark/macros/materializations/incremental/validate.sql +++ b/dbt/include/spark/macros/materializations/incremental/validate.sql @@ -28,13 +28,13 @@ Invalid incremental strategy provided: {{ raw_strategy }} You can only choose this strategy when file_format is set to 'delta' or 'hudi' {%- endset %} - + {% set invalid_insert_overwrite_delta_msg -%} Invalid incremental strategy provided: {{ raw_strategy }} You cannot use this strategy when file_format is set to 'delta' Use the 'append' or 'merge' strategy instead {%- endset %} - + {% set invalid_insert_overwrite_endpoint_msg -%} Invalid incremental strategy provided: {{ raw_strategy }} You cannot use this strategy when connecting via endpoint diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 82d186ce2..9c891ef04 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -32,7 +32,7 @@ {% macro spark_build_snapshot_staging_table(strategy, sql, target_relation) %} {% set tmp_identifier = target_relation.identifier ~ '__dbt_tmp' %} - + {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, schema=target_relation.schema, database=none, diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index 3ae2df973..2eeb806fd 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -21,7 +21,7 @@ {% call statement('main') -%} {{ create_table_as(False, target_relation, sql) }} {%- endcall %} - + {% do persist_docs(target_relation, model) %} {{ run_hooks(post_hooks) }} diff --git a/dev-requirements.txt b/dev-requirements.txt index 0f84cbd5d..b94cb8b6b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,18 +3,22 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter +black==22.3.0 +bumpversion +click~=8.0.4 +flake8 +flaky freezegun==0.3.9 -pytest>=6.0.2 +ipdb mock>=1.3.0 -flake8 +mypy==0.950 +pre-commit +pytest-csv +pytest-dotenv +pytest-xdist +pytest>=6.0.2 pytz -bumpversion tox>=3.2.0 -ipdb -pytest-xdist -pytest-dotenv -pytest-csv -flaky # Test requirements sasl>=0.2.1 diff --git a/scripts/build-dist.sh b/scripts/build-dist.sh index 65e6dbc97..3c3808399 100755 --- a/scripts/build-dist.sh +++ b/scripts/build-dist.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash set -eo pipefail diff --git a/setup.py b/setup.py index 12ecbacde..836aeed43 100644 --- a/setup.py +++ b/setup.py @@ -5,41 +5,39 @@ # require python 3.7 or newer if sys.version_info < (3, 7): - print('Error: dbt does not support this version of Python.') - print('Please upgrade to Python 3.7 or higher.') + print("Error: dbt does not support this version of Python.") + print("Please upgrade to Python 3.7 or higher.") sys.exit(1) # require version of setuptools that supports find_namespace_packages from setuptools import setup + try: from setuptools import find_namespace_packages except ImportError: # the user has a downlevel version of setuptools. - print('Error: dbt requires setuptools v40.1.0 or higher.') - print('Please upgrade setuptools with "pip install --upgrade setuptools" ' - 'and try again') + print("Error: dbt requires setuptools v40.1.0 or higher.") + print('Please upgrade setuptools with "pip install --upgrade setuptools" ' "and try again") sys.exit(1) # pull long description from README this_directory = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(this_directory, 'README.md'), 'r', encoding='utf8') as f: +with open(os.path.join(this_directory, "README.md"), "r", encoding="utf8") as f: long_description = f.read() # get this package's version from dbt/adapters//__version__.py def _get_plugin_version_dict(): - _version_path = os.path.join( - this_directory, 'dbt', 'adapters', 'spark', '__version__.py' - ) - _semver = r'''(?P\d+)\.(?P\d+)\.(?P\d+)''' - _pre = r'''((?Pa|b|rc)(?P
\d+))?'''
-    _version_pattern = fr'''version\s*=\s*["']{_semver}{_pre}["']'''
+    _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py")
+    _semver = r"""(?P\d+)\.(?P\d+)\.(?P\d+)"""
+    _pre = r"""((?Pa|b|rc)(?P
\d+))?"""
+    _version_pattern = fr"""version\s*=\s*["']{_semver}{_pre}["']"""
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:
-            raise ValueError(f'invalid version at {_version_path}')
+            raise ValueError(f"invalid version at {_version_path}")
         return match.groupdict()
 
 
@@ -47,7 +45,7 @@ def _get_plugin_version_dict():
 def _get_dbt_core_version():
     parts = _get_plugin_version_dict()
     minor = "{major}.{minor}.0".format(**parts)
-    pre = (parts["prekind"]+"1" if parts["prekind"] else "")
+    pre = parts["prekind"] + "1" if parts["prekind"] else ""
     return f"{minor}{pre}"
 
 
@@ -56,33 +54,28 @@ def _get_dbt_core_version():
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
-odbc_extras = ['pyodbc>=4.0.30']
+odbc_extras = ["pyodbc>=4.0.30"]
 pyhive_extras = [
-    'PyHive[hive]>=0.6.0,<0.7.0',
-    'thrift>=0.11.0,<0.16.0',
-]
-session_extras = [
-    "pyspark>=3.0.0,<4.0.0"
+    "PyHive[hive]>=0.6.0,<0.7.0",
+    "thrift>=0.11.0,<0.16.0",
 ]
+session_extras = ["pyspark>=3.0.0,<4.0.0"]
 all_extras = odbc_extras + pyhive_extras + session_extras
 
 setup(
     name=package_name,
     version=package_version,
-
     description=description,
     long_description=long_description,
-    long_description_content_type='text/markdown',
-
-    author='dbt Labs',
-    author_email='info@dbtlabs.com',
-    url='https://github.com/dbt-labs/dbt-spark',
-
-    packages=find_namespace_packages(include=['dbt', 'dbt.*']),
+    long_description_content_type="text/markdown",
+    author="dbt Labs",
+    author_email="info@dbtlabs.com",
+    url="https://github.com/dbt-labs/dbt-spark",
+    packages=find_namespace_packages(include=["dbt", "dbt.*"]),
     include_package_data=True,
     install_requires=[
-        'dbt-core~={}'.format(dbt_core_version),
-        'sqlparams>=3.0.0',
+        "dbt-core~={}".format(dbt_core_version),
+        "sqlparams>=3.0.0",
     ],
     extras_require={
         "ODBC": odbc_extras,
@@ -92,17 +85,14 @@ def _get_dbt_core_version():
     },
     zip_safe=False,
     classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        
-        'License :: OSI Approved :: Apache Software License',
-        
-        'Operating System :: Microsoft :: Windows',
-        'Operating System :: MacOS :: MacOS X',
-        'Operating System :: POSIX :: Linux',
-
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
+        "Development Status :: 5 - Production/Stable",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: Microsoft :: Windows",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: POSIX :: Linux",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
     ],
     python_requires=">=3.7",
 )
diff --git a/tox.ini b/tox.ini
index 59b931dca..a75e2a26a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,14 +2,6 @@
 skipsdist = True
 envlist = unit, flake8, integration-spark-thrift
 
-
-[testenv:flake8]
-basepython = python3.8
-commands = /bin/bash -c '$(which flake8) --max-line-length 99 --select=E,W,F --ignore=W504 dbt/'
-passenv = DBT_* PYTEST_ADDOPTS
-deps =
-     -r{toxinidir}/dev-requirements.txt
-
 [testenv:unit]
 basepython = python3.8
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'

From fad79e31d37b027d28ba1430e980e684bb7c2556 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Mon, 16 May 2022 14:37:02 +0200
Subject: [PATCH 03/54] More consistent results from get_columns_in_relation
 (#355)

* More consistent results from get_columns_in_relation

* Not dispatched, full name

* Add changelog entry
---
 CHANGELOG.md                          |  7 ++++++-
 dbt/adapters/spark/impl.py            |  6 ++++--
 dbt/include/spark/macros/adapters.sql | 10 +++++++---
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 77eb72581..961fe01a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,9 @@
-## dbt-spark 1.1.0 (TBD)
+## dbt-spark 1.2.0 (April 28, 2022)
+
+### Fixes
+- `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
+
+## dbt-spark 1.1.0 (April 28, 2022)
 
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index dd090a23b..4f7b9d4cc 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -23,7 +23,7 @@
 
 logger = AdapterLogger("Spark")
 
-GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation"
+GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "spark__get_columns_in_relation_raw"
 LIST_SCHEMAS_MACRO_NAME = "list_schemas"
 LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
 DROP_RELATION_MACRO_NAME = "drop_relation"
@@ -225,7 +225,9 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
             # use get_columns_in_relation spark macro
             # which would execute 'describe extended tablename' query
             try:
-                rows: List[agate.Row] = super().get_columns_in_relation(relation)
+                rows: List[agate.Row] = self.execute_macro(
+                    GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
+                )
                 columns = self.parse_describe_extended(relation, rows)
             except dbt.exceptions.RuntimeException as e:
                 # spark would throw error when table doesn't exist, where other
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 22381d9ea..5322597ff 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -168,11 +168,15 @@
   {%- endcall -%}
 {% endmacro %}
 
-{% macro spark__get_columns_in_relation(relation) -%}
-  {% call statement('get_columns_in_relation', fetch_result=True) %}
+{% macro spark__get_columns_in_relation_raw(relation) -%}
+  {% call statement('get_columns_in_relation_raw', fetch_result=True) %}
       describe extended {{ relation.include(schema=(schema is not none)) }}
   {% endcall %}
-  {% do return(load_result('get_columns_in_relation').table) %}
+  {% do return(load_result('get_columns_in_relation_raw').table) %}
+{% endmacro %}
+
+{% macro spark__get_columns_in_relation(relation) -%}
+  {{ return(adapter.get_columns_in_relation(relation)) }}
 {% endmacro %}
 
 {% macro spark__list_relations_without_caching(relation) %}

From 8744cf1faa0b57fe9e797a32a109ba4e7a056e76 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 17 May 2022 12:43:31 -0600
Subject: [PATCH 04/54] Fix misnomers within the comment (#352)

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 7ba95d47b..0c624713c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,7 +8,7 @@ def pytest_addoption(parser):
     parser.addoption("--profile", action="store", default="apache_spark", type=str)
 
 
-# Using @pytest.mark.skip_adapter('apache_spark') uses the 'skip_by_adapter_type'
+# Using @pytest.mark.skip_profile('apache_spark') uses the 'skip_by_profile_type'
 # autouse fixture below
 def pytest_configure(config):
     config.addinivalue_line(

From 4c41d9e3686250827da958bc56623bb7995d7566 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN 
Date: Tue, 31 May 2022 11:53:26 -0700
Subject: [PATCH 05/54] Use dispatch pattern for get_columns_in_relation_raw
 macro. (#365)

---
 dbt/adapters/spark/impl.py            | 2 +-
 dbt/include/spark/macros/adapters.sql | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 4f7b9d4cc..699eca9d2 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -23,7 +23,7 @@
 
 logger = AdapterLogger("Spark")
 
-GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "spark__get_columns_in_relation_raw"
+GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw"
 LIST_SCHEMAS_MACRO_NAME = "list_schemas"
 LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching"
 DROP_RELATION_MACRO_NAME = "drop_relation"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 5322597ff..abdeacb7f 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -168,6 +168,10 @@
   {%- endcall -%}
 {% endmacro %}
 
+{% macro get_columns_in_relation_raw(relation) -%}
+  {{ return(adapter.dispatch('get_columns_in_relation_raw', 'dbt')(relation)) }}
+{%- endmacro -%}
+
 {% macro spark__get_columns_in_relation_raw(relation) -%}
   {% call statement('get_columns_in_relation_raw', fetch_result=True) %}
       describe extended {{ relation.include(schema=(schema is not none)) }}

From bc9fc0baaacce28706d0542391704d9ea971cee4 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Wed, 1 Jun 2022 11:30:39 -0500
Subject: [PATCH 06/54] First draft of adding contributing.md to each adapter
 repo (#276)

* First draft of adding contributing.md to each adapter repo

* updates after kyle review, and minor changes regarding review process and CI as spark still uses CircleCI and not GHA

* minor addition

* add test.env.example

* fix eof black errors

* added example for functional tests
---
 CONTRIBUTING.MD  | 101 +++++++++++++++++++++++++++++++++++++++++++++++
 test.env.example |  10 +++++
 2 files changed, 111 insertions(+)
 create mode 100644 CONTRIBUTING.MD
 create mode 100644 test.env.example

diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD
new file mode 100644
index 000000000..c0d9bb3d2
--- /dev/null
+++ b/CONTRIBUTING.MD
@@ -0,0 +1,101 @@
+# Contributing to `dbt-spark`
+
+1. [About this document](#about-this-document)
+3. [Getting the code](#getting-the-code)
+5. [Running `dbt-spark` in development](#running-dbt-spark-in-development)
+6. [Testing](#testing)
+7. [Updating Docs](#updating-docs)
+7. [Submitting a Pull Request](#submitting-a-pull-request)
+
+## About this document
+This document is a guide intended for folks interested in contributing to `dbt-spark`. Below, we document the process by which members of the community should create issues and submit pull requests (PRs) in this repository. It is not intended as a guide for using `dbt-spark`, and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, Python modules, and so on. This guide assumes you are using macOS or Linux and are comfortable with the command line.
+
+For those wishing to contribute we highly suggest reading the dbt-core's [contribution guide](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md) if you haven't already. Almost all of the information there is applicable to contributing here, too!
+
+### Signing the CLA
+
+Please note that all contributors to `dbt-spark` must sign the [Contributor License Agreement](https://docs.getdbt.com/docs/contributor-license-agreements) to have their Pull Request merged into an `dbt-spark` codebase. If you are unable to sign the CLA, then the `dbt-spark` maintainers will unfortunately be unable to merge your Pull Request. You are, however, welcome to open issues and comment on existing ones.
+
+
+## Getting the code
+
+You will need `git` in order to download and modify the `dbt-spark` source code. You can find directions [here](https://github.com/git-guides/install-git) on how to install `git`.
+
+### External contributors
+
+If you are not a member of the `dbt-labs` GitHub organization, you can contribute to `dbt-spark` by forking the `dbt-spark` repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
+
+1. fork the `dbt-spark` repository
+2. clone your fork locally
+3. check out a new branch for your proposed changes
+4. push changes to your fork
+5. open a pull request against `dbt-labs/dbt-spark` from your forked repository
+
+### dbt Labs contributors
+
+If you are a member of the `dbt Labs` GitHub organization, you will have push access to the `dbt-spark` repo. Rather than forking `dbt-spark` to make your changes, just clone the repository, check out a new branch, and push directly to that branch.
+
+
+## Running `dbt-spark` in development
+
+### Installation
+
+First make sure that you set up your `virtualenv` as described in [Setting up an environment](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md#setting-up-an-environment).  Ensure you have the latest version of pip installed with `pip install --upgrade pip`. Next, install `dbt-spark` latest dependencies:
+
+```sh
+pip install -e . -r dev-requirements.txt
+```
+
+When `dbt-spark` is installed this way, any changes you make to the `dbt-spark` source code will be reflected immediately in your next `dbt-spark` run.
+
+To confirm you have correct version of `dbt-core` installed please run `dbt --version` and `which dbt`.
+
+
+## Testing
+
+### Initial Setup
+
+`dbt-spark` uses test credentials specified in a `test.env` file in the root of the repository. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing. To create your `test.env` file, copy the provided example file, then supply your relevant credentials.
+
+```
+cp test.env.example test.env
+$EDITOR test.env
+```
+
+### Test commands
+There are a few methods for running tests locally.
+
+#### `tox`
+`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.7, Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py37`. The configuration of these tests are located in `tox.ini`.
+
+#### `pytest`
+Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like:
+
+```sh
+# run specific spark integration tests
+python -m pytest -m profile_spark tests/integration/get_columns_in_relation
+# run specific functional tests
+python -m pytest --profile databricks_sql_endpoint tests/functional/adapter/test_basic.py
+# run all unit tests in a file
+python -m pytest tests/unit/test_adapter.py
+# run a specific unit test
+python -m pytest test/unit/test_adapter.py::TestSparkAdapter::test_profile_with_database
+```
+## Updating Docs
+
+Many changes will require and update to the `dbt-spark` docs here are some useful resources.
+
+- Docs are [here](https://docs.getdbt.com/).
+- The docs repo for making changes is located [here]( https://github.com/dbt-labs/docs.getdbt.com).
+- The changes made are likely to impact one or both of [Spark Profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile), or [Saprk Configs](https://docs.getdbt.com/reference/resource-configs/spark-configs).
+- We ask every community member who makes a user-facing change to open an issue or PR regarding doc changes.
+
+## Submitting a Pull Request
+
+dbt Labs provides a CI environment to test changes to the `dbt-spark` adapter, and periodic checks against the development version of `dbt-core` through Github Actions.
+
+A `dbt-spark` maintainer will review your PR. They may suggest code revision for style or clarity, or request that you add unit or integration test(s). These are good things! We believe that, with a little bit of help, anyone can contribute high-quality code.
+
+Once all requests and answers have been answered the `dbt-spark` maintainer can trigger CI testing.
+
+Once all tests are passing and your PR has been approved, a `dbt-spark` maintainer will merge your changes into the active development branch. And that's it! Happy developing :tada:
diff --git a/test.env.example b/test.env.example
new file mode 100644
index 000000000..bf4cf2eee
--- /dev/null
+++ b/test.env.example
@@ -0,0 +1,10 @@
+# Cluster ID
+DBT_DATABRICKS_CLUSTER_NAME=
+# SQL Endpoint
+DBT_DATABRICKS_ENDPOINT=
+# Server Hostname value
+DBT_DATABRICKS_HOST_NAME=
+# personal token
+DBT_DATABRICKS_TOKEN=
+# file path to local ODBC driver
+ODBC_DRIVER=

From ca1b5b6df849068b0151dd8e7166623256b83c67 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 8 Jun 2022 09:33:36 +0200
Subject: [PATCH 07/54] Add invocation env to user agent string (#367)

* Add invocation env to user agent string

* Consistency + fixups

* Changelog entry

* Try diff pattern
---
 CHANGELOG.md                      | 3 +++
 dbt/adapters/spark/connections.py | 8 +++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 961fe01a3..49e030aba 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,9 @@
 ### Fixes
 - `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
 
+### Under the hood
+- Add `DBT_INVOCATION_ENV` environment variable to ODBC user agent string ([#366](https://github.com/dbt-labs/dbt-spark/pull/366))
+
 ## dbt-spark 1.1.0 (April 28, 2022)
 
 ### Features
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 59ceb9dd8..135463eb3 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,3 +1,5 @@
+import os
+
 from contextlib import contextmanager
 
 import dbt.exceptions
@@ -7,6 +9,7 @@
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from dbt.adapters.spark import __version__
+from dbt.tracking import DBT_INVOCATION_ENV
 
 try:
     from TCLIService.ttypes import TOperationState as ThriftState
@@ -409,9 +412,8 @@ def open(cls, connection):
                     cls.validate_creds(creds, required_fields)
 
                     dbt_spark_version = __version__.version
-                    user_agent_entry = (
-                        f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
-                    )
+                    dbt_invocation_env = os.getenv(DBT_INVOCATION_ENV) or "manual"
+                    user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks, {dbt_invocation_env})"  # noqa
 
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
                     ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()}

From 9614bca5b471089692bd1df73760b23e83d537bb Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Fri, 17 Jun 2022 16:06:39 +0200
Subject: [PATCH 08/54] Initialize lift + shift for cross-db macros (#359)

* Initialize lift + shift, dateadd + datediff

* Fixups

* More fixups

* Next round of utilities

* Reorgnanize, skip, max for bool_or

* fail -> skip_profile

* Rm branch names
---
 dbt/include/spark/macros/utils/any_value.sql  |   5 +
 .../spark/macros/utils/assert_not_null.sql    |   9 ++
 dbt/include/spark/macros/utils/bool_or.sql    |  11 ++
 dbt/include/spark/macros/utils/concat.sql     |   3 +
 dbt/include/spark/macros/utils/dateadd.sql    |  62 +++++++++
 dbt/include/spark/macros/utils/datediff.sql   | 107 ++++++++++++++++
 dbt/include/spark/macros/utils/listagg.sql    |  17 +++
 dbt/include/spark/macros/utils/split_part.sql |  23 ++++
 tests/functional/adapter/test_basic.py        |   4 +-
 .../adapter/utils/fixture_listagg.py          |  61 +++++++++
 tests/functional/adapter/utils/test_utils.py  | 121 ++++++++++++++++++
 11 files changed, 422 insertions(+), 1 deletion(-)
 create mode 100644 dbt/include/spark/macros/utils/any_value.sql
 create mode 100644 dbt/include/spark/macros/utils/assert_not_null.sql
 create mode 100644 dbt/include/spark/macros/utils/bool_or.sql
 create mode 100644 dbt/include/spark/macros/utils/concat.sql
 create mode 100644 dbt/include/spark/macros/utils/dateadd.sql
 create mode 100644 dbt/include/spark/macros/utils/datediff.sql
 create mode 100644 dbt/include/spark/macros/utils/listagg.sql
 create mode 100644 dbt/include/spark/macros/utils/split_part.sql
 create mode 100644 tests/functional/adapter/utils/fixture_listagg.py
 create mode 100644 tests/functional/adapter/utils/test_utils.py

diff --git a/dbt/include/spark/macros/utils/any_value.sql b/dbt/include/spark/macros/utils/any_value.sql
new file mode 100644
index 000000000..eb0a019b3
--- /dev/null
+++ b/dbt/include/spark/macros/utils/any_value.sql
@@ -0,0 +1,5 @@
+{% macro spark__any_value(expression) -%}
+    {#-- return any value (non-deterministic)  --#}
+    first({{ expression }})
+
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/assert_not_null.sql b/dbt/include/spark/macros/utils/assert_not_null.sql
new file mode 100644
index 000000000..e5454bce9
--- /dev/null
+++ b/dbt/include/spark/macros/utils/assert_not_null.sql
@@ -0,0 +1,9 @@
+{% macro assert_not_null(function, arg) -%}
+  {{ return(adapter.dispatch('assert_not_null', 'dbt')(function, arg)) }}
+{%- endmacro %}
+
+{% macro spark__assert_not_null(function, arg) %}
+
+    coalesce({{function}}({{arg}}), nvl2({{function}}({{arg}}), assert_true({{function}}({{arg}}) is not null), null))
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/utils/bool_or.sql b/dbt/include/spark/macros/utils/bool_or.sql
new file mode 100644
index 000000000..60d705eb3
--- /dev/null
+++ b/dbt/include/spark/macros/utils/bool_or.sql
@@ -0,0 +1,11 @@
+{#-- Spark v3 supports 'bool_or' and 'any', but Spark v2 needs to use 'max' for this
+  -- https://spark.apache.org/docs/latest/api/sql/index.html#any
+  -- https://spark.apache.org/docs/latest/api/sql/index.html#bool_or
+  -- https://spark.apache.org/docs/latest/api/sql/index.html#max
+#}
+
+{% macro spark__bool_or(expression) -%}
+
+    max({{ expression }})
+
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/concat.sql b/dbt/include/spark/macros/utils/concat.sql
new file mode 100644
index 000000000..30f1a420e
--- /dev/null
+++ b/dbt/include/spark/macros/utils/concat.sql
@@ -0,0 +1,3 @@
+{% macro spark__concat(fields) -%}
+    concat({{ fields|join(', ') }})
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/dateadd.sql b/dbt/include/spark/macros/utils/dateadd.sql
new file mode 100644
index 000000000..e2a20d0f2
--- /dev/null
+++ b/dbt/include/spark/macros/utils/dateadd.sql
@@ -0,0 +1,62 @@
+{% macro spark__dateadd(datepart, interval, from_date_or_timestamp) %}
+
+    {%- set clock_component -%}
+        {# make sure the dates + timestamps are real, otherwise raise an error asap #}
+        to_unix_timestamp({{ assert_not_null('to_timestamp', from_date_or_timestamp) }})
+        - to_unix_timestamp({{ assert_not_null('date', from_date_or_timestamp) }})
+    {%- endset -%}
+
+    {%- if datepart in ['day', 'week'] -%}
+
+        {%- set multiplier = 7 if datepart == 'week' else 1 -%}
+
+        to_timestamp(
+            to_unix_timestamp(
+                date_add(
+                    {{ assert_not_null('date', from_date_or_timestamp) }},
+                    cast({{interval}} * {{multiplier}} as int)
+                )
+            ) + {{clock_component}}
+        )
+
+    {%- elif datepart in ['month', 'quarter', 'year'] -%}
+
+        {%- set multiplier -%}
+            {%- if datepart == 'month' -%} 1
+            {%- elif datepart == 'quarter' -%} 3
+            {%- elif datepart == 'year' -%} 12
+            {%- endif -%}
+        {%- endset -%}
+
+        to_timestamp(
+            to_unix_timestamp(
+                add_months(
+                    {{ assert_not_null('date', from_date_or_timestamp) }},
+                    cast({{interval}} * {{multiplier}} as int)
+                )
+            ) + {{clock_component}}
+        )
+
+    {%- elif datepart in ('hour', 'minute', 'second', 'millisecond', 'microsecond') -%}
+
+        {%- set multiplier -%}
+            {%- if datepart == 'hour' -%} 3600
+            {%- elif datepart == 'minute' -%} 60
+            {%- elif datepart == 'second' -%} 1
+            {%- elif datepart == 'millisecond' -%} (1/1000000)
+            {%- elif datepart == 'microsecond' -%} (1/1000000)
+            {%- endif -%}
+        {%- endset -%}
+
+        to_timestamp(
+            {{ assert_not_null('to_unix_timestamp', from_date_or_timestamp) }}
+            + cast({{interval}} * {{multiplier}} as int)
+        )
+
+    {%- else -%}
+
+        {{ exceptions.raise_compiler_error("macro dateadd not implemented for datepart ~ '" ~ datepart ~ "' ~ on Spark") }}
+
+    {%- endif -%}
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/utils/datediff.sql b/dbt/include/spark/macros/utils/datediff.sql
new file mode 100644
index 000000000..d0e684c47
--- /dev/null
+++ b/dbt/include/spark/macros/utils/datediff.sql
@@ -0,0 +1,107 @@
+{% macro spark__datediff(first_date, second_date, datepart) %}
+
+    {%- if datepart in ['day', 'week', 'month', 'quarter', 'year'] -%}
+
+        {# make sure the dates are real, otherwise raise an error asap #}
+        {% set first_date = assert_not_null('date', first_date) %}
+        {% set second_date = assert_not_null('date', second_date) %}
+
+    {%- endif -%}
+
+    {%- if datepart == 'day' -%}
+
+        datediff({{second_date}}, {{first_date}})
+
+    {%- elif datepart == 'week' -%}
+
+        case when {{first_date}} < {{second_date}}
+            then floor(datediff({{second_date}}, {{first_date}})/7)
+            else ceil(datediff({{second_date}}, {{first_date}})/7)
+            end
+
+        -- did we cross a week boundary (Sunday)?
+        + case
+            when {{first_date}} < {{second_date}} and dayofweek({{second_date}}) < dayofweek({{first_date}}) then 1
+            when {{first_date}} > {{second_date}} and dayofweek({{second_date}}) > dayofweek({{first_date}}) then -1
+            else 0 end
+
+    {%- elif datepart == 'month' -%}
+
+        case when {{first_date}} < {{second_date}}
+            then floor(months_between(date({{second_date}}), date({{first_date}})))
+            else ceil(months_between(date({{second_date}}), date({{first_date}})))
+            end
+
+        -- did we cross a month boundary?
+        + case
+            when {{first_date}} < {{second_date}} and dayofmonth({{second_date}}) < dayofmonth({{first_date}}) then 1
+            when {{first_date}} > {{second_date}} and dayofmonth({{second_date}}) > dayofmonth({{first_date}}) then -1
+            else 0 end
+
+    {%- elif datepart == 'quarter' -%}
+
+        case when {{first_date}} < {{second_date}}
+            then floor(months_between(date({{second_date}}), date({{first_date}}))/3)
+            else ceil(months_between(date({{second_date}}), date({{first_date}}))/3)
+            end
+
+        -- did we cross a quarter boundary?
+        + case
+            when {{first_date}} < {{second_date}} and (
+                (dayofyear({{second_date}}) - (quarter({{second_date}}) * 365/4))
+                < (dayofyear({{first_date}}) - (quarter({{first_date}}) * 365/4))
+            ) then 1
+            when {{first_date}} > {{second_date}} and (
+                (dayofyear({{second_date}}) - (quarter({{second_date}}) * 365/4))
+                > (dayofyear({{first_date}}) - (quarter({{first_date}}) * 365/4))
+            ) then -1
+            else 0 end
+
+    {%- elif datepart == 'year' -%}
+
+        year({{second_date}}) - year({{first_date}})
+
+    {%- elif datepart in ('hour', 'minute', 'second', 'millisecond', 'microsecond') -%}
+
+        {%- set divisor -%}
+            {%- if datepart == 'hour' -%} 3600
+            {%- elif datepart == 'minute' -%} 60
+            {%- elif datepart == 'second' -%} 1
+            {%- elif datepart == 'millisecond' -%} (1/1000)
+            {%- elif datepart == 'microsecond' -%} (1/1000000)
+            {%- endif -%}
+        {%- endset -%}
+
+        case when {{first_date}} < {{second_date}}
+            then ceil((
+                {# make sure the timestamps are real, otherwise raise an error asap #}
+                {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', second_date)) }}
+                - {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', first_date)) }}
+            ) / {{divisor}})
+            else floor((
+                {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', second_date)) }}
+                - {{ assert_not_null('to_unix_timestamp', assert_not_null('to_timestamp', first_date)) }}
+            ) / {{divisor}})
+            end
+
+            {% if datepart == 'millisecond' %}
+                + cast(date_format({{second_date}}, 'SSS') as int)
+                - cast(date_format({{first_date}}, 'SSS') as int)
+            {% endif %}
+
+            {% if datepart == 'microsecond' %}
+                {% set capture_str = '[0-9]{4}-[0-9]{2}-[0-9]{2}.[0-9]{2}:[0-9]{2}:[0-9]{2}.([0-9]{6})' %}
+                -- Spark doesn't really support microseconds, so this is a massive hack!
+                -- It will only work if the timestamp-string is of the format
+                -- 'yyyy-MM-dd-HH mm.ss.SSSSSS'
+                + cast(regexp_extract({{second_date}}, '{{capture_str}}', 1) as int)
+                - cast(regexp_extract({{first_date}}, '{{capture_str}}', 1) as int)
+            {% endif %}
+
+    {%- else -%}
+
+        {{ exceptions.raise_compiler_error("macro datediff not implemented for datepart ~ '" ~ datepart ~ "' ~ on Spark") }}
+
+    {%- endif -%}
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/utils/listagg.sql b/dbt/include/spark/macros/utils/listagg.sql
new file mode 100644
index 000000000..3577edb71
--- /dev/null
+++ b/dbt/include/spark/macros/utils/listagg.sql
@@ -0,0 +1,17 @@
+{% macro spark__listagg(measure, delimiter_text, order_by_clause, limit_num) -%}
+
+  {% if order_by_clause %}
+    {{ exceptions.warn("order_by_clause is not supported for listagg on Spark/Databricks") }}
+  {% endif %}
+
+  {% set collect_list %} collect_list({{ measure }}) {% endset %}
+
+  {% set limited %} slice({{ collect_list }}, 1, {{ limit_num }}) {% endset %}
+
+  {% set collected = limited if limit_num else collect_list %}
+
+  {% set final %} array_join({{ collected }}, {{ delimiter_text }}) {% endset %}
+
+  {% do return(final) %}
+
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/split_part.sql b/dbt/include/spark/macros/utils/split_part.sql
new file mode 100644
index 000000000..d5ae30924
--- /dev/null
+++ b/dbt/include/spark/macros/utils/split_part.sql
@@ -0,0 +1,23 @@
+{% macro spark__split_part(string_text, delimiter_text, part_number) %}
+
+    {% set delimiter_expr %}
+
+        -- escape if starts with a special character
+        case when regexp_extract({{ delimiter_text }}, '([^A-Za-z0-9])(.*)', 1) != '_'
+            then concat('\\', {{ delimiter_text }})
+            else {{ delimiter_text }} end
+
+    {% endset %}
+
+    {% set split_part_expr %}
+
+    split(
+        {{ string_text }},
+        {{ delimiter_expr }}
+        )[({{ part_number - 1 }})]
+
+    {% endset %}
+
+    {{ return(split_part_expr) }}
+
+{% endmacro %}
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index 70f3267a4..e0cf2f7fe 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -64,7 +64,7 @@ def project_config_update(self):
         }
 
 
-#hese tests were not enabled in the dbtspec files, so skipping here.
+# These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
 @pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
@@ -79,5 +79,7 @@ def project_config_update(self):
             }
         }
 
+
+@pytest.mark.skip_profile('spark_session')
 class TestBaseAdapterMethod(BaseAdapterMethod):
     pass
\ No newline at end of file
diff --git a/tests/functional/adapter/utils/fixture_listagg.py b/tests/functional/adapter/utils/fixture_listagg.py
new file mode 100644
index 000000000..0262ca234
--- /dev/null
+++ b/tests/functional/adapter/utils/fixture_listagg.py
@@ -0,0 +1,61 @@
+# SparkSQL does not support 'order by' for its 'listagg' equivalent
+# the argument is ignored, so let's ignore those fields when checking equivalency
+
+models__test_listagg_no_order_by_sql = """
+with data as (
+    select * from {{ ref('data_listagg') }}
+),
+data_output as (
+    select * from {{ ref('data_listagg_output') }}
+),
+calculate as (
+/*
+
+    select
+        group_col,
+        {{ listagg('string_text', "'_|_'", "order by order_col") }} as actual,
+        'bottom_ordered' as version
+    from data
+    group by group_col
+    union all
+    select
+        group_col,
+        {{ listagg('string_text', "'_|_'", "order by order_col", 2) }} as actual,
+        'bottom_ordered_limited' as version
+    from data
+    group by group_col
+    union all
+
+*/
+    select
+        group_col,
+        {{ listagg('string_text', "', '") }} as actual,
+        'comma_whitespace_unordered' as version
+    from data
+    where group_col = 3
+    group by group_col
+    union all
+    select
+        group_col,
+        {{ listagg('DISTINCT string_text', "','") }} as actual,
+        'distinct_comma' as version
+    from data
+    where group_col = 3
+    group by group_col
+    union all
+    select
+        group_col,
+        {{ listagg('string_text') }} as actual,
+        'no_params' as version
+    from data
+    where group_col = 3
+    group by group_col
+)
+select
+    calculate.actual,
+    data_output.expected
+from calculate
+left join data_output
+on calculate.group_col = data_output.group_col
+and calculate.version = data_output.version
+"""
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
new file mode 100644
index 000000000..9137c2f75
--- /dev/null
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -0,0 +1,121 @@
+import pytest
+
+from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
+from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
+from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
+from dbt.tests.adapter.utils.test_concat import BaseConcat
+from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
+from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
+from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
+from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesQuote
+from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesBackslash
+from dbt.tests.adapter.utils.test_except import BaseExcept
+from dbt.tests.adapter.utils.test_hash import BaseHash
+from dbt.tests.adapter.utils.test_intersect import BaseIntersect
+from dbt.tests.adapter.utils.test_last_day import BaseLastDay
+from dbt.tests.adapter.utils.test_length import BaseLength
+from dbt.tests.adapter.utils.test_position import BasePosition
+from dbt.tests.adapter.utils.test_replace import BaseReplace
+from dbt.tests.adapter.utils.test_right import BaseRight
+from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast
+from dbt.tests.adapter.utils.test_split_part import BaseSplitPart
+from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral
+
+# requires modification
+from dbt.tests.adapter.utils.test_listagg import BaseListagg
+from dbt.tests.adapter.utils.fixture_listagg import models__test_listagg_yml
+from tests.functional.adapter.utils.fixture_listagg import models__test_listagg_no_order_by_sql
+
+
+class TestAnyValue(BaseAnyValue):
+    pass
+
+
+class TestBoolOr(BaseBoolOr):
+    pass
+
+
+class TestCastBoolToText(BaseCastBoolToText):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestConcat(BaseConcat):
+    pass
+
+
+class TestDateAdd(BaseDateAdd):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestDateDiff(BaseDateDiff):
+    pass
+
+
+class TestDateTrunc(BaseDateTrunc):
+    pass
+
+
+class TestEscapeSingleQuotes(BaseEscapeSingleQuotesQuote):
+    pass
+
+
+class TestExcept(BaseExcept):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestHash(BaseHash):
+    pass
+
+
+class TestIntersect(BaseIntersect):
+    pass
+
+
+class TestLastDay(BaseLastDay):
+    pass
+
+
+class TestLength(BaseLength):
+    pass
+
+
+# SparkSQL does not support 'order by' for its 'listagg' equivalent
+# the argument is ignored, so let's ignore those fields when checking equivalency
+class TestListagg(BaseListagg):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "test_listagg.yml": models__test_listagg_yml,
+            "test_listagg.sql": self.interpolate_macro_namespace(
+                models__test_listagg_no_order_by_sql, "listagg"
+            ),
+        }
+
+
+class TestPosition(BasePosition):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestReplace(BaseReplace):
+    pass
+
+
+@pytest.mark.skip_profile('spark_session')
+class TestRight(BaseRight):
+    pass
+
+
+class TestSafeCast(BaseSafeCast):
+    pass
+
+
+class TestSplitPart(BaseSplitPart):
+    pass
+
+
+class TestStringLiteral(BaseStringLiteral):
+    pass

From 120ec42d7f848cd84e9e88512eb10c63ac8f88bc Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 24 Jun 2022 14:32:58 -0400
Subject: [PATCH 09/54] Bumping version to 1.2.0b1 (#374)

* Bumping version to 1.2.0b1

* Remove whitespace

* Update CHANGELOG.md

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 9 ++++++++-
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 4de02c345..0a892fdc4 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0a1
+current_version = 1.2.0b1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 49e030aba..0a3b400f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,17 @@
-## dbt-spark 1.2.0 (April 28, 2022)
+## dbt-spark 1.2.0b1 (June 24, 2022)
 
 ### Fixes
 - `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
 
 ### Under the hood
 - Add `DBT_INVOCATION_ENV` environment variable to ODBC user agent string ([#366](https://github.com/dbt-labs/dbt-spark/pull/366))
+- Initialize lift + shift for cross-db macros ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
+- Add invocation env to user agent string ([#367](https://github.com/dbt-labs/dbt-spark/pull/367))
+- Use dispatch pattern for get_columns_in_relation_raw macro ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
+
+### Contributors
+- [@ueshin](https://github.com/dbt-labs/dbt-spark/commits?author=ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
+- [@dbeatty10](https://github.com/dbeatty10) ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
 
 ## dbt-spark 1.1.0 (April 28, 2022)
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index a6b977228..6e8eee929 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.2.0a1"
+version = "1.2.0b1"
diff --git a/setup.py b/setup.py
index 836aeed43..aba51b34f 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.2.0a1"
+package_version = "1.2.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 0082e7372d27f7c81019d0b9bb6193baad564e15 Mon Sep 17 00:00:00 2001
From: Neelesh Salian 
Date: Tue, 28 Jun 2022 08:46:09 -0700
Subject: [PATCH 10/54] Upgrade to Spark 3.1.1 with testing (#349)

* Testing Spark3 upgrade.WIP

* Skip tests.WIP

* update readme and setup for pyspark.WIP

* Fix circle ci version and bump mem value

* Bump memory, fix nit, bump pyhive version

* Pyhive version change

* enabled sasl for metastore

* Explicit server2 host port

* Try showing debug-level logs

* Rm -n4

* move to godatadriven lates spark image

* restore to 2 to check output

* Restore debug and parallelized to check output

* Revert to 3.0

* Revert to normal state

* open source spark image

* Change to pyspark image

* Testing with gdd spark 3.0 for thrift

* Switch back to dbt user pass

* Spark 3.1.1 gdd image without configs

* Clean up

* Skip session test

* Clean up for review

* Update to CHANGELOG

Co-authored-by: Jeremy Cohen 
---
 .circleci/config.yml                   | 19 +------------------
 CHANGELOG.md                           |  2 ++
 README.md                              |  2 +-
 docker-compose.yml                     |  4 ++--
 docker/spark-defaults.conf             |  4 +++-
 tests/functional/adapter/test_basic.py |  2 +-
 6 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 34e449acf..0a1a3e1b2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -33,29 +33,12 @@ jobs:
       DBT_INVOCATION_ENV: circle
     docker:
       - image: fishtownanalytics/test-container:10
-      - image: godatadriven/spark:2
+      - image: godatadriven/spark:3.1.1
         environment:
           WAIT_FOR: localhost:5432
         command: >
           --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
           --name Thrift JDBC/ODBC Server
-          --conf spark.hadoop.javax.jdo.option.ConnectionURL=jdbc:postgresql://localhost/metastore
-          --conf spark.hadoop.javax.jdo.option.ConnectionUserName=dbt
-          --conf spark.hadoop.javax.jdo.option.ConnectionPassword=dbt
-          --conf spark.hadoop.javax.jdo.option.ConnectionDriverName=org.postgresql.Driver
-          --conf spark.serializer=org.apache.spark.serializer.KryoSerializer
-          --conf spark.jars.packages=org.apache.hudi:hudi-spark-bundle_2.11:0.9.0
-          --conf spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension
-          --conf spark.driver.userClassPathFirst=true
-          --conf spark.hadoop.datanucleus.autoCreateTables=true
-          --conf spark.hadoop.datanucleus.schema.autoCreateTables=true
-          --conf spark.hadoop.datanucleus.fixedDatastore=false
-          --conf spark.sql.hive.convertMetastoreParquet=false
-          --hiveconf hoodie.datasource.hive_sync.use_jdbc=false
-          --hiveconf hoodie.datasource.hive_sync.mode=hms
-          --hiveconf datanucleus.schema.autoCreateAll=true
-          --hiveconf hive.metastore.schema.verification=false
-
       - image: postgres:9.6.17-alpine
         environment:
           POSTGRES_USER: dbt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a3b400f6..0ab9f4e8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
+- Upgrade Spark version to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ### Under the hood
 - Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
@@ -29,6 +30,7 @@
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
+- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ## dbt-spark 1.1.0b1 (March 23, 2022)
 
diff --git a/README.md b/README.md
index 037a49895..241d869d7 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ more information, consult [the docs](https://docs.getdbt.com/docs/profile-spark)
 
 ## Running locally
 A `docker-compose` environment starts a Spark Thrift server and a Postgres database as a Hive Metastore backend.
-Note that this is spark 2 not spark 3 so some functionalities might not be available.
+Note: dbt-spark now supports Spark 3.1.1 (formerly on Spark 2.x).
 
 The following command would start two docker containers
 ```
diff --git a/docker-compose.yml b/docker-compose.yml
index 8054dfd75..9bc9e509c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,8 +1,8 @@
 version: "3.7"
 services:
 
-  dbt-spark2-thrift:
-    image: godatadriven/spark:3.0
+  dbt-spark3-thrift:
+    image: godatadriven/spark:3.1.1
     ports:
       - "10000:10000"
       - "4040:4040"
diff --git a/docker/spark-defaults.conf b/docker/spark-defaults.conf
index 48a0501c2..30ec59591 100644
--- a/docker/spark-defaults.conf
+++ b/docker/spark-defaults.conf
@@ -1,7 +1,9 @@
+spark.driver.memory 2g
+spark.executor.memory 2g
 spark.hadoop.datanucleus.autoCreateTables	true
 spark.hadoop.datanucleus.schema.autoCreateTables	true
 spark.hadoop.datanucleus.fixedDatastore 	false
 spark.serializer	org.apache.spark.serializer.KryoSerializer
-spark.jars.packages	org.apache.hudi:hudi-spark3-bundle_2.12:0.9.0
+spark.jars.packages	org.apache.hudi:hudi-spark3-bundle_2.12:0.10.0
 spark.sql.extensions	org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 spark.driver.userClassPathFirst true
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index e0cf2f7fe..e1a57fd3f 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -82,4 +82,4 @@ def project_config_update(self):
 
 @pytest.mark.skip_profile('spark_session')
 class TestBaseAdapterMethod(BaseAdapterMethod):
-    pass
\ No newline at end of file
+    pass

From cc2daed76627203d7e97d4394b607b21ee6abed8 Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 29 Jun 2022 15:23:43 +0200
Subject: [PATCH 11/54] Disable TestDateDiff on apache_spark (#375)

* Disable TestDateDiff on apache_spark

* i dont really know what im doing
---
 tests/functional/adapter/utils/test_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 9137c2f75..c71161e65 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -48,7 +48,8 @@ class TestDateAdd(BaseDateAdd):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+# this generates too much SQL to run successfully in our testing environments :(
+@pytest.mark.skip_profile('apache_spark', 'spark_session')
 class TestDateDiff(BaseDateDiff):
     pass
 

From 48e1989888bc1a858d58d93344fe777d7bca887c Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Thu, 30 Jun 2022 12:23:29 +0200
Subject: [PATCH 12/54] Revert #367 (#378)

---
 CHANGELOG.md                      | 3 +--
 dbt/adapters/spark/connections.py | 8 +++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ab9f4e8f..f99b14c76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,13 +4,12 @@
 - `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
 
 ### Under the hood
-- Add `DBT_INVOCATION_ENV` environment variable to ODBC user agent string ([#366](https://github.com/dbt-labs/dbt-spark/pull/366))
 - Initialize lift + shift for cross-db macros ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
 - Add invocation env to user agent string ([#367](https://github.com/dbt-labs/dbt-spark/pull/367))
 - Use dispatch pattern for get_columns_in_relation_raw macro ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
 
 ### Contributors
-- [@ueshin](https://github.com/dbt-labs/dbt-spark/commits?author=ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
+- [@ueshin](https://github.com/ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
 - [@dbeatty10](https://github.com/dbeatty10) ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
 
 ## dbt-spark 1.1.0 (April 28, 2022)
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 135463eb3..59ceb9dd8 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -1,5 +1,3 @@
-import os
-
 from contextlib import contextmanager
 
 import dbt.exceptions
@@ -9,7 +7,6 @@
 from dbt.events import AdapterLogger
 from dbt.utils import DECIMALS
 from dbt.adapters.spark import __version__
-from dbt.tracking import DBT_INVOCATION_ENV
 
 try:
     from TCLIService.ttypes import TOperationState as ThriftState
@@ -412,8 +409,9 @@ def open(cls, connection):
                     cls.validate_creds(creds, required_fields)
 
                     dbt_spark_version = __version__.version
-                    dbt_invocation_env = os.getenv(DBT_INVOCATION_ENV) or "manual"
-                    user_agent_entry = f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks, {dbt_invocation_env})"  # noqa
+                    user_agent_entry = (
+                        f"dbt-labs-dbt-spark/{dbt_spark_version} (Databricks)"  # noqa
+                    )
 
                     # http://simba.wpengine.com/products/Spark/doc/ODBC_InstallGuide/unix/content/odbc/hi/configuring/serverside.htm
                     ssp = {f"SSP_{k}": f"{{{v}}}" for k, v in creds.server_side_parameters.items()}

From ed8a17036be15ce929a068e2daeaf7433573778a Mon Sep 17 00:00:00 2001
From: Sindre Grindheim 
Date: Tue, 5 Jul 2022 10:25:44 +0200
Subject: [PATCH 13/54] Not dropping table for incremental full refresh with
 delta (#287)

* Not dropping table for incremental full refresh with delta

* Updated changelog

* Simplified conditional logic according to suggestion

* Updated changelog

* Only drop table if not delta table

Co-authored-by: Jeremy Cohen 

* Update changelog, trigger CircleCI tests

Co-authored-by: Jeremy Cohen 
Co-authored-by: Jeremy Cohen 
---
 CHANGELOG.md                                               | 7 +++++++
 .../macros/materializations/incremental/incremental.sql    | 6 +++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f99b14c76..f7164641c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## dbt-spark 1.2.0rc1 (Release TBD)
+
+- Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+
+### Contributors
+- [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+
 ## dbt-spark 1.2.0b1 (June 24, 2022)
 
 ### Fixes
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 8d8e69d93..99cd31db1 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -26,10 +26,14 @@
 
   {{ run_hooks(pre_hooks) }}
 
+  {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
+
   {% if existing_relation is none %}
     {% set build_sql = create_table_as(False, target_relation, sql) %}
   {% elif existing_relation.is_view or full_refresh_mode %}
-    {% do adapter.drop_relation(existing_relation) %}
+    {% if not is_delta %} {#-- If Delta, we will `create or replace` below, so no need to drop --#}
+      {% do adapter.drop_relation(existing_relation) %}
+    {% endif %}
     {% set build_sql = create_table_as(False, target_relation, sql) %}
   {% else %}
     {% do run_query(create_table_as(True, tmp_relation, sql)) %}

From f284cde4c44010ddf3fdfa420492e6190937e81c Mon Sep 17 00:00:00 2001
From: Jeremy Cohen 
Date: Wed, 6 Jul 2022 13:42:23 +0200
Subject: [PATCH 14/54] Data type macros (#380)

* Run tests for data type macros. Fine tune numeric_type

* Hard code seed loading types for float + int

* Repoint, fixup, changelog entry
---
 CHANGELOG.md                                  |  4 ++
 dbt/adapters/spark/column.py                  |  8 +++
 .../adapter/utils/test_data_types.py          | 67 +++++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 tests/functional/adapter/utils/test_data_types.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7164641c..81b890f22 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,11 @@
 ## dbt-spark 1.2.0rc1 (Release TBD)
 
+### Fixes
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
 
+### Under the hood
+- Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
+
 ### Contributors
 - [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
 
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index 4df6b301b..dcf7590e9 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -37,6 +37,14 @@ def quoted(self) -> str:
     def data_type(self) -> str:
         return self.dtype
 
+    @classmethod
+    def numeric_type(cls, dtype: str, precision: Any, scale: Any) -> str:
+        # SparkSQL does not support 'numeric' or 'number', only 'decimal'
+        if precision is None or scale is None:
+            return "decimal"
+        else:
+            return "{}({},{})".format("decimal", precision, scale)
+
     def __repr__(self) -> str:
         return "".format(self.name, self.data_type)
 
diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py
new file mode 100644
index 000000000..65a24a3a9
--- /dev/null
+++ b/tests/functional/adapter/utils/test_data_types.py
@@ -0,0 +1,67 @@
+import pytest
+from dbt.tests.adapter.utils.data_types.test_type_bigint import BaseTypeBigInt
+from dbt.tests.adapter.utils.data_types.test_type_float import (
+    BaseTypeFloat, seeds__expected_csv as seeds__float_expected_csv
+)
+from dbt.tests.adapter.utils.data_types.test_type_int import (
+    BaseTypeInt, seeds__expected_csv as seeds__int_expected_csv
+)
+from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
+from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
+from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp
+
+
+class TestTypeBigInt(BaseTypeBigInt):
+    pass
+
+
+# need to explicitly cast this to avoid it being inferred/loaded as a DOUBLE on Spark
+# in SparkSQL, the two are equivalent for `=` comparison, but distinct for EXCEPT comparison
+seeds__float_expected_yml = """
+version: 2
+seeds:
+  - name: expected
+    config:
+      column_types:
+        float_col: float
+"""
+
+class TestTypeFloat(BaseTypeFloat):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "expected.csv": seeds__float_expected_csv,
+            "expected.yml": seeds__float_expected_yml,
+        }
+
+
+# need to explicitly cast this to avoid it being inferred/loaded as a BIGINT on Spark
+seeds__int_expected_yml = """
+version: 2
+seeds:
+  - name: expected
+    config:
+      column_types:
+        int_col: int
+"""
+
+class TestTypeInt(BaseTypeInt):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "expected.csv": seeds__int_expected_csv,
+            "expected.yml": seeds__int_expected_yml,
+        }
+
+    
+class TestTypeNumeric(BaseTypeNumeric):
+    def numeric_fixture_type(self):
+        return "decimal(28,6)"
+
+    
+class TestTypeString(BaseTypeString):
+    pass
+
+    
+class TestTypeTimestamp(BaseTypeTimestamp):
+    pass

From 75d2933665bb3110d8392b35f60c4f0bb039c8c9 Mon Sep 17 00:00:00 2001
From: Neelesh Salian 
Date: Fri, 8 Jul 2022 03:25:53 -0700
Subject: [PATCH 15/54] Fix changelog for spark upgrade feature (#385)

---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 81b890f22..36958eff3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,12 +2,14 @@
 
 ### Fixes
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+- Apache Spark version upgraded to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ### Under the hood
 - Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
 
 ### Contributors
 - [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
+- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ## dbt-spark 1.2.0b1 (June 24, 2022)
 
@@ -28,7 +30,6 @@
 ### Features
 - Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
-- Upgrade Spark version to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ### Under the hood
 - Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
@@ -40,7 +41,6 @@
 ### Contributors
 - [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
 - [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
-- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
 ## dbt-spark 1.1.0b1 (March 23, 2022)
 

From 27504209ae37823af560f4952452c559791b1e23 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski 
Date: Mon, 11 Jul 2022 23:42:29 +0200
Subject: [PATCH 16/54] Use lowercase file ext for CONTRIBUTING.md (#384)

---
 CONTRIBUTING.MD => CONTRIBUTING.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename CONTRIBUTING.MD => CONTRIBUTING.md (100%)

diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.md
similarity index 100%
rename from CONTRIBUTING.MD
rename to CONTRIBUTING.md

From 9109fe1babaab92cbe1c58868977c7a9c998c2a8 Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Tue, 12 Jul 2022 10:14:11 -0400
Subject: [PATCH 17/54] Add apply_grants call to materialization macros (#381)

* Add apply_grants call to materialization macros

* add standardize_grants_dict

* Working grant macros

* Initialize tests in CI

* Refactor to account for core macro changes. Passing tests

* Fix code checks

* Try default__reset_csv_table

* Code checks

* Revert "Try default__reset_csv_table"

This reverts commit 8bd41451249afee1f2884f24c292b81a0b1da82c.

* Account for refactor in dbt-labs/dbt-core@c763601

* Account for test changes in dbt-labs/dbt-core@debc867

* add changelog

* Empty-Commit

* rerun ci

* rerun ci

* readd a persist_docs call to snapshot.sql

* fix whitespace

Co-authored-by: Jeremy Cohen 
Co-authored-by: Matthew McKnight 
---
 .circleci/config.yml                          |  6 ++
 CHANGELOG.md                                  |  3 +
 dbt/adapters/spark/impl.py                    | 17 ++++++
 dbt/include/spark/macros/apply_grants.sql     | 39 ++++++++++++
 .../incremental/incremental.sql               |  4 ++
 .../macros/materializations/snapshot.sql      |  4 ++
 .../spark/macros/materializations/table.sql   |  4 ++
 test.env.example                              |  5 ++
 tests/conftest.py                             |  2 +-
 tests/functional/adapter/test_grants.py       | 60 +++++++++++++++++++
 10 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 dbt/include/spark/macros/apply_grants.sql
 create mode 100644 tests/functional/adapter/test_grants.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 0a1a3e1b2..8f0afa6ce 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -63,6 +63,9 @@ jobs:
     environment:
       DBT_INVOCATION_ENV: circle
       DBT_DATABRICKS_RETRY_ALL: True
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
     docker:
       - image: fishtownanalytics/test-container:10
     steps:
@@ -78,6 +81,9 @@ jobs:
     environment:
       DBT_INVOCATION_ENV: circle
       ODBC_DRIVER: Simba # TODO: move env var to Docker image
+      DBT_TEST_USER_1: "buildbot+dbt_test_user_1@dbtlabs.com"
+      DBT_TEST_USER_2: "buildbot+dbt_test_user_2@dbtlabs.com"
+      DBT_TEST_USER_3: "buildbot+dbt_test_user_3@dbtlabs.com"
     docker:
       # image based on `fishtownanalytics/test-container` w/ Simba ODBC Spark driver installed
       - image: 828731156495.dkr.ecr.us-east-1.amazonaws.com/dbt-spark-odbc-test-container:latest
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36958eff3..1abed6ec9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,9 @@
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
 - Apache Spark version upgraded to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
 
+### Features
+- Add grants to materializations ([#366](https://github.com/dbt-labs/dbt-spark/issues/366), [#381](https://github.com/dbt-labs/dbt-spark/pull/381))
+
 ### Under the hood
 - Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 699eca9d2..3fb9978d8 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -380,6 +380,23 @@ def run_sql_for_tests(self, sql, fetch, conn):
         finally:
             conn.transaction_open = False
 
+    def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
+        grants_dict: Dict[str, List[str]] = {}
+        for row in grants_table:
+            grantee = row["Principal"]
+            privilege = row["ActionType"]
+            object_type = row["ObjectType"]
+
+            # we only want to consider grants on this object
+            # (view or table both appear as 'TABLE')
+            # and we don't want to consider the OWN privilege
+            if object_type == "TABLE" and privilege != "OWN":
+                if privilege in grants_dict.keys():
+                    grants_dict[privilege].append(grantee)
+                else:
+                    grants_dict.update({privilege: [grantee]})
+        return grants_dict
+
 
 # spark does something interesting with joins when both tables have the same
 # static values for the join condition and complains that the join condition is
diff --git a/dbt/include/spark/macros/apply_grants.sql b/dbt/include/spark/macros/apply_grants.sql
new file mode 100644
index 000000000..49dae95dc
--- /dev/null
+++ b/dbt/include/spark/macros/apply_grants.sql
@@ -0,0 +1,39 @@
+{% macro spark__copy_grants() %}
+
+    {% if config.materialized == 'view' %}
+        {#-- Spark views don't copy grants when they're replaced --#}
+        {{ return(False) }}
+
+    {% else %}
+      {#-- This depends on how we're replacing the table, which depends on its file format
+        -- Just play it safe by assuming that grants have been copied over, and need to be checked / possibly revoked
+        -- We can make this more efficient in the future
+      #}
+        {{ return(True) }}
+
+    {% endif %}
+{% endmacro %}
+
+
+{%- macro spark__get_grant_sql(relation, privilege, grantees) -%}
+    grant {{ privilege }} on {{ relation }} to {{ adapter.quote(grantees[0]) }}
+{%- endmacro %}
+
+
+{%- macro spark__get_revoke_sql(relation, privilege, grantees) -%}
+    revoke {{ privilege }} on {{ relation }} from {{ adapter.quote(grantees[0]) }}
+{%- endmacro %}
+
+
+{%- macro spark__support_multiple_grantees_per_dcl_statement() -%}
+    {{ return(False) }}
+{%- endmacro -%}
+
+
+{% macro spark__call_dcl_statements(dcl_statement_list) %}
+    {% for dcl_statement in dcl_statement_list %}
+        {% call statement('grant_or_revoke') %}
+            {{ dcl_statement }}
+        {% endcall %}
+    {% endfor %}
+{% endmacro %}
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 99cd31db1..b80510b71 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -3,6 +3,7 @@
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy', default='append') -%}
+  {%- set grant_config = config.get('grants') -%}
 
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}
   {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%}
@@ -45,6 +46,9 @@
     {{ build_sql }}
   {%- endcall -%}
 
+  {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}
+  {% do apply_grants(target_relation, grant_config, should_revoke) %}
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks) }}
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index 9c891ef04..a5304682e 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -75,6 +75,7 @@
   {%- set strategy_name = config.get('strategy') -%}
   {%- set unique_key = config.get('unique_key') %}
   {%- set file_format = config.get('file_format', 'parquet') -%}
+  {%- set grant_config = config.get('grants') -%}
 
   {% set target_relation_exists, target_relation = get_or_create_relation(
           database=none,
@@ -163,6 +164,9 @@
       {{ final_sql }}
   {% endcall %}
 
+  {% set should_revoke = should_revoke(target_relation_exists, full_refresh_mode) %}
+  {% do apply_grants(target_relation, grant_config, should_revoke) %}
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks, inside_transaction=True) }}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 2eeb806fd..3462d3332 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -1,6 +1,7 @@
 {% materialization table, adapter = 'spark' %}
 
   {%- set identifier = model['alias'] -%}
+  {%- set grant_config = config.get('grants') -%}
 
   {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
   {%- set target_relation = api.Relation.create(identifier=identifier,
@@ -22,6 +23,9 @@
     {{ create_table_as(False, target_relation, sql) }}
   {%- endcall %}
 
+  {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
+  {% do apply_grants(target_relation, grant_config, should_revoke) %}
+
   {% do persist_docs(target_relation, model) %}
 
   {{ run_hooks(post_hooks) }}
diff --git a/test.env.example b/test.env.example
index bf4cf2eee..e69f700b7 100644
--- a/test.env.example
+++ b/test.env.example
@@ -8,3 +8,8 @@ DBT_DATABRICKS_HOST_NAME=
 DBT_DATABRICKS_TOKEN=
 # file path to local ODBC driver
 ODBC_DRIVER=
+
+# users for testing 'grants' functionality
+DBT_TEST_USER_1=
+DBT_TEST_USER_2=
+DBT_TEST_USER_3=
diff --git a/tests/conftest.py b/tests/conftest.py
index 0c624713c..0771566b7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -108,4 +108,4 @@ def skip_by_profile_type(request):
     if request.node.get_closest_marker("skip_profile"):
         for skip_profile_type in request.node.get_closest_marker("skip_profile").args:
             if skip_profile_type == profile_type:
-                pytest.skip("skipped on '{profile_type}' profile")
+                pytest.skip(f"skipped on '{profile_type}' profile")
diff --git a/tests/functional/adapter/test_grants.py b/tests/functional/adapter/test_grants.py
new file mode 100644
index 000000000..8e0341df6
--- /dev/null
+++ b/tests/functional/adapter/test_grants.py
@@ -0,0 +1,60 @@
+import pytest
+from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants
+from dbt.tests.adapter.grants.test_incremental_grants import BaseIncrementalGrants
+from dbt.tests.adapter.grants.test_invalid_grants import BaseInvalidGrants
+from dbt.tests.adapter.grants.test_seed_grants import BaseSeedGrants
+from dbt.tests.adapter.grants.test_snapshot_grants import BaseSnapshotGrants
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestModelGrantsSpark(BaseModelGrants):
+    def privilege_grantee_name_overrides(self):
+        # insert --> modify
+        return {
+            "select": "select",
+            "insert": "modify",
+            "fake_privilege": "fake_privilege",
+            "invalid_user": "invalid_user",
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestIncrementalGrantsSpark(BaseIncrementalGrants):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge",
+            }
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestSeedGrantsSpark(BaseSeedGrants):
+    # seeds in dbt-spark are currently "full refreshed," in such a way that
+    # the grants are not carried over
+    # see https://github.com/dbt-labs/dbt-spark/issues/388
+    def seeds_support_partial_refresh(self):
+        return False
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestSnapshotGrantsSpark(BaseSnapshotGrants):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "snapshots": {
+                "+file_format": "delta",
+                "+incremental_strategy": "merge",
+            }
+        }
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestInvalidGrantsSpark(BaseInvalidGrants):
+    def grantee_does_not_exist_error(self):
+        return "RESOURCE_DOES_NOT_EXIST"
+        
+    def privilege_does_not_exist_error(self):
+        return "Action Unknown"

From 244742ac0ee35db9774d9830a96d5796bf738448 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 12 Jul 2022 14:21:59 -0400
Subject: [PATCH 18/54] Bumping version to 1.3.0a1 (#393)

* Bumping version to 1.3.0a1

* Update CHANGELOG.md

* Fix whitespace

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 5 ++++-
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 0a892fdc4..605b6f378 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0b1
+current_version = 1.3.0a1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1abed6ec9..276e67598 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,7 @@
-## dbt-spark 1.2.0rc1 (Release TBD)
+## dbt-spark 1.3.0b1 (Release TBD)
+
+
+## dbt-spark 1.2.0rc1 (July 12, 2022)
 
 ### Fixes
 - Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 6e8eee929..a9fe3c3ee 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.2.0b1"
+version = "1.3.0a1"
diff --git a/setup.py b/setup.py
index aba51b34f..cb0c40aec 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.2.0b1"
+package_version = "1.3.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 80c1775a9b6283fcd7cf2f1f2ce63ce2a1da8460 Mon Sep 17 00:00:00 2001
From: Scott Barber <74067474+barberscott@users.noreply.github.com>
Date: Thu, 21 Jul 2022 10:48:36 -0500
Subject: [PATCH 19/54] [CT-868] Pin pyodbc in dbt-spark (#398)

* [CT-868] Pin pyodbc in dbt-spark

Pin pyodbc to a known-working version until pyodbc>=4.0.35 releases.

* Update CHANGELOG.md

* Update CHANGELOG.md

* Update CHANGELOG.md
---
 CHANGELOG.md     | 5 +++++
 requirements.txt | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 276e67598..8e018f429 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,10 @@
 ## dbt-spark 1.3.0b1 (Release TBD)
 
+### Fixes
+- Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
+
+### Contributors
+- [@barberscott](https://github.com/barberscott)  ([#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
 ## dbt-spark 1.2.0rc1 (July 12, 2022)
 
diff --git a/requirements.txt b/requirements.txt
index e03320a41..c64512aeb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 PyHive[hive]>=0.6.0,<0.7.0
-pyodbc>=4.0.30
+pyodbc==4.0.32
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From eac5614a3ad25e9a268ab8d71aaa91a9803e2039 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Fri, 22 Jul 2022 12:20:37 -0400
Subject: [PATCH 20/54] Updating CI pip and py10 (#403)

---
 .github/workflows/main.yml | 39 ++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 56685bfc6..4166756c3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -18,7 +18,6 @@ on:
   push:
     branches:
       - "main"
-      - "develop"
       - "*.latest"
       - "releases/*"
   pull_request:
@@ -40,6 +39,7 @@ jobs:
     name: code-quality
 
     runs-on: ubuntu-latest
+    timeout-minutes: 10
 
     steps:
       - name: Check out the repository
@@ -55,23 +55,29 @@ jobs:
       - name: Install python dependencies
         run: |
           sudo apt-get install libsasl2-dev
-          pip install --user --upgrade pip
-          pip install -r dev-requirements.txt
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install pre-commit
           pre-commit --version
+          python -m pip install mypy==0.942
           mypy --version
+          python -m pip install -r requirements.txt
+          python -m pip install -r dev-requirements.txt
           dbt --version
-      - name: pre-commit hooks
+
+      - name: Run pre-commit hooks
         run: pre-commit run --all-files --show-diff-on-failure
 
   unit:
     name: unit test / python ${{ matrix.python-version }}
 
     runs-on: ubuntu-latest
+    timeout-minutes: 10
 
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.7, 3.8] # TODO: support unit testing for python 3.9 (https://github.com/dbt-labs/dbt/issues/3689)
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
 
     env:
       TOXENV: "unit"
@@ -80,8 +86,6 @@ jobs:
     steps:
       - name: Check out the repository
         uses: actions/checkout@v2
-        with:
-          persist-credentials: false
 
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2
@@ -91,9 +95,9 @@ jobs:
       - name: Install python dependencies
         run: |
           sudo apt-get install libsasl2-dev
-          pip install --user --upgrade pip
-          pip install tox
-          pip --version
+          python -m pip install --user --upgrade pip
+          python -m pip --version
+          python -m pip install tox
           tox --version
       - name: Run tox
         run: tox
@@ -120,8 +124,6 @@ jobs:
     steps:
       - name: Check out the repository
         uses: actions/checkout@v2
-        with:
-          persist-credentials: false
 
       - name: Set up Python
         uses: actions/setup-python@v2
@@ -130,9 +132,10 @@ jobs:
 
       - name: Install python dependencies
         run: |
-          pip install --user --upgrade pip
-          pip install --upgrade setuptools wheel twine check-wheel-contents
-          pip --version
+          python -m pip install --user --upgrade pip
+          python -m pip install --upgrade setuptools wheel twine check-wheel-contents
+          python -m pip --version
+
       - name: Build distributions
         run: ./scripts/build-dist.sh
 
@@ -171,7 +174,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.7, 3.8, 3.9]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
 
     steps:
       - name: Set up Python ${{ matrix.python-version }}
@@ -194,13 +197,13 @@ jobs:
 
       - name: Install wheel distributions
         run: |
-          find ./dist/*.whl -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
+          find ./dist/*.whl -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Check wheel distributions
         run: |
           dbt --version
       - name: Install source distributions
         run: |
-          find ./dist/*.gz -maxdepth 1 -type f | xargs pip install --force-reinstall --find-links=dist/
+          find ./dist/*.gz -maxdepth 1 -type f | xargs python -m pip install --force-reinstall --find-links=dist/
       - name: Check source distributions
         run: |
           dbt --version

From 3a292d4e04519ad58d5ae660348f2b924f82f052 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Fri, 22 Jul 2022 12:51:05 -0400
Subject: [PATCH 21/54] Fixing one more pip reference in CI (#405)

---
 .github/workflows/main.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4166756c3..b45f93776 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -184,9 +184,9 @@ jobs:
 
       - name: Install python dependencies
         run: |
-          pip install --user --upgrade pip
-          pip install --upgrade wheel
-          pip --version
+          python -m pip install --user --upgrade pip
+          python -m pip install --upgrade wheel
+          python -m pip --version
       - uses: actions/download-artifact@v2
         with:
           name: dist

From 16d05e2e8d60baeda3b4a4abb82034dc3fd0d776 Mon Sep 17 00:00:00 2001
From: Gerda Shank 
Date: Mon, 25 Jul 2022 11:57:13 -0400
Subject: [PATCH 22/54] Change to support core incremental refactor (#394)

---
 CHANGELOG.md                                                    | 1 +
 .../spark/macros/materializations/incremental/incremental.sql   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e018f429..28f7e138b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@
 
 ### Under the hood
 - Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
+- Make minimal changes to support dbt Core incremental materialization refactor ([#402](https://github.com/dbt-labs/dbt-spark/issue/402), [#394](httpe://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
 - [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index b80510b71..1ca2c149a 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -2,7 +2,7 @@
 
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
-  {%- set raw_strategy = config.get('incremental_strategy', default='append') -%}
+  {%- set raw_strategy = config.get('incremental_strategy') or 'append' -%}
   {%- set grant_config = config.get('grants') -%}
 
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}

From f58fc233fbbc7b9289299313807e706c57360613 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Thu, 28 Jul 2022 13:52:18 -0700
Subject: [PATCH 23/54] Feature/python model beta (#377)

Co-authored-by: Jeremy Cohen 
Co-authored-by: Ian Knox 
---
 .github/workflows/main.yml                    |   1 +
 CHANGELOG.md                                  |   3 +
 dbt/adapters/spark/impl.py                    | 127 +++++++++++++++++-
 dbt/include/spark/macros/adapters.sql         |  61 +++++----
 .../incremental/incremental.sql               |  77 +++++++----
 .../macros/materializations/snapshot.sql      |   2 +-
 .../spark/macros/materializations/table.sql   |  24 +++-
 dev-requirements.txt                          |   2 +
 requirements.txt                              |   2 +
 tests/conftest.py                             |   2 +
 tests/functional/adapter/test_basic.py        |   1 -
 tests/functional/adapter/test_python_model.py |  59 ++++++++
 .../test_incremental_strategies.py            |   2 +
 13 files changed, 297 insertions(+), 66 deletions(-)
 create mode 100644 tests/functional/adapter/test_python_model.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index b45f93776..bf607c379 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -60,6 +60,7 @@ jobs:
           python -m pip install pre-commit
           pre-commit --version
           python -m pip install mypy==0.942
+          python -m pip install types-requests
           mypy --version
           python -m pip install -r requirements.txt
           python -m pip install -r dev-requirements.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 28f7e138b..d015a26c7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 ## dbt-spark 1.3.0b1 (Release TBD)
 
+### Features
+- support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+
 ### Fixes
 - Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 3fb9978d8..12c42ab98 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,4 +1,7 @@
 import re
+import requests
+import time
+import base64
 from concurrent.futures import Future
 from dataclasses import dataclass
 from typing import Any, Dict, Iterable, List, Optional, Union
@@ -11,7 +14,8 @@
 import dbt.exceptions
 
 from dbt.adapters.base import AdapterConfig
-from dbt.adapters.base.impl import catch_as_completed
+from dbt.adapters.base.impl import catch_as_completed, log_code_execution
+from dbt.adapters.base.meta import available
 from dbt.adapters.sql import SQLAdapter
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
@@ -159,11 +163,9 @@ def list_relations_without_caching(
 
         return relations
 
-    def get_relation(
-        self, database: Optional[str], schema: str, identifier: str
-    ) -> Optional[BaseRelation]:
+    def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]:
         if not self.Relation.include_policy.database:
-            database = None
+            database = None  # type: ignore
 
         return super().get_relation(database, schema, identifier)
 
@@ -296,7 +298,12 @@ def get_catalog(self, manifest):
                 for schema in schemas:
                     futures.append(
                         tpe.submit_connected(
-                            self, schema, self._get_one_catalog, info, [schema], manifest
+                            self,
+                            schema,
+                            self._get_one_catalog,
+                            info,
+                            [schema],
+                            manifest,
                         )
                     )
             catalogs, exceptions = catch_as_completed(futures)
@@ -380,6 +387,114 @@ def run_sql_for_tests(self, sql, fetch, conn):
         finally:
             conn.transaction_open = False
 
+    @available.parse_none
+    @log_code_execution
+    def submit_python_job(self, parsed_model: dict, compiled_code: str, timeout=None):
+        # TODO improve the typing here.  N.B. Jinja returns a `jinja2.runtime.Undefined` instead
+        # of `None` which evaluates to True!
+
+        # TODO limit this function to run only when doing the materialization of python nodes
+
+        # assuming that for python job running over 1 day user would mannually overwrite this
+        schema = getattr(parsed_model, "schema", self.config.credentials.schema)
+        identifier = parsed_model["alias"]
+        if not timeout:
+            timeout = 60 * 60 * 24
+        if timeout <= 0:
+            raise ValueError("Timeout must larger than 0")
+
+        auth_header = {"Authorization": f"Bearer {self.connections.profile.credentials.token}"}
+
+        # create new dir
+        if not self.connections.profile.credentials.user:
+            raise ValueError("Need to supply user in profile to submit python job")
+        # it is safe to call mkdirs even if dir already exists and have content inside
+        work_dir = f"/Users/{self.connections.profile.credentials.user}/{schema}"
+        response = requests.post(
+            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/mkdirs",
+            headers=auth_header,
+            json={
+                "path": work_dir,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating work_dir for python notebooks\n {response.content!r}"
+            )
+
+        # add notebook
+        b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
+        response = requests.post(
+            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/import",
+            headers=auth_header,
+            json={
+                "path": f"{work_dir}/{identifier}",
+                "content": b64_encoded_content,
+                "language": "PYTHON",
+                "overwrite": True,
+                "format": "SOURCE",
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python notebook.\n {response.content!r}"
+            )
+
+        # submit job
+        submit_response = requests.post(
+            f"https://{self.connections.profile.credentials.host}/api/2.1/jobs/runs/submit",
+            headers=auth_header,
+            json={
+                "run_name": "debug task",
+                "existing_cluster_id": self.connections.profile.credentials.cluster,
+                "notebook_task": {
+                    "notebook_path": f"{work_dir}/{identifier}",
+                },
+            },
+        )
+        if submit_response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python run.\n {response.content!r}"
+            )
+
+        # poll until job finish
+        state = None
+        start = time.time()
+        run_id = submit_response.json()["run_id"]
+        terminal_states = ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]
+        while state not in terminal_states and time.time() - start < timeout:
+            time.sleep(1)
+            resp = requests.get(
+                f"https://{self.connections.profile.credentials.host}"
+                f"/api/2.1/jobs/runs/get?run_id={run_id}",
+                headers=auth_header,
+            )
+            json_resp = resp.json()
+            state = json_resp["state"]["life_cycle_state"]
+            # logger.debug(f"Polling.... in state: {state}")
+        if state != "TERMINATED":
+            raise dbt.exceptions.RuntimeException(
+                "python model run ended in state"
+                f"{state} with state_message\n{json_resp['state']['state_message']}"
+            )
+
+        # get end state to return to user
+        run_output = requests.get(
+            f"https://{self.connections.profile.credentials.host}"
+            f"/api/2.1/jobs/runs/get-output?run_id={run_id}",
+            headers=auth_header,
+        )
+        json_run_output = run_output.json()
+        result_state = json_run_output["metadata"]["state"]["result_state"]
+        if result_state != "SUCCESS":
+            raise dbt.exceptions.RuntimeException(
+                "Python model failed with traceback as:\n"
+                "(Note that the line number here does not "
+                "match the line number in your code due to dbt templating)\n"
+                f"{json_run_output['error_trace']}"
+            )
+        return self.connections.get_response(None)
+
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
         grants_dict: Dict[str, List[str]] = {}
         for row in grants_table:
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index abdeacb7f..05630ede5 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -117,35 +117,46 @@
 {%- endmacro %}
 
 
-{% macro create_temporary_view(relation, sql) -%}
-  {{ return(adapter.dispatch('create_temporary_view', 'dbt')(relation, sql)) }}
+{% macro create_temporary_view(relation, compiled_code) -%}
+  {{ return(adapter.dispatch('create_temporary_view', 'dbt')(relation, compiled_code)) }}
 {%- endmacro -%}
 
-{#-- We can't use temporary tables with `create ... as ()` syntax #}
-{% macro spark__create_temporary_view(relation, sql) -%}
-  create temporary view {{ relation.include(schema=false) }} as
-    {{ sql }}
-{% endmacro %}
+{#-- We can't use temporary tables with `create ... as ()` syntax --#}
+{% macro spark__create_temporary_view(relation, compiled_code) -%}
+    create temporary view {{ relation.include(schema=false) }} as
+      {{ compiled_code }}
+{%- endmacro -%}
 
 
-{% macro spark__create_table_as(temporary, relation, sql) -%}
-  {% if temporary -%}
-    {{ create_temporary_view(relation, sql) }}
-  {%- else -%}
-    {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
-      create or replace table {{ relation }}
-    {% else %}
-      create table {{ relation }}
-    {% endif %}
-    {{ file_format_clause() }}
-    {{ options_clause() }}
-    {{ partition_cols(label="partitioned by") }}
-    {{ clustered_cols(label="clustered by") }}
-    {{ location_clause() }}
-    {{ comment_clause() }}
-    as
-      {{ sql }}
-  {%- endif %}
+{%- macro spark__create_table_as(temporary, relation, compiled_code, language='sql') -%}
+  {%- if language == 'sql' -%}
+    {%- if temporary -%}
+      {{ create_temporary_view(relation, compiled_code) }}
+    {%- else -%}
+      {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %}
+        create or replace table {{ relation }}
+      {% else %}
+        create table {{ relation }}
+      {% endif %}
+      {{ file_format_clause() }}
+      {{ options_clause() }}
+      {{ partition_cols(label="partitioned by") }}
+      {{ clustered_cols(label="clustered by") }}
+      {{ location_clause() }}
+      {{ comment_clause() }}
+      as
+      {{ compiled_code }}
+    {%- endif -%}
+  {%- elif language == 'python' -%}
+    {#--
+    N.B. Python models _can_ write to temp views HOWEVER they use a different session
+    and have already expired by the time they need to be used (I.E. in merges for incremental models)
+
+    TODO: Deep dive into spark sessions to see if we can reuse a single session for an entire
+    dbt invocation.
+     --#}
+    {{ py_write_table(compiled_code=compiled_code, target_relation=relation) }}
+  {%- endif -%}
 {%- endmacro -%}
 
 
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 1ca2c149a..91cba9e5f 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -1,5 +1,4 @@
 {% materialization incremental, adapter='spark' -%}
-
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy') or 'append' -%}
@@ -8,43 +7,63 @@
   {%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%}
   {%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%}
 
+  {#-- Set vars --#}
+
   {%- set unique_key = config.get('unique_key', none) -%}
   {%- set partition_by = config.get('partition_by', none) -%}
-
-  {%- set full_refresh_mode = (should_full_refresh()) -%}
-
-  {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %}
-
-  {% set target_relation = this %}
-  {% set existing_relation = load_relation(this) %}
-  {% set tmp_relation = make_temp_relation(this) %}
-
-  {% if strategy == 'insert_overwrite' and partition_by %}
-    {% call statement() %}
+  {%- set language = model['language'] -%}
+  {%- set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') -%}
+  {%- set target_relation = this -%}
+  {%- set existing_relation = load_relation(this) -%}
+  {%- set tmp_relation = make_temp_relation(this) -%}
+
+  {#-- Set Overwrite Mode --#}
+  {%- if strategy == 'insert_overwrite' and partition_by -%}
+    {%- call statement() -%}
       set spark.sql.sources.partitionOverwriteMode = DYNAMIC
-    {% endcall %}
-  {% endif %}
+    {%- endcall -%}
+  {%- endif -%}
 
+  {#-- Run pre-hooks --#}
   {{ run_hooks(pre_hooks) }}
 
-  {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
-
-  {% if existing_relation is none %}
-    {% set build_sql = create_table_as(False, target_relation, sql) %}
-  {% elif existing_relation.is_view or full_refresh_mode %}
+  {#-- Incremental run logic --#}
+  {%- if existing_relation is none -%}
+    {#-- Relation must be created --#}
+    {%- call statement('main', language=language) -%}
+      {{ create_table_as(False, target_relation, compiled_code, language) }}
+    {%- endcall -%}
+  {%- elif existing_relation.is_view or should_full_refresh() -%}
+    {#-- Relation must be dropped & recreated --#}
+    {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %}
     {% if not is_delta %} {#-- If Delta, we will `create or replace` below, so no need to drop --#}
       {% do adapter.drop_relation(existing_relation) %}
     {% endif %}
-    {% set build_sql = create_table_as(False, target_relation, sql) %}
-  {% else %}
-    {% do run_query(create_table_as(True, tmp_relation, sql)) %}
-    {% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
-    {% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %}
-  {% endif %}
-
-  {%- call statement('main') -%}
-    {{ build_sql }}
-  {%- endcall -%}
+    {%- call statement('main', language=language) -%}
+      {{ create_table_as(False, target_relation, compiled_code, language) }}
+    {%- endcall -%}
+  {%- else -%}
+    {#-- Relation must be merged --#}
+    {%- call statement('create_tmp_relation', language=language) -%}
+      {{ create_table_as(True, tmp_relation, compiled_code, language) }}
+    {%- endcall -%}
+    {%- do process_schema_changes(on_schema_change, tmp_relation, existing_relation) -%}
+    {%- call statement('main') -%}
+      {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) }}
+    {%- endcall -%}
+    {%- if language == 'python' -%}
+      {#--
+      This is yucky.
+      See note in dbt-spark/dbt/include/spark/macros/adapters.sql
+      re: python models and temporary views.
+
+      Also, why doesn't either drop_relation or adapter.drop_relation work here?!
+      --#}
+      {% call statement('drop_relation') -%}
+        drop table if exists {{ tmp_relation }}
+      {%- endcall %}
+    {%- endif -%}
+  {%- endif -%}
 
   {% set should_revoke = should_revoke(existing_relation, full_refresh_mode) %}
   {% do apply_grants(target_relation, grant_config, should_revoke) %}
diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql
index a5304682e..6cf2358fe 100644
--- a/dbt/include/spark/macros/materializations/snapshot.sql
+++ b/dbt/include/spark/macros/materializations/snapshot.sql
@@ -117,7 +117,7 @@
 
   {% if not target_relation_exists %}
 
-      {% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %}
+      {% set build_sql = build_snapshot_table(strategy, model['compiled_code']) %}
       {% set final_sql = create_table_as(False, target_relation, build_sql) %}
 
   {% else %}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 3462d3332..6a02ea164 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -1,5 +1,5 @@
 {% materialization table, adapter = 'spark' %}
-
+  {%- set language = model['language'] -%}
   {%- set identifier = model['alias'] -%}
   {%- set grant_config = config.get('grants') -%}
 
@@ -19,9 +19,10 @@
   {%- endif %}
 
   -- build model
-  {% call statement('main') -%}
-    {{ create_table_as(False, target_relation, sql) }}
-  {%- endcall %}
+
+  {%- call statement('main', language=language) -%}
+    {{ create_table_as(False, target_relation, compiled_code, language) }}
+  {%- endcall -%}
 
   {% set should_revoke = should_revoke(old_relation, full_refresh_mode=True) %}
   {% do apply_grants(target_relation, grant_config, should_revoke) %}
@@ -33,3 +34,18 @@
   {{ return({'relations': [target_relation]})}}
 
 {% endmaterialization %}
+
+
+{% macro py_write_table(compiled_code, target_relation) %}
+{{ compiled_code }}
+# --- Autogenerated dbt materialization code. --- #
+dbt = dbtObj(spark.table)
+df = model(dbt, spark)
+df.write.mode("overwrite").format("delta").saveAsTable("{{ target_relation }}")
+{%- endmacro -%}
+
+{%macro py_script_comment()%}
+# how to execute python model in notebook
+# dbt = dbtObj(spark.table)
+# df = model(dbt, spark)
+{%endmacro%}
diff --git a/dev-requirements.txt b/dev-requirements.txt
index b94cb8b6b..5b29e5e9d 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,6 +3,8 @@
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
+
+
 black==22.3.0
 bumpversion
 click~=8.0.4
diff --git a/requirements.txt b/requirements.txt
index c64512aeb..5d774e4f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
 PyHive[hive]>=0.6.0,<0.7.0
+requests[python]>=2.28.1
+
 pyodbc==4.0.32
 sqlparams>=3.0.0
 thrift>=0.13.0
diff --git a/tests/conftest.py b/tests/conftest.py
index 0771566b7..2fa50d6c7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,6 +60,7 @@ def databricks_cluster_target():
         "connect_retries": 3,
         "connect_timeout": 5,
         "retry_all": True,
+        "user": os.getenv('DBT_DATABRICKS_USER'),
     }
 
 
@@ -91,6 +92,7 @@ def databricks_http_cluster_target():
         "connect_retries": 5,
         "connect_timeout": 60, 
         "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)),
+        "user": os.getenv('DBT_DATABRICKS_USER'),
     }
 
 
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index e1a57fd3f..bdccf169d 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -79,7 +79,6 @@ def project_config_update(self):
             }
         }
 
-
 @pytest.mark.skip_profile('spark_session')
 class TestBaseAdapterMethod(BaseAdapterMethod):
     pass
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
new file mode 100644
index 000000000..059412f10
--- /dev/null
+++ b/tests/functional/adapter/test_python_model.py
@@ -0,0 +1,59 @@
+import os
+import pytest
+from dbt.tests.util import run_dbt, write_file, run_dbt_and_capture
+from dbt.tests.adapter.python_model.test_python_model import BasePythonModelTests, BasePythonIncrementalTests
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestPythonModelSpark(BasePythonModelTests):
+    pass
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {}
+
+
+models__simple_python_model = """
+import pandas
+
+def model(dbt, spark):
+    dbt.config(
+        materialized='table',
+    )
+    data = [[1,2]] * 10
+    return spark.createDataFrame(data, schema=['test', 'test2'])
+"""
+models__simple_python_model_v2 = """
+import pandas
+
+def model(dbt, spark):
+    dbt.config(
+        materialized='table',
+    )
+    data = [[1,2]] * 10
+    return spark.createDataFrame(data, schema=['test1', 'test3'])
+"""
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestChangingSchemaSpark:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"simple_python_model.py": models__simple_python_model}
+
+    def test_changing_schema_with_log_validation(self, project, logs_dir):
+        run_dbt(["run"])
+        write_file(
+            models__simple_python_model_v2,
+            project.project_root + "/models",
+            "simple_python_model.py",
+        )
+        run_dbt(["run"])
+        log_file = os.path.join(logs_dir, "dbt.log")
+        with open(log_file, "r") as f:
+            log = f.read()
+            # validate #5510 log_code_execution works
+            assert "On model.test.simple_python_model:" in log
+            assert "spark.createDataFrame(data, schema=['test1', 'test3'])" in log
+            assert "Execution status: OK in" in log
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index 839f167e6..3848d11ae 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -60,6 +60,8 @@ def run_and_test(self):
     def test_insert_overwrite_apache_spark(self):
         self.run_and_test()
 
+    # This test requires settings on the test cluster
+    # more info at https://docs.getdbt.com/reference/resource-configs/spark-configs#the-insert_overwrite-strategy
     @use_profile("databricks_cluster")
     def test_insert_overwrite_databricks_cluster(self):
         self.run_and_test()

From 7f6cffecf38b7c41aa441eb020d464ba1e20bf9e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 29 Jul 2022 15:27:56 -0400
Subject: [PATCH 24/54] Bumping version to 1.3.0b1 (#412)

* Bumping version to 1.3.0b1

* Update CHANGELOG.md

* Fix whitespace

* Fixing whitespace

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                  | 2 +-
 CHANGELOG.md                      | 9 +++++++--
 dbt/adapters/spark/__version__.py | 2 +-
 setup.py                          | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 605b6f378..ef3954f4c 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.0a1
+current_version = 1.3.0b1
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d015a26c7..5948429a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,16 @@
-## dbt-spark 1.3.0b1 (Release TBD)
+## dbt-spark 1.3.0b2 (Release TBD)
+
+## dbt-spark 1.3.0b1 (July 29, 2022)
 
 ### Features
-- support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+- Support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
 
 ### Fixes
 - Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
+### Under the hood
+- Support core incremental refactor ([#394](https://github.com/dbt-labs/dbt-spark/issues/394))
+
 ### Contributors
 - [@barberscott](https://github.com/barberscott)  ([#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
 
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index a9fe3c3ee..4b49b750d 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.3.0a1"
+version = "1.3.0b1"
diff --git a/setup.py b/setup.py
index cb0c40aec..229e89a17 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.3.0a1"
+package_version = "1.3.0b1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 9b00895a51d14745d896ac17d08e6c2423a4703a Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Tue, 9 Aug 2022 15:34:57 -0500
Subject: [PATCH 25/54] init pr push for ct-1005 (#418)

* init pr push for ct-1005

* add changelog

* change pointer to spark

* eof fix

* remove ref to dbt-core in changelog existence

* typo fix

* typo and change of ref

* add condtional logic for kinds security and dependency and change NO ISSUE ref to spark issue #417
---
 .changes/0.0.0.md                             |   5 +
 .changes/1.3.0-b1.md                          |  11 +
 .changes/1.3.0/Features-20220808-141141.yaml  |   8 +
 .changes/1.3.0/Fixes-20220808-141623.yaml     |   8 +
 .../1.3.0/Under the Hood-20220808-141320.yaml |   7 +
 .changes/README.md                            |   3 +
 .changes/header.tpl.md                        |   6 +
 .changes/unreleased/.gitkeep                  |   0
 .../unreleased/Features-20220808-142118.yaml  |   7 +
 .changie.yaml                                 |  62 ++++
 .github/pull_request_template.md              |   2 +-
 .github/workflows/bot-changelog.yml           |  61 ++++
 .github/workflows/changelog-existence.yml     |  41 +++
 CHANGELOG.md                                  | 275 +-----------------
 CONTRIBUTING.md                               |  10 +
 15 files changed, 246 insertions(+), 260 deletions(-)
 create mode 100644 .changes/0.0.0.md
 create mode 100644 .changes/1.3.0-b1.md
 create mode 100644 .changes/1.3.0/Features-20220808-141141.yaml
 create mode 100644 .changes/1.3.0/Fixes-20220808-141623.yaml
 create mode 100644 .changes/1.3.0/Under the Hood-20220808-141320.yaml
 create mode 100644 .changes/README.md
 create mode 100644 .changes/header.tpl.md
 create mode 100644 .changes/unreleased/.gitkeep
 create mode 100644 .changes/unreleased/Features-20220808-142118.yaml
 create mode 100644 .changie.yaml
 create mode 100644 .github/workflows/bot-changelog.yml
 create mode 100644 .github/workflows/changelog-existence.yml

diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
new file mode 100644
index 000000000..5acfb3dbc
--- /dev/null
+++ b/.changes/0.0.0.md
@@ -0,0 +1,5 @@
+## Previous Releases
+For information on prior major and minor releases, see their changelogs:
+- [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
+- [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
+- [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/.changes/1.3.0-b1.md b/.changes/1.3.0-b1.md
new file mode 100644
index 000000000..ef64f4395
--- /dev/null
+++ b/.changes/1.3.0-b1.md
@@ -0,0 +1,11 @@
+## dbt-spark 1.3.0-b1 - July 29, 2022
+
+### Features
+- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+### Fixes
+- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+### Under the Hood
+- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
+
+### Contributors
+- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
diff --git a/.changes/1.3.0/Features-20220808-141141.yaml b/.changes/1.3.0/Features-20220808-141141.yaml
new file mode 100644
index 000000000..444a3062b
--- /dev/null
+++ b/.changes/1.3.0/Features-20220808-141141.yaml
@@ -0,0 +1,8 @@
+kind: Features
+body: Support python model through notebook, currently supported materializations
+  are table and incremental
+time: 2022-08-08T14:11:41.906131-05:00
+custom:
+  Author: ChenyuLInx
+  Issue: "417"
+  PR: "377"
diff --git a/.changes/1.3.0/Fixes-20220808-141623.yaml b/.changes/1.3.0/Fixes-20220808-141623.yaml
new file mode 100644
index 000000000..793e3e5b2
--- /dev/null
+++ b/.changes/1.3.0/Fixes-20220808-141623.yaml
@@ -0,0 +1,8 @@
+kind: Fixes
+body: Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so
+  on Linux
+time: 2022-08-08T14:16:23.846876-05:00
+custom:
+  Author: barberscot
+  Issue: "397"
+  PR: "398"
diff --git a/.changes/1.3.0/Under the Hood-20220808-141320.yaml b/.changes/1.3.0/Under the Hood-20220808-141320.yaml
new file mode 100644
index 000000000..82535f926
--- /dev/null
+++ b/.changes/1.3.0/Under the Hood-20220808-141320.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Support core incremental refactor
+time: 2022-08-08T14:13:20.576155-05:00
+custom:
+  Author: gshank
+  Issue: "4402"
+  PR: "394"
diff --git a/.changes/README.md b/.changes/README.md
new file mode 100644
index 000000000..dc6106dfe
--- /dev/null
+++ b/.changes/README.md
@@ -0,0 +1,3 @@
+# CHANGELOG
+
+To view information about the changelog operation we suggest reading this [README](https://github.com/dbt-labs/dbt-spark/blob/main/.changes/README.md) found in `dbt-spark`.
diff --git a/.changes/header.tpl.md b/.changes/header.tpl.md
new file mode 100644
index 000000000..251ea5d51
--- /dev/null
+++ b/.changes/header.tpl.md
@@ -0,0 +1,6 @@
+# dbt-spark Changelog
+
+- This file provides a full account of all changes to `dbt-spark`.
+- Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
+- "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
+- Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
diff --git a/.changes/unreleased/.gitkeep b/.changes/unreleased/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/.changes/unreleased/Features-20220808-142118.yaml b/.changes/unreleased/Features-20220808-142118.yaml
new file mode 100644
index 000000000..9c110e937
--- /dev/null
+++ b/.changes/unreleased/Features-20220808-142118.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Add changie to dbt-spark
+time: 2022-08-08T14:21:18.569756-05:00
+custom:
+  Author: mcknight-42
+  Issue: "416"
+  PR: "418"
diff --git a/.changie.yaml b/.changie.yaml
new file mode 100644
index 000000000..f5800f324
--- /dev/null
+++ b/.changie.yaml
@@ -0,0 +1,62 @@
+changesDir: .changes
+unreleasedDir: unreleased
+headerPath: header.tpl.md
+versionHeaderPath: ""
+changelogPath: CHANGELOG.md
+versionExt: md
+versionFormat: '## dbt-spark {{.Version}} - {{.Time.Format "January 02, 2006"}}'
+kindFormat: '### {{.Kind}}'
+changeFormat: '- {{.Body}} ([#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), [#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+kinds:
+- label: Breaking Changes
+- label: Features
+- label: Fixes
+- label: Under the Hood
+- label: Dependencies
+  changeFormat: '- {{.Body}} ({{if ne .Custom.Issue ""}}[#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), {{end}}[#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+- label: Security
+  changeFormat: '- {{.Body}} ({{if ne .Custom.Issue ""}}[#{{.Custom.Issue}}](https://github.com/dbt-labs/dbt-spark/issues/{{.Custom.Issue}}), {{end}}[#{{.Custom.PR}}](https://github.com/dbt-labs/dbt-spark/pull/{{.Custom.PR}}))'
+custom:
+- key: Author
+  label: GitHub Username(s) (separated by a single space if multiple)
+  type: string
+  minLength: 3
+- key: Issue
+  label: GitHub Issue Number
+  type: int
+  minLength: 4
+- key: PR
+  label: GitHub Pull Request Number
+  type: int
+  minLength: 4
+footerFormat: |
+  {{- $contributorDict := dict }}
+  {{- /* any names added to this list should be all lowercase for later matching purposes */}}
+  {{- $core_team := list "emmyoop" "nathaniel-may" "gshank" "leahwicz" "chenyulinx" "stu-k" "iknox-fa" "versusfacit" "mcknight-42" "jtcohen6" "dependabot[bot]" "snyk-bot" }}
+  {{- range $change := .Changes }}
+    {{- $authorList := splitList " " $change.Custom.Author }}
+    {{- /* loop through all authors for a PR */}}
+    {{- range $author := $authorList }}
+      {{- $authorLower := lower $author }}
+      {{- /* we only want to include non-core team contributors */}}
+      {{- if not (has $authorLower $core_team)}}
+        {{- $pr := $change.Custom.PR }}
+        {{- /* check if this contributor has other PRs associated with them already */}}
+        {{- if hasKey $contributorDict $author }}
+          {{- $prList := get $contributorDict $author }}
+          {{- $prList = append $prList $pr  }}
+          {{- $contributorDict := set $contributorDict $author $prList }}
+        {{- else }}
+          {{- $prList := list $change.Custom.PR }}
+          {{- $contributorDict := set $contributorDict $author $prList }}
+        {{- end }}
+      {{- end}}
+    {{- end}}
+  {{- end }}
+  {{- /* no indentation here for formatting so the final markdown doesn't have unneeded indentations */}}
+  {{- if $contributorDict}}
+  ### Contributors
+  {{- range $k,$v := $contributorDict }}
+  - [@{{$k}}](https://github.com/{{$k}}) ({{ range $index, $element := $v }}{{if $index}}, {{end}}[#{{$element}}](https://github.com/dbt-labs/dbt-spark/pull/{{$element}}){{end}})
+  {{- end }}
+  {{- end }}
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 5928b1cbf..c4a5c53b4 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -18,4 +18,4 @@ resolves #
 - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
 - [ ] I have run this code in development and it appears to resolve the stated issue
 - [ ] This PR includes tests, or tests are not required/relevant for this PR
-- [ ] I have updated the `CHANGELOG.md` and added information about my change to the "dbt-spark next" section.
+- [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry)
diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
new file mode 100644
index 000000000..d8056efe4
--- /dev/null
+++ b/.github/workflows/bot-changelog.yml
@@ -0,0 +1,61 @@
+# **what?**
+# When bots create a PR, this action will add a corresponding changie yaml file to that
+# PR when a specific label is added.
+#
+# The file is created off a template:
+#
+# kind: 
+# body: 
+# time: 
+# custom:
+#   Author: 
+#   Issue: 4904
+#   PR: 
+#
+# **why?**
+# Automate changelog generation for more visability with automated bot PRs.
+#
+# **when?**
+# Once a PR is created, label should be added to PR before or after creation. You can also
+#  manually trigger this by adding the appropriate label at any time.
+#
+# **how to add another bot?**
+# Add the label and changie kind to the include matrix.  That's it!
+#
+
+name: Bot Changelog
+
+on:
+  pull_request:
+    # catch when the PR is opened with the label or when the label is added
+    types: [opened, labeled]
+
+permissions:
+  contents: write
+  pull-requests: read
+
+jobs:
+  generate_changelog:
+    strategy:
+      matrix:
+        include:
+          - label: "dependencies"
+            changie_kind: "Dependency"
+          - label: "snyk"
+            changie_kind: "Security"
+    runs-on: ubuntu-latest
+
+    steps:
+
+    - name: Create and commit changelog on bot PR
+      if: "contains(github.event.pull_request.labels.*.name, ${{ matrix.label }})"
+      id: bot_changelog
+      uses: emmyoop/changie_bot@v1.0
+      with:
+        GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
+        commit_author_name: "Github Build Bot"
+        commit_author_email: ""
+        commit_message: "Add automated changelog yaml from template for bot PR"
+        changie_kind: ${{ matrix.changie_kind }}
+        label: ${{ matrix.label }}
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}\n"
diff --git a/.github/workflows/changelog-existence.yml b/.github/workflows/changelog-existence.yml
new file mode 100644
index 000000000..6e51e8afc
--- /dev/null
+++ b/.github/workflows/changelog-existence.yml
@@ -0,0 +1,41 @@
+# **what?**
+# Checks that a file has been committed under the /.changes directory
+# as a new CHANGELOG entry.  Cannot check for a specific filename as
+# it is dynamically generated by change type and timestamp.
+# This workflow should not require any secrets since it runs for PRs
+# from forked repos.
+# By default, secrets are not passed to workflows running from
+# a forked repo.
+
+# **why?**
+# Ensure code change gets reflected in the CHANGELOG.
+
+# **when?**
+# This will run for all PRs going into main and *.latest.  It will
+# run when they are opened, reopened, when any label is added or removed
+# and when new code is pushed to the branch.  The action will then get
+# skipped if the 'Skip Changelog' label is present is any of the labels.
+
+name: Check Changelog Entry
+
+on:
+  pull_request:
+    types: [opened, reopened, labeled, unlabeled, synchronize]
+  workflow_dispatch:
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+  pull-requests: write
+
+
+jobs:
+  changelog:
+    uses: dbt-labs/actions/.github/workflows/changelog-existence.yml@main
+    with:
+      changelog_comment: 'Thank you for your pull request! We could not find a changelog entry for this change. For details on how to document a change, see the [dbt-spark contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.MD).'
+      skip_label: 'Skip Changelog'
+    secrets: inherit # this is only acceptable because we own the action we're calling
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5948429a7..4f187e31e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,266 +1,23 @@
-## dbt-spark 1.3.0b2 (Release TBD)
+# dbt-spark Changelog
 
-## dbt-spark 1.3.0b1 (July 29, 2022)
+- This file provides a full account of all changes to `dbt-spark`.
+- Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
+- "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
+- Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
 
-### Features
-- Support python model through notebook, currently supported materializations are table and incremental. ([#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-
-### Fixes
-- Pin `pyodbc` to version 4.0.32 to prevent overwriting `libodbc.so` and `libltdl.so` on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397/), [#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
-
-### Under the hood
-- Support core incremental refactor ([#394](https://github.com/dbt-labs/dbt-spark/issues/394))
-
-### Contributors
-- [@barberscott](https://github.com/barberscott)  ([#398](https://github.com/dbt-labs/dbt-spark/pull/398/))
-
-## dbt-spark 1.2.0rc1 (July 12, 2022)
-
-### Fixes
-- Incremental materialization updated to not drop table first if full refresh for delta lake format, as it already runs _create or replace table_ ([#286](https://github.com/dbt-labs/dbt-spark/issues/286), [#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
-- Apache Spark version upgraded to 3.1.1 ([#348](https://github.com/dbt-labs/dbt-spark/issues/348), [#349](https://github.com/dbt-labs/dbt-spark/pull/349))
-
-### Features
-- Add grants to materializations ([#366](https://github.com/dbt-labs/dbt-spark/issues/366), [#381](https://github.com/dbt-labs/dbt-spark/pull/381))
-
-### Under the hood
-- Update `SparkColumn.numeric_type` to return `decimal` instead of `numeric`, since SparkSQL exclusively supports the former ([#380](https://github.com/dbt-labs/dbt-spark/pull/380))
-- Make minimal changes to support dbt Core incremental materialization refactor ([#402](https://github.com/dbt-labs/dbt-spark/issue/402), [#394](httpe://github.com/dbt-labs/dbt-spark/pull/394))
-
-### Contributors
-- [@grindheim](https://github.com/grindheim) ([#287](https://github.com/dbt-labs/dbt-spark/pull/287/))
-- [@nssalian](https://github.com/nssalian) ([#349](https://github.com/dbt-labs/dbt-spark/pull/349))
-
-## dbt-spark 1.2.0b1 (June 24, 2022)
-
-### Fixes
-- `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))
-
-### Under the hood
-- Initialize lift + shift for cross-db macros ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
-- Add invocation env to user agent string ([#367](https://github.com/dbt-labs/dbt-spark/pull/367))
-- Use dispatch pattern for get_columns_in_relation_raw macro ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
-
-### Contributors
-- [@ueshin](https://github.com/ueshin) ([#365](https://github.com/dbt-labs/dbt-spark/pull/365))
-- [@dbeatty10](https://github.com/dbeatty10) ([#359](https://github.com/dbt-labs/dbt-spark/pull/359))
-
-## dbt-spark 1.1.0 (April 28, 2022)
-
-### Features
-- Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
-- rename file to match reference to dbt-core ([#344](https://github.com/dbt-labs/dbt-spark/pull/344))
-
-### Under the hood
-- Add precommit tooling to this repo ([#356](https://github.com/dbt-labs/dbt-spark/pull/356))
-- Use dbt.tests.adapter.basic in test suite ([#298](https://github.com/dbt-labs/dbt-spark/issues/298), [#299](https://github.com/dbt-labs/dbt-spark/pull/299))
-- Make internal macros use macro dispatch to be overridable in child adapters ([#319](https://github.com/dbt-labs/dbt-spark/issues/319), [#320](https://github.com/dbt-labs/dbt-spark/pull/320))
-- Override adapter method 'run_sql_for_tests' ([#323](https://github.com/dbt-labs/dbt-spark/issues/323), [#324](https://github.com/dbt-labs/dbt-spark/pull/324))
-- when a table or view doesn't exist, 'adapter.get_columns_in_relation' will return empty list instead of fail ([#328]https://github.com/dbt-labs/dbt-spark/pull/328)
-
-### Contributors
-- [@JCZuurmond](https://github.com/dbt-labs/dbt-spark/pull/279) ( [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
-- [@ueshin](https://github.com/ueshin) ([#320](https://github.com/dbt-labs/dbt-spark/pull/320))
-
-## dbt-spark 1.1.0b1 (March 23, 2022)
-
-### Features
-- Adds new integration test to check against new ability to allow unique_key to be a list. ([#282](https://github.com/dbt-labs/dbt-spark/issues/282)), [#291](https://github.com/dbt-labs/dbt-spark/pull/291))
-
-### Fixes
-- Closes the connection properly ([#280](https://github.com/dbt-labs/dbt-spark/issues/280), [#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-### Under the hood
-- get_response -> AdapterResponse ([#265](https://github.com/dbt-labs/dbt-spark/pull/265))
-- Adding stale Actions workflow ([#275](https://github.com/dbt-labs/dbt-spark/pull/275))
-- Update plugin author name (`fishtown-analytics` → `dbt-labs`) in ODBC user agent ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
-- Configure insert_overwrite models to use parquet ([#301](https://github.com/dbt-labs/dbt-spark/pull/301))
-
-### Contributors
-- [@amychen1776](https://github.com/amychen1776) ([#288](https://github.com/dbt-labs/dbt-spark/pull/288))
-- [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-## dbt-spark 1.0.1rc0 (Release TBD)
-
-### Fixes
-- Closes the connection properly ([#280](https://github.com/dbt-labs/dbt-spark/issues/280), [#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-### Contributors
-- [@ueshin](https://github.com/ueshin) ([#285](https://github.com/dbt-labs/dbt-spark/pull/285))
-
-## dbt-spark 1.0.0 (December 3, 2021)
-
-### Fixes
-- Incremental materialization corrected to respect `full_refresh` config, by using `should_full_refresh()` macro ([#260](https://github.com/dbt-labs/dbt-spark/issues/260), [#262](https://github.com/dbt-labs/dbt-spark/pull/262/))
-
-### Contributors
-- [@grindheim](https://github.com/grindheim) ([#262](https://github.com/dbt-labs/dbt-spark/pull/262/))
-
-## dbt-spark 1.0.0rc2 (November 24, 2021)
-
-### Features
-- Add support for Apache Hudi (hudi file format) which supports incremental merge strategies ([#187](https://github.com/dbt-labs/dbt-spark/issues/187), [#210](https://github.com/dbt-labs/dbt-spark/pull/210))
-
-### Under the hood
-- Refactor seed macros: remove duplicated code from dbt-core, and provide clearer logging of SQL parameters that differ by connection method ([#249](https://github.com/dbt-labs/dbt-spark/issues/249), [#250](https://github.com/dbt-labs/dbt-snowflake/pull/250))
-- Replace `sample_profiles.yml` with `profile_template.yml`, for use with new `dbt init` ([#247](https://github.com/dbt-labs/dbt-spark/pull/247))
-
-### Contributors
-- [@vingov](https://github.com/vingov) ([#210](https://github.com/dbt-labs/dbt-spark/pull/210))
-
-## dbt-spark 1.0.0rc1 (November 10, 2021)
-
-### Under the hood
-- Remove official support for python 3.6, which is reaching end of life on December 23, 2021 ([dbt-core#4134](https://github.com/dbt-labs/dbt-core/issues/4134), [#253](https://github.com/dbt-labs/dbt-snowflake/pull/253))
-- Add support for structured logging ([#251](https://github.com/dbt-labs/dbt-spark/pull/251))
-
-## dbt-spark 0.21.1 (Release TBD)
-
-## dbt-spark 0.21.1rc1 (November 3, 2021)
-
-### Fixes
-- Fix `--store-failures` for tests, by suppressing irrelevant error in `comment_clause()` macro ([#232](https://github.com/dbt-labs/dbt-spark/issues/232), [#233](https://github.com/dbt-labs/dbt-spark/pull/233))
-- Add support for `on_schema_change` config in incremental models: `ignore`, `fail`, `append_new_columns`. For `sync_all_columns`, removing columns is not supported by Apache Spark or Delta Lake ([#198](https://github.com/dbt-labs/dbt-spark/issues/198), [#226](https://github.com/dbt-labs/dbt-spark/issues/226), [#229](https://github.com/dbt-labs/dbt-spark/pull/229))
-- Add `persist_docs` call to incremental model ([#224](https://github.com/dbt-labs/dbt-spark/issues/224), [#234](https://github.com/dbt-labs/dbt-spark/pull/234))
-
-### Contributors
-- [@binhnefits](https://github.com/binhnefits) ([#234](https://github.com/dbt-labs/dbt-spark/pull/234))
-
-## dbt-spark 0.21.0 (October 4, 2021)
-
-### Fixes
-- Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
-- Parse properly columns when there are struct fields to avoid considering inner fields: Issue ([#202](https://github.com/dbt-labs/dbt-spark/issues/202))
-
-### Under the hood
-- Add `unique_field` to better understand adapter adoption in anonymous usage tracking ([#211](https://github.com/dbt-labs/dbt-spark/pull/211))
-
-### Contributors
-- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
-- [@SCouto](https://github.com/Scouto) ([#204](https://github.com/dbt-labs/dbt-spark/pull/204))
-
-## dbt-spark 0.21.0b2 (August 20, 2021)
-
-### Fixes
-- Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192))
-- Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201))
-- Add `retry_all` configuration setting to retry all connection issues, not just when the `_is_retryable_error` function determines ([#194](https://github.com/dbt-labs/dbt-spark/pull/194))
-
-### Contributors
-- [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192))
-- [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201))
-- [@gregingenii](https://github.com/gregingenii) ([#194](https://github.com/dbt-labs/dbt-spark/pull/194))
-
-## dbt-spark 0.21.0b1 (August 3, 2021)
-
-## dbt-spark 0.20.1 (August 2, 2021)
-
-## dbt-spark 0.20.1rc1 (August 2, 2021)
-
-### Fixes
-- Fix `get_columns_in_relation` when called on models created in the same run ([#196](https://github.com/dbt-labs/dbt-spark/pull/196), [#197](https://github.com/dbt-labs/dbt-spark/pull/197))
-
-### Contributors
-- [@ali-tny](https://github.com/ali-tny) ([#197](https://github.com/fishtown-analytics/dbt-spark/pull/197))
-
-
-## dbt-spark 0.20.0 (July 12, 2021)
-
-## dbt-spark 0.20.0rc2 (July 7, 2021)
+## dbt-spark 1.3.0-b1 - July 29, 2022
 
 ### Features
-
-- Add support for `merge_update_columns` config in `merge`-strategy incremental models ([#183](https://github.com/fishtown-analytics/dbt-spark/pull/183), [#184](https://github.com/fishtown-analytics/dbt-spark/pull/184))
-
+- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
 ### Fixes
-
-- Fix column-level `persist_docs` on Delta tables, add tests ([#180](https://github.com/fishtown-analytics/dbt-spark/pull/180))
-
-## dbt-spark 0.20.0rc1 (June 8, 2021)
-
-### Features
-
-- Allow user to specify `use_ssl` ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169))
-- Allow setting table `OPTIONS` using `config` ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171))
-- Add support for column-level `persist_docs` on Delta tables ([#84](https://github.com/fishtown-analytics/dbt-spark/pull/84), [#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))
-
-### Fixes
-- Cast `table_owner` to string to avoid errors generating docs ([#158](https://github.com/fishtown-analytics/dbt-spark/pull/158), [#159](https://github.com/fishtown-analytics/dbt-spark/pull/159))
-- Explicitly cast column types when inserting seeds ([#139](https://github.com/fishtown-analytics/dbt-spark/pull/139), [#166](https://github.com/fishtown-analytics/dbt-spark/pull/166))
-
-### Under the hood
-- Parse information returned by `list_relations_without_caching` macro to speed up catalog generation ([#93](https://github.com/fishtown-analytics/dbt-spark/issues/93), [#160](https://github.com/fishtown-analytics/dbt-spark/pull/160))
-- More flexible host passing, https:// can be omitted ([#153](https://github.com/fishtown-analytics/dbt-spark/issues/153))
+- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+### Under the Hood
+- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
-- [@friendofasquid](https://github.com/friendofasquid) ([#159](https://github.com/fishtown-analytics/dbt-spark/pull/159))
-- [@franloza](https://github.com/franloza) ([#160](https://github.com/fishtown-analytics/dbt-spark/pull/160))
-- [@Fokko](https://github.com/Fokko) ([#165](https://github.com/fishtown-analytics/dbt-spark/pull/165))
-- [@rahulgoyal2987](https://github.com/rahulgoyal2987) ([#169](https://github.com/fishtown-analytics/dbt-spark/pull/169))
-- [@JCZuurmond](https://github.com/JCZuurmond) ([#171](https://github.com/fishtown-analytics/dbt-spark/pull/171))
-- [@cristianoperez](https://github.com/cristianoperez) ([#170](https://github.com/fishtown-analytics/dbt-spark/pull/170))
-
-
-## dbt-spark 0.19.1 (April 2, 2021)
-
-## dbt-spark 0.19.1b2 (February 26, 2021)
-
-### Under the hood
-- Update serialization calls to use new API in dbt-core `0.19.1b2` ([#150](https://github.com/fishtown-analytics/dbt-spark/pull/150))
-
-## dbt-spark 0.19.0.1 (February 26, 2021)
-
-### Fixes
-- Fix package distribution to include incremental model materializations ([#151](https://github.com/fishtown-analytics/dbt-spark/pull/151), [#152](https://github.com/fishtown-analytics/dbt-spark/issues/152))
-
-## dbt-spark 0.19.0 (February 21, 2021)
-
-### Breaking changes
-- Incremental models have `incremental_strategy: append` by default. This strategy adds new records without updating or overwriting existing records. For that, use `merge` or `insert_overwrite` instead, depending on the file format, connection method, and attributes of your underlying data. dbt will try to raise a helpful error if you configure a strategy that is not supported for a given file format or connection. ([#140](https://github.com/fishtown-analytics/dbt-spark/pull/140), [#141](https://github.com/fishtown-analytics/dbt-spark/pull/141))
-
-### Fixes
-- Capture hard-deleted records in snapshot merge, when `invalidate_hard_deletes` config is set ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/143), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/144))
-
-## dbt-spark 0.19.0rc1 (January 8, 2021)
-
-### Breaking changes
-- Users of the `http` and `thrift` connection methods need to install extra requirements: `pip install dbt-spark[PyHive]` ([#109](https://github.com/fishtown-analytics/dbt-spark/pull/109), [#126](https://github.com/fishtown-analytics/dbt-spark/pull/126))
-
-### Under the hood
-- Enable `CREATE OR REPLACE` support when using Delta. Instead of dropping and recreating the table, it will keep the existing table, and add a new version as supported by Delta. This will ensure that the table stays available when running the pipeline, and you can track the history.
-- Add changelog, issue templates ([#119](https://github.com/fishtown-analytics/dbt-spark/pull/119), [#120](https://github.com/fishtown-analytics/dbt-spark/pull/120))
-
-### Fixes
-- Handle case of 0 retries better for HTTP Spark Connections ([#132](https://github.com/fishtown-analytics/dbt-spark/pull/132))
-
-### Contributors
-- [@danielvdende](https://github.com/danielvdende) ([#132](https://github.com/fishtown-analytics/dbt-spark/pull/132))
-- [@Fokko](https://github.com/Fokko) ([#125](https://github.com/fishtown-analytics/dbt-spark/pull/125))
-
-## dbt-spark 0.18.1.1 (November 13, 2020)
-
-### Fixes
-- Fix `extras_require` typo to enable `pip install dbt-spark[ODBC]` (([#121](https://github.com/fishtown-analytics/dbt-spark/pull/121)), ([#122](https://github.com/fishtown-analytics/dbt-spark/pull/122)))
-
-## dbt-spark 0.18.1 (November 6, 2020)
-
-### Features
-- Allows users to specify `auth` and `kerberos_service_name` ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107))
-- Add support for ODBC driver connections to Databricks clusters and endpoints ([#116](https://github.com/fishtown-analytics/dbt-spark/pull/116))
-
-### Under the hood
-- Updated README links ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115))
-- Support complete atomic overwrite of non-partitioned incremental models ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117))
-- Update to support dbt-core 0.18.1 ([#110](https://github.com/fishtown-analytics/dbt-spark/pull/110), [#118](https://github.com/fishtown-analytics/dbt-spark/pull/118))
-
-### Contributors
-- [@danielhstahl](https://github.com/danielhstahl) ([#107](https://github.com/fishtown-analytics/dbt-spark/pull/107))
-- [@collinprather](https://github.com/collinprather) ([#115](https://github.com/fishtown-analytics/dbt-spark/pull/115))
-- [@charlottevdscheun](https://github.com/charlottevdscheun) ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117))
-- [@Fokko](https://github.com/Fokko) ([#117](https://github.com/fishtown-analytics/dbt-spark/pull/117))
-
-## dbt-spark 0.18.0 (September 18, 2020)
-
-### Under the hood
-- Make a number of changes to support dbt-adapter-tests ([#103](https://github.com/fishtown-analytics/dbt-spark/pull/103))
-- Update to support dbt-core 0.18.0. Run CI tests against local Spark, Databricks ([#105](https://github.com/fishtown-analytics/dbt-spark/pull/105))
+- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+## Previous Releases
+For information on prior major and minor releases, see their changelogs:
+- [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
+- [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
+- [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c0d9bb3d2..1d6e76d31 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -90,6 +90,16 @@ Many changes will require and update to the `dbt-spark` docs here are some usefu
 - The changes made are likely to impact one or both of [Spark Profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile), or [Saprk Configs](https://docs.getdbt.com/reference/resource-configs/spark-configs).
 - We ask every community member who makes a user-facing change to open an issue or PR regarding doc changes.
 
+## Adding CHANGELOG Entry
+
+We use [changie](https://changie.dev) to generate `CHANGELOG` entries. **Note:** Do not edit the `CHANGELOG.md` directly. Your modifications will be lost.
+
+Follow the steps to [install `changie`](https://changie.dev/guide/installation/) for your system.
+
+Once changie is installed and your PR is created, simply run `changie new` and changie will walk you through the process of creating a changelog entry.  Commit the file that's created and your changelog entry is complete!
+
+You don't need to worry about which `dbt-spark` version your change will go into. Just create the changelog entry with `changie`, and open your PR against the `main` branch. All merged changes will be included in the next minor version of `dbt-spark`. The Core maintainers _may_ choose to "backport" specific changes in order to patch older minor versions. In that case, a maintainer will take care of that backport after merging your PR, before releasing the new version of `dbt-spark`.
+
 ## Submitting a Pull Request
 
 dbt Labs provides a CI environment to test changes to the `dbt-spark` adapter, and periodic checks against the development version of `dbt-core` through Github Actions.

From 24e796d52d0201bdb4c45fac2e99a2a848cbe853 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Mon, 22 Aug 2022 10:23:05 -0500
Subject: [PATCH 26/54] Add ref to pre 1.0.0 in 0.0.0.md (#428)

* init pr push for ct-1005

* add changelog

* change pointer to spark

* eof fix

* remove ref to dbt-core in changelog existence

* typo fix

* typo and change of ref

* add condtional logic for kinds security and dependency and change NO ISSUE ref to spark issue #417

* add ref to pre 1.0.0 changes

* add ref to pre 1.0.0 changes

* fix eof fail on test

* fix eof fail on test

* expand out ref to past 1.0.0

* run changie merge

* repush changes

* remove excess spacing
---
 .changes/0.0.0.md | 3 +++
 CHANGELOG.md      | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md
index 5acfb3dbc..14c2cf9e7 100644
--- a/.changes/0.0.0.md
+++ b/.changes/0.0.0.md
@@ -3,3 +3,6 @@ For information on prior major and minor releases, see their changelogs:
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
+- [0.21](https://github.com/dbt-labs/dbt-spark/blob/0.21.latest/CHANGELOG.md)
+- [0.20](https://github.com/dbt-labs/dbt-spark/blob/0.20.latest/CHANGELOG.md)
+- [0.19 and earlier](https://github.com/dbt-labs/dbt-spark/blob/0.19.latest/CHANGELOG.md)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f187e31e..0491a7b5f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,15 +9,21 @@
 
 ### Features
 - Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
+
 ### Fixes
 - Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+
 ### Under the Hood
 - Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
 - [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
+
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
+- [0.21](https://github.com/dbt-labs/dbt-spark/blob/0.21.latest/CHANGELOG.md)
+- [0.20](https://github.com/dbt-labs/dbt-spark/blob/0.20.latest/CHANGELOG.md)
+- [0.19 and earlier](https://github.com/dbt-labs/dbt-spark/blob/0.19.latest/CHANGELOG.md)

From c9698f62118b9c5408b53bb8cc3be03ae5d3d8a4 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Thu, 25 Aug 2022 10:02:50 -0700
Subject: [PATCH 27/54] add supported language (#440)

* add supported language

* add changelog
---
 .changes/unreleased/Under the Hood-20220825-073413.yaml    | 7 +++++++
 .../macros/materializations/incremental/incremental.sql    | 2 +-
 dbt/include/spark/macros/materializations/table.sql        | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220825-073413.yaml

diff --git a/.changes/unreleased/Under the Hood-20220825-073413.yaml b/.changes/unreleased/Under the Hood-20220825-073413.yaml
new file mode 100644
index 000000000..71e187ca7
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220825-073413.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: specify supported_languages for materialization that support python models
+time: 2022-08-25T07:34:13.397367-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "437"
+  PR: "440"
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 91cba9e5f..1a92351ce 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -1,4 +1,4 @@
-{% materialization incremental, adapter='spark' -%}
+{% materialization incremental, adapter='spark', supported_languages=['sql', 'python'] -%}
   {#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#}
   {%- set raw_file_format = config.get('file_format', default='parquet') -%}
   {%- set raw_strategy = config.get('incremental_strategy') or 'append' -%}
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 6a02ea164..d39ba0b44 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -1,4 +1,4 @@
-{% materialization table, adapter = 'spark' %}
+{% materialization table, adapter = 'spark', supported_languages=['sql', 'python'] %}
   {%- set language = model['language'] -%}
   {%- set identifier = model['alias'] -%}
   {%- set grant_config = config.get('grants') -%}

From 5297b9225263fb33338fc54f004365ec1ad47104 Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 25 Aug 2022 15:27:29 -0500
Subject: [PATCH 28/54] version bump, changie. and backports (#434)

---
 .github/workflows/backport.yml     | 42 +++++++++++++
 .github/workflows/version-bump.yml | 97 ++++--------------------------
 2 files changed, 53 insertions(+), 86 deletions(-)
 create mode 100644 .github/workflows/backport.yml

diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml
new file mode 100644
index 000000000..8c0355bda
--- /dev/null
+++ b/.github/workflows/backport.yml
@@ -0,0 +1,42 @@
+
+
+# **what?**
+# When a PR is merged, if it has the backport label, it will create
+# a new PR to backport those changes to the given branch. If it can't
+# cleanly do a backport, it will comment on the merged PR of the failure.
+#
+# Label naming convention: "backport "
+# Example: backport 1.0.latest
+#
+# You MUST "Squash and merge" the original PR or this won't work.
+
+# **why?**
+# Changes sometimes need to be backported to release branches.
+# This automates the backporting process
+
+# **when?**
+# Once a PR is "Squash and merge"'d, by adding a backport label, this is triggered
+
+name: Backport
+on:
+  pull_request:
+    types:
+      - labeled
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  backport:
+    name: Backport
+    runs-on: ubuntu-latest
+    # Only react to merged PRs for security reasons.
+    # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target.
+    if: >
+      github.event.pull_request.merged
+      && contains(github.event.label.name, 'backport')
+    steps:
+      - uses: tibdex/backport@v2.0.2
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/version-bump.yml b/.github/workflows/version-bump.yml
index a8b3236ce..bde34d683 100644
--- a/.github/workflows/version-bump.yml
+++ b/.github/workflows/version-bump.yml
@@ -1,18 +1,15 @@
 # **what?**
-# This workflow will take a version number and a dry run flag. With that
+# This workflow will take the new version number to bump to. With that
 # it will run versionbump to update the version number everywhere in the
-# code base and then generate an update Docker requirements file. If this
-# is a dry run, a draft PR will open with the changes. If this isn't a dry
-# run, the changes will be committed to the branch this is run on.
+# code base and then run changie to create the corresponding changelog.
+# A PR will be created with the changes that can be reviewed before committing.
 
 # **why?**
 # This is to aid in releasing dbt and making sure we have updated
-# the versions and Docker requirements in all places.
+# the version in all places and generated the changelog.
 
 # **when?**
-# This is triggered either manually OR
-# from the repository_dispatch event "version-bump" which is sent from
-# the dbt-release repo Action
+# This is triggered manually
 
 name: Version Bump
 
@@ -20,84 +17,12 @@ on:
   workflow_dispatch:
     inputs:
       version_number:
-       description: 'The version number to bump to'
+       description: 'The version number to bump to (ex. 1.2.0, 1.3.0b1)'
        required: true
-      is_dry_run:
-       description: 'Creates a draft PR to allow testing instead of committing to a branch'
-       required: true
-       default: 'true'
-  repository_dispatch:
-    types: [version-bump]
 
 jobs:
-  bump:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out the repository
-        uses: actions/checkout@v2
-
-      - name: Set version and dry run values
-        id: variables
-        env:
-          VERSION_NUMBER: "${{ github.event.client_payload.version_number == '' && github.event.inputs.version_number || github.event.client_payload.version_number }}"
-          IS_DRY_RUN: "${{ github.event.client_payload.is_dry_run == '' && github.event.inputs.is_dry_run || github.event.client_payload.is_dry_run }}"
-        run: |
-          echo Repository dispatch event version: ${{ github.event.client_payload.version_number }}
-          echo Repository dispatch event dry run: ${{ github.event.client_payload.is_dry_run }}
-          echo Workflow dispatch event version: ${{ github.event.inputs.version_number }}
-          echo Workflow dispatch event dry run: ${{ github.event.inputs.is_dry_run }}
-          echo ::set-output name=VERSION_NUMBER::$VERSION_NUMBER
-          echo ::set-output name=IS_DRY_RUN::$IS_DRY_RUN
-
-      - uses: actions/setup-python@v2
-        with:
-          python-version: "3.8"
-
-      - name: Install python dependencies
-        run: |
-          sudo apt-get install libsasl2-dev
-          python3 -m venv env
-          source env/bin/activate
-          pip install --upgrade pip
-
-      - name: Create PR branch
-        if: ${{ steps.variables.outputs.IS_DRY_RUN  == 'true' }}
-        run: |
-          git checkout -b bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-          git push origin bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-          git branch --set-upstream-to=origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_$GITHUB_RUN_ID
-
-      - name: Bumping version
-        run: |
-          source env/bin/activate
-          pip install -r dev-requirements.txt
-          env/bin/bumpversion --allow-dirty --new-version ${{steps.variables.outputs.VERSION_NUMBER}} major
-          git status
-
-      - name: Commit version bump directly
-        uses: EndBug/add-and-commit@v7
-        if: ${{ steps.variables.outputs.IS_DRY_RUN == 'false' }}
-        with:
-          author_name: 'Github Build Bot'
-          author_email: 'buildbot@fishtownanalytics.com'
-          message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-
-      - name: Commit version bump to branch
-        uses: EndBug/add-and-commit@v7
-        if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }}
-        with:
-          author_name: 'Github Build Bot'
-          author_email: 'buildbot@fishtownanalytics.com'
-          message: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-          branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
-          push: 'origin origin/bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
-
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v3
-        if: ${{ steps.variables.outputs.IS_DRY_RUN == 'true' }}
-        with:
-          author: 'Github Build Bot '
-          draft: true
-          base: ${{github.ref}}
-          title: 'Bumping version to ${{steps.variables.outputs.VERSION_NUMBER}}'
-          branch: 'bumping-version/${{steps.variables.outputs.VERSION_NUMBER}}_${{GITHUB.RUN_ID}}'
+  version_bump_and_changie:
+    uses: dbt-labs/actions/.github/workflows/version-bump.yml@main
+    with:
+      version_number: ${{ inputs.version_number }}
+    secrets: inherit  # ok since what we are calling is internally maintained

From 224cc28004122f478a965acb9f5deff788bbdd72 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 30 Aug 2022 11:20:35 -0400
Subject: [PATCH 29/54] Bumping version to 1.3.0b2 and generate changelog
 (#443)

* Bumping version to 1.3.0b2 and generate CHANGELOG

* Remove newline

Co-authored-by: Github Build Bot 
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .bumpversion.cfg                                          | 2 +-
 .changes/1.3.0-b2.md                                      | 5 +++++
 .../{unreleased => 1.3.0}/Features-20220808-142118.yaml   | 0
 .../Under the Hood-20220825-073413.yaml                   | 0
 CHANGELOG.md                                              | 8 +++++---
 dbt/adapters/spark/__version__.py                         | 2 +-
 setup.py                                                  | 2 +-
 7 files changed, 13 insertions(+), 6 deletions(-)
 create mode 100644 .changes/1.3.0-b2.md
 rename .changes/{unreleased => 1.3.0}/Features-20220808-142118.yaml (100%)
 rename .changes/{unreleased => 1.3.0}/Under the Hood-20220825-073413.yaml (100%)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index ef3954f4c..f93a02ae6 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.0b1
+current_version = 1.3.0b2
 parse = (?P\d+)
 	\.(?P\d+)
 	\.(?P\d+)
diff --git a/.changes/1.3.0-b2.md b/.changes/1.3.0-b2.md
new file mode 100644
index 000000000..8f7ea1e62
--- /dev/null
+++ b/.changes/1.3.0-b2.md
@@ -0,0 +1,5 @@
+## dbt-spark 1.3.0-b2 - August 30, 2022
+### Features
+- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
+### Under the Hood
+- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
diff --git a/.changes/unreleased/Features-20220808-142118.yaml b/.changes/1.3.0/Features-20220808-142118.yaml
similarity index 100%
rename from .changes/unreleased/Features-20220808-142118.yaml
rename to .changes/1.3.0/Features-20220808-142118.yaml
diff --git a/.changes/unreleased/Under the Hood-20220825-073413.yaml b/.changes/1.3.0/Under the Hood-20220825-073413.yaml
similarity index 100%
rename from .changes/unreleased/Under the Hood-20220825-073413.yaml
rename to .changes/1.3.0/Under the Hood-20220825-073413.yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0491a7b5f..de20a0738 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,21 +4,23 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
+## dbt-spark 1.3.0-b2 - August 30, 2022
+### Features
+- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
+### Under the Hood
+- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
 
 ## dbt-spark 1.3.0-b1 - July 29, 2022
 
 ### Features
 - Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-
 ### Fixes
 - Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-
 ### Under the Hood
 - Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
 
 ### Contributors
 - [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index 4b49b750d..e2c1a233c 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.3.0b1"
+version = "1.3.0b2"
diff --git a/setup.py b/setup.py
index 229e89a17..05e814490 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.3.0b1"
+package_version = "1.3.0b2"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From cef098f5181c51e9a6ae06c157ec6863852bcd22 Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Tue, 30 Aug 2022 16:49:12 -0700
Subject: [PATCH 30/54] refactor submission method and add command API as
 defualt (#442)

* refactor submission method and add command API as defualt

* update run_name and add changelog

* fix format

* pr feedback
---
 .../Under the Hood-20220829-164426.yaml       |   7 +
 dbt/adapters/spark/impl.py                    | 108 +------
 dbt/adapters/spark/python_submissions.py      | 284 ++++++++++++++++++
 3 files changed, 300 insertions(+), 99 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220829-164426.yaml
 create mode 100644 dbt/adapters/spark/python_submissions.py

diff --git a/.changes/unreleased/Under the Hood-20220829-164426.yaml b/.changes/unreleased/Under the Hood-20220829-164426.yaml
new file mode 100644
index 000000000..bf58971f2
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220829-164426.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Submit python model with Command API by default. Adjusted run name
+time: 2022-08-29T16:44:26.509138-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "424"
+  PR: "442"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 12c42ab98..6e97ce1f5 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,4 @@
 import re
-import requests
-import time
-import base64
 from concurrent.futures import Future
 from dataclasses import dataclass
 from typing import Any, Dict, Iterable, List, Optional, Union
@@ -20,6 +17,7 @@
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
 from dbt.adapters.spark import SparkColumn
+from dbt.adapters.spark.python_submissions import PYTHON_SUBMISSION_HELPERS
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
@@ -394,105 +392,17 @@ def submit_python_job(self, parsed_model: dict, compiled_code: str, timeout=None
         # of `None` which evaluates to True!
 
         # TODO limit this function to run only when doing the materialization of python nodes
-
         # assuming that for python job running over 1 day user would mannually overwrite this
-        schema = getattr(parsed_model, "schema", self.config.credentials.schema)
-        identifier = parsed_model["alias"]
-        if not timeout:
-            timeout = 60 * 60 * 24
-        if timeout <= 0:
-            raise ValueError("Timeout must larger than 0")
-
-        auth_header = {"Authorization": f"Bearer {self.connections.profile.credentials.token}"}
-
-        # create new dir
-        if not self.connections.profile.credentials.user:
-            raise ValueError("Need to supply user in profile to submit python job")
-        # it is safe to call mkdirs even if dir already exists and have content inside
-        work_dir = f"/Users/{self.connections.profile.credentials.user}/{schema}"
-        response = requests.post(
-            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/mkdirs",
-            headers=auth_header,
-            json={
-                "path": work_dir,
-            },
-        )
-        if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
-                f"Error creating work_dir for python notebooks\n {response.content!r}"
+        submission_method = parsed_model["config"].get("submission_method", "commands")
+        if submission_method not in PYTHON_SUBMISSION_HELPERS:
+            raise NotImplementedError(
+                "Submission method {} is not supported".format(submission_method)
             )
-
-        # add notebook
-        b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
-        response = requests.post(
-            f"https://{self.connections.profile.credentials.host}/api/2.0/workspace/import",
-            headers=auth_header,
-            json={
-                "path": f"{work_dir}/{identifier}",
-                "content": b64_encoded_content,
-                "language": "PYTHON",
-                "overwrite": True,
-                "format": "SOURCE",
-            },
+        job_helper = PYTHON_SUBMISSION_HELPERS[submission_method](
+            parsed_model, self.connections.profile.credentials
         )
-        if response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
-                f"Error creating python notebook.\n {response.content!r}"
-            )
-
-        # submit job
-        submit_response = requests.post(
-            f"https://{self.connections.profile.credentials.host}/api/2.1/jobs/runs/submit",
-            headers=auth_header,
-            json={
-                "run_name": "debug task",
-                "existing_cluster_id": self.connections.profile.credentials.cluster,
-                "notebook_task": {
-                    "notebook_path": f"{work_dir}/{identifier}",
-                },
-            },
-        )
-        if submit_response.status_code != 200:
-            raise dbt.exceptions.RuntimeException(
-                f"Error creating python run.\n {response.content!r}"
-            )
-
-        # poll until job finish
-        state = None
-        start = time.time()
-        run_id = submit_response.json()["run_id"]
-        terminal_states = ["TERMINATED", "SKIPPED", "INTERNAL_ERROR"]
-        while state not in terminal_states and time.time() - start < timeout:
-            time.sleep(1)
-            resp = requests.get(
-                f"https://{self.connections.profile.credentials.host}"
-                f"/api/2.1/jobs/runs/get?run_id={run_id}",
-                headers=auth_header,
-            )
-            json_resp = resp.json()
-            state = json_resp["state"]["life_cycle_state"]
-            # logger.debug(f"Polling.... in state: {state}")
-        if state != "TERMINATED":
-            raise dbt.exceptions.RuntimeException(
-                "python model run ended in state"
-                f"{state} with state_message\n{json_resp['state']['state_message']}"
-            )
-
-        # get end state to return to user
-        run_output = requests.get(
-            f"https://{self.connections.profile.credentials.host}"
-            f"/api/2.1/jobs/runs/get-output?run_id={run_id}",
-            headers=auth_header,
-        )
-        json_run_output = run_output.json()
-        result_state = json_run_output["metadata"]["state"]["result_state"]
-        if result_state != "SUCCESS":
-            raise dbt.exceptions.RuntimeException(
-                "Python model failed with traceback as:\n"
-                "(Note that the line number here does not "
-                "match the line number in your code due to dbt templating)\n"
-                f"{json_run_output['error_trace']}"
-            )
+        job_helper.submit(compiled_code)
+        # we don't really get any useful information back from the job submission other than success
         return self.connections.get_response(None)
 
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
new file mode 100644
index 000000000..ea172ef03
--- /dev/null
+++ b/dbt/adapters/spark/python_submissions.py
@@ -0,0 +1,284 @@
+import base64
+import time
+import requests
+from typing import Any, Dict
+import uuid
+
+import dbt.exceptions
+
+DEFAULT_POLLING_INTERVAL = 3
+SUBMISSION_LANGUAGE = "python"
+DEFAULT_TIMEOUT = 60 * 60 * 24
+
+
+class BasePythonJobHelper:
+    def __init__(self, parsed_model, credentials):
+        self.check_credentials(credentials)
+        self.credentials = credentials
+        self.identifier = parsed_model["alias"]
+        self.schema = getattr(parsed_model, "schema", self.credentials.schema)
+        self.parsed_model = parsed_model
+        self.timeout = self.get_timeout()
+        self.polling_interval = DEFAULT_POLLING_INTERVAL
+
+    def get_timeout(self):
+        timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
+        if timeout <= 0:
+            raise ValueError("Timeout must be a positive integer")
+        return timeout
+
+    def check_credentials(self, credentials):
+        raise NotImplementedError(
+            "Overwrite this method to check specific requirement for current submission method"
+        )
+
+    def submit(self, compiled_code):
+        raise NotImplementedError(
+            "BasePythonJobHelper is an abstract class and you should implement submit method."
+        )
+
+    def polling(
+        self,
+        status_func,
+        status_func_kwargs,
+        get_state_func,
+        terminal_states,
+        expected_end_state,
+        get_state_msg_func,
+    ):
+        state = None
+        start = time.time()
+        exceeded_timeout = False
+        response = {}
+        while state not in terminal_states:
+            if time.time() - start > self.timeout:
+                exceeded_timeout = True
+                break
+            # TODO should we do exponential backoff?
+            time.sleep(self.polling_interval)
+            response = status_func(**status_func_kwargs)
+            state = get_state_func(response)
+        if exceeded_timeout:
+            raise dbt.exceptions.RuntimeException("python model run timed out")
+        if state != expected_end_state:
+            raise dbt.exceptions.RuntimeException(
+                "python model run ended in state"
+                f"{state} with state_message\n{get_state_msg_func(response)}"
+            )
+        return response
+
+
+class DBNotebookPythonJobHelper(BasePythonJobHelper):
+    def __init__(self, parsed_model, credentials):
+        super().__init__(parsed_model, credentials)
+        self.auth_header = {"Authorization": f"Bearer {self.credentials.token}"}
+
+    def check_credentials(self, credentials):
+        if not credentials.user:
+            raise ValueError("Databricks user is required for notebook submission method.")
+
+    def _create_work_dir(self, path):
+        response = requests.post(
+            f"https://{self.credentials.host}/api/2.0/workspace/mkdirs",
+            headers=self.auth_header,
+            json={
+                "path": path,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating work_dir for python notebooks\n {response.content!r}"
+            )
+
+    def _upload_notebook(self, path, compiled_code):
+        b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
+        response = requests.post(
+            f"https://{self.credentials.host}/api/2.0/workspace/import",
+            headers=self.auth_header,
+            json={
+                "path": path,
+                "content": b64_encoded_content,
+                "language": "PYTHON",
+                "overwrite": True,
+                "format": "SOURCE",
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python notebook.\n {response.content!r}"
+            )
+
+    def _submit_notebook(self, path):
+        submit_response = requests.post(
+            f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
+            headers=self.auth_header,
+            json={
+                "run_name": f"{self.schema}-{self.identifier}-{uuid.uuid4()}",
+                "existing_cluster_id": self.credentials.cluster,
+                "notebook_task": {
+                    "notebook_path": path,
+                },
+            },
+        )
+        if submit_response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating python run.\n {submit_response.content!r}"
+            )
+        return submit_response.json()["run_id"]
+
+    def submit(self, compiled_code):
+        # it is safe to call mkdirs even if dir already exists and have content inside
+        work_dir = f"/Users/{self.credentials.user}/{self.schema}/"
+        self._create_work_dir(work_dir)
+
+        # add notebook
+        whole_file_path = f"{work_dir}{self.identifier}"
+        self._upload_notebook(whole_file_path, compiled_code)
+
+        # submit job
+        run_id = self._submit_notebook(whole_file_path)
+
+        self.polling(
+            status_func=requests.get,
+            status_func_kwargs={
+                "url": f"https://{self.credentials.host}/api/2.1/jobs/runs/get?run_id={run_id}",
+                "headers": self.auth_header,
+            },
+            get_state_func=lambda response: response.json()["state"]["life_cycle_state"],
+            terminal_states=("TERMINATED", "SKIPPED", "INTERNAL_ERROR"),
+            expected_end_state="TERMINATED",
+            get_state_msg_func=lambda response: response.json()["state"]["state_message"],
+        )
+
+        # get end state to return to user
+        run_output = requests.get(
+            f"https://{self.credentials.host}" f"/api/2.1/jobs/runs/get-output?run_id={run_id}",
+            headers=self.auth_header,
+        )
+        json_run_output = run_output.json()
+        result_state = json_run_output["metadata"]["state"]["result_state"]
+        if result_state != "SUCCESS":
+            raise dbt.exceptions.RuntimeException(
+                "Python model failed with traceback as:\n"
+                "(Note that the line number here does not "
+                "match the line number in your code due to dbt templating)\n"
+                f"{json_run_output['error_trace']}"
+            )
+
+
+class DBContext:
+    def __init__(self, credentials):
+        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
+        self.cluster = credentials.cluster
+        self.host = credentials.host
+
+    def create(self) -> str:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#create-an-execution-context
+        response = requests.post(
+            f"https://{self.host}/api/1.2/contexts/create",
+            headers=self.auth_header,
+            json={
+                "clusterId": self.cluster,
+                "language": SUBMISSION_LANGUAGE,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating an execution context.\n {response.content!r}"
+            )
+        return response.json()["id"]
+
+    def destroy(self, context_id: str) -> str:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#delete-an-execution-context
+        response = requests.post(
+            f"https://{self.host}/api/1.2/contexts/destroy",
+            headers=self.auth_header,
+            json={
+                "clusterId": self.cluster,
+                "contextId": context_id,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error deleting an execution context.\n {response.content!r}"
+            )
+        return response.json()["id"]
+
+
+class DBCommand:
+    def __init__(self, credentials):
+        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
+        self.cluster = credentials.cluster
+        self.host = credentials.host
+
+    def execute(self, context_id: str, command: str) -> str:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#run-a-command
+        response = requests.post(
+            f"https://{self.host}/api/1.2/commands/execute",
+            headers=self.auth_header,
+            json={
+                "clusterId": self.cluster,
+                "contextId": context_id,
+                "language": SUBMISSION_LANGUAGE,
+                "command": command,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error creating a command.\n {response.content!r}"
+            )
+        return response.json()["id"]
+
+    def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
+        # https://docs.databricks.com/dev-tools/api/1.2/index.html#get-information-about-a-command
+        response = requests.get(
+            f"https://{self.host}/api/1.2/commands/status",
+            headers=self.auth_header,
+            params={
+                "clusterId": self.cluster,
+                "contextId": context_id,
+                "commandId": command_id,
+            },
+        )
+        if response.status_code != 200:
+            raise dbt.exceptions.RuntimeException(
+                f"Error getting status of command.\n {response.content!r}"
+            )
+        return response.json()
+
+
+class DBCommandsApiPythonJobHelper(BasePythonJobHelper):
+    def check_credentials(self, credentials):
+        if not credentials.cluster:
+            raise ValueError("Databricks cluster is required for commands submission method.")
+
+    def submit(self, compiled_code):
+        context = DBContext(self.credentials)
+        command = DBCommand(self.credentials)
+        context_id = context.create()
+        try:
+            command_id = command.execute(context_id, compiled_code)
+            # poll until job finish
+            response = self.polling(
+                status_func=command.status,
+                status_func_kwargs={
+                    "context_id": context_id,
+                    "command_id": command_id,
+                },
+                get_state_func=lambda response: response["status"],
+                terminal_states=("Cancelled", "Error", "Finished"),
+                expected_end_state="Finished",
+                get_state_msg_func=lambda response: response.json()["results"]["data"],
+            )
+            if response["results"]["resultType"] == "error":
+                raise dbt.exceptions.RuntimeException(
+                    f"Python model failed with traceback as:\n" f"{response['results']['cause']}"
+                )
+        finally:
+            context.destroy(context_id)
+
+
+PYTHON_SUBMISSION_HELPERS = {
+    "notebook": DBNotebookPythonJobHelper,
+    "commands": DBCommandsApiPythonJobHelper,
+}

From ebd011ea71ba533e065cb167f6c8213753fa6e9e Mon Sep 17 00:00:00 2001
From: Chenyu Li 
Date: Wed, 31 Aug 2022 07:21:14 -0700
Subject: [PATCH 31/54] set tmp relation with proper schema (#445)

* set tmp relation with proper schema

* add changelog
---
 .changes/unreleased/Fixes-20220830-140224.yaml             | 7 +++++++
 dbt/include/spark/macros/adapters.sql                      | 7 +++----
 .../macros/materializations/incremental/incremental.sql    | 5 +++++
 .../macros/materializations/incremental/strategies.sql     | 6 +++---
 4 files changed, 18 insertions(+), 7 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220830-140224.yaml

diff --git a/.changes/unreleased/Fixes-20220830-140224.yaml b/.changes/unreleased/Fixes-20220830-140224.yaml
new file mode 100644
index 000000000..9e3da3ea6
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220830-140224.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: python incremental model tmp table using correct schema
+time: 2022-08-30T14:02:24.603033-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "441"
+  PR: "445"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 05630ede5..88190cc04 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -123,7 +123,7 @@
 
 {#-- We can't use temporary tables with `create ... as ()` syntax --#}
 {% macro spark__create_temporary_view(relation, compiled_code) -%}
-    create temporary view {{ relation.include(schema=false) }} as
+    create temporary view {{ relation }} as
       {{ compiled_code }}
 {%- endmacro -%}
 
@@ -185,7 +185,7 @@
 
 {% macro spark__get_columns_in_relation_raw(relation) -%}
   {% call statement('get_columns_in_relation_raw', fetch_result=True) %}
-      describe extended {{ relation.include(schema=(schema is not none)) }}
+      describe extended {{ relation }}
   {% endcall %}
   {% do return(load_result('get_columns_in_relation_raw').table) %}
 {% endmacro %}
@@ -263,8 +263,7 @@
 {% macro spark__make_temp_relation(base_relation, suffix) %}
     {% set tmp_identifier = base_relation.identifier ~ suffix %}
     {% set tmp_relation = base_relation.incorporate(path = {
-        "identifier": tmp_identifier,
-        "schema": None
+        "identifier": tmp_identifier
     }) -%}
 
     {% do return(tmp_relation) %}
diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql
index 1a92351ce..e293441b8 100644
--- a/dbt/include/spark/macros/materializations/incremental/incremental.sql
+++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql
@@ -17,6 +17,11 @@
   {%- set existing_relation = load_relation(this) -%}
   {%- set tmp_relation = make_temp_relation(this) -%}
 
+  {#-- for SQL model we will create temp view that doesn't have database and schema --#}
+  {%- if language == 'sql'-%}
+    {%- set tmp_relation = tmp_relation.include(database=false, schema=false) -%}
+  {%- endif -%}
+
   {#-- Set Overwrite Mode --#}
   {%- if strategy == 'insert_overwrite' and partition_by -%}
     {%- call statement() -%}
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index 28b8f2001..d98e1f692 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -4,7 +4,7 @@
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
     insert overwrite table {{ target_relation }}
     {{ partition_cols(label="partition") }}
-    select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }}
+    select {{dest_cols_csv}} from {{ source_relation }}
 
 {% endmacro %}
 
@@ -14,7 +14,7 @@
     {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
     insert into table {{ target_relation }}
-    select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }}
+    select {{dest_cols_csv}} from {{ source_relation }}
 
 {% endmacro %}
 
@@ -45,7 +45,7 @@
   {{ sql_header if sql_header is not none }}
 
   merge into {{ target }} as DBT_INTERNAL_DEST
-      using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE
+      using {{ source }} as DBT_INTERNAL_SOURCE
       on {{ predicates | join(' and ') }}
 
       when matched then update set

From a9c1d8c256956d40e5b6d25e6f0e7b7f7c9b5700 Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Tue, 13 Sep 2022 15:44:11 -0400
Subject: [PATCH 32/54] Update repo templates (#448)

---
 .github/ISSUE_TEMPLATE/bug-report.yml        | 84 ++++++++++++++++++++
 .github/ISSUE_TEMPLATE/bug_report.md         | 33 --------
 .github/ISSUE_TEMPLATE/config.yml            | 14 ++++
 .github/ISSUE_TEMPLATE/feature-request.yml   | 59 ++++++++++++++
 .github/ISSUE_TEMPLATE/feature_request.md    | 23 ------
 .github/ISSUE_TEMPLATE/regression-report.yml | 82 +++++++++++++++++++
 .github/ISSUE_TEMPLATE/release.md            | 10 ---
 .github/{ISSUE_TEMPLATE => }/dependabot.yml  |  0
 .github/pull_request_template.md             |  7 +-
 9 files changed, 245 insertions(+), 67 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/bug-report.yml
 delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yml
 delete mode 100644 .github/ISSUE_TEMPLATE/feature_request.md
 create mode 100644 .github/ISSUE_TEMPLATE/regression-report.yml
 delete mode 100644 .github/ISSUE_TEMPLATE/release.md
 rename .github/{ISSUE_TEMPLATE => }/dependabot.yml (100%)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
new file mode 100644
index 000000000..f5494b313
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,84 @@
+name: 🐞 Bug
+description: Report a bug or an issue you've found with dbt-spark
+title: "[Bug] "
+labels: ["bug", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this bug report!
+  - type: checkboxes
+    attributes:
+      label: Is this a new bug in dbt-spark?
+      description: >
+        In other words, is this an error, flaw, failure or fault in our software?
+
+        If this is a bug that broke existing functionality that used to work, please open a regression issue.
+        If this is a bug in the dbt-core logic, please open an issue in the dbt-core repository.
+        If this is a bug experienced while using dbt Cloud, please report to [support](mailto:support@getdbt.com).
+        If this is a request for help or troubleshooting code in your own dbt project, please join our [dbt Community Slack](https://www.getdbt.com/community/join-the-community/) or open a [Discussion question](https://github.com/dbt-labs/docs.getdbt.com/discussions).
+
+        Please search to see if an issue already exists for the bug you encountered.
+      options:
+        - label: I believe this is a new bug in dbt-spark
+          required: true
+        - label: I have searched the existing issues, and I could not find an existing issue for this bug
+          required: true
+  - type: textarea
+    attributes:
+      label: Current Behavior
+      description: A concise description of what you're experiencing.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Expected Behavior
+      description: A concise description of what you expected to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Steps To Reproduce
+      description: Steps to reproduce the behavior.
+      placeholder: |
+        1. In this environment...
+        2. With this config...
+        3. Run '...'
+        4. See error...
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: |
+        If applicable, log output to help explain your problem.
+      render: shell
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Environment
+      description: |
+        examples:
+          - **OS**: Ubuntu 20.04
+          - **Python**: 3.9.12 (`python3 --version`)
+          - **dbt-core**: 1.1.1 (`dbt --version`)
+          - **dbt-spark**: 1.1.0 (`dbt --version`)
+      value: |
+        - OS:
+        - Python:
+        - dbt-core:
+        - dbt-spark:
+      render: markdown
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Additional Context
+      description: |
+        Links? References? Anything that will give us more context about the issue you are encountering!
+
+        Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index 43f19a154..000000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,33 +0,0 @@
----
-name: Bug report
-about: Report a bug or an issue you've found with dbt-spark
-title: ''
-labels: bug, triage
-assignees: ''
-
----
-
-### Describe the bug
-A clear and concise description of what the bug is. What command did you run? What happened?
-
-### Steps To Reproduce
-In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here.
-
-### Expected behavior
-A clear and concise description of what you expected to happen.
-
-### Screenshots and log output
-If applicable, add screenshots or log output to help explain your problem.
-
-### System information
-**The output of `dbt --version`:**
-```
-<output goes here>
-```
-
-**The operating system you're using:**
-
-**The output of `python --version`:**
-
-### Additional context
-Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 000000000..129ea7779
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,14 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Ask the community for help
+    url: https://github.com/dbt-labs/docs.getdbt.com/discussions
+    about: Need help troubleshooting? Check out our guide on how to ask
+  - name: Contact dbt Cloud support
+    url: mailto:support@getdbt.com
+    about: Are you using dbt Cloud? Contact our support team for help!
+  - name: Participate in Discussions
+    url: https://github.com/dbt-labs/dbt-spark/discussions
+    about: Do you have a Big Idea for dbt-spark? Read open discussions, or start a new one
+  - name: Create an issue for dbt-core
+    url: https://github.com/dbt-labs/dbt-core/issues/new/choose
+    about: Report a bug or request a feature for dbt-core
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
new file mode 100644
index 000000000..8c123ba51
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -0,0 +1,59 @@
+name: ✨ Feature
+description: Propose a straightforward extension of dbt-spark functionality
+title: "[Feature] <title>"
+labels: ["enhancement", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this feature request!
+  - type: checkboxes
+    attributes:
+      label: Is this your first time submitting a feature request?
+      description: >
+        We want to make sure that features are distinct and discoverable,
+        so that other members of the community can find them and offer their thoughts.
+
+        Issues are the right place to request straightforward extensions of existing dbt-spark functionality.
+        For "big ideas" about future capabilities of dbt-spark, we ask that you open a
+        [discussion](https://github.com/dbt-labs/dbt-spark/discussions) in the "Ideas" category instead.
+      options:
+        - label: I have read the [expectations for open source contributors](https://docs.getdbt.com/docs/contributing/oss-expectations)
+          required: true
+        - label: I have searched the existing issues, and I could not find an existing issue for this feature
+          required: true
+        - label: I am requesting a straightforward extension of existing dbt-spark functionality, rather than a Big Idea better suited to a discussion
+          required: true
+  - type: textarea
+    attributes:
+      label: Describe the feature
+      description: A clear and concise description of what you want to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Describe alternatives you've considered
+      description: |
+        A clear and concise description of any alternative solutions or features you've considered.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Who will this benefit?
+      description: |
+        What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
+    validations:
+      required: false
+  - type: input
+    attributes:
+      label: Are you interested in contributing this feature?
+      description: Let us know if you want to write some code, and how we can help.
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Anything else?
+      description: |
+        Links? References? Anything that will give us more context about the feature you are suggesting!
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index 5edc9f6ca..000000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,23 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for dbt-spark
-title: ''
-labels: enhancement, triage
-assignees: ''
-
----
-
-### Describe the feature
-A clear and concise description of what you want to happen.
-
-### Describe alternatives you've considered
-A clear and concise description of any alternative solutions or features you've considered.
-
-### Additional context
-Please include any other relevant context here.
-
-### Who will this benefit?
-What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.
-
-### Are you interested in contributing this feature?
-Let us know if you want to write some code, and how we can help.
diff --git a/.github/ISSUE_TEMPLATE/regression-report.yml b/.github/ISSUE_TEMPLATE/regression-report.yml
new file mode 100644
index 000000000..8b65d6a26
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/regression-report.yml
@@ -0,0 +1,82 @@
+name: ☣️ Regression
+description: Report a regression you've observed in a newer version of dbt-spark
+title: "[Regression] <title>"
+labels: ["bug", "regression", "triage"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this regression report!
+  - type: checkboxes
+    attributes:
+      label: Is this a regression in a recent version of dbt-spark?
+      description: >
+        A regression is when documented functionality works as expected in an older version of dbt-spark,
+        and no longer works after upgrading to a newer version of dbt-spark
+      options:
+        - label: I believe this is a regression in dbt-spark functionality
+          required: true
+        - label: I have searched the existing issues, and I could not find an existing issue for this regression
+          required: true
+  - type: textarea
+    attributes:
+      label: Current Behavior
+      description: A concise description of what you're experiencing.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Expected/Previous Behavior
+      description: A concise description of what you expected to happen.
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Steps To Reproduce
+      description: Steps to reproduce the behavior.
+      placeholder: |
+        1. In this environment...
+        2. With this config...
+        3. Run '...'
+        4. See error...
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant log output
+      description: |
+        If applicable, log output to help explain your problem.
+      render: shell
+    validations:
+      required: false
+  - type: textarea
+    attributes:
+      label: Environment
+      description: |
+        examples:
+          - **OS**: Ubuntu 20.04
+          - **Python**: 3.9.12 (`python3 --version`)
+          - **dbt-core (working version)**: 1.1.1 (`dbt --version`)
+          - **dbt-spark (working version)**: 1.1.0 (`dbt --version`)
+          - **dbt-core (regression version)**: 1.2.0 (`dbt --version`)
+          - **dbt-spark (regression version)**: 1.2.0 (`dbt --version`)
+      value: |
+        - OS:
+        - Python:
+        - dbt-core (working version):
+        - dbt-spark (working version):
+        - dbt-core (regression version):
+        - dbt-spark (regression version):
+      render: markdown
+    validations:
+      required: true
+  - type: textarea
+    attributes:
+      label: Additional Context
+      description: |
+        Links? References? Anything that will give us more context about the issue you are encountering!
+
+        Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md
deleted file mode 100644
index a69349f54..000000000
--- a/.github/ISSUE_TEMPLATE/release.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: Release
-about: Release a new version of dbt-spark
-title: ''
-labels: release
-assignees: ''
-
----
-
-### TBD
diff --git a/.github/ISSUE_TEMPLATE/dependabot.yml b/.github/dependabot.yml
similarity index 100%
rename from .github/ISSUE_TEMPLATE/dependabot.yml
rename to .github/dependabot.yml
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index c4a5c53b4..11381456a 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -11,11 +11,16 @@ resolves #
 
 ### Description
 
-<!--- Describe the Pull Request here -->
+<!---
+  Describe the Pull Request here. Add any references and info to help reviewers
+  understand your changes. Include any tradeoffs you considered.
+-->
 
 ### Checklist
 
+- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md) and understand what's expected of me
 - [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements)
 - [ ] I have run this code in development and it appears to resolve the stated issue
 - [ ] This PR includes tests, or tests are not required/relevant for this PR
+- [ ] I have [opened an issue to add/update docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose), or docs changes are not required/relevant for this PR
 - [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry)

From b310c4c81a8aeedc6ed188854d92c36410ba70ba Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Tue, 13 Sep 2022 15:30:30 -0700
Subject: [PATCH 33/54] add mypy ignore to column, connections and init (#462)

* add mypy ignore to column, connections and init

* changie file
---
 .changes/unreleased/Under the Hood-20220913-152004.yaml | 7 +++++++
 dbt/adapters/spark/__init__.py                          | 2 +-
 dbt/adapters/spark/column.py                            | 4 ++--
 dbt/adapters/spark/connections.py                       | 2 +-
 dbt/adapters/spark/impl.py                              | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220913-152004.yaml

diff --git a/.changes/unreleased/Under the Hood-20220913-152004.yaml b/.changes/unreleased/Under the Hood-20220913-152004.yaml
new file mode 100644
index 000000000..4c372db01
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220913-152004.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: ignore mypy typing issues
+time: 2022-09-13T15:20:04.459783-07:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "461"
+  PR: "462"
diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py
index 6ecc5eccf..91ad54768 100644
--- a/dbt/adapters/spark/__init__.py
+++ b/dbt/adapters/spark/__init__.py
@@ -5,7 +5,7 @@
 from dbt.adapters.spark.impl import SparkAdapter
 
 from dbt.adapters.base import AdapterPlugin
-from dbt.include import spark
+from dbt.include import spark  # type: ignore
 
 Plugin = AdapterPlugin(
     adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH
diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py
index dcf7590e9..8100fa450 100644
--- a/dbt/adapters/spark/column.py
+++ b/dbt/adapters/spark/column.py
@@ -9,7 +9,7 @@
 
 
 @dataclass
-class SparkColumn(dbtClassMixin, Column):
+class SparkColumn(dbtClassMixin, Column):  # type: ignore
     table_database: Optional[str] = None
     table_schema: Optional[str] = None
     table_name: Optional[str] = None
@@ -22,7 +22,7 @@ class SparkColumn(dbtClassMixin, Column):
     def translate_type(cls, dtype: str) -> str:
         return dtype
 
-    def can_expand_to(self: Self, other_column: Self) -> bool:
+    def can_expand_to(self: Self, other_column: Self) -> bool:  # type: ignore
         """returns True if both columns are strings"""
         return self.is_string() and other_column.is_string()
 
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 59ceb9dd8..80e014a2f 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -59,7 +59,7 @@ class SparkConnectionMethod(StrEnum):
 class SparkCredentials(Credentials):
     host: str
     method: SparkConnectionMethod
-    database: Optional[str]
+    database: Optional[str]  # type: ignore
     driver: Optional[str] = None
     cluster: Optional[str] = None
     endpoint: Optional[str] = None
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 6e97ce1f5..b89793805 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -118,7 +118,7 @@ def add_schema_to_cache(self, schema) -> str:
             dbt.exceptions.raise_compiler_error(
                 "Attempted to cache a null schema for {}".format(name)
             )
-        if dbt.flags.USE_CACHE:
+        if dbt.flags.USE_CACHE:  # type: ignore
             self.cache.add_schema(None, schema)
         # so jinja doesn't render things
         return ""

From 571a6ef43763d0ae37d84e2a6eba7c32028e21dd Mon Sep 17 00:00:00 2001
From: leahwicz <60146280+leahwicz@users.noreply.github.com>
Date: Wed, 14 Sep 2022 10:23:53 -0400
Subject: [PATCH 34/54] Update changelog bot (#463)

* Update changelog bot

* Updating correct issue number
---
 .github/workflows/bot-changelog.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml
index d8056efe4..39eacf9a6 100644
--- a/.github/workflows/bot-changelog.yml
+++ b/.github/workflows/bot-changelog.yml
@@ -28,7 +28,7 @@ name: Bot Changelog
 on:
   pull_request:
     # catch when the PR is opened with the label or when the label is added
-    types: [opened, labeled]
+    types: [labeled]
 
 permissions:
   contents: write
@@ -48,9 +48,9 @@ jobs:
     steps:
 
     - name: Create and commit changelog on bot PR
-      if: "contains(github.event.pull_request.labels.*.name, ${{ matrix.label }})"
+      if: ${{ contains(github.event.pull_request.labels.*.name, matrix.label) }}
       id: bot_changelog
-      uses: emmyoop/changie_bot@v1.0
+      uses: emmyoop/changie_bot@v1.0.1
       with:
         GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }}
         commit_author_name: "Github Build Bot"
@@ -58,4 +58,4 @@ jobs:
         commit_message: "Add automated changelog yaml from template for bot PR"
         changie_kind: ${{ matrix.changie_kind }}
         label: ${{ matrix.label }}
-        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}\n"
+        custom_changelog_string: "custom:\n  Author: ${{ github.event.pull_request.user.login }}\n  Issue: 417\n  PR: ${{ github.event.pull_request.number }}"

From 60f47d5acf7e2788725529e4bf349120551eb66b Mon Sep 17 00:00:00 2001
From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com>
Date: Thu, 15 Sep 2022 09:37:05 -0500
Subject: [PATCH 35/54] [CT-1114] remove Cache call from
 get_columns_in_relation (#451)

* init push for change to get_columns_in_relation to fix cache inconsistencies during on_schema_change

* trying to clear mypy issues

* changelog

* add ref to columns before called on by macro
---
 .../unreleased/Fixes-20220914-010520.yaml     |  8 ++++
 .pre-commit-config.yaml                       |  2 +-
 dbt/adapters/spark/impl.py                    | 42 ++++++-------------
 3 files changed, 22 insertions(+), 30 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220914-010520.yaml

diff --git a/.changes/unreleased/Fixes-20220914-010520.yaml b/.changes/unreleased/Fixes-20220914-010520.yaml
new file mode 100644
index 000000000..f8584f05f
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220914-010520.yaml
@@ -0,0 +1,8 @@
+kind: Fixes
+body: change to get_columns_in_relation to fix cache inconsistencies to fix cache
+  issues in incremental models causing failure on on_schema_change
+time: 2022-09-14T01:05:20.312981-05:00
+custom:
+  Author: McKnight-42
+  Issue: "447"
+  PR: "451"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e70156dcd..e85b1dc8b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ exclude: '^tests/.*'
 
 # Force all unspecified python hooks to run python 3.8
 default_language_version:
-  python: python3.8
+  python: python3
 
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index b89793805..c228fc03d 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -207,36 +207,20 @@ def find_table_information_separator(rows: List[dict]) -> int:
         return pos
 
     def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
-        cached_relations = self.cache.get_relations(relation.database, relation.schema)
-        cached_relation = next(
-            (
-                cached_relation
-                for cached_relation in cached_relations
-                if str(cached_relation) == str(relation)
-            ),
-            None,
-        )
         columns = []
-        if cached_relation and cached_relation.information:
-            columns = self.parse_columns_from_information(cached_relation)
-        if not columns:
-            # in open source delta 'show table extended' query output doesnt
-            # return relation's schema. if columns are empty from cache,
-            # use get_columns_in_relation spark macro
-            # which would execute 'describe extended tablename' query
-            try:
-                rows: List[agate.Row] = self.execute_macro(
-                    GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
-                )
-                columns = self.parse_describe_extended(relation, rows)
-            except dbt.exceptions.RuntimeException as e:
-                # spark would throw error when table doesn't exist, where other
-                # CDW would just return and empty list, normalizing the behavior here
-                errmsg = getattr(e, "msg", "")
-                if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg:
-                    pass
-                else:
-                    raise e
+        try:
+            rows: List[agate.Row] = self.execute_macro(
+                GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
+            )
+            columns = self.parse_describe_extended(relation, rows)
+        except dbt.exceptions.RuntimeException as e:
+            # spark would throw error when table doesn't exist, where other
+            # CDW would just return and empty list, normalizing the behavior here
+            errmsg = getattr(e, "msg", "")
+            if "Table or view not found" in errmsg or "NoSuchTableException" in errmsg:
+                pass
+            else:
+                raise e
 
         # strip hudi metadata columns.
         columns = [x for x in columns if x.name not in self.HUDI_METADATA_COLUMNS]

From 36bbe0de5ec6069384c9a754ada85588fe032511 Mon Sep 17 00:00:00 2001
From: Chenyu Li <chenyu.li@dbtlabs.com>
Date: Thu, 15 Sep 2022 08:20:42 -0700
Subject: [PATCH 36/54] Enhancement/refactor python submission (#452)

* refactor and move common logic to core
---
 .../Under the Hood-20220912-104517.yaml       |  7 +++
 dbt/adapters/spark/impl.py                    | 43 ++++++++---------
 dbt/adapters/spark/python_submissions.py      | 48 +++++++++----------
 3 files changed, 49 insertions(+), 49 deletions(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220912-104517.yaml

diff --git a/.changes/unreleased/Under the Hood-20220912-104517.yaml b/.changes/unreleased/Under the Hood-20220912-104517.yaml
new file mode 100644
index 000000000..e45c97bf0
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220912-104517.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Better interface for python submission
+time: 2022-09-12T10:45:17.226481-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "452"
+  PR: "452"
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index c228fc03d..77b1e4b5a 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -1,7 +1,7 @@
 import re
 from concurrent.futures import Future
 from dataclasses import dataclass
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Union, Type
 from typing_extensions import TypeAlias
 
 import agate
@@ -10,14 +10,17 @@
 import dbt
 import dbt.exceptions
 
-from dbt.adapters.base import AdapterConfig
-from dbt.adapters.base.impl import catch_as_completed, log_code_execution
-from dbt.adapters.base.meta import available
+from dbt.adapters.base import AdapterConfig, PythonJobHelper
+from dbt.adapters.base.impl import catch_as_completed
+from dbt.contracts.connection import AdapterResponse
 from dbt.adapters.sql import SQLAdapter
 from dbt.adapters.spark import SparkConnectionManager
 from dbt.adapters.spark import SparkRelation
 from dbt.adapters.spark import SparkColumn
-from dbt.adapters.spark.python_submissions import PYTHON_SUBMISSION_HELPERS
+from dbt.adapters.spark.python_submissions import (
+    DBNotebookPythonJobHelper,
+    DBCommandsApiPythonJobHelper,
+)
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
 from dbt.events import AdapterLogger
@@ -369,26 +372,20 @@ def run_sql_for_tests(self, sql, fetch, conn):
         finally:
             conn.transaction_open = False
 
-    @available.parse_none
-    @log_code_execution
-    def submit_python_job(self, parsed_model: dict, compiled_code: str, timeout=None):
-        # TODO improve the typing here.  N.B. Jinja returns a `jinja2.runtime.Undefined` instead
-        # of `None` which evaluates to True!
-
-        # TODO limit this function to run only when doing the materialization of python nodes
-        # assuming that for python job running over 1 day user would mannually overwrite this
-        submission_method = parsed_model["config"].get("submission_method", "commands")
-        if submission_method not in PYTHON_SUBMISSION_HELPERS:
-            raise NotImplementedError(
-                "Submission method {} is not supported".format(submission_method)
-            )
-        job_helper = PYTHON_SUBMISSION_HELPERS[submission_method](
-            parsed_model, self.connections.profile.credentials
-        )
-        job_helper.submit(compiled_code)
-        # we don't really get any useful information back from the job submission other than success
+    def generate_python_submission_response(self, submission_result: Any) -> AdapterResponse:
         return self.connections.get_response(None)
 
+    @property
+    def default_python_submission_method(self) -> str:
+        return "commands"
+
+    @property
+    def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]:
+        return {
+            "notebook": DBNotebookPythonJobHelper,
+            "commands": DBCommandsApiPythonJobHelper,
+        }
+
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
         grants_dict: Dict[str, List[str]] = {}
         for row in grants_table:
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index ea172ef03..5ee4adb18 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -5,14 +5,16 @@
 import uuid
 
 import dbt.exceptions
+from dbt.adapters.base import PythonJobHelper
+from dbt.adapters.spark import SparkCredentials
 
-DEFAULT_POLLING_INTERVAL = 3
+DEFAULT_POLLING_INTERVAL = 5
 SUBMISSION_LANGUAGE = "python"
 DEFAULT_TIMEOUT = 60 * 60 * 24
 
 
-class BasePythonJobHelper:
-    def __init__(self, parsed_model, credentials):
+class BaseDatabricksHelper(PythonJobHelper):
+    def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
         self.check_credentials(credentials)
         self.credentials = credentials
         self.identifier = parsed_model["alias"]
@@ -21,18 +23,18 @@ def __init__(self, parsed_model, credentials):
         self.timeout = self.get_timeout()
         self.polling_interval = DEFAULT_POLLING_INTERVAL
 
-    def get_timeout(self):
+    def get_timeout(self) -> int:
         timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
         if timeout <= 0:
             raise ValueError("Timeout must be a positive integer")
         return timeout
 
-    def check_credentials(self, credentials):
+    def check_credentials(self, credentials: SparkCredentials) -> None:
         raise NotImplementedError(
             "Overwrite this method to check specific requirement for current submission method"
         )
 
-    def submit(self, compiled_code):
+    def submit(self, compiled_code: str) -> None:
         raise NotImplementedError(
             "BasePythonJobHelper is an abstract class and you should implement submit method."
         )
@@ -45,7 +47,7 @@ def polling(
         terminal_states,
         expected_end_state,
         get_state_msg_func,
-    ):
+    ) -> Dict:
         state = None
         start = time.time()
         exceeded_timeout = False
@@ -54,7 +56,7 @@ def polling(
             if time.time() - start > self.timeout:
                 exceeded_timeout = True
                 break
-            # TODO should we do exponential backoff?
+            # should we do exponential backoff?
             time.sleep(self.polling_interval)
             response = status_func(**status_func_kwargs)
             state = get_state_func(response)
@@ -68,16 +70,16 @@ def polling(
         return response
 
 
-class DBNotebookPythonJobHelper(BasePythonJobHelper):
-    def __init__(self, parsed_model, credentials):
+class DBNotebookPythonJobHelper(BaseDatabricksHelper):
+    def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
         super().__init__(parsed_model, credentials)
         self.auth_header = {"Authorization": f"Bearer {self.credentials.token}"}
 
-    def check_credentials(self, credentials):
+    def check_credentials(self, credentials) -> None:
         if not credentials.user:
             raise ValueError("Databricks user is required for notebook submission method.")
 
-    def _create_work_dir(self, path):
+    def _create_work_dir(self, path: str) -> None:
         response = requests.post(
             f"https://{self.credentials.host}/api/2.0/workspace/mkdirs",
             headers=self.auth_header,
@@ -90,7 +92,7 @@ def _create_work_dir(self, path):
                 f"Error creating work_dir for python notebooks\n {response.content!r}"
             )
 
-    def _upload_notebook(self, path, compiled_code):
+    def _upload_notebook(self, path: str, compiled_code: str) -> None:
         b64_encoded_content = base64.b64encode(compiled_code.encode()).decode()
         response = requests.post(
             f"https://{self.credentials.host}/api/2.0/workspace/import",
@@ -108,7 +110,7 @@ def _upload_notebook(self, path, compiled_code):
                 f"Error creating python notebook.\n {response.content!r}"
             )
 
-    def _submit_notebook(self, path):
+    def _submit_notebook(self, path: str) -> str:
         submit_response = requests.post(
             f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
             headers=self.auth_header,
@@ -126,7 +128,7 @@ def _submit_notebook(self, path):
             )
         return submit_response.json()["run_id"]
 
-    def submit(self, compiled_code):
+    def submit(self, compiled_code: str) -> None:
         # it is safe to call mkdirs even if dir already exists and have content inside
         work_dir = f"/Users/{self.credentials.user}/{self.schema}/"
         self._create_work_dir(work_dir)
@@ -167,7 +169,7 @@ def submit(self, compiled_code):
 
 
 class DBContext:
-    def __init__(self, credentials):
+    def __init__(self, credentials: SparkCredentials) -> None:
         self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
         self.cluster = credentials.cluster
         self.host = credentials.host
@@ -206,7 +208,7 @@ def destroy(self, context_id: str) -> str:
 
 
 class DBCommand:
-    def __init__(self, credentials):
+    def __init__(self, credentials: SparkCredentials) -> None:
         self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
         self.cluster = credentials.cluster
         self.host = credentials.host
@@ -247,12 +249,12 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
         return response.json()
 
 
-class DBCommandsApiPythonJobHelper(BasePythonJobHelper):
-    def check_credentials(self, credentials):
+class DBCommandsApiPythonJobHelper(BaseDatabricksHelper):
+    def check_credentials(self, credentials: SparkCredentials) -> None:
         if not credentials.cluster:
             raise ValueError("Databricks cluster is required for commands submission method.")
 
-    def submit(self, compiled_code):
+    def submit(self, compiled_code: str) -> None:
         context = DBContext(self.credentials)
         command = DBCommand(self.credentials)
         context_id = context.create()
@@ -276,9 +278,3 @@ def submit(self, compiled_code):
                 )
         finally:
             context.destroy(context_id)
-
-
-PYTHON_SUBMISSION_HELPERS = {
-    "notebook": DBNotebookPythonJobHelper,
-    "commands": DBCommandsApiPythonJobHelper,
-}

From 4c88e4ab6af5b9ccec8710bba6354aa8ca53dac9 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 20 Sep 2022 09:46:51 -0600
Subject: [PATCH 37/54] Convert df to pyspark DataFrame if it is pandas before
 writing (#469)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Convert df to pyspark DataFrame if it is pandas before writing

* Changelog entry

* Use `overwriteSchema` option like dbt-databricks

* Upstream `py_write_table` macro from dbt-databricks

* Convert df to a PySpark DataFrame if it's a Pandas-on-Spark DataFrame before writing

* Separate conversion logic from import logic

* Raise exception if not able to convert to a Spark DataFrame

* Prefer pandas → pandas-on-Spark → Spark over direct pandas → Spark
---
 .../Under the Hood-20220916-125706.yaml       |  7 ++++
 .../spark/macros/materializations/table.sql   | 35 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220916-125706.yaml

diff --git a/.changes/unreleased/Under the Hood-20220916-125706.yaml b/.changes/unreleased/Under the Hood-20220916-125706.yaml
new file mode 100644
index 000000000..54b82eb55
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220916-125706.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Enable Pandas and Pandas-on-Spark DataFrames for dbt python models
+time: 2022-09-16T12:57:06.846297-06:00
+custom:
+  Author: chamini2 dbeatty10
+  Issue: "468"
+  PR: "469"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index d39ba0b44..25d70c722 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -41,7 +41,40 @@
 # --- Autogenerated dbt materialization code. --- #
 dbt = dbtObj(spark.table)
 df = model(dbt, spark)
-df.write.mode("overwrite").format("delta").saveAsTable("{{ target_relation }}")
+
+import importlib.util
+
+pandas_available = False
+pyspark_available = False
+
+# make sure pandas exists before using it
+if importlib.util.find_spec("pandas"):
+  import pandas
+  pandas_available = True
+
+# make sure pyspark.pandas exists before using it
+if importlib.util.find_spec("pyspark.pandas"):
+  import pyspark.pandas
+  pyspark_available = True
+
+# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first
+# since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`
+# and converting from pandas-on-Spark to Spark DataFrame has no overhead
+if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+  df = pyspark.pandas.frame.DataFrame(df)
+
+# convert to pyspark.sql.dataframe.DataFrame
+if isinstance(df, pyspark.sql.dataframe.DataFrame):
+  pass  # since it is already a Spark DataFrame
+elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame):
+  df = df.to_spark()
+elif pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+  df = spark.createDataFrame(df)
+else:
+  msg = f"{type(df)} is not a supported type for dbt Python materialization"
+  raise Exception(msg)
+
+df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").saveAsTable("{{ target_relation }}")
 {%- endmacro -%}
 
 {%macro py_script_comment()%}

From c46fff986ab4ef0ff62e2c955bc380e6c8b110f9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:02:36 -0400
Subject: [PATCH 38/54] Bump black from 22.3.0 to 22.8.0 (#458)

* Bump black from 22.3.0 to 22.8.0

Bumps [black](https://github.com/psf/black) from 22.3.0 to 22.8.0.
- [Release notes](https://github.com/psf/black/releases)
- [Changelog](https://github.com/psf/black/blob/main/CHANGES.md)
- [Commits](https://github.com/psf/black/compare/22.3.0...22.8.0)

---
updated-dependencies:
- dependency-name: black
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-191910.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-191910.yaml

diff --git a/.changes/unreleased/Dependency-20220914-191910.yaml b/.changes/unreleased/Dependency-20220914-191910.yaml
new file mode 100644
index 000000000..ad2534c16
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-191910.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump black from 22.3.0 to 22.8.0"
+time: 2022-09-14T19:19:10.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 458
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 5b29e5e9d..81f95a6df 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -5,7 +5,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 
 
-black==22.3.0
+black==22.8.0
 bumpversion
 click~=8.0.4
 flake8

From f32f9fd723c3879e007c5d87df3c61fa79f2ac77 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Sep 2022 16:03:35 -0400
Subject: [PATCH 39/54] Bump mypy from 0.950 to 0.971 (#456)

* Bump mypy from 0.950 to 0.971

Bumps [mypy](https://github.com/python/mypy) from 0.950 to 0.971.
- [Release notes](https://github.com/python/mypy/releases)
- [Commits](https://github.com/python/mypy/compare/v0.950...v0.971)

---
updated-dependencies:
- dependency-name: mypy
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-192102.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-192102.yaml

diff --git a/.changes/unreleased/Dependency-20220914-192102.yaml b/.changes/unreleased/Dependency-20220914-192102.yaml
new file mode 100644
index 000000000..f13fd6c45
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-192102.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump mypy from 0.950 to 0.971"
+time: 2022-09-14T19:21:02.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 456
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 81f95a6df..87ca93da7 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -13,7 +13,7 @@ flaky
 freezegun==0.3.9
 ipdb
 mock>=1.3.0
-mypy==0.950
+mypy==0.971
 pre-commit
 pytest-csv
 pytest-dotenv

From e918e2671b044753b36b0ea14a02d22ae66345f8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Sep 2022 10:08:10 -0400
Subject: [PATCH 40/54] Update click requirement from ~=8.0.4 to ~=8.1.3 (#457)

* Update click requirement from ~=8.0.4 to ~=8.1.3

Updates the requirements on [click](https://github.com/pallets/click) to permit the latest version.
- [Release notes](https://github.com/pallets/click/releases)
- [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/click/compare/8.0.4...8.1.3)

---
updated-dependencies:
- dependency-name: click
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-192027.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-192027.yaml

diff --git a/.changes/unreleased/Dependency-20220914-192027.yaml b/.changes/unreleased/Dependency-20220914-192027.yaml
new file mode 100644
index 000000000..1863b52fc
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-192027.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Update click requirement from ~=8.0.4 to ~=8.1.3"
+time: 2022-09-14T19:20:27.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 457
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 87ca93da7..8959df95d 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -7,7 +7,7 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=
 
 black==22.8.0
 bumpversion
-click~=8.0.4
+click~=8.1.3
 flake8
 flaky
 freezegun==0.3.9

From d2dfcdf38858df878ac5bc2253336160c00a6bca Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Sep 2022 11:58:02 -0400
Subject: [PATCH 41/54] Bump pyodbc from 4.0.32 to 4.0.34 (#459)

* Bump pyodbc from 4.0.32 to 4.0.34

Bumps [pyodbc](https://github.com/mkleehammer/pyodbc) from 4.0.32 to 4.0.34.
- [Release notes](https://github.com/mkleehammer/pyodbc/releases)
- [Commits](https://github.com/mkleehammer/pyodbc/compare/4.0.32...4.0.34)

---
updated-dependencies:
- dependency-name: pyodbc
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

* Remove newline

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
---
 .changes/unreleased/Dependency-20220913-225328.yaml | 7 +++++++
 requirements.txt                                    | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220913-225328.yaml

diff --git a/.changes/unreleased/Dependency-20220913-225328.yaml b/.changes/unreleased/Dependency-20220913-225328.yaml
new file mode 100644
index 000000000..b934c08c7
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220913-225328.yaml
@@ -0,0 +1,7 @@
+kind: Dependency
+body: "Bump pyodbc from 4.0.32 to 4.0.34"
+time: 2022-09-13T22:53:28.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 459
diff --git a/requirements.txt b/requirements.txt
index 5d774e4f7..14b36b723 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 PyHive[hive]>=0.6.0,<0.7.0
 requests[python]>=2.28.1
 
-pyodbc==4.0.32
+pyodbc==4.0.34
 sqlparams>=3.0.0
 thrift>=0.13.0
 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability

From e07b8a20fa409e24e4c9959739c3eaefaf2eeb5a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 21 Sep 2022 15:04:33 -0400
Subject: [PATCH 42/54] Bump thrift-sasl from 0.4.1 to 0.4.3 (#455)

* Bump thrift-sasl from 0.4.1 to 0.4.3

Bumps [thrift-sasl](https://github.com/cloudera/thrift_sasl) from 0.4.1 to 0.4.3.
- [Release notes](https://github.com/cloudera/thrift_sasl/releases)
- [Changelog](https://github.com/cloudera/thrift_sasl/blob/master/CHANGELOG.md)
- [Commits](https://github.com/cloudera/thrift_sasl/compare/v0.4.1...v0.4.3)

---
updated-dependencies:
- dependency-name: thrift-sasl
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Add automated changelog yaml from template for bot PR

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
---
 .changes/unreleased/Dependency-20220914-192125.yaml | 7 +++++++
 dev-requirements.txt                                | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Dependency-20220914-192125.yaml

diff --git a/.changes/unreleased/Dependency-20220914-192125.yaml b/.changes/unreleased/Dependency-20220914-192125.yaml
new file mode 100644
index 000000000..78234be80
--- /dev/null
+++ b/.changes/unreleased/Dependency-20220914-192125.yaml
@@ -0,0 +1,7 @@
+kind: "Dependency"
+body: "Bump thrift-sasl from 0.4.1 to 0.4.3"
+time: 2022-09-14T19:21:25.00000Z
+custom:
+  Author: dependabot[bot]
+  Issue: 417
+  PR: 455
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 8959df95d..e93c1b41a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -24,4 +24,4 @@ tox>=3.2.0
 
 # Test requirements
 sasl>=0.2.1
-thrift_sasl==0.4.1
+thrift_sasl==0.4.3

From 8c921d071e925524c958b2f3fdea1405761c1a97 Mon Sep 17 00:00:00 2001
From: James McNeill <55981540+jpmmcneill@users.noreply.github.com>
Date: Thu, 22 Sep 2022 14:16:00 +0100
Subject: [PATCH 43/54] Jpmmcneill/spark type boolean (#471)

* implement type boolean test spark

* changie result
---
 .changes/unreleased/Features-20220920-000814.yaml | 7 +++++++
 tests/functional/adapter/utils/test_data_types.py | 5 +++++
 2 files changed, 12 insertions(+)
 create mode 100644 .changes/unreleased/Features-20220920-000814.yaml

diff --git a/.changes/unreleased/Features-20220920-000814.yaml b/.changes/unreleased/Features-20220920-000814.yaml
new file mode 100644
index 000000000..96ba63648
--- /dev/null
+++ b/.changes/unreleased/Features-20220920-000814.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: implement testing for type_boolean in spark
+time: 2022-09-20T00:08:14.15447+01:00
+custom:
+  Author: jpmmcneill
+  Issue: "470"
+  PR: "471"
diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py
index 65a24a3a9..ce6085803 100644
--- a/tests/functional/adapter/utils/test_data_types.py
+++ b/tests/functional/adapter/utils/test_data_types.py
@@ -9,6 +9,7 @@
 from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
 from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
 from dbt.tests.adapter.utils.data_types.test_type_timestamp import BaseTypeTimestamp
+from dbt.tests.adapter.utils.data_types.test_type_boolean import BaseTypeBoolean
 
 
 class TestTypeBigInt(BaseTypeBigInt):
@@ -65,3 +66,7 @@ class TestTypeString(BaseTypeString):
     
 class TestTypeTimestamp(BaseTypeTimestamp):
     pass
+
+
+class TestTypeBoolean(BaseTypeBoolean):
+    pass

From f20aecd275c211726c6225dc9abe1484fc90e9e6 Mon Sep 17 00:00:00 2001
From: Chenyu Li <chenyu.li@dbtlabs.com>
Date: Fri, 23 Sep 2022 15:59:41 -0700
Subject: [PATCH 44/54] More flexible cluster configuration (#467)

* update config hierachy for python submission method
---
 .../unreleased/Features-20220923-101248.yaml  |   8 +
 dbt/adapters/spark/connections.py             |   4 +
 dbt/adapters/spark/impl.py                    |  10 +-
 dbt/adapters/spark/python_submissions.py      | 210 ++++++++++--------
 tests/functional/adapter/test_python_model.py |   6 +-
 5 files changed, 135 insertions(+), 103 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220923-101248.yaml

diff --git a/.changes/unreleased/Features-20220923-101248.yaml b/.changes/unreleased/Features-20220923-101248.yaml
new file mode 100644
index 000000000..e46b2b105
--- /dev/null
+++ b/.changes/unreleased/Features-20220923-101248.yaml
@@ -0,0 +1,8 @@
+kind: Features
+body: Support job cluster in notebook submission method, remove requirement for user
+  for python model submission
+time: 2022-09-23T10:12:48.288911-07:00
+custom:
+  Author: ChenyuLInx
+  Issue: "444"
+  PR: "467"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 80e014a2f..951e8ed70 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -82,6 +82,10 @@ def __pre_deserialize__(cls, data):
             data["database"] = None
         return data
 
+    @property
+    def cluster_id(self):
+        return self.cluster
+
     def __post_init__(self):
         # spark classifies database and schema as the same thing
         if self.database is not None and self.database != self.schema:
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index 77b1e4b5a..6eff652eb 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -18,8 +18,8 @@
 from dbt.adapters.spark import SparkRelation
 from dbt.adapters.spark import SparkColumn
 from dbt.adapters.spark.python_submissions import (
-    DBNotebookPythonJobHelper,
-    DBCommandsApiPythonJobHelper,
+    JobClusterPythonJobHelper,
+    AllPurposeClusterPythonJobHelper,
 )
 from dbt.adapters.base import BaseRelation
 from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER
@@ -377,13 +377,13 @@ def generate_python_submission_response(self, submission_result: Any) -> Adapter
 
     @property
     def default_python_submission_method(self) -> str:
-        return "commands"
+        return "all_purpose_cluster"
 
     @property
     def python_submission_helpers(self) -> Dict[str, Type[PythonJobHelper]]:
         return {
-            "notebook": DBNotebookPythonJobHelper,
-            "commands": DBCommandsApiPythonJobHelper,
+            "job_cluster": JobClusterPythonJobHelper,
+            "all_purpose_cluster": AllPurposeClusterPythonJobHelper,
         }
 
     def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index 5ee4adb18..c6341abed 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -7,21 +7,31 @@
 import dbt.exceptions
 from dbt.adapters.base import PythonJobHelper
 from dbt.adapters.spark import SparkCredentials
+from dbt.adapters.spark import __version__
 
-DEFAULT_POLLING_INTERVAL = 5
+DEFAULT_POLLING_INTERVAL = 10
 SUBMISSION_LANGUAGE = "python"
 DEFAULT_TIMEOUT = 60 * 60 * 24
+DBT_SPARK_VERSION = __version__.version
 
 
 class BaseDatabricksHelper(PythonJobHelper):
     def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
-        self.check_credentials(credentials)
         self.credentials = credentials
         self.identifier = parsed_model["alias"]
-        self.schema = getattr(parsed_model, "schema", self.credentials.schema)
+        self.schema = parsed_model["schema"]
         self.parsed_model = parsed_model
         self.timeout = self.get_timeout()
         self.polling_interval = DEFAULT_POLLING_INTERVAL
+        self.check_credentials()
+        self.auth_header = {
+            "Authorization": f"Bearer {self.credentials.token}",
+            "User-Agent": f"dbt-labs-dbt-spark/{DBT_SPARK_VERSION} (Databricks)",
+        }
+
+    @property
+    def cluster_id(self) -> str:
+        return self.parsed_model.get("cluster_id", self.credentials.cluster_id)
 
     def get_timeout(self) -> int:
         timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
@@ -29,56 +39,11 @@ def get_timeout(self) -> int:
             raise ValueError("Timeout must be a positive integer")
         return timeout
 
-    def check_credentials(self, credentials: SparkCredentials) -> None:
+    def check_credentials(self) -> None:
         raise NotImplementedError(
             "Overwrite this method to check specific requirement for current submission method"
         )
 
-    def submit(self, compiled_code: str) -> None:
-        raise NotImplementedError(
-            "BasePythonJobHelper is an abstract class and you should implement submit method."
-        )
-
-    def polling(
-        self,
-        status_func,
-        status_func_kwargs,
-        get_state_func,
-        terminal_states,
-        expected_end_state,
-        get_state_msg_func,
-    ) -> Dict:
-        state = None
-        start = time.time()
-        exceeded_timeout = False
-        response = {}
-        while state not in terminal_states:
-            if time.time() - start > self.timeout:
-                exceeded_timeout = True
-                break
-            # should we do exponential backoff?
-            time.sleep(self.polling_interval)
-            response = status_func(**status_func_kwargs)
-            state = get_state_func(response)
-        if exceeded_timeout:
-            raise dbt.exceptions.RuntimeException("python model run timed out")
-        if state != expected_end_state:
-            raise dbt.exceptions.RuntimeException(
-                "python model run ended in state"
-                f"{state} with state_message\n{get_state_msg_func(response)}"
-            )
-        return response
-
-
-class DBNotebookPythonJobHelper(BaseDatabricksHelper):
-    def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
-        super().__init__(parsed_model, credentials)
-        self.auth_header = {"Authorization": f"Bearer {self.credentials.token}"}
-
-    def check_credentials(self, credentials) -> None:
-        if not credentials.user:
-            raise ValueError("Databricks user is required for notebook submission method.")
-
     def _create_work_dir(self, path: str) -> None:
         response = requests.post(
             f"https://{self.credentials.host}/api/2.0/workspace/mkdirs",
@@ -110,17 +75,18 @@ def _upload_notebook(self, path: str, compiled_code: str) -> None:
                 f"Error creating python notebook.\n {response.content!r}"
             )
 
-    def _submit_notebook(self, path: str) -> str:
+    def _submit_job(self, path: str, cluster_spec: dict) -> str:
+        job_spec = {
+            "run_name": f"{self.schema}-{self.identifier}-{uuid.uuid4()}",
+            "notebook_task": {
+                "notebook_path": path,
+            },
+        }
+        job_spec.update(cluster_spec)
         submit_response = requests.post(
             f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
             headers=self.auth_header,
-            json={
-                "run_name": f"{self.schema}-{self.identifier}-{uuid.uuid4()}",
-                "existing_cluster_id": self.credentials.cluster,
-                "notebook_task": {
-                    "notebook_path": path,
-                },
-            },
+            json=job_spec,
         )
         if submit_response.status_code != 200:
             raise dbt.exceptions.RuntimeException(
@@ -128,17 +94,16 @@ def _submit_notebook(self, path: str) -> str:
             )
         return submit_response.json()["run_id"]
 
-    def submit(self, compiled_code: str) -> None:
+    def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None:
         # it is safe to call mkdirs even if dir already exists and have content inside
-        work_dir = f"/Users/{self.credentials.user}/{self.schema}/"
+        work_dir = f"/dbt_python_model/{self.schema}/"
         self._create_work_dir(work_dir)
-
         # add notebook
         whole_file_path = f"{work_dir}{self.identifier}"
         self._upload_notebook(whole_file_path, compiled_code)
 
         # submit job
-        run_id = self._submit_notebook(whole_file_path)
+        run_id = self._submit_job(whole_file_path, cluster_spec)
 
         self.polling(
             status_func=requests.get,
@@ -167,11 +132,56 @@ def submit(self, compiled_code: str) -> None:
                 f"{json_run_output['error_trace']}"
             )
 
+    def submit(self, compiled_code: str) -> None:
+        raise NotImplementedError(
+            "BasePythonJobHelper is an abstract class and you should implement submit method."
+        )
+
+    def polling(
+        self,
+        status_func,
+        status_func_kwargs,
+        get_state_func,
+        terminal_states,
+        expected_end_state,
+        get_state_msg_func,
+    ) -> Dict:
+        state = None
+        start = time.time()
+        exceeded_timeout = False
+        response = {}
+        while state not in terminal_states:
+            if time.time() - start > self.timeout:
+                exceeded_timeout = True
+                break
+            # should we do exponential backoff?
+            time.sleep(self.polling_interval)
+            response = status_func(**status_func_kwargs)
+            state = get_state_func(response)
+        if exceeded_timeout:
+            raise dbt.exceptions.RuntimeException("python model run timed out")
+        if state != expected_end_state:
+            raise dbt.exceptions.RuntimeException(
+                "python model run ended in state"
+                f"{state} with state_message\n{get_state_msg_func(response)}"
+            )
+        return response
+
+
+class JobClusterPythonJobHelper(BaseDatabricksHelper):
+    def check_credentials(self) -> None:
+        if not self.parsed_model["config"].get("job_cluster_config", None):
+            raise ValueError("job_cluster_config is required for commands submission method.")
+
+    def submit(self, compiled_code: str) -> None:
+        cluster_spec = {"new_cluster": self.parsed_model["config"]["job_cluster_config"]}
+        self._submit_through_notebook(compiled_code, cluster_spec)
+
 
 class DBContext:
-    def __init__(self, credentials: SparkCredentials) -> None:
-        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
-        self.cluster = credentials.cluster
+    def __init__(self, credentials: SparkCredentials, cluster_id: str, auth_header: dict) -> None:
+        self.auth_header = auth_header
+        self.cluster_id = cluster_id
         self.host = credentials.host
 
     def create(self) -> str:
@@ -180,7 +190,7 @@ def create(self) -> str:
             f"https://{self.host}/api/1.2/contexts/create",
             headers=self.auth_header,
             json={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "language": SUBMISSION_LANGUAGE,
             },
         )
@@ -196,7 +206,7 @@ def destroy(self, context_id: str) -> str:
             f"https://{self.host}/api/1.2/contexts/destroy",
             headers=self.auth_header,
             json={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "contextId": context_id,
             },
         )
@@ -208,9 +218,9 @@ def destroy(self, context_id: str) -> str:
 
 
 class DBCommand:
-    def __init__(self, credentials: SparkCredentials) -> None:
-        self.auth_header = {"Authorization": f"Bearer {credentials.token}"}
-        self.cluster = credentials.cluster
+    def __init__(self, credentials: SparkCredentials, cluster_id: str, auth_header: dict) -> None:
+        self.auth_header = auth_header
+        self.cluster_id = cluster_id
         self.host = credentials.host
 
     def execute(self, context_id: str, command: str) -> str:
@@ -219,7 +229,7 @@ def execute(self, context_id: str, command: str) -> str:
             f"https://{self.host}/api/1.2/commands/execute",
             headers=self.auth_header,
             json={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "contextId": context_id,
                 "language": SUBMISSION_LANGUAGE,
                 "command": command,
@@ -237,7 +247,7 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
             f"https://{self.host}/api/1.2/commands/status",
             headers=self.auth_header,
             params={
-                "clusterId": self.cluster,
+                "clusterId": self.cluster_id,
                 "contextId": context_id,
                 "commandId": command_id,
             },
@@ -249,32 +259,38 @@ def status(self, context_id: str, command_id: str) -> Dict[str, Any]:
         return response.json()
 
 
-class DBCommandsApiPythonJobHelper(BaseDatabricksHelper):
-    def check_credentials(self, credentials: SparkCredentials) -> None:
-        if not credentials.cluster:
-            raise ValueError("Databricks cluster is required for commands submission method.")
+class AllPurposeClusterPythonJobHelper(BaseDatabricksHelper):
+    def check_credentials(self) -> None:
+        if not self.cluster_id:
+            raise ValueError(
+                "Databricks cluster_id is required for all_purpose_cluster submission method with running with notebook."
+            )
 
     def submit(self, compiled_code: str) -> None:
-        context = DBContext(self.credentials)
-        command = DBCommand(self.credentials)
-        context_id = context.create()
-        try:
-            command_id = command.execute(context_id, compiled_code)
-            # poll until job finish
-            response = self.polling(
-                status_func=command.status,
-                status_func_kwargs={
-                    "context_id": context_id,
-                    "command_id": command_id,
-                },
-                get_state_func=lambda response: response["status"],
-                terminal_states=("Cancelled", "Error", "Finished"),
-                expected_end_state="Finished",
-                get_state_msg_func=lambda response: response.json()["results"]["data"],
-            )
-            if response["results"]["resultType"] == "error":
-                raise dbt.exceptions.RuntimeException(
-                    f"Python model failed with traceback as:\n" f"{response['results']['cause']}"
+        if self.parsed_model["config"].get("create_notebook", False):
+            self._submit_through_notebook(compiled_code, {"existing_cluster_id": self.cluster_id})
+        else:
+            context = DBContext(self.credentials, self.cluster_id, self.auth_header)
+            command = DBCommand(self.credentials, self.cluster_id, self.auth_header)
+            context_id = context.create()
+            try:
+                command_id = command.execute(context_id, compiled_code)
+                # poll until job finish
+                response = self.polling(
+                    status_func=command.status,
+                    status_func_kwargs={
+                        "context_id": context_id,
+                        "command_id": command_id,
+                    },
+                    get_state_func=lambda response: response["status"],
+                    terminal_states=("Cancelled", "Error", "Finished"),
+                    expected_end_state="Finished",
+                    get_state_msg_func=lambda response: response.json()["results"]["data"],
                 )
-        finally:
-            context.destroy(context_id)
+                if response["results"]["resultType"] == "error":
+                    raise dbt.exceptions.RuntimeException(
+                        f"Python model failed with traceback as:\n"
+                        f"{response['results']['cause']}"
+                    )
+            finally:
+                context.destroy(context_id)
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index 059412f10..ed6185b61 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -2,11 +2,15 @@
 import pytest
 from dbt.tests.util import run_dbt, write_file, run_dbt_and_capture
 from dbt.tests.adapter.python_model.test_python_model import BasePythonModelTests, BasePythonIncrementalTests
-
+from dbt.tests.adapter.python_model.test_spark import BasePySparkTests
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonModelSpark(BasePythonModelTests):
     pass
 
+@pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
+class TestPySpark(BasePySparkTests):
+    pass
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     @pytest.fixture(scope="class")

From 31104dff7ba2f63cd6c3adafeb2ac633b225e043 Mon Sep 17 00:00:00 2001
From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com>
Date: Mon, 26 Sep 2022 13:49:28 -0500
Subject: [PATCH 45/54] merge exclude columns spark (#390)

* merge exclude columns spark

* reformat

* integration test for merge exclude columns

* pin core branch for integration tests

* missing symbol

* changie

* add dest columns
---
 .../unreleased/Features-20220826-133818.yaml  |  7 ++++++
 .../incremental/column_helpers.sql            | 23 +++++++++++++++++++
 .../incremental/strategies.sql                |  7 ++++--
 dev-requirements.txt                          |  2 +-
 .../models_delta/merge_exclude_columns.sql    | 22 ++++++++++++++++++
 .../seeds/expected_exclude_upsert.csv         |  4 ++++
 .../test_incremental_strategies.py            |  1 +
 7 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220826-133818.yaml
 create mode 100644 dbt/include/spark/macros/materializations/incremental/column_helpers.sql
 create mode 100644 tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
 create mode 100644 tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv

diff --git a/.changes/unreleased/Features-20220826-133818.yaml b/.changes/unreleased/Features-20220826-133818.yaml
new file mode 100644
index 000000000..f67718092
--- /dev/null
+++ b/.changes/unreleased/Features-20220826-133818.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: merge exclude columns for spark models
+time: 2022-08-26T13:38:18.75458-05:00
+custom:
+  Author: dave-connors-3
+  Issue: "5260"
+  PR: "390"
diff --git a/dbt/include/spark/macros/materializations/incremental/column_helpers.sql b/dbt/include/spark/macros/materializations/incremental/column_helpers.sql
new file mode 100644
index 000000000..3eec968d5
--- /dev/null
+++ b/dbt/include/spark/macros/materializations/incremental/column_helpers.sql
@@ -0,0 +1,23 @@
+{% macro spark__get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) %}
+  {%- set default_cols = None -%}
+
+  {%- if merge_update_columns and merge_exclude_columns -%}
+    {{ exceptions.raise_compiler_error(
+        'Model cannot specify merge_update_columns and merge_exclude_columns. Please update model to use only one config'
+    )}}
+  {%- elif merge_update_columns -%}
+    {%- set update_columns = merge_update_columns -%}
+  {%- elif merge_exclude_columns -%}
+    {%- set update_columns = [] -%}
+    {%- for column in dest_columns -%}
+      {% if column.column | lower not in merge_exclude_columns | map("lower") | list %}
+        {%- do update_columns.append(column.quoted) -%}
+      {% endif %}
+    {%- endfor -%}
+  {%- else -%}
+    {%- set update_columns = default_cols -%}
+  {%- endif -%}
+
+  {{ return(update_columns) }}
+
+{% endmacro %}
diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql
index d98e1f692..17196e85d 100644
--- a/dbt/include/spark/macros/materializations/incremental/strategies.sql
+++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql
@@ -20,9 +20,12 @@
 
 
 {% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %}
-  {# skip dest_columns, use merge_update_columns config if provided, otherwise use "*" #}
+  {# need dest_columns for merge_exclude_columns, default to use "*" #}
   {%- set predicates = [] if predicates is none else [] + predicates -%}
-  {%- set update_columns = config.get("merge_update_columns") -%}
+  {%- set dest_columns = adapter.get_columns_in_relation(target) -%}
+  {%- set merge_update_columns = config.get('merge_update_columns') -%}
+  {%- set merge_exclude_columns = config.get('merge_exclude_columns') -%}
+  {%- set update_columns = get_merge_update_columns(merge_update_columns, merge_exclude_columns, dest_columns) -%}
 
   {% if unique_key %}
       {% if unique_key is sequence and unique_key is not mapping and unique_key is not string %}
diff --git a/dev-requirements.txt b/dev-requirements.txt
index e93c1b41a..4148a759c 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,6 +1,6 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git@feature/merge_exclude_columns#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 
diff --git a/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql b/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
new file mode 100644
index 000000000..815f46b1d
--- /dev/null
+++ b/tests/integration/incremental_strategies/models_delta/merge_exclude_columns.sql
@@ -0,0 +1,22 @@
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+    unique_key = 'id',
+    merge_exclude_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be ignored, color will be updated
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv
new file mode 100644
index 000000000..a0f1a6526
--- /dev/null
+++ b/tests/integration/incremental_strategies/seeds/expected_exclude_upsert.csv
@@ -0,0 +1,4 @@
+id,msg,color
+1,hello,blue
+2,goodbye,green
+3,anyway,purple
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
index 3848d11ae..73bb6ba2b 100644
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ b/tests/integration/incremental_strategies/test_incremental_strategies.py
@@ -78,6 +78,7 @@ def run_and_test(self):
         self.assertTablesEqual("merge_no_key", "expected_append")
         self.assertTablesEqual("merge_unique_key", "expected_upsert")
         self.assertTablesEqual("merge_update_columns", "expected_partial_upsert")
+        self.assertTablesEqual("merge_exclude_columns", "expected_exclude_upsert")
 
     @use_profile("databricks_cluster")
     def test_delta_strategies_databricks_cluster(self):

From ceab5abdd73999f34a34328735d71b6a47fb7594 Mon Sep 17 00:00:00 2001
From: Gerda Shank <gerda@dbtlabs.com>
Date: Mon, 26 Sep 2022 15:35:31 -0400
Subject: [PATCH 46/54] Remove branch ref from dev-requirements.txt (#477)

---
 dev-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 4148a759c..e93c1b41a 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -1,6 +1,6 @@
 # install latest changes in dbt-core
 # TODO: how to automate switching from develop to version branches?
-git+https://github.com/dbt-labs/dbt-core.git@feature/merge_exclude_columns#egg=dbt-core&subdirectory=core
+git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core
 git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter
 
 

From 32ddd224f64097e8178c442d80ded8f55ba1ba92 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 26 Sep 2022 15:30:18 -0600
Subject: [PATCH 47/54] Array macros (#454)

* Temporary dev-requirements

* Changelog entry

* Implementations and tests for array macros

* Remove `cast_array_to_string` macro

* Restore original dev-requirements.txt
---
 .changes/unreleased/Features-20220913-084852.yaml |  7 +++++++
 dbt/include/spark/macros/utils/array_append.sql   |  3 +++
 dbt/include/spark/macros/utils/array_concat.sql   |  3 +++
 .../spark/macros/utils/array_construct.sql        |  3 +++
 tests/functional/adapter/utils/test_utils.py      | 15 +++++++++++++++
 5 files changed, 31 insertions(+)
 create mode 100644 .changes/unreleased/Features-20220913-084852.yaml
 create mode 100644 dbt/include/spark/macros/utils/array_append.sql
 create mode 100644 dbt/include/spark/macros/utils/array_concat.sql
 create mode 100644 dbt/include/spark/macros/utils/array_construct.sql

diff --git a/.changes/unreleased/Features-20220913-084852.yaml b/.changes/unreleased/Features-20220913-084852.yaml
new file mode 100644
index 000000000..014a598a3
--- /dev/null
+++ b/.changes/unreleased/Features-20220913-084852.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Array macros
+time: 2022-09-13T08:48:52.684985-06:00
+custom:
+  Author: graciegoheen dbeatty10
+  Issue: "453"
+  PR: "454"
diff --git a/dbt/include/spark/macros/utils/array_append.sql b/dbt/include/spark/macros/utils/array_append.sql
new file mode 100644
index 000000000..efe39e7ab
--- /dev/null
+++ b/dbt/include/spark/macros/utils/array_append.sql
@@ -0,0 +1,3 @@
+{% macro spark__array_append(array, new_element) -%}
+    {{ array_concat(array, array_construct([new_element])) }}
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/array_concat.sql b/dbt/include/spark/macros/utils/array_concat.sql
new file mode 100644
index 000000000..1441618c8
--- /dev/null
+++ b/dbt/include/spark/macros/utils/array_concat.sql
@@ -0,0 +1,3 @@
+{% macro spark__array_concat(array_1, array_2) -%}
+    concat({{ array_1 }}, {{ array_2 }})
+{%- endmacro %}
diff --git a/dbt/include/spark/macros/utils/array_construct.sql b/dbt/include/spark/macros/utils/array_construct.sql
new file mode 100644
index 000000000..a4e5e0c7c
--- /dev/null
+++ b/dbt/include/spark/macros/utils/array_construct.sql
@@ -0,0 +1,3 @@
+{% macro spark__array_construct(inputs, data_type) -%}
+    array( {{ inputs|join(' , ') }} )
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index c71161e65..eab99b498 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -1,5 +1,8 @@
 import pytest
 
+from dbt.tests.adapter.utils.test_array_append import BaseArrayAppend
+from dbt.tests.adapter.utils.test_array_concat import BaseArrayConcat
+from dbt.tests.adapter.utils.test_array_construct import BaseArrayConstruct
 from dbt.tests.adapter.utils.test_any_value import BaseAnyValue
 from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
@@ -31,6 +34,18 @@ class TestAnyValue(BaseAnyValue):
     pass
 
 
+class TestArrayAppend(BaseArrayAppend):
+    pass
+
+
+class TestArrayConcat(BaseArrayConcat):
+    pass
+
+
+class TestArrayConstruct(BaseArrayConstruct):
+    pass
+
+
 class TestBoolOr(BaseBoolOr):
     pass
 

From 23d17a068d6b2de26eee3b6830c8ab8d16a3797d Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Mon, 26 Sep 2022 19:52:58 -0600
Subject: [PATCH 48/54] Tests for `current_timestamp` (#475)

* Test failure will confirm that it is not a timezone-naive data type

* Temporarily update dev-requirements.text

* Tests for current_timestamp

* Restore original dev-requirements.txt

* Tests that the Python driver for dbt-spark returns a timezone-naive datetime for `{{ dbt.current_timestamp() }}`
---
 tests/functional/adapter/utils/test_utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index eab99b498..102df731a 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -7,6 +7,7 @@
 from dbt.tests.adapter.utils.test_bool_or import BaseBoolOr
 from dbt.tests.adapter.utils.test_cast_bool_to_text import BaseCastBoolToText
 from dbt.tests.adapter.utils.test_concat import BaseConcat
+from dbt.tests.adapter.utils.test_current_timestamp import BaseCurrentTimestampNaive
 from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
@@ -59,6 +60,11 @@ class TestConcat(BaseConcat):
     pass
 
 
+# Use either BaseCurrentTimestampAware or BaseCurrentTimestampNaive but not both
+class TestCurrentTimestamp(BaseCurrentTimestampNaive):
+    pass
+
+
 class TestDateAdd(BaseDateAdd):
     pass
 

From 80dc02992c7626fa294537d6f8b1491a06b589a0 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Tue, 27 Sep 2022 06:02:20 -0600
Subject: [PATCH 49/54] Convert df to pyspark DataFrame if it is koalas before
 writing (#474)

* Temporarily update dev-requirements.txt

* Changelog entry

* Temporarily update dev-requirements.txt

* Convert df to pyspark DataFrame if it is koalas before writing

* Restore original version of dev-requirements.txt

* Preferentially convert Koalas DataFrames to pandas-on-Spark DataFrames first

* Fix explanation

Co-authored-by: Takuya UESHIN <ueshin@databricks.com>
---
 .../unreleased/Under the Hood-20220924-143713.yaml   |  7 +++++++
 dbt/include/spark/macros/materializations/table.sql  | 12 +++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 .changes/unreleased/Under the Hood-20220924-143713.yaml

diff --git a/.changes/unreleased/Under the Hood-20220924-143713.yaml b/.changes/unreleased/Under the Hood-20220924-143713.yaml
new file mode 100644
index 000000000..c537a9395
--- /dev/null
+++ b/.changes/unreleased/Under the Hood-20220924-143713.yaml	
@@ -0,0 +1,7 @@
+kind: Under the Hood
+body: Convert df to pyspark DataFrame if it is koalas before writing
+time: 2022-09-24T14:37:13.100404-06:00
+custom:
+  Author: dbeatty10 ueshin
+  Issue: "473"
+  PR: "474"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 25d70c722..5721bd25e 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -46,6 +46,7 @@ import importlib.util
 
 pandas_available = False
 pyspark_available = False
+koalas_available = False
 
 # make sure pandas exists before using it
 if importlib.util.find_spec("pandas"):
@@ -57,17 +58,26 @@ if importlib.util.find_spec("pyspark.pandas"):
   import pyspark.pandas
   pyspark_available = True
 
-# preferentially convert pandas DataFrames to pandas-on-Spark DataFrames first
+# make sure databricks.koalas exists before using it
+if importlib.util.find_spec("databricks.koalas"):
+  import databricks.koalas
+  koalas_available = True
+
+# preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first
 # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`
 # and converting from pandas-on-Spark to Spark DataFrame has no overhead
 if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = pyspark.pandas.frame.DataFrame(df)
+elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+  df = databricks.koalas.frame.DataFrame(df)
 
 # convert to pyspark.sql.dataframe.DataFrame
 if isinstance(df, pyspark.sql.dataframe.DataFrame):
   pass  # since it is already a Spark DataFrame
 elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame):
   df = df.to_spark()
+elif koalas_available and isinstance(df, databricks.koalas.frame.DataFrame):
+  df = df.to_spark()
 elif pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = spark.createDataFrame(df)
 else:

From d462afcd78258e962b642068bd58ec049fef4d65 Mon Sep 17 00:00:00 2001
From: Doug Beatty <44704949+dbeatty10@users.noreply.github.com>
Date: Wed, 28 Sep 2022 14:33:44 -0600
Subject: [PATCH 50/54] try/except rather than find_spec for optional imports
 (#489)

* try/except rather than find_spec for optional imports

* Remove unused import
---
 .../spark/macros/materializations/table.sql    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 5721bd25e..808089105 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -42,26 +42,26 @@
 dbt = dbtObj(spark.table)
 df = model(dbt, spark)
 
-import importlib.util
-
-pandas_available = False
-pyspark_available = False
-koalas_available = False
-
 # make sure pandas exists before using it
-if importlib.util.find_spec("pandas"):
+try:
   import pandas
   pandas_available = True
+except ImportError:
+  pandas_available = False
 
 # make sure pyspark.pandas exists before using it
-if importlib.util.find_spec("pyspark.pandas"):
+try:
   import pyspark.pandas
   pyspark_available = True
+except ImportError:
+  pyspark_available = False
 
 # make sure databricks.koalas exists before using it
-if importlib.util.find_spec("databricks.koalas"):
+try:
   import databricks.koalas
   koalas_available = True
+except ImportError:
+  koalas_available = False
 
 # preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first
 # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`

From 7f233b16ce1ae6683eb62741cb32c99ff2867d70 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 29 Sep 2022 09:16:07 -0400
Subject: [PATCH 51/54] Bumping version to 1.4.0a1 and generate changelog
 (#492)

* Bumping version to 1.4.0a1 and generate CHANGELOG

* Updated changelog

Co-authored-by: Github Build Bot <buildbot@fishtownanalytics.com>
Co-authored-by: Leah Antkiewicz <leah.antkiewicz@fishtownanalytics.com>
---
 .bumpversion.cfg                                |  2 +-
 .changes/1.3.0-b1.md                            | 11 -----------
 .changes/1.3.0-b2.md                            |  5 -----
 .changes/1.3.0/Features-20220808-141141.yaml    |  8 --------
 .changes/1.3.0/Features-20220808-142118.yaml    |  7 -------
 .changes/1.3.0/Fixes-20220808-141623.yaml       |  8 --------
 .../1.3.0/Under the Hood-20220808-141320.yaml   |  7 -------
 .../1.3.0/Under the Hood-20220825-073413.yaml   |  7 -------
 .../unreleased/Dependency-20220913-225328.yaml  |  7 -------
 .../unreleased/Dependency-20220914-191910.yaml  |  7 -------
 .../unreleased/Dependency-20220914-192027.yaml  |  7 -------
 .../unreleased/Dependency-20220914-192102.yaml  |  7 -------
 .../unreleased/Dependency-20220914-192125.yaml  |  7 -------
 .../unreleased/Features-20220826-133818.yaml    |  7 -------
 .../unreleased/Features-20220913-084852.yaml    |  7 -------
 .../unreleased/Features-20220920-000814.yaml    |  7 -------
 .../unreleased/Features-20220923-101248.yaml    |  8 --------
 .changes/unreleased/Fixes-20220830-140224.yaml  |  7 -------
 .changes/unreleased/Fixes-20220914-010520.yaml  |  8 --------
 .../Under the Hood-20220829-164426.yaml         |  7 -------
 .../Under the Hood-20220912-104517.yaml         |  7 -------
 .../Under the Hood-20220913-152004.yaml         |  7 -------
 .../Under the Hood-20220916-125706.yaml         |  7 -------
 .../Under the Hood-20220924-143713.yaml         |  7 -------
 CHANGELOG.md                                    | 17 +----------------
 dbt/adapters/spark/__version__.py               |  2 +-
 setup.py                                        |  2 +-
 27 files changed, 4 insertions(+), 186 deletions(-)
 delete mode 100644 .changes/1.3.0-b1.md
 delete mode 100644 .changes/1.3.0-b2.md
 delete mode 100644 .changes/1.3.0/Features-20220808-141141.yaml
 delete mode 100644 .changes/1.3.0/Features-20220808-142118.yaml
 delete mode 100644 .changes/1.3.0/Fixes-20220808-141623.yaml
 delete mode 100644 .changes/1.3.0/Under the Hood-20220808-141320.yaml
 delete mode 100644 .changes/1.3.0/Under the Hood-20220825-073413.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220913-225328.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-191910.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-192027.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-192102.yaml
 delete mode 100644 .changes/unreleased/Dependency-20220914-192125.yaml
 delete mode 100644 .changes/unreleased/Features-20220826-133818.yaml
 delete mode 100644 .changes/unreleased/Features-20220913-084852.yaml
 delete mode 100644 .changes/unreleased/Features-20220920-000814.yaml
 delete mode 100644 .changes/unreleased/Features-20220923-101248.yaml
 delete mode 100644 .changes/unreleased/Fixes-20220830-140224.yaml
 delete mode 100644 .changes/unreleased/Fixes-20220914-010520.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220829-164426.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220912-104517.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220913-152004.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220916-125706.yaml
 delete mode 100644 .changes/unreleased/Under the Hood-20220924-143713.yaml

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f93a02ae6..5fa558e9f 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.3.0b2
+current_version = 1.4.0a1
 parse = (?P<major>\d+)
 	\.(?P<minor>\d+)
 	\.(?P<patch>\d+)
diff --git a/.changes/1.3.0-b1.md b/.changes/1.3.0-b1.md
deleted file mode 100644
index ef64f4395..000000000
--- a/.changes/1.3.0-b1.md
+++ /dev/null
@@ -1,11 +0,0 @@
-## dbt-spark 1.3.0-b1 - July 29, 2022
-
-### Features
-- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-### Fixes
-- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-### Under the Hood
-- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
-
-### Contributors
-- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
diff --git a/.changes/1.3.0-b2.md b/.changes/1.3.0-b2.md
deleted file mode 100644
index 8f7ea1e62..000000000
--- a/.changes/1.3.0-b2.md
+++ /dev/null
@@ -1,5 +0,0 @@
-## dbt-spark 1.3.0-b2 - August 30, 2022
-### Features
-- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
-### Under the Hood
-- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
diff --git a/.changes/1.3.0/Features-20220808-141141.yaml b/.changes/1.3.0/Features-20220808-141141.yaml
deleted file mode 100644
index 444a3062b..000000000
--- a/.changes/1.3.0/Features-20220808-141141.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Features
-body: Support python model through notebook, currently supported materializations
-  are table and incremental
-time: 2022-08-08T14:11:41.906131-05:00
-custom:
-  Author: ChenyuLInx
-  Issue: "417"
-  PR: "377"
diff --git a/.changes/1.3.0/Features-20220808-142118.yaml b/.changes/1.3.0/Features-20220808-142118.yaml
deleted file mode 100644
index 9c110e937..000000000
--- a/.changes/1.3.0/Features-20220808-142118.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Add changie to dbt-spark
-time: 2022-08-08T14:21:18.569756-05:00
-custom:
-  Author: mcknight-42
-  Issue: "416"
-  PR: "418"
diff --git a/.changes/1.3.0/Fixes-20220808-141623.yaml b/.changes/1.3.0/Fixes-20220808-141623.yaml
deleted file mode 100644
index 793e3e5b2..000000000
--- a/.changes/1.3.0/Fixes-20220808-141623.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Fixes
-body: Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so
-  on Linux
-time: 2022-08-08T14:16:23.846876-05:00
-custom:
-  Author: barberscot
-  Issue: "397"
-  PR: "398"
diff --git a/.changes/1.3.0/Under the Hood-20220808-141320.yaml b/.changes/1.3.0/Under the Hood-20220808-141320.yaml
deleted file mode 100644
index 82535f926..000000000
--- a/.changes/1.3.0/Under the Hood-20220808-141320.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Support core incremental refactor
-time: 2022-08-08T14:13:20.576155-05:00
-custom:
-  Author: gshank
-  Issue: "4402"
-  PR: "394"
diff --git a/.changes/1.3.0/Under the Hood-20220825-073413.yaml b/.changes/1.3.0/Under the Hood-20220825-073413.yaml
deleted file mode 100644
index 71e187ca7..000000000
--- a/.changes/1.3.0/Under the Hood-20220825-073413.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: specify supported_languages for materialization that support python models
-time: 2022-08-25T07:34:13.397367-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "437"
-  PR: "440"
diff --git a/.changes/unreleased/Dependency-20220913-225328.yaml b/.changes/unreleased/Dependency-20220913-225328.yaml
deleted file mode 100644
index b934c08c7..000000000
--- a/.changes/unreleased/Dependency-20220913-225328.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Dependency
-body: "Bump pyodbc from 4.0.32 to 4.0.34"
-time: 2022-09-13T22:53:28.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 459
diff --git a/.changes/unreleased/Dependency-20220914-191910.yaml b/.changes/unreleased/Dependency-20220914-191910.yaml
deleted file mode 100644
index ad2534c16..000000000
--- a/.changes/unreleased/Dependency-20220914-191910.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump black from 22.3.0 to 22.8.0"
-time: 2022-09-14T19:19:10.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 458
diff --git a/.changes/unreleased/Dependency-20220914-192027.yaml b/.changes/unreleased/Dependency-20220914-192027.yaml
deleted file mode 100644
index 1863b52fc..000000000
--- a/.changes/unreleased/Dependency-20220914-192027.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Update click requirement from ~=8.0.4 to ~=8.1.3"
-time: 2022-09-14T19:20:27.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 457
diff --git a/.changes/unreleased/Dependency-20220914-192102.yaml b/.changes/unreleased/Dependency-20220914-192102.yaml
deleted file mode 100644
index f13fd6c45..000000000
--- a/.changes/unreleased/Dependency-20220914-192102.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump mypy from 0.950 to 0.971"
-time: 2022-09-14T19:21:02.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 456
diff --git a/.changes/unreleased/Dependency-20220914-192125.yaml b/.changes/unreleased/Dependency-20220914-192125.yaml
deleted file mode 100644
index 78234be80..000000000
--- a/.changes/unreleased/Dependency-20220914-192125.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: "Dependency"
-body: "Bump thrift-sasl from 0.4.1 to 0.4.3"
-time: 2022-09-14T19:21:25.00000Z
-custom:
-  Author: dependabot[bot]
-  Issue: 417
-  PR: 455
diff --git a/.changes/unreleased/Features-20220826-133818.yaml b/.changes/unreleased/Features-20220826-133818.yaml
deleted file mode 100644
index f67718092..000000000
--- a/.changes/unreleased/Features-20220826-133818.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: merge exclude columns for spark models
-time: 2022-08-26T13:38:18.75458-05:00
-custom:
-  Author: dave-connors-3
-  Issue: "5260"
-  PR: "390"
diff --git a/.changes/unreleased/Features-20220913-084852.yaml b/.changes/unreleased/Features-20220913-084852.yaml
deleted file mode 100644
index 014a598a3..000000000
--- a/.changes/unreleased/Features-20220913-084852.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: Array macros
-time: 2022-09-13T08:48:52.684985-06:00
-custom:
-  Author: graciegoheen dbeatty10
-  Issue: "453"
-  PR: "454"
diff --git a/.changes/unreleased/Features-20220920-000814.yaml b/.changes/unreleased/Features-20220920-000814.yaml
deleted file mode 100644
index 96ba63648..000000000
--- a/.changes/unreleased/Features-20220920-000814.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Features
-body: implement testing for type_boolean in spark
-time: 2022-09-20T00:08:14.15447+01:00
-custom:
-  Author: jpmmcneill
-  Issue: "470"
-  PR: "471"
diff --git a/.changes/unreleased/Features-20220923-101248.yaml b/.changes/unreleased/Features-20220923-101248.yaml
deleted file mode 100644
index e46b2b105..000000000
--- a/.changes/unreleased/Features-20220923-101248.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Features
-body: Support job cluster in notebook submission method, remove requirement for user
-  for python model submission
-time: 2022-09-23T10:12:48.288911-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "444"
-  PR: "467"
diff --git a/.changes/unreleased/Fixes-20220830-140224.yaml b/.changes/unreleased/Fixes-20220830-140224.yaml
deleted file mode 100644
index 9e3da3ea6..000000000
--- a/.changes/unreleased/Fixes-20220830-140224.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Fixes
-body: python incremental model tmp table using correct schema
-time: 2022-08-30T14:02:24.603033-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "441"
-  PR: "445"
diff --git a/.changes/unreleased/Fixes-20220914-010520.yaml b/.changes/unreleased/Fixes-20220914-010520.yaml
deleted file mode 100644
index f8584f05f..000000000
--- a/.changes/unreleased/Fixes-20220914-010520.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Fixes
-body: change to get_columns_in_relation to fix cache inconsistencies to fix cache
-  issues in incremental models causing failure on on_schema_change
-time: 2022-09-14T01:05:20.312981-05:00
-custom:
-  Author: McKnight-42
-  Issue: "447"
-  PR: "451"
diff --git a/.changes/unreleased/Under the Hood-20220829-164426.yaml b/.changes/unreleased/Under the Hood-20220829-164426.yaml
deleted file mode 100644
index bf58971f2..000000000
--- a/.changes/unreleased/Under the Hood-20220829-164426.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Submit python model with Command API by default. Adjusted run name
-time: 2022-08-29T16:44:26.509138-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "424"
-  PR: "442"
diff --git a/.changes/unreleased/Under the Hood-20220912-104517.yaml b/.changes/unreleased/Under the Hood-20220912-104517.yaml
deleted file mode 100644
index e45c97bf0..000000000
--- a/.changes/unreleased/Under the Hood-20220912-104517.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Better interface for python submission
-time: 2022-09-12T10:45:17.226481-07:00
-custom:
-  Author: ChenyuLInx
-  Issue: "452"
-  PR: "452"
diff --git a/.changes/unreleased/Under the Hood-20220913-152004.yaml b/.changes/unreleased/Under the Hood-20220913-152004.yaml
deleted file mode 100644
index 4c372db01..000000000
--- a/.changes/unreleased/Under the Hood-20220913-152004.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: ignore mypy typing issues
-time: 2022-09-13T15:20:04.459783-07:00
-custom:
-  Author: colin-rogers-dbt
-  Issue: "461"
-  PR: "462"
diff --git a/.changes/unreleased/Under the Hood-20220916-125706.yaml b/.changes/unreleased/Under the Hood-20220916-125706.yaml
deleted file mode 100644
index 54b82eb55..000000000
--- a/.changes/unreleased/Under the Hood-20220916-125706.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Enable Pandas and Pandas-on-Spark DataFrames for dbt python models
-time: 2022-09-16T12:57:06.846297-06:00
-custom:
-  Author: chamini2 dbeatty10
-  Issue: "468"
-  PR: "469"
diff --git a/.changes/unreleased/Under the Hood-20220924-143713.yaml b/.changes/unreleased/Under the Hood-20220924-143713.yaml
deleted file mode 100644
index c537a9395..000000000
--- a/.changes/unreleased/Under the Hood-20220924-143713.yaml	
+++ /dev/null
@@ -1,7 +0,0 @@
-kind: Under the Hood
-body: Convert df to pyspark DataFrame if it is koalas before writing
-time: 2022-09-24T14:37:13.100404-06:00
-custom:
-  Author: dbeatty10 ueshin
-  Issue: "473"
-  PR: "474"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index de20a0738..6dd49494b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,25 +4,10 @@
 - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases.
 - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version.
 - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry)
-## dbt-spark 1.3.0-b2 - August 30, 2022
-### Features
-- Add changie to dbt-spark ([#416](https://github.com/dbt-labs/dbt-spark/issues/416), [#418](https://github.com/dbt-labs/dbt-spark/pull/418))
-### Under the Hood
-- specify supported_languages for materialization that support python models ([#437](https://github.com/dbt-labs/dbt-spark/issues/437), [#440](https://github.com/dbt-labs/dbt-spark/pull/440))
 
-## dbt-spark 1.3.0-b1 - July 29, 2022
-
-### Features
-- Support python model through notebook, currently supported materializations are table and incremental ([#417](https://github.com/dbt-labs/dbt-spark/issues/417), [#377](https://github.com/dbt-labs/dbt-spark/pull/377))
-### Fixes
-- Pin pyodbc to version 4.0.32 to prevent overwriting libodbc.so and libltdl.so on Linux ([#397](https://github.com/dbt-labs/dbt-spark/issues/397), [#398](https://github.com/dbt-labs/dbt-spark/pull/398))
-### Under the Hood
-- Support core incremental refactor ([#4402](https://github.com/dbt-labs/dbt-spark/issues/4402), [#394](https://github.com/dbt-labs/dbt-spark/pull/394))
-
-### Contributors
-- [@barberscot](https://github.com/barberscot) ([#398](https://github.com/dbt-labs/dbt-spark/pull/398))
 ## Previous Releases
 For information on prior major and minor releases, see their changelogs:
+- [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md)
 - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md)
 - [1.1](https://github.com/dbt-labs/dbt-spark/blob/1.1.latest/CHANGELOG.md)
 - [1.0](https://github.com/dbt-labs/dbt-spark/blob/1.0.latest/CHANGELOG.md)
diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py
index e2c1a233c..70ba273f5 100644
--- a/dbt/adapters/spark/__version__.py
+++ b/dbt/adapters/spark/__version__.py
@@ -1 +1 @@
-version = "1.3.0b2"
+version = "1.4.0a1"
diff --git a/setup.py b/setup.py
index 05e814490..9d6c1367e 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,7 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.3.0b2"
+package_version = "1.4.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 

From 0cb958275fa3184dd6cee451f2f8b7719b39e380 Mon Sep 17 00:00:00 2001
From: Chenyu Li <chenyu.li@dbtlabs.com>
Date: Fri, 30 Sep 2022 13:42:32 -0700
Subject: [PATCH 52/54] various improvement (#493)

Co-authored-by: Jeremy Cohen <jeremy@dbtlabs.com>
---
 dbt/adapters/spark/python_submissions.py         | 16 +++++++++++++---
 .../spark/macros/materializations/table.sql      | 10 ++++++----
 tests/functional/adapter/test_python_model.py    | 16 ++++++++++++++++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py
index c6341abed..1e81c572a 100644
--- a/dbt/adapters/spark/python_submissions.py
+++ b/dbt/adapters/spark/python_submissions.py
@@ -31,7 +31,7 @@ def __init__(self, parsed_model: Dict, credentials: SparkCredentials) -> None:
 
     @property
     def cluster_id(self) -> str:
-        return self.parsed_model.get("cluster_id", self.credentials.cluster_id)
+        return self.parsed_model["config"].get("cluster_id", self.credentials.cluster_id)
 
     def get_timeout(self) -> int:
         timeout = self.parsed_model["config"].get("timeout", DEFAULT_TIMEOUT)
@@ -82,7 +82,17 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
                 "notebook_path": path,
             },
         }
-        job_spec.update(cluster_spec)
+        job_spec.update(cluster_spec)  # updates 'new_cluster' config
+        # PYPI packages
+        packages = self.parsed_model["config"].get("packages", [])
+        # additional format of packages
+        additional_libs = self.parsed_model["config"].get("additional_libs", [])
+        libraries = []
+        for package in packages:
+            libraries.append({"pypi": {"package": package}})
+        for lib in additional_libs:
+            libraries.append(lib)
+        job_spec.update({"libraries": libraries})  # type: ignore
         submit_response = requests.post(
             f"https://{self.credentials.host}/api/2.1/jobs/runs/submit",
             headers=self.auth_header,
@@ -96,7 +106,7 @@ def _submit_job(self, path: str, cluster_spec: dict) -> str:
 
     def _submit_through_notebook(self, compiled_code: str, cluster_spec: dict) -> None:
         # it is safe to call mkdirs even if dir already exists and have content inside
-        work_dir = f"/dbt_python_model/{self.schema}/"
+        work_dir = f"/Shared/dbt_python_model/{self.schema}/"
         self._create_work_dir(work_dir)
         # add notebook
         whole_file_path = f"{work_dir}{self.identifier}"
diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql
index 808089105..c82e27e9c 100644
--- a/dbt/include/spark/macros/materializations/table.sql
+++ b/dbt/include/spark/macros/materializations/table.sql
@@ -42,6 +42,8 @@
 dbt = dbtObj(spark.table)
 df = model(dbt, spark)
 
+# make sure pyspark exists in the namepace, for 7.3.x-scala2.12 it does not exist
+import pyspark
 # make sure pandas exists before using it
 try:
   import pandas
@@ -52,9 +54,9 @@ except ImportError:
 # make sure pyspark.pandas exists before using it
 try:
   import pyspark.pandas
-  pyspark_available = True
+  pyspark_pandas_api_available = True
 except ImportError:
-  pyspark_available = False
+  pyspark_pandas_api_available = False
 
 # make sure databricks.koalas exists before using it
 try:
@@ -66,7 +68,7 @@ except ImportError:
 # preferentially convert pandas DataFrames to pandas-on-Spark or Koalas DataFrames first
 # since they know how to convert pandas DataFrames better than `spark.createDataFrame(df)`
 # and converting from pandas-on-Spark to Spark DataFrame has no overhead
-if pyspark_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
+if pyspark_pandas_api_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = pyspark.pandas.frame.DataFrame(df)
 elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.DataFrame):
   df = databricks.koalas.frame.DataFrame(df)
@@ -74,7 +76,7 @@ elif koalas_available and pandas_available and isinstance(df, pandas.core.frame.
 # convert to pyspark.sql.dataframe.DataFrame
 if isinstance(df, pyspark.sql.dataframe.DataFrame):
   pass  # since it is already a Spark DataFrame
-elif pyspark_available and isinstance(df, pyspark.pandas.frame.DataFrame):
+elif pyspark_pandas_api_available and isinstance(df, pyspark.pandas.frame.DataFrame):
   df = df.to_spark()
 elif koalas_available and isinstance(df, databricks.koalas.frame.DataFrame):
   df = df.to_spark()
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index ed6185b61..140f41621 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -20,10 +20,26 @@ def project_config_update(self):
 
 models__simple_python_model = """
 import pandas
+import torch
+import spacy
 
 def model(dbt, spark):
     dbt.config(
         materialized='table',
+        submission_method='job_cluster',
+        job_cluster_config={
+            "spark_version": "7.3.x-scala2.12",
+            "node_type_id": "i3.xlarge",
+            "num_workers": 0,
+            "spark_conf": {
+                "spark.databricks.cluster.profile": "singleNode",
+                "spark.master": "local[*, 4]"
+            },
+            "custom_tags": {
+                "ResourceClass": "SingleNode"
+            }
+        },
+        packages=['spacy', 'torch']
     )
     data = [[1,2]] * 10
     return spark.createDataFrame(data, schema=['test', 'test2'])

From b65e4f366bce7dc98c2e698667fb197faa7cbe48 Mon Sep 17 00:00:00 2001
From: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com>
Date: Mon, 3 Oct 2022 08:04:34 -0700
Subject: [PATCH 53/54] consolidate timestamp macros (#480)

* add mypy ignore to column, connections and init

* consolidate timestamp macros

* add changie

* Update Features-20220926-123609.yaml

* add backcompat to test fixture

* remove current_timestamp_in_utc

* add expected_sql

* remove backcompat from test_timestamps.py

* update dev-requirements

* Update change log body

* lower case timestamps
---
 .../unreleased/Features-20220926-123609.yaml   |  7 +++++++
 dbt/include/spark/macros/adapters.sql          |  4 ----
 dbt/include/spark/macros/utils/timestamps.sql  |  3 +++
 .../adapter/utils/test_timestamps.py           | 18 ++++++++++++++++++
 4 files changed, 28 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Features-20220926-123609.yaml
 create mode 100644 dbt/include/spark/macros/utils/timestamps.sql
 create mode 100644 tests/functional/adapter/utils/test_timestamps.py

diff --git a/.changes/unreleased/Features-20220926-123609.yaml b/.changes/unreleased/Features-20220926-123609.yaml
new file mode 100644
index 000000000..b80b3730d
--- /dev/null
+++ b/.changes/unreleased/Features-20220926-123609.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Migrate dbt-utils current_timestamp macros into core + adapters
+time: 2022-09-26T12:36:09.319981-07:00
+custom:
+  Author: colin-rogers-dbt
+  Issue: "483"
+  PR: "480"
diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql
index 88190cc04..abc7a0ba3 100644
--- a/dbt/include/spark/macros/adapters.sql
+++ b/dbt/include/spark/macros/adapters.sql
@@ -209,10 +209,6 @@
   {{ return(load_result('list_schemas').table) }}
 {% endmacro %}
 
-{% macro spark__current_timestamp() -%}
-  current_timestamp()
-{%- endmacro %}
-
 {% macro spark__rename_relation(from_relation, to_relation) -%}
   {% call statement('rename_relation') -%}
     {% if not from_relation.type %}
diff --git a/dbt/include/spark/macros/utils/timestamps.sql b/dbt/include/spark/macros/utils/timestamps.sql
new file mode 100644
index 000000000..68d6f6884
--- /dev/null
+++ b/dbt/include/spark/macros/utils/timestamps.sql
@@ -0,0 +1,3 @@
+{% macro spark__current_timestamp() -%}
+    current_timestamp()
+{%- endmacro %}
diff --git a/tests/functional/adapter/utils/test_timestamps.py b/tests/functional/adapter/utils/test_timestamps.py
new file mode 100644
index 000000000..8507c0a6b
--- /dev/null
+++ b/tests/functional/adapter/utils/test_timestamps.py
@@ -0,0 +1,18 @@
+import pytest
+from dbt.tests.adapter.utils.test_timestamps import BaseCurrentTimestamps
+
+
+class TestCurrentTimestampSpark(BaseCurrentTimestamps):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"get_current_timestamp.sql": "select {{ current_timestamp() }} as current_timestamp"}
+
+    @pytest.fixture(scope="class")
+    def expected_schema(self):
+        return {
+            "current_timestamp": "timestamp"
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return """select current_timestamp() as current_timestamp"""

From 37dcfe3061c2d7c50621ae1591062fc4fed5e995 Mon Sep 17 00:00:00 2001
From: "V.Shkaberda" <V.Shkaberda@gmail.com>
Date: Wed, 12 Oct 2022 00:56:13 +0300
Subject: [PATCH 54/54] Fix/ldap password (#396)

* Fix password is not passing for thrift + LDAP.

* Add password to tests.

* Add CHANGELOG entry.

* Fixing up changelog entry

Co-authored-by: leahwicz <60146280+leahwicz@users.noreply.github.com>
Co-authored-by: Leah Antkiewicz <leah.antkiewicz@fishtownanalytics.com>
---
 .changes/unreleased/Fixes-20220926-112857.yaml | 7 +++++++
 dbt/adapters/spark/connections.py              | 3 +++
 tests/unit/test_adapter.py                     | 6 ++++--
 3 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 .changes/unreleased/Fixes-20220926-112857.yaml

diff --git a/.changes/unreleased/Fixes-20220926-112857.yaml b/.changes/unreleased/Fixes-20220926-112857.yaml
new file mode 100644
index 000000000..2a18f13ac
--- /dev/null
+++ b/.changes/unreleased/Fixes-20220926-112857.yaml
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Password doesn't pass to server using LDAP connection via thrift (#310)
+time: 2022-09-26T11:28:57.306285-04:00
+custom:
+  Author: VShkaberda
+  Issue: "310"
+  PR: "396"
diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py
index 951e8ed70..66ca93d30 100644
--- a/dbt/adapters/spark/connections.py
+++ b/dbt/adapters/spark/connections.py
@@ -65,6 +65,7 @@ class SparkCredentials(Credentials):
     endpoint: Optional[str] = None
     token: Optional[str] = None
     user: Optional[str] = None
+    password: Optional[str] = None
     port: int = 443
     auth: Optional[str] = None
     kerberos_service_name: Optional[str] = None
@@ -375,6 +376,7 @@ def open(cls, connection):
                             username=creds.user,
                             auth=creds.auth,
                             kerberos_service_name=creds.kerberos_service_name,
+                            password=creds.password,
                         )
                         conn = hive.connect(thrift_transport=transport)
                     else:
@@ -384,6 +386,7 @@ def open(cls, connection):
                             username=creds.user,
                             auth=creds.auth,
                             kerberos_service_name=creds.kerberos_service_name,
+                            password=creds.password,
                         )  # noqa
                     handle = PyhiveConnectionWrapper(conn)
                 elif creds.method == SparkConnectionMethod.ODBC:
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index f87a89b2b..53b95f731 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -154,12 +154,13 @@ def test_thrift_connection(self):
         config = self._get_target_thrift(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(host, port, username, auth, kerberos_service_name):
+        def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
             self.assertEqual(host, 'myorg.sparkhost.com')
             self.assertEqual(port, 10001)
             self.assertEqual(username, 'dbt')
             self.assertIsNone(auth)
             self.assertIsNone(kerberos_service_name)
+            self.assertIsNone(password)
 
         with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
             connection = adapter.acquire_connection('dummy')
@@ -193,12 +194,13 @@ def test_thrift_connection_kerberos(self):
         config = self._get_target_thrift_kerberos(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_thrift_connect(host, port, username, auth, kerberos_service_name):
+        def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password):
             self.assertEqual(host, 'myorg.sparkhost.com')
             self.assertEqual(port, 10001)
             self.assertEqual(username, 'dbt')
             self.assertEqual(auth, 'KERBEROS')
             self.assertEqual(kerberos_service_name, 'hive')
+            self.assertIsNone(password)
 
         with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
             connection = adapter.acquire_connection('dummy')