From 31cb05e7d7dc6e5e63b3027a66428f22d40f86ce Mon Sep 17 00:00:00 2001 From: Colin Date: Wed, 18 Oct 2023 16:54:42 -0700 Subject: [PATCH] add databricks and PR execution protections --- .github/scripts/update_dbt_core_branch.sh | 20 +++ .github/scripts/update_release_branch.sh | 11 ++ .github/workflows/integration.yml | 193 +++++++++++++++++++++- 3 files changed, 215 insertions(+), 9 deletions(-) create mode 100755 .github/scripts/update_dbt_core_branch.sh create mode 100644 .github/scripts/update_release_branch.sh diff --git a/.github/scripts/update_dbt_core_branch.sh b/.github/scripts/update_dbt_core_branch.sh new file mode 100755 index 000000000..d28a40c35 --- /dev/null +++ b/.github/scripts/update_dbt_core_branch.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +set -e + +git_branch=$1 +target_req_file="dev-requirements.txt" +core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g" +postgres_req_sed_pattern="s|dbt-core.git.*#egg=dbt-postgres|dbt-core.git@${git_branch}#egg=dbt-postgres|g" +tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "$core_req_sed_pattern" $target_req_file + sed -i "" "$postgres_req_sed_pattern" $target_req_file + sed -i "" "$tests_req_sed_pattern" $target_req_file +else + sed -i "$core_req_sed_pattern" $target_req_file + sed -i "$postgres_req_sed_pattern" $target_req_file + sed -i "$tests_req_sed_pattern" $target_req_file +fi +core_version=$(curl "https://raw.githubusercontent.com/dbt-labs/dbt-core/${git_branch}/core/dbt/version.py" | grep "__version__ = *"|cut -d'=' -f2) +bumpversion --allow-dirty --new-version "$core_version" major diff --git a/.github/scripts/update_release_branch.sh b/.github/scripts/update_release_branch.sh new file mode 100644 index 000000000..75b9ccef6 --- /dev/null +++ b/.github/scripts/update_release_branch.sh @@ -0,0 +1,11 @@ +#!/bin/bash -e +set -e + +release_branch=$1 +target_req_file=".github/workflows/nightly-release.yml" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "s|[0-9].[0-9].latest|$release_branch|" $target_req_file +else + sed -i "s|[0-9].[0-9].latest|$release_branch|" $target_req_file +fi diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index f4c34c5fb..684bcfab5 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -18,6 +18,11 @@ on: - "releases/*" pull_request: workflow_dispatch: + inputs: + dbt-core-branch: + description: "branch of dbt-core to use in dev-requirements.txt" + required: false + type: string # explicitly turn off permissions for `GITHUB_TOKEN` permissions: read-all @@ -32,8 +37,60 @@ defaults: shell: bash jobs: - tests: - name: test with python ${{ matrix.python-version }} + # generate test metadata about what files changed and the testing matrix to use + test-metadata: + # run if not a PR from a forked repository or has a label to mark as safe to test + if: >- + github.event_name != 'pull_request_target' || + github.event.pull_request.head.repo.full_name == github.repository || + contains(github.event.pull_request.labels.*.name, 'ok to test') + runs-on: ubuntu-latest + + outputs: + matrix: ${{ steps.generate-matrix.outputs.result }} + run-python-tests: ${{ steps.filter.outputs.bigquery-python }} + + steps: + - name: Check out the repository (non-PR) + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} + - name: Check if relevant files changed + if: github.event_name == 'pull_request_target' + # https://github.com/marketplace/actions/paths-changes-filter + # For each filter, it sets output variable named by the filter to the text: + # 'true' - if any of changed files matches any of filter rules + # 'false' - if none of changed files matches any of filter rules + # also, returns: + # `changes` - JSON array with names of all filters matching any of the changed files + uses: dorny/paths-filter@v2 + id: get-changes + with: + token: ${{ secrets.GITHUB_TOKEN }} + filters: | + spark: + - 'dbt/**' + - 'tests/**' + - 'dev-requirements.txt' + local-tests: + name: test spark local against python ${{ matrix.python-version }} + + # run if not a PR from a forked repository or has a label to mark as safe to test + # also checks that the matrix generated is not empty + if: >- + ( + github.event_name != 'pull_request_target' || + github.event.pull_request.head.repo.full_name == github.repository || + contains(github.event.pull_request.labels.*.name, 'ok to test') + ) runs-on: ubuntu-latest @@ -45,31 +102,149 @@ jobs: - "3.9" - "3.10" - "3.11" + test: + - "spark-thrift" + - "spark-session" env: - TOXENV: "unit" PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} steps: - name: Check out the repository + if: github.event_name != 'pull_request_target' uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicity checkout the branch for the PR, + # this is necessary for the `pull_request_target` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Install python dependencies + run: | + python -m pip install --user --upgrade pip + python -m pip install tox + python -m pip --version + tox --version + + - name: Update dev_requirements.txt + if: inputs.dbt-core-branch != '' + run: | + pip install bumpversion + ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} + - uses: isbang/compose-action@v1.5.1 + if: ${{ matrix.test == 'spark-thrift'}} with: compose-file: "./docker-compose.yml" - - name: Install tox + - name: Run tox for Spark ${{ matrix.test }} + run: tox -e integration-${{ matrix.test }} + + databricks-tests: + name: test spark databricks against python ${{ matrix.python-version }} + # run if not a PR from a forked repository or has a label to mark as safe to test + # also checks that the matrix generated is not empty + if: >- + ( + github.event_name != 'pull_request_target' || + github.event.pull_request.head.repo.full_name == github.repository || + contains(github.event.pull_request.labels.*.name, 'ok to test') + ) + + runs-on: ubuntu-latest + container: + image: "fishtownanalytics/test-container:10" + strategy: + fail-fast: false + matrix: + python-version: + - "3.8" + - "3.9" + - "3.10" + - "3.11" + test: + - "databricks-odbc-sql-endpoint" + - "databricks-odbc-cluster" + - "spark-databricks-http" + + env: + PYTEST_ADDOPTS: "-v --color=yes --csv test_results.csv" + DBT_INVOCATION_ENV: github-actions + DD_CIVISIBILITY_AGENTLESS_ENABLED: true + DD_API_KEY: ${{ secrets.DATADOG_API_KEY }} + DD_SITE: datadoghq.com + DD_ENV: ci + DD_SERVICE: ${{ github.event.repository.name }} + DBT_DATABRICKS_CLUSTER_NAME: ${{ secrets.DBT_DATABRICKS_CLUSTER_NAME }} + DBT_DATABRICKS_HOSTNAME: ${{ secrets.DBT_DATABRICKS_HOST }} + DBT_DATABRICKS_ENDPOINT: ${{ secrets.DBT_DATABRICKS_ENDPOINT }} + DBT_DATABRICKS_TOKEN: ${{ secrets.DBT_DATABRICKS_TOKEN }} + DBT_DATABRICS_USER: ${{ secrets.DBT_DATABRICKS_USER }} + + steps: + - name: Check out the repository + if: github.event_name != 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + + # explicity checkout the branch for the PR, + # this is necessary for the `pull_request_target` event + - name: Check out the repository (PR) + if: github.event_name == 'pull_request_target' + uses: actions/checkout@v3 + with: + persist-credentials: false + ref: ${{ github.event.pull_request.head.sha }} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install python dependencies run: | - python -m pip install --upgrade pip + python -m pip install --user --upgrade pip python -m pip install tox + python -m pip --version + tox --version - - name: Run tox for Spark session - run: tox -e integration-spark-session + - name: Update dev_requirements.txt + if: inputs.dbt-core-branch != '' + run: | + pip install bumpversion + ./.github/scripts/update_dbt_core_branch.sh ${{ inputs.dbt-core-branch }} - - name: Run tox for Spark thrift - run: tox -e integration-spark-thrift + - name: Configure ODBC + if: ${{ matrix.test != "spark-databricks-http" }} + run: | + apt-get update && apt-get install -y --no-install-recommends \ + g++ \ + unixodbc-dev \ + unzip + + unzip /tmp/simba_odbc.zip -d /tmp/ \ + && dpkg -i /tmp/SimbaSparkODBC-*/*.deb \ + && echo "[Simba]\nDriver = /opt/simba/spark/lib/64/libsparkodbc_sb64.so" >> /etc/odbcinst.ini \ + && rm /tmp/simba_odbc.zip \ + && rm -rf /tmp/SimbaSparkODBC* + + - name: Run tox for Spark ${{ matrix.test }} + run: tox -e integration-${{ matrix.test }} \ No newline at end of file