Skip to content

Add Tests and best practices #51

Add Tests and best practices

Add Tests and best practices #51

name: 🔄 dbt Transform & Governance Checks"
on: # yamllint disable-line rule:truthy
pull_request:
paths:
- transform/*
- transform/**/*
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# This cancels a run if another change is pushed to the same branch
concurrency:
group: transform-${{ github.ref }}
cancel-in-progress: true
jobs:
dbt:
name: Pull Request dbt Tests
runs-on: ubuntu-latest
# Set environment variables in
# https://github.com//<your org>/<your repo>/settings/variables/actions
#
# Alternatively, You can define multiple ENV for different workflows.
# https://github.com/<org>/<repo>/settings/environments
# environment: PR_ENV
# most people should use this one
container: datacoves/ci-basic-dbt-snowflake:3.2
defaults:
run:
working-directory: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
env:
DBT_PROFILES_DIR: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/automate/dbt
DATACOVES__DBT_HOME: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/transform
DATACOVES__YAML_DAGS_FOLDER: /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}/schedule
DATACOVES__MAIN__ACCOUNT: ${{ vars.DATACOVES__MAIN__ACCOUNT }}
DATACOVES__MAIN__DATABASE: ${{ vars.DATACOVES__MAIN__DATABASE }}_PR_${{ github.event.number }}
DATACOVES__MAIN__SCHEMA: ${{ vars.DATACOVES__MAIN__SCHEMA }}
DATACOVES__MAIN__ROLE: ${{ vars.DATACOVES__MAIN__ROLE }}
DATACOVES__MAIN__WAREHOUSE: ${{ vars.DATACOVES__MAIN__WAREHOUSE }}
DATACOVES__MAIN__USER: ${{ vars.DATACOVES__MAIN__USER }}
DATACOVES__MAIN__PASSWORD: ${{ secrets.DATACOVES__MAIN__PASSWORD }}
# This is used by datacoves to drop the test database if permissions
# cannot be applied when using the Datacoves permifrost security model.
DATACOVES__DROP_DB_ON_FAIL: ${{ vars.DATACOVES__DROP_DB_ON_FAIL }}
steps:
- name: Checkout branch
uses: actions/[email protected]
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
- name: Set Secure Directory
run: git config --global --add safe.directory /__w/${{ github.event.repository.name }}/${{ github.event.repository.name }}
- name: List of files changed
run: "git diff origin/${{ github.event.pull_request.base.ref }} HEAD --name-status"
- name: Install dbt packages
run: "dbt deps"
- name: Create PR database
run: "dbt --no-write-json run-operation create_database"
- name: Get prod manifest
id: prod_manifest
run: "../automate/dbt/get_artifacts.sh"
##### Governance Checks
# this first runs dbt but creates enpty tables, this is enough to then run the hooks and fail fast
# There is an issue with --empty and dynamic tables so need to exclude them
- name: Governance run of dynamic tables
run: "dbt build --fail-fast -s config.materialized:dynamic_table --defer --state logs"
# There is an issue with --empty and dynamic tables so need to exclude them
- name: Governance run of dbt with EMPTY models using slim mode
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt build --fail-fast --defer --state logs --select state:modified+ --empty --exclude config.materialized:dynamic_table"
# There is an issue with --empty and dynamic tables so need to exclude
- name: Governance run of dbt with EMPTY models using full run
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
run: "dbt build --fail-fast --empty --exclude config.materialized:dynamic_table"
- name: Generate Docs Combining Prod and branch catalog.json
run: "dbt-coves generate docs --merge-deferred --state logs"
- name: Run governance checks
run: "pre-commit run --from-ref origin/${{ github.event.pull_request.base.ref }} --to-ref HEAD"
##### Real dbt run given that we passed governance checks
- name: Run dbt build slim mode
if: ${{ steps.prod_manifest.outputs.manifest_found == 'true' && contains(github.event.pull_request.labels.*.name, 'full-refresh') != true }}
run: "dbt build --fail-fast --defer --state logs --select state:modified+"
- name: Run dbt build full run
if: ${{ steps.prod_manifest.outputs.manifest_found == 'false' || contains(github.event.pull_request.labels.*.name, 'full-refresh') }}
run: "dbt build --fail-fast"
- name: Grant access to PR database
id: grant-access-to-database
run: "dbt --no-write-json run-operation grant_access_to_pr_database"
# We drop the database when there is a failure to grant access to the db because
# most likely the schema was not set properly in dbt_project.yml so models built to default schema
- name: Drop PR database on Failure to grant security access
if: always() && (env.DATACOVES__DROP_DB_ON_FAIL == 'true') && (steps.grant-access-to-database.outcome == 'failure')
run: "dbt --no-write-json run-operation drop_recreate_db --args '{db_name: ${{env.DATACOVES__MAIN__DATABASE}}, recreate: False}'" # yamllint disable-line rule:line-length