From 40725a39ab80202a7e83343a7bc1e6e9a3dd0397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Mon, 13 May 2024 18:48:14 -0300 Subject: [PATCH 1/9] feat: add run_elementary_models step --- .github/workflows/elementary.yaml | 21 +++ .../scripts/triggers_flow_execution.py | 166 ++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 .github/workflows/scripts/triggers_flow_execution.py diff --git a/.github/workflows/elementary.yaml b/.github/workflows/elementary.yaml index 594155d1..c0a05ab7 100644 --- a/.github/workflows/elementary.yaml +++ b/.github/workflows/elementary.yaml @@ -34,3 +34,24 @@ jobs: with: name: edr.log path: edr.log + run_elementary_models: + needs: elementary + name: Run elementary model + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} + - name: Set up poetry + run: pipx install poetry + - name: Set up python + uses: actions/setup-python@v4 + with: + cache: poetry + python-version: '3.9' + - name: Install requirements + run: poetry install --only=dev + - name: Run script to test DBT model + run: |- + poetry run python .github/workflows/scripts/triggers_flow_execution.py --dataset-id elementary --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --prefect-backend-token ${{ secrets.PREFECT_BACKEND_TOKEN }} diff --git a/.github/workflows/scripts/triggers_flow_execution.py b/.github/workflows/scripts/triggers_flow_execution.py new file mode 100644 index 00000000..b34295c5 --- /dev/null +++ b/.github/workflows/scripts/triggers_flow_execution.py @@ -0,0 +1,166 @@ +from argparse import ArgumentParser +from time import sleep +import re +from backend import Backend +from utils import expand_alls, get_datasets_tables_from_modified_files +from scripts.table_test import get_flow_run_state, get_materialization_flow_id, get_flow_status_logs + +if __name__ == "__main__": + # Start argument parser + arg_parser = ArgumentParser() + + # Add GraphQL URL argument + arg_parser.add_argument( + "--graphql-url", + type=str, + required=True, + help="URL of the GraphQL endpoint.", + ) + + # Add list of modified files argument + arg_parser.add_argument( + "--dataset-id", + type=str, + required=True, + help="Table id", + ) + arg_parser.add_argument( + "--table-id", + type=str, + required=False, + default="", + help="Table id", + ) + + arg_parser.add_argument( + "--alias", + type=str, + required=False, + default="False", + help="DBT alias", + ) + + # Add Prefect backend URL argument + arg_parser.add_argument( + "--prefect-backend-url", + type=str, + required=False, + default="https://prefect.basedosdados.org/api", + help="Prefect backend URL.", + ) + + # Add prefect base URL argument + arg_parser.add_argument( + "--prefect-base-url", + type=str, + required=False, + default="https://prefect.basedosdados.org", + help="Prefect base URL.", + ) + + # Add Prefect API token argument + arg_parser.add_argument( + "--prefect-backend-token", + type=str, + required=True, + help="Prefect backend token.", + ) + + # Add materialization mode argument + arg_parser.add_argument( + "--materialization-mode", + type=str, + required=False, + default="prod", + help="Materialization mode.", + ) + + # Add materialization label argument + arg_parser.add_argument( + "--materialization-label", + type=str, + required=False, + default="basedosdados", + help="Materialization label.", + ) + + # Add dbt command label argument + arg_parser.add_argument( + "--dbt-command", + type=str, + required=False, + default = "run", + help="Materialization label.", + ) + + # Get arguments + args = arg_parser.parse_args() + # Expand `__all__` tables + backend = Backend(args.graphql_url) + + # Launch materialization flows + backend = Backend(args.prefect_backend_url) + flow_id = get_materialization_flow_id(backend, args.prefect_backend_token) + launched_flow_run_ids = [] + print( + f"Launching materialization flow for {args.dataset_id}.{args.table_id}" + ) + parameters = { + "dataset_id": args.dataset_id, + "dbt_alias": args.alias, + "mode": args.materialization_mode, + "table_id": args.table_id, + "dbt_command": args.dbt_command, + "disable_elementary": False, + } + + mutation = """ + mutation ($flow_id: UUID, $parameters: JSON, $label: String!) { + create_flow_run (input: { + flow_id: $flow_id, + parameters: $parameters, + labels: [$label], + }) { + id + } + } + """ + variables = { + "flow_id": flow_id, + "parameters": parameters, + "label": args.materialization_label, + } + + response = backend._execute_query( + mutation, + variables, + headers={"Authorization": f"Bearer {args.prefect_backend_token}"}, + ) + + flow_run_id = response["create_flow_run"]["id"] + launched_flow_run_ids.append(flow_run_id) + flow_run_url = f"{args.prefect_base_url}/flow-run/{flow_run_id}" + print(f" - Materialization flow run launched: {flow_run_url}") + + # Keep monitoring the launched flow runs until they are finished + for launched_flow_run_id in launched_flow_run_ids: + print(f"Monitoring flow run {launched_flow_run_id}...") + flow_run_state = get_flow_run_state( + flow_run_id=launched_flow_run_id, + backend=backend, + auth_token=args.prefect_backend_token, + ) + while flow_run_state not in ["Success", "Failed", "Cancelled"]: + sleep(5) + flow_run_state = get_flow_run_state( + flow_run_id=launched_flow_run_id, + backend=backend, + auth_token=args.prefect_backend_token, + ) + if flow_run_state != "Success": + raise Exception( + f'Flow run {launched_flow_run_id} finished with state "{flow_run_state}". ' + f"Check the logs at {args.prefect_base_url}/flow-run/{launched_flow_run_id}" + ) + else: + print("Congrats! Everything seems fine!") From d68b540e355923b08a672302a9d4a8af6760bd2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Mon, 13 May 2024 18:53:04 -0300 Subject: [PATCH 2/9] feat: add 90 days-back parameter --- .github/workflows/elementary.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/elementary.yaml b/.github/workflows/elementary.yaml index c0a05ab7..53bd9370 100644 --- a/.github/workflows/elementary.yaml +++ b/.github/workflows/elementary.yaml @@ -18,8 +18,8 @@ jobs: warehouse-type: bigquery adapter-version: 1.5.9 profiles-yml: ${{ secrets.ELEMENTARY_PROFILES_YML }} - edr-command: edr report --file-path "report.html" && edr send-report --google-service-account-path - "/tmp/gcs_keyfile.json" --gcs-bucket-name "basedosdados" --update-bucket-website + edr-command: edr report --file-path "report.html" --days-back 90 && edr send-report --google-service-account-path + "/tmp/gcs_keyfile.json" --gcs-bucket-name "basedosdados" --update-bucket-website --days-back 90 "true" bigquery-keyfile: ${{ secrets.BIGQUERY_KEYFILE }} gcs-keyfile: ${{ secrets.GCS_KEYFILE }} From 67881aa5496077af72c7ec3a4ab85bdd0b79439e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:11:23 -0300 Subject: [PATCH 3/9] feat: divides the action into two --- .github/workflows/elementary.yaml | 23 +------------- .../workflows/triggers_elementary_model.yml | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 22 deletions(-) create mode 100644 .github/workflows/triggers_elementary_model.yml diff --git a/.github/workflows/elementary.yaml b/.github/workflows/elementary.yaml index 53bd9370..5ce9da90 100644 --- a/.github/workflows/elementary.yaml +++ b/.github/workflows/elementary.yaml @@ -33,25 +33,4 @@ jobs: uses: actions/upload-artifact@v3 with: name: edr.log - path: edr.log - run_elementary_models: - needs: elementary - name: Run elementary model - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - ref: ${{ github.head_ref }} - - name: Set up poetry - run: pipx install poetry - - name: Set up python - uses: actions/setup-python@v4 - with: - cache: poetry - python-version: '3.9' - - name: Install requirements - run: poetry install --only=dev - - name: Run script to test DBT model - run: |- - poetry run python .github/workflows/scripts/triggers_flow_execution.py --dataset-id elementary --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --prefect-backend-token ${{ secrets.PREFECT_BACKEND_TOKEN }} + path: edr.log \ No newline at end of file diff --git a/.github/workflows/triggers_elementary_model.yml b/.github/workflows/triggers_elementary_model.yml new file mode 100644 index 00000000..7d904816 --- /dev/null +++ b/.github/workflows/triggers_elementary_model.yml @@ -0,0 +1,31 @@ + --- +name: Triggers Elementary Models +on: + push: + branches: [main, master] + schedule: + - cron: 00 23 * * 1 + workflow_dispatch: + +jobs: + run_elementary_models: + needs: elementary + name: Run elementary model + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} + - name: Set up poetry + run: pipx install poetry + - name: Set up python + uses: actions/setup-python@v4 + with: + cache: poetry + python-version: '3.9' + - name: Install requirements + run: poetry install --only=dev + - name: Run script to test DBT model + run: |- + poetry run python .github/workflows/scripts/triggers_flow_execution.py --dataset-id elementary --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --prefect-backend-token ${{ secrets.PREFECT_BACKEND_TOKEN }} From 4b235914c04e8fcebc4d938a37bc64a3d5f4f846 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:13:09 -0300 Subject: [PATCH 4/9] fix: typo --- .github/workflows/elementary.yaml | 2 +- .github/workflows/triggers_elementary_model.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/elementary.yaml b/.github/workflows/elementary.yaml index 5ce9da90..eaa03bac 100644 --- a/.github/workflows/elementary.yaml +++ b/.github/workflows/elementary.yaml @@ -33,4 +33,4 @@ jobs: uses: actions/upload-artifact@v3 with: name: edr.log - path: edr.log \ No newline at end of file + path: edr.log diff --git a/.github/workflows/triggers_elementary_model.yml b/.github/workflows/triggers_elementary_model.yml index 7d904816..6b716bc0 100644 --- a/.github/workflows/triggers_elementary_model.yml +++ b/.github/workflows/triggers_elementary_model.yml @@ -9,7 +9,6 @@ on: jobs: run_elementary_models: - needs: elementary name: Run elementary model runs-on: ubuntu-latest steps: From b116bce212ce1027772c17c36c31b527d12e40d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:21:10 -0300 Subject: [PATCH 5/9] fix: typo --- .github/workflows/triggers_elementary_model.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triggers_elementary_model.yml b/.github/workflows/triggers_elementary_model.yml index 6b716bc0..f03c051f 100644 --- a/.github/workflows/triggers_elementary_model.yml +++ b/.github/workflows/triggers_elementary_model.yml @@ -1,4 +1,4 @@ - --- +--- name: Triggers Elementary Models on: push: From c8b1f717dab84699f5bd511b126765abb09ada63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:27:21 -0300 Subject: [PATCH 6/9] fix: typo --- ...iggers_elementary_model.yml => triggers-elementary-model.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{triggers_elementary_model.yml => triggers-elementary-model.yaml} (100%) diff --git a/.github/workflows/triggers_elementary_model.yml b/.github/workflows/triggers-elementary-model.yaml similarity index 100% rename from .github/workflows/triggers_elementary_model.yml rename to .github/workflows/triggers-elementary-model.yaml From f2756d0c51f896237dbb6a4199a35bbc315f1e5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:30:03 -0300 Subject: [PATCH 7/9] feat: add pr trigger --- .github/workflows/triggers-elementary-model.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/triggers-elementary-model.yaml b/.github/workflows/triggers-elementary-model.yaml index f03c051f..3571db27 100644 --- a/.github/workflows/triggers-elementary-model.yaml +++ b/.github/workflows/triggers-elementary-model.yaml @@ -3,6 +3,8 @@ name: Triggers Elementary Models on: push: branches: [main, master] + pull_request: + branches: [main, master] schedule: - cron: 00 23 * * 1 workflow_dispatch: From 6e3c12014c0391689468e5c7240080b03bf56354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:32:07 -0300 Subject: [PATCH 8/9] fix: fix module error --- .github/workflows/scripts/triggers_flow_execution.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scripts/triggers_flow_execution.py b/.github/workflows/scripts/triggers_flow_execution.py index b34295c5..8d08ad76 100644 --- a/.github/workflows/scripts/triggers_flow_execution.py +++ b/.github/workflows/scripts/triggers_flow_execution.py @@ -3,7 +3,7 @@ import re from backend import Backend from utils import expand_alls, get_datasets_tables_from_modified_files -from scripts.table_test import get_flow_run_state, get_materialization_flow_id, get_flow_status_logs +from table_test import get_flow_run_state, get_materialization_flow_id, get_flow_status_logs if __name__ == "__main__": # Start argument parser From c5f31ef4d0ceabbb3ccadaccc7e6ec67ff23b496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arthur=20Gusm=C3=A3o?= Date: Tue, 14 May 2024 17:36:28 -0300 Subject: [PATCH 9/9] feat: rm pr trigger --- .github/workflows/triggers-elementary-model.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/triggers-elementary-model.yaml b/.github/workflows/triggers-elementary-model.yaml index 3571db27..f03c051f 100644 --- a/.github/workflows/triggers-elementary-model.yaml +++ b/.github/workflows/triggers-elementary-model.yaml @@ -3,8 +3,6 @@ name: Triggers Elementary Models on: push: branches: [main, master] - pull_request: - branches: [main, master] schedule: - cron: 00 23 * * 1 workflow_dispatch: