From 48ac47ca95718adb398c747b05bc12e634d3bdf4 Mon Sep 17 00:00:00 2001 From: Laura Amaral Date: Thu, 7 Mar 2024 11:31:10 -0300 Subject: [PATCH 1/4] fix: parquet header files in table_approve --- .github/workflows/scripts/table_approve.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/scripts/table_approve.py b/.github/workflows/scripts/table_approve.py index 6c6fe347..3c18c133 100644 --- a/.github/workflows/scripts/table_approve.py +++ b/.github/workflows/scripts/table_approve.py @@ -111,7 +111,7 @@ def push_table_to_bq( Dataset(dataset_id).update(mode="prod") delete_storage_path = file_path.replace("./downloaded_data/", "") print( - f"DELETE HEADER FILE FROM basedosdados/staing/{dataset_id}_staging/{table_id}/{delete_storage_path}" + f"DELETE HEADER FILE FROM basedosdados/staging/{dataset_id}_staging/{table_id}/{delete_storage_path}" ) st = Storage(dataset_id=dataset_id, table_id=table_id) st.delete_file(filename=delete_storage_path, mode="staging") @@ -146,14 +146,6 @@ def save_header_files(dataset_id, table_id): print("Found blob: ", str(blob.name)) print("Renamed blob: ", blob_path) break - ### save table header in storage - - print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}") - query = f""" - SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1 - """ - df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True) - df = df.drop(columns=partitions) file_name = blob_path.split("/")[-1] file_type = file_name.split(".")[-1] @@ -161,12 +153,20 @@ def save_header_files(dataset_id, table_id): path = Path(blob_path.replace(f"/{file_name}", "")) path.mkdir(parents=True, exist_ok=True) + ### save table header in storage if file_type == "csv": + print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}") + query = f""" + SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1 + """ + df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True) + df = df.drop(columns=partitions) + file_path = f"./{path}/table_approve_temp_file_271828.csv" df.to_csv(file_path, index=False) elif file_type == "parquet": file_path = f"./{path}/table_approve_temp_file_271828.parquet" - df.to_parquet(file_path) + blob.download_to_filename(file_path) print("SAVE HEADER FILE: ", file_path) return file_path From 053ed23498db5d307c092efb83b3c47f30924194 Mon Sep 17 00:00:00 2001 From: Laura Amaral Date: Thu, 7 Mar 2024 11:38:38 -0300 Subject: [PATCH 2/4] test: new table-approve --- .github/workflows/cd.yaml | 6 ++---- .../br_cgu_dados_abertos/br_cgu_dados_abertos__conjunto.sql | 1 - .../br_cgu_dados_abertos__organizacao.sql | 1 - .../br_cgu_dados_abertos/br_cgu_dados_abertos__recurso.sql | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index 91effb9c..6eb8a50a 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -2,7 +2,7 @@ name: CD on: pull_request: - types: [closed] + types: types: [labeled,closed] env: GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} @@ -15,9 +15,7 @@ env: IMAGE_NAME: ghcr.io/basedosdados/queries-basedosdados jobs: build-container: - if: | - github.event.pull_request.merged == true - && github.event.pull_request.base.ref == 'main' + if: contains(github.event.pull_request.labels.*.name, 'table-approve') name: Deployment runs-on: ubuntu-latest steps: diff --git a/models/br_cgu_dados_abertos/br_cgu_dados_abertos__conjunto.sql b/models/br_cgu_dados_abertos/br_cgu_dados_abertos__conjunto.sql index 7b5dd2cf..7e457b5a 100644 --- a/models/br_cgu_dados_abertos/br_cgu_dados_abertos__conjunto.sql +++ b/models/br_cgu_dados_abertos/br_cgu_dados_abertos__conjunto.sql @@ -1,5 +1,4 @@ {{ config(alias="conjunto", schema="br_cgu_dados_abertos") }} --- select safe_cast(nullif(id, "") as string) id, safe_cast(nullif(titulo, "") as string) nome, diff --git a/models/br_cgu_dados_abertos/br_cgu_dados_abertos__organizacao.sql b/models/br_cgu_dados_abertos/br_cgu_dados_abertos__organizacao.sql index 7e85af0f..2a772e6e 100644 --- a/models/br_cgu_dados_abertos/br_cgu_dados_abertos__organizacao.sql +++ b/models/br_cgu_dados_abertos/br_cgu_dados_abertos__organizacao.sql @@ -1,5 +1,4 @@ {{ config(alias="organizacao", schema="br_cgu_dados_abertos") }} --- select safe_cast(nullif(o.id, "") as string) id, safe_cast(nullif(o.titulo, "") as string) nome, diff --git a/models/br_cgu_dados_abertos/br_cgu_dados_abertos__recurso.sql b/models/br_cgu_dados_abertos/br_cgu_dados_abertos__recurso.sql index ceafefa4..a2835f64 100644 --- a/models/br_cgu_dados_abertos/br_cgu_dados_abertos__recurso.sql +++ b/models/br_cgu_dados_abertos/br_cgu_dados_abertos__recurso.sql @@ -1,5 +1,4 @@ {{ config(alias="recurso", schema="br_cgu_dados_abertos") }} --- select safe_cast(nullif(id, "") as string) id, safe_cast(nullif(id_conjunto, "") as string) id_conjunto, From 74c083cab5e1a7cca991be4a2956947274e3dd8e Mon Sep 17 00:00:00 2001 From: Laura Amaral Date: Thu, 7 Mar 2024 11:42:01 -0300 Subject: [PATCH 3/4] fix: typo --- .github/workflows/cd.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index 6eb8a50a..9112a70c 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -2,7 +2,7 @@ name: CD on: pull_request: - types: types: [labeled,closed] + types: [labeled,closed] env: GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} From aedd92457127cb04cbd61f38da6869a52443cb02 Mon Sep 17 00:00:00 2001 From: Laura Amaral Date: Thu, 7 Mar 2024 15:08:27 -0300 Subject: [PATCH 4/4] fix: return to original config --- .github/workflows/cd.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index 9112a70c..91effb9c 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -2,7 +2,7 @@ name: CD on: pull_request: - types: [labeled,closed] + types: [closed] env: GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }} @@ -15,7 +15,9 @@ env: IMAGE_NAME: ghcr.io/basedosdados/queries-basedosdados jobs: build-container: - if: contains(github.event.pull_request.labels.*.name, 'table-approve') + if: | + github.event.pull_request.merged == true + && github.event.pull_request.base.ref == 'main' name: Deployment runs-on: ubuntu-latest steps: