Merge branch 'master' into staging/br_b3_cotacoes

basedosdados · Sep 15, 2023 · 182669e · 182669e
2 parents a3e4701 + 4ebcd84
commit 182669e
Show file tree

Hide file tree

Showing 141 changed files with 31,109 additions and 1,981 deletions.
diff --git a/.github/workflows/scripts/sync_dbt_schema.py b/.github/workflows/scripts/sync_dbt_schema.py
@@ -259,15 +259,19 @@ def update_metadata_json(
         json.dump(metadata, f, indent=4, ensure_ascii=False)
 
 
-def update_schema_yaml_files():
+def update_schema_yaml_files_for_modified_datasets(existing_datasets_tables):
     """
     Reads the current `metadata.json` file and generates the corresponding `schema.yml` file for
     each dataset.
     """
     # Read the metadata file
     with open("metadata.json", "r", encoding="utf-8") as f:
-        metadata = json.load(f)
+        original_metadata = json.load(f)
 
+    # Get metadata only for datasets that are in the list of modified datasets
+    metadata = {}
+    for dataset_id, _ in existing_datasets_tables:
+        metadata[dataset_id] = original_metadata.get(dataset_id, {})
     # Instantiate the YAML object
     ruamel = load_ruamel()
 
@@ -340,7 +344,8 @@ def update_schema_yaml_files():
     datasets_tables = get_datasets_tables_from_modified_files(
         modified_files, show_details=True
     )
-
+    print(datasets_tables)
+    # raise (Exception("STOP"))
     # Split deleted datasets and tables
     deleted_datasets_tables = []
     existing_datasets_tables = []
@@ -362,10 +367,11 @@ def update_schema_yaml_files():
         metadatas.extend(metadata)
 
     # Merge metadatas
+
     final_metadata = merge_metadatas(metadatas)
 
     # Update metadata.json file
     update_metadata_json(final_metadata, deleted_datasets_tables)
 
     # Update `schema.yml` files
-    update_schema_yaml_files()
+    update_schema_yaml_files_for_modified_datasets(existing_datasets_tables)
diff --git a/.github/workflows/scripts/table_approve.py b/.github/workflows/scripts/table_approve.py
@@ -135,13 +135,17 @@ def save_header_files(dataset_id, table_id):
     ## only needs the first bloob
     partitions = []
     for blob in blobs:
-        blob_path = str(blob.name).replace(
-            f"staging/{dataset_id}/{table_id}/", "./downloaded_data/"
-        )
-        for folder in blob.name.split("/"):
-            if "=" in folder:
-                partitions.append(folder.split("=")[0])
-        break
+        blob_name = str(blob.name)
+        if blob_name.endswith((".csv", ".parquet")):
+            blob_path = blob_name.replace(
+                f"staging/{dataset_id}/{table_id}/", "./downloaded_data/"
+            )
+            for folder in blob.name.split("/"):
+                if "=" in folder:
+                    partitions.append(folder.split("=")[0])
+            print("Found blob: ", str(blob.name))
+            print("Renamed blob: ", blob_path)
+            break
     ### save table header in storage
 
     print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+**/test.py
 target/
 dbt_modules/
 logs/

diff --git a/dbt_project.yml b/dbt_project.yml
@@ -70,6 +70,18 @@ models:
     br_bcb_estban:
       +materialized: table
       +schema: br_bcb_estban 
+    br_bcb_taxa_cambio:
+      +materialized: table
+      +schema: br_bcb_taxa_cambio 
+      +post-hook: 
+        - 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
+        - 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
+    br_bcb_taxa_selic:
+      +materialized: table
+      +schema: br_bcb_taxa_selic 
+      +post-hook: 
+        - 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
+        - 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
     br_bd_diretorios_brasil:
       +materialized: table
       +schema: br_bd_diretorios_brasil
@@ -82,6 +94,12 @@ models:
     br_bd_metadados:
       +materialized: table
       +schema: br_bd_metadados
+    br_ce_fortaleza_sefin_iptu:
+      +materialized: table
+      +schema: br_ce_fortaleza_sefin_iptu
+      +post-hook: 
+        - 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
+        - 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
     br_cgu_pessoal_executivo_federal:
       +materialized: table
       +schema: br_cgu_pessoal_executivo_federal
@@ -115,6 +133,12 @@ models:
     br_ibge_pnadc:
       +materialized: table
       +schema: br_ibge_pnadc
+    br_inep_censo_educacao_superior:
+      +materialized: table
+      +schema: br_inep_censo_educacao_superior
+    br_inep_enem:
+      +materialized: table
+      +schema: br_inep_enem
     br_inep_saeb:
       +materialized: table
       +schema: br_inep_saeb
@@ -139,6 +163,12 @@ models:
       +post-hook: 
         - 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
         - 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
+    br_mg_belohorizonte_smfa_iptu:
+      +materialized: table
+      +schema: br_mg_belohorizonte_smfa_iptu
+      +post-hook: 
+        - 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
+        - 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
     br_mp_pep:
       +materialized: table
       +schema: br_mp_pep
@@ -184,6 +214,9 @@ models:
     br_tse_eleicoes_2022:
       +materialized: table
       +schema: br_tse_eleicoes_2022
+    br_rf_cafir:
+      +materialized: table
+      +schema: br_rf_cafir
     fundacao_lemann:
       +materialized: table
       +schema: fundacao_lemann
@@ -193,6 +226,9 @@ models:
     world_iea_pirls:
       +materialized: table
       +schema: world_iea_pirls
+    world_wb_mides:
+      +materialized: table
+      +schema: world_wb_mides
     test_dataset:
       +materialized: table
       +schema: test_dataset