Skip to content

Commit

Permalink
Merge branch 'master' into staging/br_b3_cotacoes
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx authored Sep 15, 2023
2 parents a3e4701 + 4ebcd84 commit 182669e
Show file tree
Hide file tree
Showing 141 changed files with 31,109 additions and 1,981 deletions.
14 changes: 10 additions & 4 deletions .github/workflows/scripts/sync_dbt_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,19 @@ def update_metadata_json(
json.dump(metadata, f, indent=4, ensure_ascii=False)


def update_schema_yaml_files():
def update_schema_yaml_files_for_modified_datasets(existing_datasets_tables):
"""
Reads the current `metadata.json` file and generates the corresponding `schema.yml` file for
each dataset.
"""
# Read the metadata file
with open("metadata.json", "r", encoding="utf-8") as f:
metadata = json.load(f)
original_metadata = json.load(f)

# Get metadata only for datasets that are in the list of modified datasets
metadata = {}
for dataset_id, _ in existing_datasets_tables:
metadata[dataset_id] = original_metadata.get(dataset_id, {})
# Instantiate the YAML object
ruamel = load_ruamel()

Expand Down Expand Up @@ -340,7 +344,8 @@ def update_schema_yaml_files():
datasets_tables = get_datasets_tables_from_modified_files(
modified_files, show_details=True
)

print(datasets_tables)
# raise (Exception("STOP"))
# Split deleted datasets and tables
deleted_datasets_tables = []
existing_datasets_tables = []
Expand All @@ -362,10 +367,11 @@ def update_schema_yaml_files():
metadatas.extend(metadata)

# Merge metadatas

final_metadata = merge_metadatas(metadatas)

# Update metadata.json file
update_metadata_json(final_metadata, deleted_datasets_tables)

# Update `schema.yml` files
update_schema_yaml_files()
update_schema_yaml_files_for_modified_datasets(existing_datasets_tables)
18 changes: 11 additions & 7 deletions .github/workflows/scripts/table_approve.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,17 @@ def save_header_files(dataset_id, table_id):
## only needs the first bloob
partitions = []
for blob in blobs:
blob_path = str(blob.name).replace(
f"staging/{dataset_id}/{table_id}/", "./downloaded_data/"
)
for folder in blob.name.split("/"):
if "=" in folder:
partitions.append(folder.split("=")[0])
break
blob_name = str(blob.name)
if blob_name.endswith((".csv", ".parquet")):
blob_path = blob_name.replace(
f"staging/{dataset_id}/{table_id}/", "./downloaded_data/"
)
for folder in blob.name.split("/"):
if "=" in folder:
partitions.append(folder.split("=")[0])
print("Found blob: ", str(blob.name))
print("Renamed blob: ", blob_path)
break
### save table header in storage

print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
**/test.py
target/
dbt_modules/
logs/
Expand Down
36 changes: 36 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,18 @@ models:
br_bcb_estban:
+materialized: table
+schema: br_bcb_estban
br_bcb_taxa_cambio:
+materialized: table
+schema: br_bcb_taxa_cambio
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_bcb_taxa_selic:
+materialized: table
+schema: br_bcb_taxa_selic
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_bd_diretorios_brasil:
+materialized: table
+schema: br_bd_diretorios_brasil
Expand All @@ -82,6 +94,12 @@ models:
br_bd_metadados:
+materialized: table
+schema: br_bd_metadados
br_ce_fortaleza_sefin_iptu:
+materialized: table
+schema: br_ce_fortaleza_sefin_iptu
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_cgu_pessoal_executivo_federal:
+materialized: table
+schema: br_cgu_pessoal_executivo_federal
Expand Down Expand Up @@ -115,6 +133,12 @@ models:
br_ibge_pnadc:
+materialized: table
+schema: br_ibge_pnadc
br_inep_censo_educacao_superior:
+materialized: table
+schema: br_inep_censo_educacao_superior
br_inep_enem:
+materialized: table
+schema: br_inep_enem
br_inep_saeb:
+materialized: table
+schema: br_inep_saeb
Expand All @@ -139,6 +163,12 @@ models:
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_mg_belohorizonte_smfa_iptu:
+materialized: table
+schema: br_mg_belohorizonte_smfa_iptu
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_mp_pep:
+materialized: table
+schema: br_mp_pep
Expand Down Expand Up @@ -184,6 +214,9 @@ models:
br_tse_eleicoes_2022:
+materialized: table
+schema: br_tse_eleicoes_2022
br_rf_cafir:
+materialized: table
+schema: br_rf_cafir
fundacao_lemann:
+materialized: table
+schema: fundacao_lemann
Expand All @@ -193,6 +226,9 @@ models:
world_iea_pirls:
+materialized: table
+schema: world_iea_pirls
world_wb_mides:
+materialized: table
+schema: world_wb_mides
test_dataset:
+materialized: table
+schema: test_dataset
Loading

0 comments on commit 182669e

Please sign in to comment.