Skip to content

Commit

Permalink
Merge branch 'main' into staging/cnes
Browse files Browse the repository at this point in the history
  • Loading branch information
folhesgabriel authored Mar 19, 2024
2 parents 5fc5b88 + 4df886e commit 8924b38
Show file tree
Hide file tree
Showing 118 changed files with 63,660 additions and 3,569 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,4 @@ jobs:
run: poetry install --only=dev
- name: Run script for changing metadata status
run: |-
python .github/workflows/scripts/change_metadata_status.py --modified-files ${{ steps.changed-files.outputs.all_modified_files }} --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --status published --email ${{ secrets.BACKEND_EMAIL }} --password ${{ secrets.BACKEND_PASSWORD }}
poetry run python .github/workflows/scripts/change_metadata_status.py --modified-files ${{ steps.changed-files.outputs.all_modified_files }} --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --status published --email ${{ secrets.BACKEND_EMAIL }} --password ${{ secrets.BACKEND_PASSWORD }}
20 changes: 10 additions & 10 deletions .github/workflows/scripts/table_approve.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def push_table_to_bq(
Dataset(dataset_id).update(mode="prod")
delete_storage_path = file_path.replace("./downloaded_data/", "")
print(
f"DELETE HEADER FILE FROM basedosdados/staing/{dataset_id}_staging/{table_id}/{delete_storage_path}"
f"DELETE HEADER FILE FROM basedosdados/staging/{dataset_id}_staging/{table_id}/{delete_storage_path}"
)
st = Storage(dataset_id=dataset_id, table_id=table_id)
st.delete_file(filename=delete_storage_path, mode="staging")
Expand Down Expand Up @@ -146,27 +146,27 @@ def save_header_files(dataset_id, table_id):
print("Found blob: ", str(blob.name))
print("Renamed blob: ", blob_path)
break
### save table header in storage

print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_name = blob_path.split("/")[-1]
file_type = file_name.split(".")[-1]

path = Path(blob_path.replace(f"/{file_name}", ""))
path.mkdir(parents=True, exist_ok=True)

### save table header in storage
if file_type == "csv":
print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_path = f"./{path}/table_approve_temp_file_271828.csv"
df.to_csv(file_path, index=False)
elif file_type == "parquet":
file_path = f"./{path}/table_approve_temp_file_271828.parquet"
df.to_parquet(file_path)
blob.download_to_filename(file_path)
print("SAVE HEADER FILE: ", file_path)
return file_path

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ models/*/tmp/
models/*/input/
models/*/output/
models/*/extra/
venv*
19 changes: 14 additions & 5 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@ config-version: 2

# This setting configures which "profile" dbt uses for this project.
profile: default

# Options to disable elementary models
vars:
disable_run_results: false
disable_tests_results: false
disable_dbt_artifacts_autoupload: false
disable_dbt_invocation_autoupload: false

# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
Expand All @@ -29,7 +27,6 @@ target-path: target # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- target
- dbt_modules

# Grant acess
# bq data control: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-control-language
# dbt grant statements use https://discourse.getdbt.com/t/the-exact-grant-statements-we-use-in-a-dbt-project/430
Expand Down Expand Up @@ -114,12 +111,18 @@ models:
br_cgu_beneficios_cidadao:
+materialized: table
+schema: br_cgu_beneficios_cidadao
br_cgu_servidores_executivo_federal:
br_cgu_dados_abertos:
+materialized: table
+schema: br_cgu_servidores_executivo_federal
+schema: br_cgu_dados_abertos
br_cgu_pessoal_executivo_federal:
+materialized: table
+schema: br_cgu_pessoal_executivo_federal
br_cgu_servidores_executivo_federal:
+materialized: table
+schema: br_cgu_servidores_executivo_federal
br_cnj_improbidade_administrativa:
+materialized: table
+schema: br_cnj_improbidade_administrativa
br_cnpq_bolsas:
+materialized: table
+schema: br_cnpq_bolsas
Expand Down Expand Up @@ -177,6 +180,9 @@ models:
br_inep_censo_educacao_superior:
+materialized: table
+schema: br_inep_censo_educacao_superior
br_inep_censo_escolar:
+materialized: table
+schema: br_inep_censo_escolar
br_inep_enem:
+materialized: table
+schema: br_inep_enem
Expand Down Expand Up @@ -288,6 +294,9 @@ models:
mundo_transfermarkt_competicoes_internacionais:
+materialized: table
+schema: mundo_transfermarkt_competicoes_internacionais
world_ampas_oscar:
+materialized: table
+schema: world_ampas_oscar
world_iea_pirls:
+materialized: table
+schema: world_iea_pirls
Expand Down
2 changes: 1 addition & 1 deletion models/br_ans_beneficiario/informacao_consolidada.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2014, "end": 2023, "interval": 1},
"range": {"start": 2014, "end": 2024, "interval": 1},
},
cluster_by=["id_municipio", "mes", "sigla_uf"],
labels={"project_id": "basedosdados"},
Expand Down
11 changes: 2 additions & 9 deletions models/br_bcb_agencia/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,26 @@ models:
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns: [ano, mes, cnpj]
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: data_inicio
description: Data de criação da agência
- name: cnpj
description: Cadastro Nacional de Pessoa Júridica (CNPJ)
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj
Expand All @@ -53,8 +48,6 @@ models:
- relationships:
to: ref('br_bd_diretorios_brasil__cep')
field: cep.cep
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: endereco
description: endereço da agência
- name: complemento
Expand Down
24 changes: 4 additions & 20 deletions models/br_bcb_estban/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,25 @@ models:
- id_municipio
- cnpj_basico
- id_verbete
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: cnpj_basico
description: Cadastro Nacional de Pessoa Jurídica (CNPJ) básico (8 primeiros
dígitos)
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj_basico
Expand All @@ -49,9 +44,6 @@ models:
description: Quantidade de agências processadas da IF no município
- name: id_verbete
description: Código do verbete
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: valor
description: Valores
- name: br_bcb_estban__agencia
Expand All @@ -67,24 +59,21 @@ models:
- id_municipio
- cnpj_agencia
- id_verbete
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: cnpj_basico
description: Cadastro Nacional de Pessoa Jurídica (CNPJ) básico (8 primeiros
dígitos)
Expand All @@ -93,15 +82,10 @@ models:
- name: cnpj_agencia
description: CNPJ da agência bancária
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj
- name: id_verbete
description: Código do verbete
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
- name: valor
description: Valores
Loading

0 comments on commit 8924b38

Please sign in to comment.