Skip to content

Commit

Permalink
Merge branch 'main' into br_ibge_criacao_municipios
Browse files Browse the repository at this point in the history
  • Loading branch information
laura-l-amaral authored Nov 9, 2023
2 parents 7c20e81 + 1a7eaed commit 7472544
Show file tree
Hide file tree
Showing 319 changed files with 60,179 additions and 1,479 deletions.
1 change: 1 addition & 0 deletions .dbtignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**.py
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ env:

jobs:
build-container:
if: (github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'master')
if: (github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main')
name: Deployment
runs-on: ubuntu-latest
steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ env:

jobs:
docs:
if: github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'master'
if: github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main'
name: Deploy docs
runs-on: ubuntu-latest
steps:
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/scripts/sync_dbt_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,19 @@ def update_metadata_json(
json.dump(metadata, f, indent=4, ensure_ascii=False)


def update_schema_yaml_files():
def update_schema_yaml_files_for_modified_datasets(existing_datasets_tables):
"""
Reads the current `metadata.json` file and generates the corresponding `schema.yml` file for
each dataset.
"""
# Read the metadata file
with open("metadata.json", "r", encoding="utf-8") as f:
metadata = json.load(f)
original_metadata = json.load(f)

# Get metadata only for datasets that are in the list of modified datasets
metadata = {}
for dataset_id, _ in existing_datasets_tables:
metadata[dataset_id] = original_metadata.get(dataset_id, {})
# Instantiate the YAML object
ruamel = load_ruamel()

Expand Down Expand Up @@ -340,7 +344,8 @@ def update_schema_yaml_files():
datasets_tables = get_datasets_tables_from_modified_files(
modified_files, show_details=True
)

print(datasets_tables)
# raise (Exception("STOP"))
# Split deleted datasets and tables
deleted_datasets_tables = []
existing_datasets_tables = []
Expand All @@ -362,10 +367,11 @@ def update_schema_yaml_files():
metadatas.extend(metadata)

# Merge metadatas

final_metadata = merge_metadatas(metadatas)

# Update metadata.json file
update_metadata_json(final_metadata, deleted_datasets_tables)

# Update `schema.yml` files
update_schema_yaml_files()
update_schema_yaml_files_for_modified_datasets(existing_datasets_tables)
18 changes: 11 additions & 7 deletions .github/workflows/scripts/table_approve.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,17 @@ def save_header_files(dataset_id, table_id):
## only needs the first bloob
partitions = []
for blob in blobs:
blob_path = str(blob.name).replace(
f"staging/{dataset_id}/{table_id}/", "./downloaded_data/"
)
for folder in blob.name.split("/"):
if "=" in folder:
partitions.append(folder.split("=")[0])
break
blob_name = str(blob.name)
if blob_name.endswith((".csv", ".parquet")):
blob_path = blob_name.replace(
f"staging/{dataset_id}/{table_id}/", "./downloaded_data/"
)
for folder in blob.name.split("/"):
if "=" in folder:
partitions.append(folder.split("=")[0])
print("Found blob: ", str(blob.name))
print("Renamed blob: ", blob_path)
break
### save table header in storage

print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sync-dbt-schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request:
types: [labeled, opened]
branches:
- master
- main
paths:
- 'models/**'
- '.github/workflows/sync-dbt-schema.yaml'
Expand Down
15 changes: 10 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
target/
dbt_modules/
logs/
# Generic
.vscode/
.env
.venv
.DS_store
pyproject.toml
logs/
# dbt
target/
dbt_modules/
dbt_packages/
# Python
**/test.py
__pycache__/

pyproject.toml
#
models/*/tmp/
models/*/input/
models/*/output/
Expand Down
41 changes: 15 additions & 26 deletions .kubernetes/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,13 @@ spec:
- name: credentials-prod
mountPath: /credentials-prod/
readOnly: true
command:
- "dbt"
- "rpc"
- "--profiles-dir"
- "."
- "--profile"
- "default"
- "--target"
- "dev"
- "--host"
- "0.0.0.0"
- "--port"
- "8580"
command:
- "/bin/sh"
- "-c"
args:
- >
dbt deps;
dbt-rpc serve --profiles-dir . --profile default --target dev --host 0.0.0.0 --port 8580
volumes:
- name: credentials-dev
secret:
Expand Down Expand Up @@ -84,19 +78,14 @@ spec:
- name: credentials-prod
mountPath: /credentials-prod/
readOnly: true
command:
- "dbt"
- "rpc"
- "--profiles-dir"
- "."
- "--profile"
- "default"
- "--target"
- "prod"
- "--host"
- "0.0.0.0"
- "--port"
- "8580"

command:
- "/bin/sh"
- "-c"
args:
- >
dbt deps;
dbt-rpc serve --profiles-dir . --profile default --target prod --host 0.0.0.0 --port 8580
volumes:
- name: credentials-dev
secret:
Expand Down
7 changes: 3 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
# Copy and install dependencies
WORKDIR /tmp
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt && \
rm requirements.txt
RUN pip install --no-cache-dir -r requirements.txt && rm requirements.txt

# Copy dbt project and profiles
WORKDIR /dbt
COPY . .

# Run dbt rpc
CMD ["dbt", "rpc", "--profiles-dir", ".", "--host", "0.0.0.0", "--port", "8580"]
# Run dbt deps and dbt rpc
CMD ["/dbt/start-server.sh"]
23 changes: 23 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
.PHONY: run
run:
docker-compose up --build --force-recreate --detach

.PHONY: stop
stop:
docker-compose stop

.PHONY: clean
clean:
docker-compose down --volumes

.PHONY: shell
shell:
docker-compose exec dbt bash

.PHONY: logs
logs:
docker-compose logs --tail=500 -f

.PHONY: status
status:
docker-compose ps
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Esse é um template para um pacote DBT a ser importado no [repositório principa
- Acesse o arquivo `profiles.yml` e se atente aos comentários, eles indicam os campos que devem ser alterados.
- Usando os arquivos de credencial, crie os secrets `credentials-dev` e `credentials-prod` [usando a flag `--from-file`](https://cloud.google.com/kubernetes-engine/docs/concepts/secret#creating_secrets_from_files).
- Faça o upload das alterações realizadas em seu repositório.
- Toda vez que houver uma alteração de código na branch `master`, uma instância atualizada do servidor RPC do DBT será criada em seu cluster, no devido namespace.
- Toda vez que houver uma alteração de código na branch `main`, uma instância atualizada do servidor RPC do DBT será criada em seu cluster, no devido namespace.

### Resources:

Expand Down
64 changes: 59 additions & 5 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ profile: "default"
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
seed-paths: ["data"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]

Expand Down Expand Up @@ -69,7 +69,10 @@ models:
+schema: br_bcb_agencia
br_bcb_estban:
+materialized: table
+schema: br_bcb_estban
+schema: br_bcb_estban
br_bcb_sicor:
+materialized: table
+schema: br_bcb_sicor
br_bcb_taxa_cambio:
+materialized: table
+schema: br_bcb_taxa_cambio
Expand All @@ -94,12 +97,18 @@ models:
br_bd_metadados:
+materialized: table
+schema: br_bd_metadados
br_camara_dados_abertos:
+materialized: table
+schema: br_camara_dados_abertos
br_ce_fortaleza_sefin_iptu:
+materialized: table
+schema: br_ce_fortaleza_sefin_iptu
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_cgu_servidores_executivo_federal:
+materialized: table
+schema: br_cgu_servidores_executivo_federal
br_cgu_pessoal_executivo_federal:
+materialized: table
+schema: br_cgu_pessoal_executivo_federal
Expand All @@ -118,9 +127,18 @@ models:
br_cvm_oferta_publica_distribuicao:
+materialized: table
+schema: br_cvm_oferta_publica_distribuicao
br_fbsp_absp:
+materialized: table
+schema: br_fbsp_absp
br_fgv_igp:
+materialized: table
+schema: br_fgv_igp
br_geobr_mapas:
+materialized: table
+schema: br_geobr_mapas
br_ibge_censo_demografico:
+materialized: table
+schema: br_ibge_censo_demografico
br_ibge_criacao_municipios:
+materialized: table
+schema: br_ibge_criacao_municipios
Expand All @@ -136,12 +154,24 @@ models:
br_ibge_pnadc:
+materialized: table
+schema: br_ibge_pnadc
br_inep_censo_educacao_superior:
+materialized: table
+schema: br_inep_censo_educacao_superior
br_inep_enem:
+materialized: table
+schema: br_inep_enem
br_inep_saeb:
+materialized: table
+schema: br_inep_saeb
br_inmet_bdmep:
+materialized: table
+schema: br_inmet_bdmep
+schema: br_inmet_bdmep
br_inpe_prodes:
+materialized: table
+schema: br_inpe_prodes
br_inpe_queimadas:
+materialized: table
+schema: br_inpe_queimadas
br_jota:
+materialized: table
+schema: br_jota
Expand All @@ -160,6 +190,12 @@ models:
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_mg_belohorizonte_smfa_iptu:
+materialized: table
+schema: br_mg_belohorizonte_smfa_iptu
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_mp_pep:
+materialized: table
+schema: br_mp_pep
Expand All @@ -171,7 +207,10 @@ models:
+schema: br_ms_sim
br_ms_sinan:
+materialized: table
+schema: br_ms_sinan
+schema: br_ms_sinan
br_ms_sinasc:
+materialized: table
+schema: br_ms_sinasc
br_ons_avaliacao_operacao:
+materialized: table
+schema: br_ons_avaliacao_operacao
Expand Down Expand Up @@ -199,21 +238,36 @@ models:
+post-hook:
- 'REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"'
- 'GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"'
br_stf_corte_aberta:
+materialized: table
+schema: br_stf_corte_aberta
br_tse_eleicoes:
+materialized: table
+schema: br_tse_eleicoes
br_tse_eleicoes_2022:
+materialized: table
+schema: br_tse_eleicoes_2022
br_rf_cafir:
+materialized: table
+schema: br_rf_cafir
fundacao_lemann:
+materialized: table
+schema: fundacao_lemann
mundo_transfermarkt_competicoes:
+materialized: table
+schema: mundo_transfermarkt_competicoes
mundo_transfermarkt_competicoes_internacionais:
+materialized: table
+schema: mundo_transfermarkt_competicoes_internacionais
world_iea_pirls:
+materialized: table
+schema: world_iea_pirls
world_wb_mides:
+materialized: table
+schema: world_wb_mides
world_oceanos_mapeamento:
+materialized: table
+schema: world_oceanos_mapeamento
test_dataset:
+materialized: table
+schema: test_dataset
11 changes: 11 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: '3'
services:
dbt:
container_name: dbt
build:
context: .
dockerfile: Dockerfile
volumes:
- .:/dbt
environment:
- DBT_PROFILES_DIR=/dbt
Loading

0 comments on commit 7472544

Please sign in to comment.