Skip to content

Commit

Permalink
Merge branch 'main' into create_dict
Browse files Browse the repository at this point in the history
  • Loading branch information
laura-l-amaral authored May 7, 2024
2 parents 3f356cf + 2c7f204 commit 4914b54
Show file tree
Hide file tree
Showing 24 changed files with 1,422 additions and 44 deletions.
6 changes: 3 additions & 3 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ models:
br_b3_cotacoes:
+materialized: table
+schema: br_b3_cotacoes
+post-hook:
- REVOKE `roles/bigquery.dataViewer` ON TABLE {{ this }} FROM "specialGroup:allUsers"
- GRANT `roles/bigquery.dataViewer` ON TABLE {{ this }} TO "group:[email protected]"
br_bcb_agencia:
+materialized: table
+schema: br_bcb_agencia
Expand Down Expand Up @@ -189,6 +186,9 @@ models:
br_ibge_pof:
+materialized: table
+schema: br_ibge_pof
br_ibge_ppm:
+materialized: table
+schema: br_ibge_ppm
br_inep_ana:
+materialized: table
+schema: br_inep_ana
Expand Down
70 changes: 53 additions & 17 deletions macros/custom_get_where_subquery.sql
Original file line number Diff line number Diff line change
@@ -1,26 +1,58 @@
-- This macro is used to get a subquery with a where clause that can be used in a test
-- to filter the data to be tested. The macro looks for a where clause in the model's
-- config (schema.yml) and replaces the placeholder "__most_recent_year_month__" with
-- the maximum
-- year and month found in the relation. The macro returns a subquery with the where
-- thats used
-- to filter the data to be tested
{% macro get_where_subquery(relation) -%}
-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#where--__most_recent_year_month__--__most_recent_date__--__most_recent_year__
{% macro get_where_subquery(relation) %}
{% set where = config.get("where", "") %}

{% if where %}
{% set max_year_query = "" %}
{% set max_date_query = "" %}
{% set max_year = "" %}
{% set max_date = "" %}

{# This block looks for __most_recent_year__ placeholder #}
{% if "__most_recent_year__" in where %}
{% set max_year_query = (
"select max(cast(ano as int64)) as max_year from " ~ relation
) %}
{% set max_year_result = run_query(max_year_query) %}
{% if execute and max_year_result.rows[0][0] %}
{% set max_year = max_year_result.rows[0][0] %}
{% set where = where | replace(
"__most_recent_year__", "ano = '" ~ max_year ~ "'"
) %}
{% do log(
"The test will filter by the most recent year: "
~ max_year,
info=True,
) %}
{% endif %}
{% endif %}

{# This block looks for __most_recent_date__ placeholder #}
{% if "__most_recent_date__" in where %}
{% set max_date_query = "select max(data) as max_date from " ~ relation %}
{% set max_date_result = run_query(max_date_query) %}
{% if execute and max_date_result.rows[0][0] %}
{% set max_date = max_date_result.rows[0][0] %}
{% set where = where | replace(
"__most_recent_date__", "data = '" ~ max_date ~ "'"
) %}
{% do log(
"The test will filter by the most recent date: "
~ max_date,
info=True,
) %}
{% endif %}
{% endif %}

{# This block looks for __most_recent_year_month__ placeholder #}
{% if "__most_recent_year_month__" in where %}
{# Construct a query to find the maximum date using ano and mes columns #}
{% set max_date_query = (
"select format_date('%Y-%m', max(date(cast(ano as int64), cast(mes as int64), 1))) as max_date from "
~ relation
) %}
{% set max_date_result = run_query(max_date_query) %}

{% if execute %}
{# % do log(max_date_query, info=True) %#}
{# % do log(max_date_result, info=True) %#}
{# Extract the maximum year and month from the max_date #}
{% set max_date = max_date_result.rows[0][0] %}
{% set max_year = max_date[:4] %}
{% set max_month = max_date[5:7] %}
Expand All @@ -37,11 +69,15 @@
{% endif %}
{% endif %}

{%- set filtered -%}
(select * from {{ relation }} where {{ where }}) dbt_subquery
{%- endset -%}

{# Return the filtered subquery #}
{% set filtered = (
"(select * from "
~ relation
~ " where "
~ where
~ ") dbt_subquery"
) %}
{% do return(filtered) %}
{% else %} {% do return(relation) %}
{% endif %}
{%- endmacro %}
{% endmacro %}
91 changes: 67 additions & 24 deletions models/br_cvm_fi/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,30 @@
version: 2
models:
- name: documentos_balancete
description: O balancete é um documento gerado mensalmente pela contabilidade
do fundo.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano
- mes
- cnpj
- data_competencia
- plano_contabil_balancete
- codigo_conta
- cnpj
- cnpj_basico
- ano
- mes
- saldo_conta
description: O balancete é um documento gerado mensalmente pela contabilidade
do fundo.
config:
where: __most_recent_year_month__
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: cnpj
description: CNPJ do fundo
- name: cnpj_basico
description: CNPJ Básico do fundo
- name: data_competencia
description: Data de competência
- name: plano_contabil_balancete
Expand All @@ -38,16 +40,26 @@ models:
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: id_fundo
description: Tipo do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: bloco
description: Bloco
- name: cnpj
description: CNPJ do Fundo
- name: cnpj_basico
description: CNPJ Básico do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: denominacao_social
description: Denominação Social
- name: data_competencia
Expand Down Expand Up @@ -89,8 +101,6 @@ models:
description: Data de vencimento
- name: cnpj_fundo_investido
description: CNPJ do fundo investido
- name: cnpj_basico_fundo_investido
description: CNPJ Básico do fundo investido
- name: denominacao_social_fundo_investido
description: Denominação social do fundo investido
- name: codigo_swap
Expand All @@ -107,8 +117,6 @@ models:
description: Data fim da vigência
- name: cnpj_emissor
description: CNPJ do emissor
- name: cnpj_basico_emissor
description: CNPJ Básico do emissor
- name: nome_emissor
description: Nome do Emissor
- name: indicador_titulo_pos_fixado
Expand Down Expand Up @@ -171,12 +179,18 @@ models:
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: cnpj
description: CNPJ do Fundo
- name: cnpj_basico
description: CNPJ Básico do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: denominacao_social
description: Denominação Social
- name: data_competencia
Expand Down Expand Up @@ -498,8 +512,16 @@ models:
columns:
- name: id_fundo
description: Tipo do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: cnpj
description: CNPJ do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: cnpj_basico
description: CNPJ Básico do Fundo
- name: denominacao_social
Expand Down Expand Up @@ -588,26 +610,37 @@ models:
- name: indicador_aplicacao_total_recursos_exterior
description: Indica se o fundo pode aplicar 100% dos recursos no exterior
- name: documentos_informe_diario
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns: [data_competencia, id_fundo, cnpj]
description: O Informe Diário é um demonstrativo que contém as seguintes informações
do fundo, relativas à data de competência - Valor total da carteira do fundo;
Patrimônio líquido; Valor da cota; Captações realizadas no dia; Resgates pagos
no dia; Número de cotistas.
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: id_fundo
description: Tipo do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: cnpj
description: CNPJ do Fundo
- name: cnpj_basico
description: CNPJ Básico do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: data_competencia
description: Data de Competência
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: valor_total
description: Valor total da carteira
- name: valor_cota
Expand All @@ -626,16 +659,26 @@ models:
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: cnpj
description: CNPJ do fundo
- name: cnpj_basico
description: CNPJ Básico do fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: denominacao_social
description: Denominacao Social
- name: data_competencia
description: Data de competência do documento
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: versao
description: Versão do documento
- name: quantidade_clientes_cotitstas_pessoa_fisica_private_banking
Expand Down
20 changes: 20 additions & 0 deletions models/br_ibge_ppm/br_ibge_ppm__efetivo_rebanhos.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{{
config(
alias="efetivo_rebanhos",
schema="br_ibge_ppm",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 1974, "end": 2022, "interval": 1},
},
cluster_by=["sigla_uf"],
)
}}
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(tipo_rebanho as string) tipo_rebanho,
safe_cast(quantidade as int64) quantidade
from `basedosdados-staging.br_ibge_ppm_staging.efetivo_rebanhos` as t
where quantidade is not null
21 changes: 21 additions & 0 deletions models/br_ibge_ppm/br_ibge_ppm__producao_aquicultura.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{
config(
alias="producao_aquicultura",
schema="br_ibge_ppm",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2013, "end": 2022, "interval": 1},
},
cluster_by=["sigla_uf"],
)
}}
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(produto as string) produto,
safe_cast(quantidade as int64) quantidade,
safe_cast(valor as int64) valor,
from `basedosdados-staging.br_ibge_ppm_staging.producao_aquicultura` as t
where quantidade is not null
22 changes: 22 additions & 0 deletions models/br_ibge_ppm/br_ibge_ppm__producao_origem_animal.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{
config(
alias="producao_origem_animal",
schema="br_ibge_ppm",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 1974, "end": 2022, "interval": 1},
},
cluster_by=["sigla_uf"],
)
}}
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(produto as string) produto,
safe_cast(unidade as string) unidade,
safe_cast(quantidade as int64) quantidade,
safe_cast(valor as int64) valor,
from `basedosdados-staging.br_ibge_ppm_staging.producao_origem_animal` as t
where quantidade is not null
20 changes: 20 additions & 0 deletions models/br_ibge_ppm/br_ibge_ppm__producao_pecuaria.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{{
config(
alias="producao_pecuaria",
schema="br_ibge_ppm",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 1974, "end": 2022, "interval": 1},
},
cluster_by=["sigla_uf"],
)
}}
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(ovinos_tosquiados as int64) ovinos_tosquiados,
safe_cast(vacas_ordenhadas as int64) vacas_ordenhadas,
from `basedosdados-staging.br_ibge_ppm_staging.producao_pecuaria` as t
where ovinos_tosquiados is not null or vacas_ordenhadas is not null
Loading

0 comments on commit 4914b54

Please sign in to comment.