Skip to content

Commit

Permalink
Merge branch 'main' into br_ibge_ppm
Browse files Browse the repository at this point in the history
  • Loading branch information
laura-l-amaral authored May 6, 2024
2 parents 4dcdce5 + 5b4be8e commit 2e486f3
Show file tree
Hide file tree
Showing 7 changed files with 305 additions and 41 deletions.
70 changes: 53 additions & 17 deletions macros/custom_get_where_subquery.sql
Original file line number Diff line number Diff line change
@@ -1,26 +1,58 @@
-- This macro is used to get a subquery with a where clause that can be used in a test
-- to filter the data to be tested. The macro looks for a where clause in the model's
-- config (schema.yml) and replaces the placeholder "__most_recent_year_month__" with
-- the maximum
-- year and month found in the relation. The macro returns a subquery with the where
-- thats used
-- to filter the data to be tested
{% macro get_where_subquery(relation) -%}
-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#where--__most_recent_year_month__--__most_recent_date__--__most_recent_year__
{% macro get_where_subquery(relation) %}
{% set where = config.get("where", "") %}

{% if where %}
{% set max_year_query = "" %}
{% set max_date_query = "" %}
{% set max_year = "" %}
{% set max_date = "" %}

{# This block looks for __most_recent_year__ placeholder #}
{% if "__most_recent_year__" in where %}
{% set max_year_query = (
"select max(cast(ano as int64)) as max_year from " ~ relation
) %}
{% set max_year_result = run_query(max_year_query) %}
{% if execute and max_year_result.rows[0][0] %}
{% set max_year = max_year_result.rows[0][0] %}
{% set where = where | replace(
"__most_recent_year__", "ano = '" ~ max_year ~ "'"
) %}
{% do log(
"The test will filter by the most recent year: "
~ max_year,
info=True,
) %}
{% endif %}
{% endif %}

{# This block looks for __most_recent_date__ placeholder #}
{% if "__most_recent_date__" in where %}
{% set max_date_query = "select max(data) as max_date from " ~ relation %}
{% set max_date_result = run_query(max_date_query) %}
{% if execute and max_date_result.rows[0][0] %}
{% set max_date = max_date_result.rows[0][0] %}
{% set where = where | replace(
"__most_recent_date__", "data = '" ~ max_date ~ "'"
) %}
{% do log(
"The test will filter by the most recent date: "
~ max_date,
info=True,
) %}
{% endif %}
{% endif %}

{# This block looks for __most_recent_year_month__ placeholder #}
{% if "__most_recent_year_month__" in where %}
{# Construct a query to find the maximum date using ano and mes columns #}
{% set max_date_query = (
"select format_date('%Y-%m', max(date(cast(ano as int64), cast(mes as int64), 1))) as max_date from "
~ relation
) %}
{% set max_date_result = run_query(max_date_query) %}

{% if execute %}
{# % do log(max_date_query, info=True) %#}
{# % do log(max_date_result, info=True) %#}
{# Extract the maximum year and month from the max_date #}
{% set max_date = max_date_result.rows[0][0] %}
{% set max_year = max_date[:4] %}
{% set max_month = max_date[5:7] %}
Expand All @@ -37,11 +69,15 @@
{% endif %}
{% endif %}

{%- set filtered -%}
(select * from {{ relation }} where {{ where }}) dbt_subquery
{%- endset -%}

{# Return the filtered subquery #}
{% set filtered = (
"(select * from "
~ relation
~ " where "
~ where
~ ") dbt_subquery"
) %}
{% do return(filtered) %}
{% else %} {% do return(relation) %}
{% endif %}
{%- endmacro %}
{% endmacro %}
91 changes: 67 additions & 24 deletions models/br_cvm_fi/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,30 @@
version: 2
models:
- name: documentos_balancete
description: O balancete é um documento gerado mensalmente pela contabilidade
do fundo.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano
- mes
- cnpj
- data_competencia
- plano_contabil_balancete
- codigo_conta
- cnpj
- cnpj_basico
- ano
- mes
- saldo_conta
description: O balancete é um documento gerado mensalmente pela contabilidade
do fundo.
config:
where: __most_recent_year_month__
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: cnpj
description: CNPJ do fundo
- name: cnpj_basico
description: CNPJ Básico do fundo
- name: data_competencia
description: Data de competência
- name: plano_contabil_balancete
Expand All @@ -38,16 +40,26 @@ models:
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: id_fundo
description: Tipo do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: bloco
description: Bloco
- name: cnpj
description: CNPJ do Fundo
- name: cnpj_basico
description: CNPJ Básico do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: denominacao_social
description: Denominação Social
- name: data_competencia
Expand Down Expand Up @@ -89,8 +101,6 @@ models:
description: Data de vencimento
- name: cnpj_fundo_investido
description: CNPJ do fundo investido
- name: cnpj_basico_fundo_investido
description: CNPJ Básico do fundo investido
- name: denominacao_social_fundo_investido
description: Denominação social do fundo investido
- name: codigo_swap
Expand All @@ -107,8 +117,6 @@ models:
description: Data fim da vigência
- name: cnpj_emissor
description: CNPJ do emissor
- name: cnpj_basico_emissor
description: CNPJ Básico do emissor
- name: nome_emissor
description: Nome do Emissor
- name: indicador_titulo_pos_fixado
Expand Down Expand Up @@ -171,12 +179,18 @@ models:
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: cnpj
description: CNPJ do Fundo
- name: cnpj_basico
description: CNPJ Básico do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: denominacao_social
description: Denominação Social
- name: data_competencia
Expand Down Expand Up @@ -498,8 +512,16 @@ models:
columns:
- name: id_fundo
description: Tipo do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: cnpj
description: CNPJ do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: cnpj_basico
description: CNPJ Básico do Fundo
- name: denominacao_social
Expand Down Expand Up @@ -588,26 +610,37 @@ models:
- name: indicador_aplicacao_total_recursos_exterior
description: Indica se o fundo pode aplicar 100% dos recursos no exterior
- name: documentos_informe_diario
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns: [data_competencia, id_fundo, cnpj]
description: O Informe Diário é um demonstrativo que contém as seguintes informações
do fundo, relativas à data de competência - Valor total da carteira do fundo;
Patrimônio líquido; Valor da cota; Captações realizadas no dia; Resgates pagos
no dia; Número de cotistas.
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: id_fundo
description: Tipo do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: cnpj
description: CNPJ do Fundo
- name: cnpj_basico
description: CNPJ Básico do Fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: data_competencia
description: Data de Competência
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: valor_total
description: Valor total da carteira
- name: valor_cota
Expand All @@ -626,16 +659,26 @@ models:
columns:
- name: ano
description: Ano
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: mes
description: Mês
- name: cnpj
description: CNPJ do fundo
- name: cnpj_basico
description: CNPJ Básico do fundo
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: denominacao_social
description: Denominacao Social
- name: data_competencia
description: Data de competência do documento
tests:
- not_null:
config:
where: __most_recent_year_month__
- name: versao
description: Versão do documento
- name: quantidade_clientes_cotitstas_pessoa_fisica_private_banking
Expand Down
8 changes: 8 additions & 0 deletions models/br_ms_sih/br_ms_sih__dicionario.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ config(alias="dicionario", schema="br_ms_sih") }}
select
safe_cast(id_tabela as string) id_tabela,
safe_cast(nome_coluna as string) nome_coluna,
safe_cast(chave as string) chave,
safe_cast(cobertura_temporal as string) cobertura_temporal,
safe_cast(valor as string) valor
from `basedosdados-staging.br_ms_sih_staging.dicionario`
24 changes: 24 additions & 0 deletions models/br_ms_sih/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,27 @@ models:
description: Código sequencial
- name: nome_remessa
description: Nome da remessa
- name: br_ms_sih__dicionario
description: Dicionário para tradução dos códigos das tabelas do do conjunto SIH/SUS.
Para códigos definidos por outras instituições, como id_municipio ou cnaes,
buscar por diretórios.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- id_tabela
- nome_coluna
- chave
- cobertura_temporal
- not_null_proportion_multiple_columns:
at_least: 0.95
columns:
- name: id_tabela
description: ID Tabela
- name: nome_coluna
description: Nome da coluna
- name: chave
description: Chave
- name: cobertura_temporal
description: Cobertura Temporal
- name: valor
description: Valor
50 changes: 50 additions & 0 deletions tests/generic/custom_dictionaries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#dicionários
{% test custom_dictionaries(
model, dictionary_model_name, table_id, columns_covered_by_dictionary
) %}
{{ config(severity="error") }}

{%- set combined_query_parts = [] -%}
{%- set union_parts = [] -%}

{%- for column_name in columns_covered_by_dictionary %}
{% set subquery_name = "exceptions_" ~ loop.index %}
{% set left_table_name = "data_table_" ~ loop.index %}
{% set right_table_name = "dictionary_table_" ~ loop.index %}

{% set subquery %}
{{ left_table_name }} as (
select {{ column_name }} as id
from {{ model }}
where {{ column_name }} is not null
),
{{ right_table_name }} as (
select chave
from {{ dictionary_model_name }}
where valor is not null
and id_tabela = '{{ table_id }}'
and nome_coluna = '{{ column_name }}'
),
{{ subquery_name }} as (
select '{{ column_name }}' as failed_column, id as missing_value
from {{ left_table_name }}
left join {{ right_table_name }} on {{ left_table_name }}.id = {{ right_table_name }}.chave
where {{ right_table_name }}.chave is null
)
{% endset %}

{%- do combined_query_parts.append(subquery) -%}
{%- do union_parts.append(subquery_name) -%}
{%- endfor %}

{# Combine all CTEs into a single WITH clause and then union all results #}
{% set final_query %}
with
{{ combined_query_parts | join(', ') }}

select distinct failed_column, missing_value from {{ union_parts | join(' union all select distinct failed_column, missing_value from ') }}
{% endset %}

{{ return(final_query) }}

{% endtest %}
Loading

0 comments on commit 2e486f3

Please sign in to comment.