diff --git a/macros/custom_get_where_subquery.sql b/macros/custom_get_where_subquery.sql index 1dac0fd3..8b33dddc 100644 --- a/macros/custom_get_where_subquery.sql +++ b/macros/custom_get_where_subquery.sql @@ -1,16 +1,51 @@ --- This macro is used to get a subquery with a where clause that can be used in a test --- to filter the data to be tested. The macro looks for a where clause in the model's --- config (schema.yml) and replaces the placeholder "__most_recent_year_month__" with --- the maximum --- year and month found in the relation. The macro returns a subquery with the where --- thats used --- to filter the data to be tested -{% macro get_where_subquery(relation) -%} +-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#where--__most_recent_year_month__--__most_recent_date__--__most_recent_year__ +{% macro get_where_subquery(relation) %} {% set where = config.get("where", "") %} {% if where %} + {% set max_year_query = "" %} + {% set max_date_query = "" %} + {% set max_year = "" %} + {% set max_date = "" %} + + {# This block looks for __most_recent_year__ placeholder #} + {% if "__most_recent_year__" in where %} + {% set max_year_query = ( + "select max(cast(ano as int64)) as max_year from " ~ relation + ) %} + {% set max_year_result = run_query(max_year_query) %} + {% if execute and max_year_result.rows[0][0] %} + {% set max_year = max_year_result.rows[0][0] %} + {% set where = where | replace( + "__most_recent_year__", "ano = '" ~ max_year ~ "'" + ) %} + {% do log( + "The test will filter by the most recent year: " + ~ max_year, + info=True, + ) %} + {% endif %} + {% endif %} + + {# This block looks for __most_recent_date__ placeholder #} + {% if "__most_recent_date__" in where %} + {% set max_date_query = "select max(data) as max_date from " ~ relation %} + {% set max_date_result = run_query(max_date_query) %} + {% if execute and max_date_result.rows[0][0] %} + {% set max_date = max_date_result.rows[0][0] %} + {% set where = where | replace( + "__most_recent_date__", "data = '" ~ max_date ~ "'" + ) %} + {% do log( + "The test will filter by the most recent date: " + ~ max_date, + info=True, + ) %} + {% endif %} + {% endif %} + + {# This block looks for __most_recent_year_month__ placeholder #} {% if "__most_recent_year_month__" in where %} - {# Construct a query to find the maximum date using ano and mes columns #} {% set max_date_query = ( "select format_date('%Y-%m', max(date(cast(ano as int64), cast(mes as int64), 1))) as max_date from " ~ relation @@ -18,9 +53,6 @@ {% set max_date_result = run_query(max_date_query) %} {% if execute %} - {# % do log(max_date_query, info=True) %#} - {# % do log(max_date_result, info=True) %#} - {# Extract the maximum year and month from the max_date #} {% set max_date = max_date_result.rows[0][0] %} {% set max_year = max_date[:4] %} {% set max_month = max_date[5:7] %} @@ -37,11 +69,15 @@ {% endif %} {% endif %} - {%- set filtered -%} - (select * from {{ relation }} where {{ where }}) dbt_subquery - {%- endset -%} - + {# Return the filtered subquery #} + {% set filtered = ( + "(select * from " + ~ relation + ~ " where " + ~ where + ~ ") dbt_subquery" + ) %} {% do return(filtered) %} {% else %} {% do return(relation) %} {% endif %} -{%- endmacro %} +{% endmacro %} diff --git a/models/br_cvm_fi/schema.yml b/models/br_cvm_fi/schema.yml index e6d3249e..11d4d2fc 100644 --- a/models/br_cvm_fi/schema.yml +++ b/models/br_cvm_fi/schema.yml @@ -2,28 +2,30 @@ version: 2 models: - name: documentos_balancete + description: O balancete é um documento gerado mensalmente pela contabilidade + do fundo. tests: - dbt_utils.unique_combination_of_columns: combination_of_columns: + - ano + - mes + - cnpj - data_competencia - plano_contabil_balancete - codigo_conta - - cnpj - - cnpj_basico - - ano - - mes - - saldo_conta - description: O balancete é um documento gerado mensalmente pela contabilidade - do fundo. + config: + where: __most_recent_year_month__ columns: - name: ano description: Ano + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: mes description: Mês - name: cnpj description: CNPJ do fundo - - name: cnpj_basico - description: CNPJ Básico do fundo - name: data_competencia description: Data de competência - name: plano_contabil_balancete @@ -38,16 +40,26 @@ models: columns: - name: ano description: Ano + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: mes description: Mês - name: id_fundo description: Tipo do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: bloco description: Bloco - name: cnpj description: CNPJ do Fundo - - name: cnpj_basico - description: CNPJ Básico do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: denominacao_social description: Denominação Social - name: data_competencia @@ -89,8 +101,6 @@ models: description: Data de vencimento - name: cnpj_fundo_investido description: CNPJ do fundo investido - - name: cnpj_basico_fundo_investido - description: CNPJ Básico do fundo investido - name: denominacao_social_fundo_investido description: Denominação social do fundo investido - name: codigo_swap @@ -107,8 +117,6 @@ models: description: Data fim da vigência - name: cnpj_emissor description: CNPJ do emissor - - name: cnpj_basico_emissor - description: CNPJ Básico do emissor - name: nome_emissor description: Nome do Emissor - name: indicador_titulo_pos_fixado @@ -171,12 +179,18 @@ models: columns: - name: ano description: Ano + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: mes description: Mês - name: cnpj description: CNPJ do Fundo - - name: cnpj_basico - description: CNPJ Básico do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: denominacao_social description: Denominação Social - name: data_competencia @@ -498,8 +512,16 @@ models: columns: - name: id_fundo description: Tipo do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: cnpj description: CNPJ do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: cnpj_basico description: CNPJ Básico do Fundo - name: denominacao_social @@ -588,9 +610,6 @@ models: - name: indicador_aplicacao_total_recursos_exterior description: Indica se o fundo pode aplicar 100% dos recursos no exterior - name: documentos_informe_diario - tests: - - dbt_utils.unique_combination_of_columns: - combination_of_columns: [data_competencia, id_fundo, cnpj] description: O Informe Diário é um demonstrativo que contém as seguintes informações do fundo, relativas à data de competência - Valor total da carteira do fundo; Patrimônio líquido; Valor da cota; Captações realizadas no dia; Resgates pagos @@ -598,16 +617,30 @@ models: columns: - name: ano description: Ano + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: mes description: Mês - name: id_fundo description: Tipo do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: cnpj description: CNPJ do Fundo - - name: cnpj_basico - description: CNPJ Básico do Fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: data_competencia description: Data de Competência + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: valor_total description: Valor total da carteira - name: valor_cota @@ -626,16 +659,26 @@ models: columns: - name: ano description: Ano + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: mes description: Mês - name: cnpj description: CNPJ do fundo - - name: cnpj_basico - description: CNPJ Básico do fundo + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: denominacao_social description: Denominacao Social - name: data_competencia description: Data de competência do documento + tests: + - not_null: + config: + where: __most_recent_year_month__ - name: versao description: Versão do documento - name: quantidade_clientes_cotitstas_pessoa_fisica_private_banking diff --git a/models/br_ms_sih/br_ms_sih__dicionario.sql b/models/br_ms_sih/br_ms_sih__dicionario.sql new file mode 100644 index 00000000..5bbe47b5 --- /dev/null +++ b/models/br_ms_sih/br_ms_sih__dicionario.sql @@ -0,0 +1,8 @@ +{{ config(alias="dicionario", schema="br_ms_sih") }} +select + safe_cast(id_tabela as string) id_tabela, + safe_cast(nome_coluna as string) nome_coluna, + safe_cast(chave as string) chave, + safe_cast(cobertura_temporal as string) cobertura_temporal, + safe_cast(valor as string) valor +from `basedosdados-staging.br_ms_sih_staging.dicionario` diff --git a/models/br_ms_sih/schema.yml b/models/br_ms_sih/schema.yml index 8501a6f2..d55d8d44 100644 --- a/models/br_ms_sih/schema.yml +++ b/models/br_ms_sih/schema.yml @@ -135,3 +135,27 @@ models: description: Código sequencial - name: nome_remessa description: Nome da remessa + - name: br_ms_sih__dicionario + description: Dicionário para tradução dos códigos das tabelas do do conjunto SIH/SUS. + Para códigos definidos por outras instituições, como id_municipio ou cnaes, + buscar por diretórios. + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - id_tabela + - nome_coluna + - chave + - cobertura_temporal + - not_null_proportion_multiple_columns: + at_least: 0.95 + columns: + - name: id_tabela + description: ID Tabela + - name: nome_coluna + description: Nome da coluna + - name: chave + description: Chave + - name: cobertura_temporal + description: Cobertura Temporal + - name: valor + description: Valor diff --git a/tests/generic/custom_dictionaries.sql b/tests/generic/custom_dictionaries.sql new file mode 100644 index 00000000..b60b1a10 --- /dev/null +++ b/tests/generic/custom_dictionaries.sql @@ -0,0 +1,50 @@ +-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#dicionários +{% test custom_dictionaries( + model, dictionary_model_name, table_id, columns_covered_by_dictionary +) %} + {{ config(severity="error") }} + + {%- set combined_query_parts = [] -%} + {%- set union_parts = [] -%} + + {%- for column_name in columns_covered_by_dictionary %} + {% set subquery_name = "exceptions_" ~ loop.index %} + {% set left_table_name = "data_table_" ~ loop.index %} + {% set right_table_name = "dictionary_table_" ~ loop.index %} + + {% set subquery %} + {{ left_table_name }} as ( + select {{ column_name }} as id + from {{ model }} + where {{ column_name }} is not null + ), + {{ right_table_name }} as ( + select chave + from {{ dictionary_model_name }} + where valor is not null + and id_tabela = '{{ table_id }}' + and nome_coluna = '{{ column_name }}' + ), + {{ subquery_name }} as ( + select '{{ column_name }}' as failed_column, id as missing_value + from {{ left_table_name }} + left join {{ right_table_name }} on {{ left_table_name }}.id = {{ right_table_name }}.chave + where {{ right_table_name }}.chave is null + ) + {% endset %} + + {%- do combined_query_parts.append(subquery) -%} + {%- do union_parts.append(subquery_name) -%} + {%- endfor %} + + {# Combine all CTEs into a single WITH clause and then union all results #} + {% set final_query %} + with + {{ combined_query_parts | join(', ') }} + + select distinct failed_column, missing_value from {{ union_parts | join(' union all select distinct failed_column, missing_value from ') }} + {% endset %} + + {{ return(final_query) }} + +{% endtest %} diff --git a/tests/generic/custom_relationships.sql b/tests/generic/custom_relationships.sql new file mode 100644 index 00000000..65af9f4e --- /dev/null +++ b/tests/generic/custom_relationships.sql @@ -0,0 +1,54 @@ +{% test custom_relationships( + model, + column_name, + to, + field, + ignore_values=None, + proportion_allowed_failures=0.05 +) %} + + {{ config(severity="error") }} + + with + child as ( + select {{ column_name }} as child_value + from {{ model }} + {% if ignore_values %} + where {{ column_name }} not in ('{{ ignore_values | join("', '") }}') + {% endif %} + ), + parent as (select {{ field }} as parent_value from {{ to }}), + validation as ( + select child.child_value + from child + left join parent on child.child_value = parent.parent_value + where parent.parent_value is null + ), + summary as ( + select + count(*) as total_missing, + (select count(*) from child) as total_child_records, + round(count(*) / (select count(*) from child), 2) as failure_rate + from validation + ) + + select + total_missing, + total_child_records, + failure_rate, + case + when failure_rate > {{ proportion_allowed_failures }} + then + 'Test failed: Failure rate of ' + || failure_rate + || '% exceeds allowed proportion of ' + || '{{ proportion_allowed_failures }}%' + else + 'Test passed: Failure rate of ' + || failure_rate + || '% within acceptable limits' + end as result_message + from summary + where failure_rate > {{ proportion_allowed_failures }} + +{% endtest %} diff --git a/tests/generic/custom_unique_combination_of_columns.sql b/tests/generic/custom_unique_combination_of_columns.sql new file mode 100644 index 00000000..05c9a364 --- /dev/null +++ b/tests/generic/custom_unique_combination_of_columns.sql @@ -0,0 +1,49 @@ +{% test custom_unique_combinations_of_columns( + model, combination_of_columns, proportion_allowed_failures=0.05 +) %} + + {{ config(severity="error") }} + + {%- set column_list = combination_of_columns %} + {%- set columns_csv = column_list | join(", ") %} + + with + validation_data as ( + select {{ columns_csv }}, count(*) as duplicates_count + from {{ model }} + group by {{ columns_csv }} + having count(*) > 1 + ), + summary as ( + select duplicates_count, (select count(*) from {{ model }}) as total_rows + from validation_data + ), + + final_summary as ( + select + duplicates_count, + total_rows, + round(duplicates_count / total_rows, 2) as failure_rate + from summary + ) + + select + duplicates_count, + total_rows, + failure_rate, + case + when failure_rate > {{ proportion_allowed_failures }} + then + 'Test failed: Proportion of non-unique ' + || failure_rate + || '% exceeds allowed proportion ' + || '{{ proportion_allowed_failures }}%' + else + 'Test passed: Proportion of non-unique ' + || failure_rate + || '% within acceptable limits' + end as log_message + from final_summary + where failure_rate > {{ proportion_allowed_failures }} + +{% endtest %}