Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbt] add macro and modify tests #550

Merged
merged 4 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions macros/custom_get_where_subquery.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
-- This macro is used to get a subquery with a where clause that can be used in a test
-- to filter the data to be tested. The macro looks for a where clause in the model's
-- config (schema.yml) and replaces the placeholder "__most_recent_year_month__" with
-- the maximum
-- year and month found in the relation. The macro returns a subquery with the where
-- thats used
-- to filter the data to be tested
{% macro get_where_subquery(relation) -%}
{% set where = config.get("where", "") %}

{% if where %}
{% if "__most_recent_year_month__" in where %}
{# Construct a query to find the maximum date using ano and mes columns #}
{% set max_date_query = (
"select format_date('%Y-%m', max(date(cast(ano as int64), cast(mes as int64), 1))) as max_date from "
~ relation
) %}
{% set max_date_result = run_query(max_date_query) %}

{% if execute %}
{# % do log(max_date_query, info=True) %#}
{# % do log(max_date_result, info=True) %#}
{# Extract the maximum year and month from the max_date #}
{% set max_date = max_date_result.rows[0][0] %}
{% set max_year = max_date[:4] %}
{% set max_month = max_date[5:7] %}

{# Replace placeholder in the where config with actual maximum year and month #}
{% set where = where | replace(
"__most_recent_year_month__",
"ano = " ~ max_year ~ " and mes = " ~ max_month,
) %}
{% do log(
"----- The test will be performed for: " ~ where, info=True
) %}

{% endif %}
{% endif %}

{%- set filtered -%}
(select * from {{ relation }} where {{ where }}) dbt_subquery
{%- endset -%}

{% do return(filtered) %}
{% else %} {% do return(relation) %}
{% endif %}
{%- endmacro %}
52 changes: 48 additions & 4 deletions models/br_bcb_estban/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,45 @@ models:
- id_municipio
- cnpj_basico
- id_verbete
- not_null_proportion_multiple_columns:
at_least: 0.05
config:
where: __most_recent_year_month__
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
laura-l-amaral marked this conversation as resolved.
Show resolved Hide resolved
config:
where: __most_recent_year_month__
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
config:
where: __most_recent_year_month__
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- name: cnpj_basico
description: Cadastro Nacional de Pessoa Jurídica (CNPJ) básico (8 primeiros
dígitos)
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj_basico
config:
where: __most_recent_year_month__
- name: instituicao
description: Nome da Instituição Financeira (IF)
- name: agencias_esperadas
Expand All @@ -44,6 +61,11 @@ models:
description: Quantidade de agências processadas da IF no município
- name: id_verbete
description: Código do verbete
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- name: valor
description: Valores
- name: br_bcb_estban__agencia
Expand All @@ -59,21 +81,32 @@ models:
- id_municipio
- cnpj_agencia
- id_verbete
- not_null_proportion_multiple_columns:
at_least: 0.05
config:
where: __most_recent_year_month__
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
config:
where: __most_recent_year_month__
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- name: cnpj_basico
description: Cadastro Nacional de Pessoa Jurídica (CNPJ) básico (8 primeiros
dígitos)
Expand All @@ -82,10 +115,21 @@ models:
- name: cnpj_agencia
description: CNPJ da agência bancária
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj
config:
where: __most_recent_year_month__
- name: id_verbete
description: Código do verbete
tests:
- dbt_utils.not_null_proportion:
at_least: 0.05
config:
where: __most_recent_year_month__
- name: valor
description: Valores
12 changes: 2 additions & 10 deletions models/br_ibge_pam/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@ models:
e valor da produção das lavouras temporárias
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano
- sigla_uf
- id_municipio
- produto
combination_of_columns: [ano, sigla_uf, id_municipio, produto]
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
Expand Down Expand Up @@ -49,11 +45,7 @@ models:
médio e valor da produção das lavouras permanentes
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano
- sigla_uf
- id_municipio
- produto
combination_of_columns: [ano, sigla_uf, id_municipio, produto]
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
Expand Down
7 changes: 7 additions & 0 deletions models/br_ms_sia/br_ms_sia__producao_ambulatorial.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ with
from `basedosdados.br_bd_diretorios_brasil.municipio`
) as mun
on producao_ambulatorial.pa_ufmun = mun.id_municipio_6
{% if is_incremental() %}
where
date(cast(ano as int64), cast(mes as int64), 1) not in (
select distinct (date(cast(ano as int64), cast(mes as int64), 1))
from {{ this }}
)
{% endif %}
),

sia as (
Expand Down
16 changes: 11 additions & 5 deletions models/br_ms_sia/br_ms_sia__psicossocial.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
config(
alias="psicossocial",
schema="br_ms_sia",
materialized="incremental",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
Expand Down Expand Up @@ -33,7 +33,13 @@ with
from `basedosdados.br_bd_diretorios_brasil.municipio`
) as mun_res
on psicossocial.munpac = mun_res.id_municipio_6

{% if is_incremental() %}
where
date(cast(ano as int64), cast(mes as int64), 1) > (
select max(date(cast(ano as int64), cast(mes as int64), 1))
from {{ this }}
)
{% endif %}
)

select
Expand All @@ -51,7 +57,7 @@ select
-- safe_cast(tippre__ as string) tipo_prestador,
-- safe_cast(mn_ind as string) tipo_mantenedor_estabelecimento,
-- safe_cast(cnpjcpf as string) cnpj_estabelecimento_executante,
-- safe_cast(cnpjmnt as string) cnpj_mantenedora_estabalecimento,
-- safe_cast(cnpjmnt as string) cnpj_mantenedora_estabelecimento,
-- safe_cast(nat_jur as string) natureza_juridica_estabelecimento,
safe_cast(pa_proc_id as string) id_procedimento_ambulatorial,
safe_cast(pa_srv as string) id_servico_especializado,
Expand Down Expand Up @@ -99,7 +105,7 @@ select
length(trim(cidpri)) = 3
and cidpri in (
select subcategoria
from `basedosdados-dev.br_bd_diretorios_brasil.cid_10`
from `basedosdados.br_bd_diretorios_brasil.cid_10`
where length(subcategoria) = 3
)
then cidpri
Expand All @@ -119,7 +125,7 @@ select
length(trim(cidassoc)) = 3
and cidassoc in (
select subcategoria
from `basedosdados-dev.br_bd_diretorios_brasil.cid_10`
from `basedosdados.br_bd_diretorios_brasil.cid_10`
where length(subcategoria) = 3
)
then cidassoc
Expand Down
Loading
Loading