-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #570 from basedosdados/staging/add_custom_tests
[dbt] add custom tests
- Loading branch information
Showing
4 changed files
with
206 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#dicionários | ||
{% test custom_dictionaries( | ||
model, dictionary_model_name, table_id, columns_covered_by_dictionary | ||
) %} | ||
{{ config(severity="error") }} | ||
|
||
{%- set combined_query_parts = [] -%} | ||
{%- set union_parts = [] -%} | ||
|
||
{%- for column_name in columns_covered_by_dictionary %} | ||
{% set subquery_name = "exceptions_" ~ loop.index %} | ||
{% set left_table_name = "data_table_" ~ loop.index %} | ||
{% set right_table_name = "dictionary_table_" ~ loop.index %} | ||
|
||
{% set subquery %} | ||
{{ left_table_name }} as ( | ||
select {{ column_name }} as id | ||
from {{ model }} | ||
where {{ column_name }} is not null | ||
), | ||
{{ right_table_name }} as ( | ||
select chave | ||
from {{ dictionary_model_name }} | ||
where valor is not null | ||
and id_tabela = '{{ table_id }}' | ||
and nome_coluna = '{{ column_name }}' | ||
), | ||
{{ subquery_name }} as ( | ||
select '{{ column_name }}' as failed_column, id as missing_value | ||
from {{ left_table_name }} | ||
left join {{ right_table_name }} on {{ left_table_name }}.id = {{ right_table_name }}.chave | ||
where {{ right_table_name }}.chave is null | ||
) | ||
{% endset %} | ||
|
||
{%- do combined_query_parts.append(subquery) -%} | ||
{%- do union_parts.append(subquery_name) -%} | ||
{%- endfor %} | ||
|
||
{# Combine all CTEs into a single WITH clause and then union all results #} | ||
{% set final_query %} | ||
with | ||
{{ combined_query_parts | join(', ') }} | ||
|
||
select distinct failed_column, missing_value from {{ union_parts | join(' union all select distinct failed_column, missing_value from ') }} | ||
{% endset %} | ||
|
||
{{ return(final_query) }} | ||
|
||
{% endtest %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
{% test custom_relationships( | ||
model, | ||
column_name, | ||
to, | ||
field, | ||
ignore_values=None, | ||
proportion_allowed_failures=0.05 | ||
) %} | ||
|
||
{{ config(severity="error") }} | ||
|
||
with | ||
child as ( | ||
select {{ column_name }} as child_value | ||
from {{ model }} | ||
{% if ignore_values %} | ||
where {{ column_name }} not in ('{{ ignore_values | join("', '") }}') | ||
{% endif %} | ||
), | ||
parent as (select {{ field }} as parent_value from {{ to }}), | ||
validation as ( | ||
select child.child_value | ||
from child | ||
left join parent on child.child_value = parent.parent_value | ||
where parent.parent_value is null | ||
), | ||
summary as ( | ||
select | ||
count(*) as total_missing, | ||
(select count(*) from child) as total_child_records, | ||
round(count(*) / (select count(*) from child), 2) as failure_rate | ||
from validation | ||
) | ||
|
||
select | ||
total_missing, | ||
total_child_records, | ||
failure_rate, | ||
case | ||
when failure_rate > {{ proportion_allowed_failures }} | ||
then | ||
'Test failed: Failure rate of ' | ||
|| failure_rate | ||
|| '% exceeds allowed proportion of ' | ||
|| '{{ proportion_allowed_failures }}%' | ||
else | ||
'Test passed: Failure rate of ' | ||
|| failure_rate | ||
|| '% within acceptable limits' | ||
end as result_message | ||
from summary | ||
where failure_rate > {{ proportion_allowed_failures }} | ||
|
||
{% endtest %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
{% test custom_unique_combinations_of_columns( | ||
model, combination_of_columns, proportion_allowed_failures=0.05 | ||
) %} | ||
|
||
{{ config(severity="error") }} | ||
|
||
{%- set column_list = combination_of_columns %} | ||
{%- set columns_csv = column_list | join(", ") %} | ||
|
||
with | ||
validation_data as ( | ||
select {{ columns_csv }}, count(*) as duplicates_count | ||
from {{ model }} | ||
group by {{ columns_csv }} | ||
having count(*) > 1 | ||
), | ||
summary as ( | ||
select duplicates_count, (select count(*) from {{ model }}) as total_rows | ||
from validation_data | ||
), | ||
|
||
final_summary as ( | ||
select | ||
duplicates_count, | ||
total_rows, | ||
round(duplicates_count / total_rows, 2) as failure_rate | ||
from summary | ||
) | ||
|
||
select | ||
duplicates_count, | ||
total_rows, | ||
failure_rate, | ||
case | ||
when failure_rate > {{ proportion_allowed_failures }} | ||
then | ||
'Test failed: Proportion of non-unique ' | ||
|| failure_rate | ||
|| '% exceeds allowed proportion ' | ||
|| '{{ proportion_allowed_failures }}%' | ||
else | ||
'Test passed: Proportion of non-unique ' | ||
|| failure_rate | ||
|| '% within acceptable limits' | ||
end as log_message | ||
from final_summary | ||
where failure_rate > {{ proportion_allowed_failures }} | ||
|
||
{% endtest %} |