From 8b1b157e1df644f2f18b7af5ad27fbfe2b86c9fb Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Thu, 2 May 2024 15:00:21 -0300
Subject: [PATCH 1/8] feat: add customs dbt tests

---
 tests/generic/custom_relationships.sql        | 56 +++++++++++++++++++
 .../custom_unique_combination_of_columns.sql  | 49 ++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 tests/generic/custom_relationships.sql
 create mode 100644 tests/generic/custom_unique_combination_of_columns.sql

diff --git a/tests/generic/custom_relationships.sql b/tests/generic/custom_relationships.sql
new file mode 100644
index 00000000..618e4a30
--- /dev/null
+++ b/tests/generic/custom_relationships.sql
@@ -0,0 +1,56 @@
+{% test custom_relationships(
+    model,
+    column_name,
+    to,
+    field,
+    ignore_values=None,
+    proportion_allowed_failures=0
+) %}
+
+    {{ config(severity="error") }}
+
+    with
+        child as (
+            select {{ column_name }} as child_value
+            from {{ model }}
+            {% if ignore_values %}
+                where {{ column_name }} not in ({{ ignore_values | join(", ") }})
+            {% endif %}
+        ),
+        parent as (select {{ field }} as parent_value from {{ to }}),
+        validation as (
+            select child.child_value
+            from child
+            left join parent on child.child_value = parent.parent_value
+            where parent.parent_value is null
+        ),
+        summary as (
+            select
+                count(*) as total_missing,
+                (select count(*) from child) as total_child_records,
+                round(
+                    ((count(*) * 100) / (select count(*) from child)), 2
+                ) as failure_rate
+            from validation
+        )
+
+    select
+        total_missing,
+        total_child_records,
+        failure_rate,
+        case
+            when failure_rate > {{ proportion_allowed_failures }}
+            then
+                'Test failed: Failure rate of '
+                || failure_rate
+                || '% exceeds allowed proportion of '
+                || '{{ proportion_allowed_failures }}%'
+            else
+                'Test passed: Failure rate of '
+                || failure_rate
+                || '% within acceptable limits'
+        end as result_message
+    from summary
+    where failure_rate > {{ proportion_allowed_failures }}
+
+{% endtest %}
diff --git a/tests/generic/custom_unique_combination_of_columns.sql b/tests/generic/custom_unique_combination_of_columns.sql
new file mode 100644
index 00000000..32b6c339
--- /dev/null
+++ b/tests/generic/custom_unique_combination_of_columns.sql
@@ -0,0 +1,49 @@
+{% test custom_unique_combinations_of_columns(
+    model, combination_of_columns, proportion_allowed_failures=5
+) %}
+
+    {{ config(severity="error") }}
+
+    {%- set column_list = combination_of_columns %}
+    {%- set columns_csv = column_list | join(", ") %}
+
+    with
+        validation_data as (
+            select {{ columns_csv }}, count(*) as duplicates_count
+            from {{ model }}
+            group by {{ columns_csv }}
+            having count(*) > 1
+        ),
+        summary as (
+            select duplicates_count, (select count(*) from {{ model }}) as total_rows
+            from validation_data
+        ),
+
+        final_summary as (
+            select
+                duplicates_count,
+                total_rows,
+                round(((duplicates_count * 100) / total_rows), 2) as proportion
+            from summary
+        )
+
+    select
+        duplicates_count,
+        total_rows,
+        proportion,
+        case
+            when proportion > {{ proportion_allowed_failures }}
+            then
+                'Test failed: Proportion of non-unique '
+                || proportion
+                || '% exceeds allowed proportion '
+                || '{{ proportion_allowed_failures }}%'
+            else
+                'Test passed: Proportion of non-unique '
+                || proportion
+                || '% within acceptable limits'
+        end as log_message
+    from final_summary
+    where proportion > {{ proportion_allowed_failures }}
+
+{% endtest %}

From 35adfb2b25480b681945087db186490162ae9c78 Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Thu, 2 May 2024 16:27:30 -0300
Subject: [PATCH 2/8] =?UTF-8?q?deixa=20a=20propor=C3=A7=C3=A3o=20como=20um?=
 =?UTF-8?q?=20n=C3=BAmero=20entre=200=20e=201?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/generic/custom_relationships.sql                 | 2 +-
 tests/generic/custom_unique_combination_of_columns.sql | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/generic/custom_relationships.sql b/tests/generic/custom_relationships.sql
index 618e4a30..32697dee 100644
--- a/tests/generic/custom_relationships.sql
+++ b/tests/generic/custom_relationships.sql
@@ -29,7 +29,7 @@
                 count(*) as total_missing,
                 (select count(*) from child) as total_child_records,
                 round(
-                    ((count(*) * 100) / (select count(*) from child)), 2
+                    ((count(*)) / (select count(*) from child)), 2
                 ) as failure_rate
             from validation
         )
diff --git a/tests/generic/custom_unique_combination_of_columns.sql b/tests/generic/custom_unique_combination_of_columns.sql
index 32b6c339..e823f9ee 100644
--- a/tests/generic/custom_unique_combination_of_columns.sql
+++ b/tests/generic/custom_unique_combination_of_columns.sql
@@ -23,7 +23,7 @@
             select
                 duplicates_count,
                 total_rows,
-                round(((duplicates_count * 100) / total_rows), 2) as proportion
+                round(((duplicates_count) / total_rows), 2) as proportion
             from summary
         )
 

From 80d99c815726d9998f5b55ff9515ce7b2e42a734 Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Fri, 3 May 2024 07:43:00 -0300
Subject: [PATCH 3/8] feat: enhance custom_get_where_subquery macro with new
 place holders

---
 macros/custom_get_where_subquery.sql | 70 +++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 17 deletions(-)

diff --git a/macros/custom_get_where_subquery.sql b/macros/custom_get_where_subquery.sql
index 1dac0fd3..8b33dddc 100644
--- a/macros/custom_get_where_subquery.sql
+++ b/macros/custom_get_where_subquery.sql
@@ -1,16 +1,51 @@
--- This macro is used to get a subquery with a where clause that can be used in a test
--- to filter the data to be tested. The macro looks for a where clause in the model's
--- config (schema.yml) and replaces the placeholder "__most_recent_year_month__" with
--- the maximum
--- year and month found in the relation. The macro returns a subquery with the where
--- thats used
--- to filter the data to be tested
-{% macro get_where_subquery(relation) -%}
+-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#where--__most_recent_year_month__--__most_recent_date__--__most_recent_year__
+{% macro get_where_subquery(relation) %}
     {% set where = config.get("where", "") %}
 
     {% if where %}
+        {% set max_year_query = "" %}
+        {% set max_date_query = "" %}
+        {% set max_year = "" %}
+        {% set max_date = "" %}
+
+        {# This block looks for __most_recent_year__  placeholder #}
+        {% if "__most_recent_year__" in where %}
+            {% set max_year_query = (
+                "select max(cast(ano as int64)) as max_year from " ~ relation
+            ) %}
+            {% set max_year_result = run_query(max_year_query) %}
+            {% if execute and max_year_result.rows[0][0] %}
+                {% set max_year = max_year_result.rows[0][0] %}
+                {% set where = where | replace(
+                    "__most_recent_year__", "ano = '" ~ max_year ~ "'"
+                ) %}
+                {% do log(
+                    "The test will filter by the most recent year: "
+                    ~ max_year,
+                    info=True,
+                ) %}
+            {% endif %}
+        {% endif %}
+
+        {# This block looks for __most_recent_date__  placeholder #}
+        {% if "__most_recent_date__" in where %}
+            {% set max_date_query = "select max(data) as max_date from " ~ relation %}
+            {% set max_date_result = run_query(max_date_query) %}
+            {% if execute and max_date_result.rows[0][0] %}
+                {% set max_date = max_date_result.rows[0][0] %}
+                {% set where = where | replace(
+                    "__most_recent_date__", "data = '" ~ max_date ~ "'"
+                ) %}
+                {% do log(
+                    "The test will filter by the most recent date: "
+                    ~ max_date,
+                    info=True,
+                ) %}
+            {% endif %}
+        {% endif %}
+
+        {# This block looks for __most_recent_year_month__  placeholder #}
         {% if "__most_recent_year_month__" in where %}
-            {# Construct a query to find the maximum date using ano and mes columns #}
             {% set max_date_query = (
                 "select format_date('%Y-%m', max(date(cast(ano as int64), cast(mes as int64), 1))) as max_date from "
                 ~ relation
@@ -18,9 +53,6 @@
             {% set max_date_result = run_query(max_date_query) %}
 
             {% if execute %}
-                {# % do log(max_date_query, info=True) %#}
-                {# % do log(max_date_result, info=True) %#}
-                {# Extract the maximum year and month from the max_date #}
                 {% set max_date = max_date_result.rows[0][0] %}
                 {% set max_year = max_date[:4] %}
                 {% set max_month = max_date[5:7] %}
@@ -37,11 +69,15 @@
             {% endif %}
         {% endif %}
 
-        {%- set filtered -%}
-            (select * from {{ relation }} where {{ where }}) dbt_subquery
-        {%- endset -%}
-
+        {# Return the filtered subquery #}
+        {% set filtered = (
+            "(select * from "
+            ~ relation
+            ~ " where "
+            ~ where
+            ~ ") dbt_subquery"
+        ) %}
         {% do return(filtered) %}
     {% else %} {% do return(relation) %}
     {% endif %}
-{%- endmacro %}
+{% endmacro %}

From f53f40e8ba692249f0ca911dda6d1cd5e940607a Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Fri, 3 May 2024 07:44:10 -0300
Subject: [PATCH 4/8] feat: refactor names and proportion formating of custom
 tests

---
 tests/generic/custom_relationships.sql             |  8 +++-----
 .../custom_unique_combination_of_columns.sql       | 14 +++++++-------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/tests/generic/custom_relationships.sql b/tests/generic/custom_relationships.sql
index 32697dee..65af9f4e 100644
--- a/tests/generic/custom_relationships.sql
+++ b/tests/generic/custom_relationships.sql
@@ -4,7 +4,7 @@
     to,
     field,
     ignore_values=None,
-    proportion_allowed_failures=0
+    proportion_allowed_failures=0.05
 ) %}
 
     {{ config(severity="error") }}
@@ -14,7 +14,7 @@
             select {{ column_name }} as child_value
             from {{ model }}
             {% if ignore_values %}
-                where {{ column_name }} not in ({{ ignore_values | join(", ") }})
+                where {{ column_name }} not in ('{{ ignore_values | join("', '") }}')
             {% endif %}
         ),
         parent as (select {{ field }} as parent_value from {{ to }}),
@@ -28,9 +28,7 @@
             select
                 count(*) as total_missing,
                 (select count(*) from child) as total_child_records,
-                round(
-                    ((count(*)) / (select count(*) from child)), 2
-                ) as failure_rate
+                round(count(*) / (select count(*) from child), 2) as failure_rate
             from validation
         )
 
diff --git a/tests/generic/custom_unique_combination_of_columns.sql b/tests/generic/custom_unique_combination_of_columns.sql
index e823f9ee..05c9a364 100644
--- a/tests/generic/custom_unique_combination_of_columns.sql
+++ b/tests/generic/custom_unique_combination_of_columns.sql
@@ -1,5 +1,5 @@
 {% test custom_unique_combinations_of_columns(
-    model, combination_of_columns, proportion_allowed_failures=5
+    model, combination_of_columns, proportion_allowed_failures=0.05
 ) %}
 
     {{ config(severity="error") }}
@@ -23,27 +23,27 @@
             select
                 duplicates_count,
                 total_rows,
-                round(((duplicates_count) / total_rows), 2) as proportion
+                round(duplicates_count / total_rows, 2) as failure_rate
             from summary
         )
 
     select
         duplicates_count,
         total_rows,
-        proportion,
+        failure_rate,
         case
-            when proportion > {{ proportion_allowed_failures }}
+            when failure_rate > {{ proportion_allowed_failures }}
             then
                 'Test failed: Proportion of non-unique '
-                || proportion
+                || failure_rate
                 || '% exceeds allowed proportion '
                 || '{{ proportion_allowed_failures }}%'
             else
                 'Test passed: Proportion of non-unique '
-                || proportion
+                || failure_rate
                 || '% within acceptable limits'
         end as log_message
     from final_summary
-    where proportion > {{ proportion_allowed_failures }}
+    where failure_rate > {{ proportion_allowed_failures }}
 
 {% endtest %}

From a39bef47c405cc6ee730e4cb6cfae5a1ed6a6f12 Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Fri, 3 May 2024 08:49:28 -0300
Subject: [PATCH 5/8] feat: add beta custom_dicionaries test

---
 tests/generic/custom_dictionaries.sql | 42 +++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 tests/generic/custom_dictionaries.sql

diff --git a/tests/generic/custom_dictionaries.sql b/tests/generic/custom_dictionaries.sql
new file mode 100644
index 00000000..1f574f8d
--- /dev/null
+++ b/tests/generic/custom_dictionaries.sql
@@ -0,0 +1,42 @@
+-- o objetivo do teste é verificar se todos os valores que estão presentes em colunas
+-- de tabelas com dicionário também estão nos dicionários
+{% test custom_dictionaries(
+    model, table_id, dictionary_model_name, columns_covered_by_dictionary
+) %}
+
+    {{ config(severity="error") }}
+
+    {% for column_name in columns_covered_by_dictionary %}
+
+        with
+            left_table as (
+                select {{ column_name }} as id
+                from {{ model }}
+                where {{ column_name }} is not null
+            ),
+
+            right_table as (
+                select chave as id
+                from {{ dictionary_model_name }}
+                where
+                    valor is not null
+                    and id_tabela = '{{ table_id }}'
+                    and nome_coluna = '{{ column_name }}'
+            ),
+
+            exceptions as (
+                select left_table.id
+                from left_table
+                left join right_table on left_table.id = right_table.id
+                where right_table.id is null
+            )
+
+        select '{{ column_name }}' as failed_column, id as missing_value
+        from exceptions
+        {% if not loop.last %}
+            union all
+        {% endif %}
+
+    {% endfor %}
+
+{% endtest %}

From c45b815a3794aec050a4ee68b1bbc33476a41642 Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Fri, 3 May 2024 11:08:11 -0300
Subject: [PATCH 6/8] feat: add custom_dicionaries test

---
 tests/generic/custom_dictionaries.sql | 62 ++++++++++++++++-----------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/tests/generic/custom_dictionaries.sql b/tests/generic/custom_dictionaries.sql
index 1f574f8d..5e648328 100644
--- a/tests/generic/custom_dictionaries.sql
+++ b/tests/generic/custom_dictionaries.sql
@@ -1,42 +1,52 @@
--- o objetivo do teste é verificar se todos os valores que estão presentes em colunas
--- de tabelas com dicionário também estão nos dicionários
+-- https://github.com/basedosdados/pipelines/wiki/Incluindo-testes-no-seu-modelo#dicionários
 {% test custom_dictionaries(
-    model, table_id, dictionary_model_name, columns_covered_by_dictionary
+    model, dictionary_model_name, table_id, columns_covered_by_dictionary
 ) %}
-
     {{ config(severity="error") }}
 
-    {% for column_name in columns_covered_by_dictionary %}
+    {%- set combined_query_parts = [] -%}
+    {%- set union_parts = [] -%}
 
-        with
-            left_table as (
+    {%- for column_name in columns_covered_by_dictionary %}
+        {% set subquery_name = "exceptions_" ~ loop.index %}
+        {% set left_table_name = "left_table_" ~ loop.index %}
+        {% set right_table_name = "right_table_" ~ loop.index %}
+
+        {% set subquery %}
+            {{ left_table_name }} as (
                 select {{ column_name }} as id
                 from {{ model }}
                 where {{ column_name }} is not null
             ),
-
-            right_table as (
-                select chave as id
+            {{ right_table_name }} as (
+                select chave
                 from {{ dictionary_model_name }}
-                where
-                    valor is not null
-                    and id_tabela = '{{ table_id }}'
-                    and nome_coluna = '{{ column_name }}'
+                where valor is not null
+                and id_tabela = '{{ table_id }}'
+                and nome_coluna = '{{ column_name }}'
             ),
-
-            exceptions as (
-                select left_table.id
-                from left_table
-                left join right_table on left_table.id = right_table.id
-                where right_table.id is null
+            {{ subquery_name }} as (
+                select '{{ column_name }}' as failed_column, id as missing_value
+                from {{ left_table_name }}
+                left join {{ right_table_name }} on {{ left_table_name }}.id = {{ right_table_name }}.chave
+                where {{ right_table_name }}.chave is null
             )
+        {% endset %}
+
+        {%- do combined_query_parts.append(subquery) -%}
+        {%- do union_parts.append(subquery_name) -%}
+    {%- endfor %}
+
+    {# Combine all CTEs into a single WITH clause and then union all results #}
+    {% set final_query %}
+        with
+        {{ combined_query_parts | join(', ') }}
+
+        select distinct failed_column, missing_value from {{ union_parts | join(' union all select distinct failed_column, missing_value from ') }}
+    {% endset %}
 
-        select '{{ column_name }}' as failed_column, id as missing_value
-        from exceptions
-        {% if not loop.last %}
-            union all
-        {% endif %}
+    {% do log(final_query, info=True) %}
 
-    {% endfor %}
+    {{ return(final_query) }}
 
 {% endtest %}

From 8c5953d61f8a548f697a4ca3fadd9018b0ba5451 Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Fri, 3 May 2024 13:41:00 -0300
Subject: [PATCH 7/8] feat: improve names of custom_dictionaries macro

---
 tests/generic/custom_dictionaries.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/generic/custom_dictionaries.sql b/tests/generic/custom_dictionaries.sql
index 5e648328..50b62c53 100644
--- a/tests/generic/custom_dictionaries.sql
+++ b/tests/generic/custom_dictionaries.sql
@@ -9,8 +9,8 @@
 
     {%- for column_name in columns_covered_by_dictionary %}
         {% set subquery_name = "exceptions_" ~ loop.index %}
-        {% set left_table_name = "left_table_" ~ loop.index %}
-        {% set right_table_name = "right_table_" ~ loop.index %}
+        {% set left_table_name = "data_table_" ~ loop.index %}
+        {% set right_table_name = "dictionary_table_" ~ loop.index %}
 
         {% set subquery %}
             {{ left_table_name }} as (

From c1318e5a59b654e7f7337274727977f7c33d7974 Mon Sep 17 00:00:00 2001
From: folhesgabriel <pisagabriel09@gmail.com>
Date: Fri, 3 May 2024 13:49:24 -0300
Subject: [PATCH 8/8] remove query log from custom_dictionaries

---
 tests/generic/custom_dictionaries.sql | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/generic/custom_dictionaries.sql b/tests/generic/custom_dictionaries.sql
index 50b62c53..b60b1a10 100644
--- a/tests/generic/custom_dictionaries.sql
+++ b/tests/generic/custom_dictionaries.sql
@@ -45,8 +45,6 @@
         select distinct failed_column, missing_value from {{ union_parts | join(' union all select distinct failed_column, missing_value from ') }}
     {% endset %}
 
-    {% do log(final_query, info=True) %}
-
     {{ return(final_query) }}
 
 {% endtest %}