diff --git a/.changes/unreleased/Fixes-20240626-163930.yaml b/.changes/unreleased/Fixes-20240626-163930.yaml new file mode 100644 index 00000000..37fcc56f --- /dev/null +++ b/.changes/unreleased/Fixes-20240626-163930.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix `persist_docs` for `materialized_view` materializations. Previously, using this configuration with materialized view models would lead to an error. +time: 2024-06-26T16:39:30.455995+02:00 +custom: + Author: morsapaes + Issue: "120" diff --git a/dbt/include/postgres/macros/adapters.sql b/dbt/include/postgres/macros/adapters.sql index 294443be..1d20e6b3 100644 --- a/dbt/include/postgres/macros/adapters.sql +++ b/dbt/include/postgres/macros/adapters.sql @@ -196,7 +196,12 @@ {% macro postgres__alter_relation_comment(relation, comment) %} {% set escaped_comment = postgres_escape_comment(comment) %} - comment on {{ relation.type }} {{ relation }} is {{ escaped_comment }}; + {% if relation.type == 'materialized_view' -%} + {% set relation_type = "materialized view" %} + {%- else -%} + {%- set relation_type = relation.type -%} + {%- endif -%} + comment on {{ relation_type }} {{ relation }} is {{ escaped_comment }}; {% endmacro %} diff --git a/tests/functional/shared_tests/__init__.py b/tests/functional/adapter/__init__.py similarity index 100% rename from tests/functional/shared_tests/__init__.py rename to tests/functional/adapter/__init__.py diff --git a/tests/functional/shared_tests/test_aliases.py b/tests/functional/adapter/test_aliases.py similarity index 100% rename from tests/functional/shared_tests/test_aliases.py rename to tests/functional/adapter/test_aliases.py diff --git a/tests/functional/shared_tests/test_basic.py b/tests/functional/adapter/test_basic.py similarity index 100% rename from tests/functional/shared_tests/test_basic.py rename to tests/functional/adapter/test_basic.py diff --git a/tests/functional/shared_tests/test_caching.py b/tests/functional/adapter/test_caching.py similarity index 100% rename from tests/functional/shared_tests/test_caching.py rename to tests/functional/adapter/test_caching.py diff --git a/tests/functional/shared_tests/test_clone.py b/tests/functional/adapter/test_clone.py similarity index 100% rename from tests/functional/shared_tests/test_clone.py rename to tests/functional/adapter/test_clone.py diff --git a/tests/functional/shared_tests/test_column_types.py b/tests/functional/adapter/test_column_types.py similarity index 100% rename from tests/functional/shared_tests/test_column_types.py rename to tests/functional/adapter/test_column_types.py diff --git a/tests/functional/shared_tests/test_concurrency.py b/tests/functional/adapter/test_concurrency.py similarity index 100% rename from tests/functional/shared_tests/test_concurrency.py rename to tests/functional/adapter/test_concurrency.py diff --git a/tests/functional/shared_tests/test_constraints.py b/tests/functional/adapter/test_constraints.py similarity index 100% rename from tests/functional/shared_tests/test_constraints.py rename to tests/functional/adapter/test_constraints.py diff --git a/tests/functional/shared_tests/test_data_types.py b/tests/functional/adapter/test_data_types.py similarity index 100% rename from tests/functional/shared_tests/test_data_types.py rename to tests/functional/adapter/test_data_types.py diff --git a/tests/functional/shared_tests/test_debug.py b/tests/functional/adapter/test_debug.py similarity index 100% rename from tests/functional/shared_tests/test_debug.py rename to tests/functional/adapter/test_debug.py diff --git a/tests/functional/shared_tests/test_empty.py b/tests/functional/adapter/test_empty.py similarity index 100% rename from tests/functional/shared_tests/test_empty.py rename to tests/functional/adapter/test_empty.py diff --git a/tests/functional/shared_tests/test_ephemeral.py b/tests/functional/adapter/test_ephemeral.py similarity index 100% rename from tests/functional/shared_tests/test_ephemeral.py rename to tests/functional/adapter/test_ephemeral.py diff --git a/tests/functional/shared_tests/test_grants.py b/tests/functional/adapter/test_grants.py similarity index 100% rename from tests/functional/shared_tests/test_grants.py rename to tests/functional/adapter/test_grants.py diff --git a/tests/functional/shared_tests/test_hooks/data/seed_model.sql b/tests/functional/adapter/test_hooks/data/seed_model.sql similarity index 100% rename from tests/functional/shared_tests/test_hooks/data/seed_model.sql rename to tests/functional/adapter/test_hooks/data/seed_model.sql diff --git a/tests/functional/shared_tests/test_hooks/data/seed_run.sql b/tests/functional/adapter/test_hooks/data/seed_run.sql similarity index 100% rename from tests/functional/shared_tests/test_hooks/data/seed_run.sql rename to tests/functional/adapter/test_hooks/data/seed_run.sql diff --git a/tests/functional/shared_tests/test_hooks/test_hooks.py b/tests/functional/adapter/test_hooks/test_hooks.py similarity index 100% rename from tests/functional/shared_tests/test_hooks/test_hooks.py rename to tests/functional/adapter/test_hooks/test_hooks.py diff --git a/tests/functional/shared_tests/test_incremental.py b/tests/functional/adapter/test_incremental.py similarity index 100% rename from tests/functional/shared_tests/test_incremental.py rename to tests/functional/adapter/test_incremental.py diff --git a/tests/functional/adapter/test_persist_docs.py b/tests/functional/adapter/test_persist_docs.py new file mode 100644 index 00000000..f1ad342b --- /dev/null +++ b/tests/functional/adapter/test_persist_docs.py @@ -0,0 +1,65 @@ +import pytest +import json + +from dbt.tests.adapter.materialized_view import files +from dbt.tests.adapter.persist_docs.test_persist_docs import ( + BasePersistDocs, + BasePersistDocsColumnMissing, + BasePersistDocsCommentOnQuotedColumn, +) +from tests.functional.utils import run_dbt + +_MATERIALIZED_VIEW_PROPERTIES__SCHEMA_YML = """ +version: 2 + +models: + - name: my_materialized_view + description: | + Materialized view model description "with double quotes" + and with 'single quotes' as welll as other; + '''abc123''' + reserved -- characters + 80% of statistics are made up on the spot + -- + /* comment */ + Some $lbl$ labeled $lbl$ and $$ unlabeled $$ dollar-quoting +""" + + +class TestPersistDocs(BasePersistDocs): + pass + + +class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing): + pass + + +class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn): + pass + + +class TestPersistDocsWithMaterializedView(BasePersistDocs): + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": files.MY_SEED} + + @pytest.fixture(scope="class") + def models(self): + return { + "my_materialized_view.sql": files.MY_MATERIALIZED_VIEW, + } + + @pytest.fixture(scope="class") + def properties(self): + return { + "schema.yml": _MATERIALIZED_VIEW_PROPERTIES__SCHEMA_YML, + } + + def test_has_comments_pglike(self, project): + run_dbt(["docs", "generate"]) + with open("target/catalog.json") as fp: + catalog_data = json.load(fp) + assert "nodes" in catalog_data + assert len(catalog_data["nodes"]) == 2 + view_node = catalog_data["nodes"]["model.test.my_materialized_view"] + assert view_node["metadata"]["comment"].startswith("Materialized view model description") diff --git a/tests/functional/shared_tests/test_query_comment.py b/tests/functional/adapter/test_query_comment.py similarity index 100% rename from tests/functional/shared_tests/test_query_comment.py rename to tests/functional/adapter/test_query_comment.py diff --git a/tests/functional/shared_tests/test_relations.py b/tests/functional/adapter/test_relations.py similarity index 100% rename from tests/functional/shared_tests/test_relations.py rename to tests/functional/adapter/test_relations.py diff --git a/tests/functional/shared_tests/test_show.py b/tests/functional/adapter/test_show.py similarity index 62% rename from tests/functional/shared_tests/test_show.py rename to tests/functional/adapter/test_show.py index 47974a04..7f3da925 100644 --- a/tests/functional/shared_tests/test_show.py +++ b/tests/functional/adapter/test_show.py @@ -1,6 +1,7 @@ from dbt.tests.adapter.dbt_show.test_dbt_show import ( BaseShowLimit, BaseShowSqlHeader, + BaseShowDoesNotHandleDoubleLimit, ) @@ -10,3 +11,7 @@ class TestPostgresShowSqlHeader(BaseShowSqlHeader): class TestPostgresShowLimit(BaseShowLimit): pass + + +class TestPostgresShowDoesNotHandleDoubleLimit(BaseShowDoesNotHandleDoubleLimit): + pass diff --git a/tests/functional/shared_tests/test_simple_copy.py b/tests/functional/adapter/test_simple_copy.py similarity index 100% rename from tests/functional/shared_tests/test_simple_copy.py rename to tests/functional/adapter/test_simple_copy.py diff --git a/tests/functional/shared_tests/test_simple_seed/seed_bom.csv b/tests/functional/adapter/test_simple_seed/seed_bom.csv similarity index 100% rename from tests/functional/shared_tests/test_simple_seed/seed_bom.csv rename to tests/functional/adapter/test_simple_seed/seed_bom.csv diff --git a/tests/functional/shared_tests/test_simple_seed/test_simple_seed.py b/tests/functional/adapter/test_simple_seed/test_simple_seed.py similarity index 100% rename from tests/functional/shared_tests/test_simple_seed/test_simple_seed.py rename to tests/functional/adapter/test_simple_seed/test_simple_seed.py diff --git a/tests/functional/shared_tests/test_simple_snapshot.py b/tests/functional/adapter/test_simple_snapshot.py similarity index 100% rename from tests/functional/shared_tests/test_simple_snapshot.py rename to tests/functional/adapter/test_simple_snapshot.py diff --git a/tests/functional/shared_tests/test_store_test_failures.py b/tests/functional/adapter/test_store_test_failures.py similarity index 100% rename from tests/functional/shared_tests/test_store_test_failures.py rename to tests/functional/adapter/test_store_test_failures.py diff --git a/tests/functional/shared_tests/test_unit_testing.py b/tests/functional/adapter/test_unit_testing.py similarity index 100% rename from tests/functional/shared_tests/test_unit_testing.py rename to tests/functional/adapter/test_unit_testing.py diff --git a/tests/functional/shared_tests/test_utils.py b/tests/functional/adapter/test_utils.py similarity index 100% rename from tests/functional/shared_tests/test_utils.py rename to tests/functional/adapter/test_utils.py diff --git a/tests/functional/shared_tests/test_persist_docs.py b/tests/functional/shared_tests/test_persist_docs.py deleted file mode 100644 index 2653ca4a..00000000 --- a/tests/functional/shared_tests/test_persist_docs.py +++ /dev/null @@ -1,17 +0,0 @@ -from dbt.tests.adapter.persist_docs.test_persist_docs import ( - BasePersistDocs, - BasePersistDocsColumnMissing, - BasePersistDocsCommentOnQuotedColumn, -) - - -class TestPersistDocs(BasePersistDocs): - pass - - -class TestPersistDocsColumnMissing(BasePersistDocsColumnMissing): - pass - - -class TestPersistDocsCommentOnQuotedColumn(BasePersistDocsCommentOnQuotedColumn): - pass diff --git a/tests/functional/simple_snapshot/data/invalidate_postgres.sql b/tests/functional/simple_snapshot/data/invalidate_postgres.sql deleted file mode 100644 index b0bef3c6..00000000 --- a/tests/functional/simple_snapshot/data/invalidate_postgres.sql +++ /dev/null @@ -1,27 +0,0 @@ - --- update records 11 - 21. Change email and updated_at field -update {schema}.seed set - updated_at = updated_at + interval '1 hour', - email = case when id = 20 then 'pfoxj@creativecommons.org' else 'new_' || email end -where id >= 10 and id <= 20; - - --- invalidate records 11 - 21 -update {schema}.snapshot_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; - - -update {schema}.snapshot_castillo_expected set - dbt_valid_to = "1-updated_at" + interval '1 hour' -where id >= 10 and id <= 20; - - -update {schema}.snapshot_alvarez_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; - - -update {schema}.snapshot_kelly_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; diff --git a/tests/functional/simple_snapshot/data/seed_pg.sql b/tests/functional/simple_snapshot/data/seed_pg.sql deleted file mode 100644 index a22a2359..00000000 --- a/tests/functional/simple_snapshot/data/seed_pg.sql +++ /dev/null @@ -1,223 +0,0 @@ - create table {database}.{schema}.seed ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - updated_at TIMESTAMP WITHOUT TIME ZONE -); - -create table {database}.{schema}.snapshot_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- snapshotting fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id TEXT, - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - - --- seed inserts --- use the same email for two users to verify that duplicated check_cols values --- are handled appropriately -insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values -(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), -(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), -(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), -(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'), -(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'), -(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'), -(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'), -(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'), -(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'), -(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'), -(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'), -(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'), -(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'), -(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'), -(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'), -(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'), -(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'), -(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'), -(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'), -(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19'); - - --- populate snapshot table -insert into {database}.{schema}.snapshot_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed; - - - -create table {database}.{schema}.snapshot_castillo_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- snapshotting fields - "1-updated_at" TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id TEXT, - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - --- one entry -insert into {database}.{schema}.snapshot_castillo_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - "1-updated_at", - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed where last_name = 'Castillo'; - -create table {database}.{schema}.snapshot_alvarez_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- snapshotting fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id TEXT, - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - --- 0 entries -insert into {database}.{schema}.snapshot_alvarez_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed where last_name = 'Alvarez'; - -create table {database}.{schema}.snapshot_kelly_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- snapshotting fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id TEXT, - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - - --- 2 entries -insert into {database}.{schema}.snapshot_kelly_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed where last_name = 'Kelly'; diff --git a/tests/functional/simple_snapshot/data/shared_macros.sql b/tests/functional/simple_snapshot/data/shared_macros.sql deleted file mode 100644 index 9bdfdd26..00000000 --- a/tests/functional/simple_snapshot/data/shared_macros.sql +++ /dev/null @@ -1,80 +0,0 @@ -{% macro get_snapshot_unique_id() -%} - {{ return(adapter.dispatch('get_snapshot_unique_id')()) }} -{%- endmacro %} - -{% macro default__get_snapshot_unique_id() -%} - {% do return("id || '-' || first_name") %} -{%- endmacro %} - -{# - mostly copy+pasted from dbt_utils, but I removed some parameters and added - a query that calls get_snapshot_unique_id -#} -{% test mutually_exclusive_ranges(model) %} - -with base as ( - select {{ get_snapshot_unique_id() }} as dbt_unique_id, - * - from {{ model }} -), -window_functions as ( - - select - dbt_valid_from as lower_bound, - coalesce(dbt_valid_to, '2099-1-1T00:00:01') as upper_bound, - - lead(dbt_valid_from) over ( - partition by dbt_unique_id - order by dbt_valid_from - ) as next_lower_bound, - - row_number() over ( - partition by dbt_unique_id - order by dbt_valid_from desc - ) = 1 as is_last_record - - from base - -), - -calc as ( - -- We want to return records where one of our assumptions fails, so we'll use - -- the `not` function with `and` statements so we can write our assumptions nore cleanly - select - *, - - -- For each record: lower_bound should be < upper_bound. - -- Coalesce it to return an error on the null case (implicit assumption - -- these columns are not_null) - coalesce( - lower_bound < upper_bound, - is_last_record - ) as lower_bound_less_than_upper_bound, - - -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound. - -- Coalesce it to handle null cases for the last record. - coalesce( - upper_bound = next_lower_bound, - is_last_record, - false - ) as upper_bound_equal_to_next_lower_bound - - from window_functions - -), - -validation_errors as ( - - select - * - from calc - - where not( - -- THE FOLLOWING SHOULD BE TRUE -- - lower_bound_less_than_upper_bound - and upper_bound_equal_to_next_lower_bound - ) -) - -select * from validation_errors -{% endtest %} diff --git a/tests/functional/simple_snapshot/data/update.sql b/tests/functional/simple_snapshot/data/update.sql deleted file mode 100644 index 890959f3..00000000 --- a/tests/functional/simple_snapshot/data/update.sql +++ /dev/null @@ -1,261 +0,0 @@ --- insert v2 of the 11 - 21 records - -insert into {database}.{schema}.snapshot_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20; - - -insert into {database}.{schema}.snapshot_castillo_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - "1-updated_at", - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20 and last_name = 'Castillo'; - - -insert into {database}.{schema}.snapshot_alvarez_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20 and last_name = 'Alvarez'; - - -insert into {database}.{schema}.snapshot_kelly_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20 and last_name = 'Kelly'; - --- insert 10 new records -insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values -(21, 'Judy', 'Robinson', 'jrobinsonk@blogs.com', 'Female', '208.21.192.232', '2016-09-18 08:27:38'), -(22, 'Kevin', 'Alvarez', 'kalvarezl@buzzfeed.com', 'Male', '228.106.146.9', '2016-07-29 03:07:37'), -(23, 'Barbara', 'Carr', 'bcarrm@pen.io', 'Female', '106.165.140.17', '2015-09-24 13:27:23'), -(24, 'William', 'Watkins', 'wwatkinsn@guardian.co.uk', 'Male', '78.155.84.6', '2016-03-08 19:13:08'), -(25, 'Judy', 'Cooper', 'jcoopero@google.com.au', 'Female', '24.149.123.184', '2016-10-05 20:49:33'), -(26, 'Shirley', 'Castillo', 'scastillop@samsung.com', 'Female', '129.252.181.12', '2016-06-20 21:12:21'), -(27, 'Justin', 'Harper', 'jharperq@opera.com', 'Male', '131.172.103.218', '2016-05-21 22:56:46'), -(28, 'Marie', 'Medina', 'mmedinar@nhs.uk', 'Female', '188.119.125.67', '2015-10-08 13:44:33'), -(29, 'Kelly', 'Edwards', 'kedwardss@phoca.cz', 'Female', '47.121.157.66', '2015-09-15 06:33:37'), -(30, 'Carl', 'Coleman', 'ccolemant@wikipedia.org', 'Male', '82.227.154.83', '2016-05-26 16:46:40'); - - --- add these new records to the snapshot table -insert into {database}.{schema}.snapshot_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20; - - --- add these new records to the snapshot table -insert into {database}.{schema}.snapshot_castillo_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - "1-updated_at", - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20 and last_name = 'Castillo'; - -insert into {database}.{schema}.snapshot_alvarez_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20 and last_name = 'Alvarez'; - -insert into {database}.{schema}.snapshot_kelly_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20 and last_name = 'Kelly'; diff --git a/tests/functional/simple_snapshot/fixtures.py b/tests/functional/simple_snapshot/fixtures.py deleted file mode 100644 index 04e4905d..00000000 --- a/tests/functional/simple_snapshot/fixtures.py +++ /dev/null @@ -1,389 +0,0 @@ -snapshots_select__snapshot_sql = """ -{% snapshot snapshot_castillo %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='"1-updated_at"', - ) - }} - select id,first_name,last_name,email,gender,ip_address,updated_at as "1-updated_at" from {{target.database}}.{{schema}}.seed where last_name = 'Castillo' - -{% endsnapshot %} - -{% snapshot snapshot_alvarez %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Alvarez' - -{% endsnapshot %} - - -{% snapshot snapshot_kelly %} - {# This has no target_database set, which is allowed! #} - {{ - config( - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Kelly' - -{% endsnapshot %} -""" - -snapshots_pg_custom__snapshot_sql = """ -{% snapshot snapshot_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=var('target_schema', schema), - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='custom', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{target.schema}}.seed - -{% endsnapshot %} -""" - - -macros_custom_snapshot__custom_sql = """ -{# A "custom" strategy that's really just the timestamp one #} -{% macro snapshot_custom_strategy(node, snapshotted_rel, current_rel, config, target_exists) %} - {% set primary_key = config['unique_key'] %} - {% set updated_at = config['updated_at'] %} - - {% set row_changed_expr -%} - ({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }}) - {%- endset %} - - {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %} - - {% do return({ - "unique_key": primary_key, - "updated_at": updated_at, - "row_changed": row_changed_expr, - "scd_id": scd_id_expr - }) %} -{% endmacro %} -""" - - -models__schema_yml = """ -version: 2 -snapshots: - - name: snapshot_actual - data_tests: - - mutually_exclusive_ranges - config: - meta: - owner: 'a_owner' -""" - -models__schema_with_target_schema_yml = """ -version: 2 -snapshots: - - name: snapshot_actual - data_tests: - - mutually_exclusive_ranges - config: - meta: - owner: 'a_owner' - target_schema: schema_from_schema_yml -""" - -models__ref_snapshot_sql = """ -select * from {{ ref('snapshot_actual') }} -""" - -macros__test_no_overlaps_sql = """ -{% macro get_snapshot_unique_id() -%} - {{ return(adapter.dispatch('get_snapshot_unique_id')()) }} -{%- endmacro %} - -{% macro default__get_snapshot_unique_id() -%} - {% do return("id || '-' || first_name") %} -{%- endmacro %} - -{# - mostly copy+pasted from dbt_utils, but I removed some parameters and added - a query that calls get_snapshot_unique_id -#} -{% test mutually_exclusive_ranges(model) %} - -with base as ( - select {{ get_snapshot_unique_id() }} as dbt_unique_id, - * - from {{ model }} -), -window_functions as ( - - select - dbt_valid_from as lower_bound, - coalesce(dbt_valid_to, '2099-1-1T00:00:01') as upper_bound, - - lead(dbt_valid_from) over ( - partition by dbt_unique_id - order by dbt_valid_from - ) as next_lower_bound, - - row_number() over ( - partition by dbt_unique_id - order by dbt_valid_from desc - ) = 1 as is_last_record - - from base - -), - -calc as ( - -- We want to return records where one of our assumptions fails, so we'll use - -- the `not` function with `and` statements so we can write our assumptions nore cleanly - select - *, - - -- For each record: lower_bound should be < upper_bound. - -- Coalesce it to return an error on the null case (implicit assumption - -- these columns are not_null) - coalesce( - lower_bound < upper_bound, - is_last_record - ) as lower_bound_less_than_upper_bound, - - -- For each record: upper_bound {{ allow_gaps_operator }} the next lower_bound. - -- Coalesce it to handle null cases for the last record. - coalesce( - upper_bound = next_lower_bound, - is_last_record, - false - ) as upper_bound_equal_to_next_lower_bound - - from window_functions - -), - -validation_errors as ( - - select - * - from calc - - where not( - -- THE FOLLOWING SHOULD BE TRUE -- - lower_bound_less_than_upper_bound - and upper_bound_equal_to_next_lower_bound - ) -) - -select * from validation_errors -{% endtest %} -""" - - -snapshots_select_noconfig__snapshot_sql = """ -{% snapshot snapshot_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=var('target_schema', schema), - ) - }} - select * from {{target.database}}.{{target.schema}}.seed - -{% endsnapshot %} - -{% snapshot snapshot_castillo %} - - {{ - config( - target_database=var('target_database', database), - updated_at='"1-updated_at"', - ) - }} - select id,first_name,last_name,email,gender,ip_address,updated_at as "1-updated_at" from {{target.database}}.{{schema}}.seed where last_name = 'Castillo' - -{% endsnapshot %} - -{% snapshot snapshot_alvarez %} - - {{ - config( - target_database=var('target_database', database), - ) - }} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Alvarez' - -{% endsnapshot %} - - -{% snapshot snapshot_kelly %} - {# This has no target_database set, which is allowed! #} - select * from {{target.database}}.{{schema}}.seed where last_name = 'Kelly' - -{% endsnapshot %} -""" - - -seeds__seed_newcol_csv = """id,first_name,last_name -1,Judith,Kennedy -2,Arthur,Kelly -3,Rachel,Moreno -""" - -seeds__seed_csv = """id,first_name -1,Judith -2,Arthur -3,Rachel -""" - - -snapshots_pg_custom_namespaced__snapshot_sql = """ -{% snapshot snapshot_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=var('target_schema', schema), - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='test.custom', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{target.schema}}.seed - -{% endsnapshot %} -""" - -snapshots_pg__snapshot_sql = """ -{% snapshot snapshot_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=var('target_schema', schema), - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - - {% if var('invalidate_hard_deletes', 'false') | as_bool %} - {{ config(invalidate_hard_deletes=True) }} - {% endif %} - - select * from {{target.database}}.{{target.schema}}.seed - -{% endsnapshot %} -""" - -snapshots_pg__snapshot_no_target_schema_sql = """ -{% snapshot snapshot_actual %} - - {{ - config( - target_database=var('target_database', database), - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - - {% if var('invalidate_hard_deletes', 'false') | as_bool %} - {{ config(invalidate_hard_deletes=True) }} - {% endif %} - - select * from {{target.database}}.{{target.schema}}.seed - -{% endsnapshot %} -""" - -models_slow__gen_sql = """ - -{{ config(materialized='ephemeral') }} - - -/* - Generates 50 rows that "appear" to update every - second to a query-er. - - 1 2020-04-21 20:44:00-04 0 - 2 2020-04-21 20:43:59-04 59 - 3 2020-04-21 20:43:58-04 58 - 4 2020-04-21 20:43:57-04 57 - - .... 1 second later .... - - 1 2020-04-21 20:44:01-04 1 - 2 2020-04-21 20:44:00-04 0 - 3 2020-04-21 20:43:59-04 59 - 4 2020-04-21 20:43:58-04 58 - - This view uses pg_sleep(2) to make queries against - the view take a non-trivial amount of time - - Use statement_timestamp() as it changes during a transactions. - If we used now() or current_time or similar, then the timestamp - of the start of the transaction would be returned instead. -*/ - -with gen as ( - - select - id, - date_trunc('second', statement_timestamp()) - (interval '1 second' * id) as updated_at - - from generate_series(1, 10) id - -) - -select - id, - updated_at, - extract(seconds from updated_at)::int as seconds - -from gen, pg_sleep(2) -""" - -snapshots_longtext__snapshot_sql = """ -{% snapshot snapshot_actual %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{schema}}.super_long -{% endsnapshot %} -""" - -snapshots_check_col_noconfig__snapshot_sql = """ -{% snapshot snapshot_actual %} - select * from {{target.database}}.{{schema}}.seed -{% endsnapshot %} - -{# This should be exactly the same #} -{% snapshot snapshot_checkall %} - {{ config(check_cols='all') }} - select * from {{target.database}}.{{schema}}.seed -{% endsnapshot %} -""" diff --git a/tests/functional/simple_snapshot/test_basic_snapshot.py b/tests/functional/simple_snapshot/test_basic_snapshot.py deleted file mode 100644 index 6165e8e1..00000000 --- a/tests/functional/simple_snapshot/test_basic_snapshot.py +++ /dev/null @@ -1,373 +0,0 @@ -from datetime import datetime -import os - -from dbt.tests.util import ( - check_relations_equal, - relation_from_name, - run_dbt, - write_file, -) -import pytest -import pytz - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - macros_custom_snapshot__custom_sql, - models__ref_snapshot_sql, - models__schema_with_target_schema_yml, - models__schema_yml, - seeds__seed_csv, - seeds__seed_newcol_csv, - snapshots_pg__snapshot_no_target_schema_sql, - snapshots_pg__snapshot_sql, - snapshots_pg_custom__snapshot_sql, - snapshots_pg_custom_namespaced__snapshot_sql, -) - - -snapshots_check_col__snapshot_sql = """ -{% snapshot snapshot_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='check', - check_cols=['email'], - ) - }} - select * from {{target.database}}.{{schema}}.seed - -{% endsnapshot %} - -{# This should be exactly the same #} -{% snapshot snapshot_checkall %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='check', - check_cols='all', - ) - }} - select * from {{target.database}}.{{schema}}.seed -{% endsnapshot %} -""" - - -snapshots_check_col_noconfig__snapshot_sql = """ -{% snapshot snapshot_actual %} - select * from {{target.database}}.{{schema}}.seed -{% endsnapshot %} - -{# This should be exactly the same #} -{% snapshot snapshot_checkall %} - {{ config(check_cols='all') }} - select * from {{target.database}}.{{schema}}.seed -{% endsnapshot %} -""" - - -def snapshot_setup(project, num_snapshot_models=1): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - results = run_dbt(["snapshot"]) - assert len(results) == num_snapshot_models - - run_dbt(["test"]) - check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) - - path = os.path.join(project.test_data_dir, "invalidate_postgres.sql") - project.run_sql_file(path) - - path = os.path.join(project.test_data_dir, "update.sql") - project.run_sql_file(path) - - results = run_dbt(["snapshot"]) - assert len(results) == num_snapshot_models - - run_dbt(["test"]) - check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) - - -def ref_setup(project, num_snapshot_models=1): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - results = run_dbt(["snapshot"]) - assert len(results) == num_snapshot_models - - results = run_dbt(["run"]) - assert len(results) == 1 - - -class Basic: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_pg__snapshot_sql} - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def macros(self): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - -class TestBasicSnapshot(Basic): - def test_basic_snapshot(self, project): - snapshot_setup(project, num_snapshot_models=1) - - -class TestBasicRef(Basic): - def test_basic_ref(self, project): - ref_setup(project, num_snapshot_models=1) - - -class TestBasicTargetSchemaConfig(Basic): - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_pg__snapshot_no_target_schema_sql} - - @pytest.fixture(scope="class") - def project_config_update(self, unique_schema): - return { - "snapshots": { - "test": { - "target_schema": unique_schema + "_alt", - } - } - } - - def test_target_schema(self, project): - manifest = run_dbt(["parse"]) - assert len(manifest.nodes) == 5 - # ensure that the schema in the snapshot node is the same as target_schema - snapshot_id = "snapshot.test.snapshot_actual" - snapshot_node = manifest.nodes[snapshot_id] - assert snapshot_node.schema == f"{project.test_schema}_alt" - assert ( - snapshot_node.relation_name - == f'"{project.database}"."{project.test_schema}_alt"."snapshot_actual"' - ) - assert snapshot_node.meta == {"owner": "a_owner"} - - # write out schema.yml file and check again - write_file(models__schema_with_target_schema_yml, "models", "schema.yml") - manifest = run_dbt(["parse"]) - snapshot_node = manifest.nodes[snapshot_id] - assert snapshot_node.schema == "schema_from_schema_yml" - - -class CustomNamespace: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_pg_custom_namespaced__snapshot_sql} - - @pytest.fixture(scope="class") - def macros(self): - return { - "test_no_overlaps.sql": macros__test_no_overlaps_sql, - "custom.sql": macros_custom_snapshot__custom_sql, - } - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - -class TestBasicCustomNamespace(CustomNamespace): - def test_custom_namespace_snapshot(self, project): - snapshot_setup(project, num_snapshot_models=1) - - -class TestRefCustomNamespace(CustomNamespace): - def test_custom_namespace_ref(self, project): - ref_setup(project, num_snapshot_models=1) - - -class CustomSnapshot: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_pg_custom__snapshot_sql} - - @pytest.fixture(scope="class") - def macros(self): - return { - "test_no_overlaps.sql": macros__test_no_overlaps_sql, - "custom.sql": macros_custom_snapshot__custom_sql, - } - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - -class TestBasicCustomSnapshot(CustomSnapshot): - def test_custom_snapshot(self, project): - snapshot_setup(project, num_snapshot_models=1) - - -class TestRefCustomSnapshot(CustomSnapshot): - def test_custom_ref(self, project): - ref_setup(project, num_snapshot_models=1) - - -class CheckCols: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_check_col__snapshot_sql} - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def macros(self): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - -class TestBasicCheckCols(CheckCols): - def test_basic_snapshot(self, project): - snapshot_setup(project, num_snapshot_models=2) - - -class TestRefCheckCols(CheckCols): - def test_check_cols_ref(self, project): - ref_setup(project, num_snapshot_models=2) - - -class ConfiguredCheckCols: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_check_col_noconfig__snapshot_sql} - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def macros(self): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - @pytest.fixture(scope="class") - def project_config_update(self): - snapshot_config = { - "snapshots": { - "test": { - "target_schema": "{{ target.schema }}", - "unique_key": "id || '-' || first_name", - "strategy": "check", - "check_cols": ["email"], - } - } - } - return snapshot_config - - -class TestBasicConfiguredCheckCols(ConfiguredCheckCols): - def test_configured_snapshot(self, project): - snapshot_setup(project, num_snapshot_models=2) - - -class TestRefConfiguredCheckCols(ConfiguredCheckCols): - def test_configured_ref(self, project): - ref_setup(project, num_snapshot_models=2) - - -class UpdatedAtCheckCols: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_check_col_noconfig__snapshot_sql} - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def macros(self): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - @pytest.fixture(scope="class") - def project_config_update(self): - snapshot_config = { - "snapshots": { - "test": { - "target_schema": "{{ target.schema }}", - "unique_key": "id || '-' || first_name", - "strategy": "check", - "check_cols": "all", - "updated_at": "updated_at", - } - } - } - return snapshot_config - - -class TestBasicUpdatedAtCheckCols(UpdatedAtCheckCols): - def test_updated_at_snapshot(self, project): - snapshot_setup(project, num_snapshot_models=2) - - snapshot_expected_relation = relation_from_name(project.adapter, "snapshot_expected") - revived_records = project.run_sql( - """ - select id, updated_at, dbt_valid_from from {} - """.format( - snapshot_expected_relation - ), - fetch="all", - ) - for result in revived_records: - # result is a tuple, the updated_at is second and dbt_valid_from is latest - assert isinstance(result[1], datetime) - assert isinstance(result[2], datetime) - assert result[1].replace(tzinfo=pytz.UTC) == result[2].replace(tzinfo=pytz.UTC) - - -class TestRefUpdatedAtCheckCols(UpdatedAtCheckCols): - def test_updated_at_ref(self, project): - ref_setup(project, num_snapshot_models=2) diff --git a/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py b/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py deleted file mode 100644 index d5333536..00000000 --- a/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py +++ /dev/null @@ -1,127 +0,0 @@ -from dbt.tests.util import check_relations_equal, run_dbt -import pytest - - -snapshot_sql = """ -{% snapshot snapshot_check_cols_new_column %} - {{ - config( - target_database=database, - target_schema=schema, - strategy='check', - unique_key='id', - check_cols=var("check_cols", ['name']), - updated_at="'" ~ var("updated_at") ~ "'::timestamp", - ) - }} - - {% if var('version') == 1 %} - - select 1 as id, 'foo' as name - - {% else %} - - select 1 as id, 'foo' as name, 'bar' as other - - {% endif %} - -{% endsnapshot %} -""" - -expected_csv = """ -id,name,other,dbt_scd_id,dbt_updated_at,dbt_valid_from,dbt_valid_to -1,foo,NULL,0d73ad1b216ad884c9f7395d799c912c,2016-07-01 00:00:00.000,2016-07-01 00:00:00.000,2016-07-02 00:00:00.000 -1,foo,bar,7df3783934a6a707d51254859260b9ff,2016-07-02 00:00:00.000,2016-07-02 00:00:00.000, -""".lstrip() - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot_check_cols_new_column.sql": snapshot_sql} - - -@pytest.fixture(scope="class") -def seeds(): - return {"snapshot_check_cols_new_column_expected.csv": expected_csv} - - -@pytest.fixture(scope="class") -def project_config_update(): - return { - "seeds": { - "quote_columns": False, - "test": { - "snapshot_check_cols_new_column_expected": { - "+column_types": { - "dbt_updated_at": "timestamp without time zone", - "dbt_valid_from": "timestamp without time zone", - "dbt_valid_to": "timestamp without time zone", - }, - }, - }, - }, - } - - -def run_check_cols_snapshot_with_schema_change(project, check_cols_override=None): - """ - Test that snapshots using the "check" strategy and explicit check_cols support adding columns. - - Approach: - 1. Take a snapshot that checks a single non-id column - 2. Add a new column to the data - 3. Take a snapshot that checks the new non-id column too - - As long as no error is thrown, then the snapshot was successful - """ - - check_cols = check_cols_override or ["name", "other"] - - # 1. Create a table that represents the expected data after a series of snapshots - vars_dict = {"version": 1, "updated_at": "2016-07-01"} - results = run_dbt(["seed", "--show", "--vars", str(vars_dict)]) - assert len(results) == 1 - - # Snapshot 1 - # Use only 'name' for check_cols - vars_dict = {"version": 1, "check_cols": [check_cols[0]], "updated_at": "2016-07-01"} - results = run_dbt(["snapshot", "--vars", str(vars_dict)]) - assert len(results) == 1 - - # Snapshot 2 - # Use both 'name' and 'other' for check_cols - vars_dict = {"version": 2, "check_cols": check_cols, "updated_at": "2016-07-02"} - results = run_dbt(["snapshot", "--vars", str(vars_dict)]) - assert len(results) == 1 - - check_relations_equal( - project.adapter, - ["snapshot_check_cols_new_column", "snapshot_check_cols_new_column_expected"], - compare_snapshot_cols=True, - ) - - # Snapshot 3 - # Run it again. Nothing has changed — ensure we don't detect changes - vars_dict = {"version": 2, "check_cols": check_cols, "updated_at": "2016-07-02"} - results = run_dbt(["snapshot", "--vars", str(vars_dict)]) - assert len(results) == 1 - - check_relations_equal( - project.adapter, - ["snapshot_check_cols_new_column", "snapshot_check_cols_new_column_expected"], - compare_snapshot_cols=True, - ) - - -def test_check_cols_snapshot_with_schema_change(project): - run_check_cols_snapshot_with_schema_change(project) - - -def test_check_cols_snapshot_with_schema_change_and_mismatched_casing(project): - """ - Test that this still works if the database-stored version of 'name' + 'other' - differs from the user-configured 'NAME' and 'OTHER' - """ - run_check_cols_snapshot_with_schema_change( - project=project, check_cols_override=["NAME", "OTHER"] - ) diff --git a/tests/functional/simple_snapshot/test_changing_strategy_snapshot.py b/tests/functional/simple_snapshot/test_changing_strategy_snapshot.py deleted file mode 100644 index 5540eee5..00000000 --- a/tests/functional/simple_snapshot/test_changing_strategy_snapshot.py +++ /dev/null @@ -1,128 +0,0 @@ -from dbt.tests.util import run_dbt -import pytest - -from tests.functional.simple_snapshot.fixtures import models_slow__gen_sql - - -test_snapshots_changing_strategy__test_snapshot_sql = """ - -{# /* - Given the repro case for the snapshot build, we'd - expect to see both records have color='pink' - in their most recent rows. -*/ #} - -with expected as ( - - select 1 as id, 'pink' as color union all - select 2 as id, 'pink' as color - -), - -actual as ( - - select id, color - from {{ ref('my_snapshot') }} - where color = 'pink' - and dbt_valid_to is null - -) - -select * from expected -except -select * from actual - -union all - -select * from actual -except -select * from expected -""" - - -snapshots_changing_strategy__snapshot_sql = """ - -{# - REPRO: - 1. Run with check strategy - 2. Add a new ts column and run with check strategy - 3. Run with timestamp strategy on new ts column - - Expect: new entry is added for changed rows in (3) -#} - - -{% snapshot my_snapshot %} - - {#--------------- Configuration ------------ #} - - {{ config( - target_schema=schema, - unique_key='id' - ) }} - - {% if var('strategy') == 'timestamp' %} - {{ config(strategy='timestamp', updated_at='updated_at') }} - {% else %} - {{ config(strategy='check', check_cols=['color']) }} - {% endif %} - - {#--------------- Test setup ------------ #} - - {% if var('step') == 1 %} - - select 1 as id, 'blue' as color - union all - select 2 as id, 'red' as color - - {% elif var('step') == 2 %} - - -- change id=1 color from blue to green - -- id=2 is unchanged when using the check strategy - select 1 as id, 'green' as color, '2020-01-01'::date as updated_at - union all - select 2 as id, 'red' as color, '2020-01-01'::date as updated_at - - {% elif var('step') == 3 %} - - -- bump timestamp for both records. Expect that after this runs - -- using the timestamp strategy, both ids should have the color - -- 'pink' in the database. This should be in the future b/c we're - -- going to compare to the check timestamp, which will be _now_ - select 1 as id, 'pink' as color, (now() + interval '1 day')::date as updated_at - union all - select 2 as id, 'pink' as color, (now() + interval '1 day')::date as updated_at - - {% endif %} - -{% endsnapshot %} -""" - - -@pytest.fixture(scope="class") -def models(): - return {"gen.sql": models_slow__gen_sql} - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot.sql": snapshots_changing_strategy__snapshot_sql} - - -@pytest.fixture(scope="class") -def tests(): - return {"test_snapshot.sql": test_snapshots_changing_strategy__test_snapshot_sql} - - -def test_changing_strategy(project): - results = run_dbt(["snapshot", "--vars", "{strategy: check, step: 1}"]) - assert len(results) == 1 - - results = run_dbt(["snapshot", "--vars", "{strategy: check, step: 2}"]) - assert len(results) == 1 - - results = run_dbt(["snapshot", "--vars", "{strategy: timestamp, step: 3}"]) - assert len(results) == 1 - - results = run_dbt(["test"]) - assert len(results) == 1 diff --git a/tests/functional/simple_snapshot/test_check_cols_snapshot.py b/tests/functional/simple_snapshot/test_check_cols_snapshot.py deleted file mode 100644 index 2b2673df..00000000 --- a/tests/functional/simple_snapshot/test_check_cols_snapshot.py +++ /dev/null @@ -1,113 +0,0 @@ -from dbt.tests.util import run_dbt -import pytest - - -snapshot_sql = """ -{% snapshot check_cols_cycle %} - - {{ - config( - target_database=database, - target_schema=schema, - unique_key='id', - strategy='check', - check_cols=['color'] - ) - }} - - {% if var('version') == 1 %} - - select 1 as id, 'red' as color union all - select 2 as id, 'green' as color - - {% elif var('version') == 2 %} - - select 1 as id, 'blue' as color union all - select 2 as id, 'green' as color - - {% elif var('version') == 3 %} - - select 1 as id, 'red' as color union all - select 2 as id, 'pink' as color - - {% else %} - {% do exceptions.raise_compiler_error("Got bad version: " ~ var('version')) %} - {% endif %} - -{% endsnapshot %} -""" - -snapshot_test_sql = """ -with query as ( - - -- check that the current value for id=1 is red - select case when ( - select count(*) - from {{ ref('check_cols_cycle') }} - where id = 1 and color = 'red' and dbt_valid_to is null - ) = 1 then 0 else 1 end as failures - - union all - - -- check that the previous 'red' value for id=1 is invalidated - select case when ( - select count(*) - from {{ ref('check_cols_cycle') }} - where id = 1 and color = 'red' and dbt_valid_to is not null - ) = 1 then 0 else 1 end as failures - - union all - - -- check that there's only one current record for id=2 - select case when ( - select count(*) - from {{ ref('check_cols_cycle') }} - where id = 2 and color = 'pink' and dbt_valid_to is null - ) = 1 then 0 else 1 end as failures - - union all - - -- check that the previous value for id=2 is represented - select case when ( - select count(*) - from {{ ref('check_cols_cycle') }} - where id = 2 and color = 'green' and dbt_valid_to is not null - ) = 1 then 0 else 1 end as failures - - union all - - -- check that there are 5 records total in the table - select case when ( - select count(*) - from {{ ref('check_cols_cycle') }} - ) = 5 then 0 else 1 end as failures - -) - -select * -from query -where failures = 1 -""" - - -@pytest.fixture(scope="class") -def snapshots(): - return {"my_snapshot.sql": snapshot_sql} - - -@pytest.fixture(scope="class") -def tests(): - return {"my_test.sql": snapshot_test_sql} - - -def test_simple_snapshot(project): - results = run_dbt(["snapshot", "--vars", "version: 1"]) - assert len(results) == 1 - - results = run_dbt(["snapshot", "--vars", "version: 2"]) - assert len(results) == 1 - - results = run_dbt(["snapshot", "--vars", "version: 3"]) - assert len(results) == 1 - - run_dbt(["test", "--select", "test_type:singular", "--vars", "version: 3"]) diff --git a/tests/functional/simple_snapshot/test_check_cols_updated_at_snapshot.py b/tests/functional/simple_snapshot/test_check_cols_updated_at_snapshot.py deleted file mode 100644 index 0c99d85e..00000000 --- a/tests/functional/simple_snapshot/test_check_cols_updated_at_snapshot.py +++ /dev/null @@ -1,114 +0,0 @@ -from dbt.tests.util import check_relations_equal, run_dbt -import pytest - - -snapshot_sql = """ -{% snapshot snapshot_check_cols_updated_at_actual %} - {{ - config( - target_database=database, - target_schema=schema, - unique_key='id', - strategy='check', - check_cols='all', - updated_at="'" ~ var("updated_at") ~ "'::timestamp", - ) - }} - - {% if var('version') == 1 %} - - select 'a' as id, 10 as counter, '2016-01-01T00:00:00Z'::timestamp as timestamp_col union all - select 'b' as id, 20 as counter, '2016-01-01T00:00:00Z'::timestamp as timestamp_col - - {% elif var('version') == 2 %} - - select 'a' as id, 30 as counter, '2016-01-02T00:00:00Z'::timestamp as timestamp_col union all - select 'b' as id, 20 as counter, '2016-01-01T00:00:00Z'::timestamp as timestamp_col union all - select 'c' as id, 40 as counter, '2016-01-02T00:00:00Z'::timestamp as timestamp_col - - {% else %} - - select 'a' as id, 30 as counter, '2016-01-02T00:00:00Z'::timestamp as timestamp_col union all - select 'c' as id, 40 as counter, '2016-01-02T00:00:00Z'::timestamp as timestamp_col - - {% endif %} - -{% endsnapshot %} -""" - -expected_csv = """ -id,counter,timestamp_col,dbt_scd_id,dbt_updated_at,dbt_valid_from,dbt_valid_to -a,10,2016-01-01 00:00:00.000,927354aa091feffd9437ead0bdae7ae1,2016-07-01 00:00:00.000,2016-07-01 00:00:00.000,2016-07-02 00:00:00.000 -b,20,2016-01-01 00:00:00.000,40ace4cbf8629f1720ec8a529ed76f8c,2016-07-01 00:00:00.000,2016-07-01 00:00:00.000, -a,30,2016-01-02 00:00:00.000,e9133f2b302c50e36f43e770944cec9b,2016-07-02 00:00:00.000,2016-07-02 00:00:00.000, -c,40,2016-01-02 00:00:00.000,09d33d35101e788c152f65d0530b6837,2016-07-02 00:00:00.000,2016-07-02 00:00:00.000, -""".lstrip() - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot_check_cols_updated_at_actual.sql": snapshot_sql} - - -@pytest.fixture(scope="class") -def seeds(): - return {"snapshot_check_cols_updated_at_expected.csv": expected_csv} - - -@pytest.fixture(scope="class") -def project_config_update(): - return { - "seeds": { - "quote_columns": False, - "test": { - "snapshot_check_cols_updated_at_expected": { - "+column_types": { - "timestamp_col": "timestamp without time zone", - "dbt_updated_at": "timestamp without time zone", - "dbt_valid_from": "timestamp without time zone", - "dbt_valid_to": "timestamp without time zone", - }, - }, - }, - }, - } - - -def test_simple_snapshot(project): - """ - Test that the `dbt_updated_at` column reflects the `updated_at` timestamp expression in the config. - - Approach: - 1. Create a table that represents the expected data after a series of snapshots - - Use dbt seed to create the expected relation (`snapshot_check_cols_updated_at_expected`) - 2. Execute a series of snapshots to create the data - - Use a series of (3) dbt snapshot commands to create the actual relation (`snapshot_check_cols_updated_at_actual`) - - The logic can switch between 3 different versions of the data (depending on the `version` number) - - The `updated_at` value is passed in via `--vars` and cast to a timestamp in the snapshot config - 3. Compare the two relations for equality - """ - - # 1. Create a table that represents the expected data after a series of snapshots - results = run_dbt(["seed", "--show", "--vars", "{version: 1, updated_at: 2016-07-01}"]) - assert len(results) == 1 - - # 2. Execute a series of snapshots to create the data - - # Snapshot day 1 - results = run_dbt(["snapshot", "--vars", "{version: 1, updated_at: 2016-07-01}"]) - assert len(results) == 1 - - # Snapshot day 2 - results = run_dbt(["snapshot", "--vars", "{version: 2, updated_at: 2016-07-02}"]) - assert len(results) == 1 - - # Snapshot day 3 - results = run_dbt(["snapshot", "--vars", "{version: 3, updated_at: 2016-07-03}"]) - assert len(results) == 1 - - # 3. Compare the two relations for equality - check_relations_equal( - project.adapter, - ["snapshot_check_cols_updated_at_actual", "snapshot_check_cols_updated_at_expected"], - compare_snapshot_cols=True, - ) diff --git a/tests/functional/simple_snapshot/test_comment_ending_snapshot.py b/tests/functional/simple_snapshot/test_comment_ending_snapshot.py deleted file mode 100644 index ab21b641..00000000 --- a/tests/functional/simple_snapshot/test_comment_ending_snapshot.py +++ /dev/null @@ -1,36 +0,0 @@ -import os - -from dbt.tests.util import run_dbt -import pytest - - -snapshots_with_comment_at_end__snapshot_sql = """ -{% snapshot snapshot_actual %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id', - strategy='check', - check_cols=['email'], - ) - }} - select * from {{target.database}}.{{schema}}.seed - -- Test comment to prevent recurrence of https://github.com/dbt-labs/dbt-core/issues/6781 -{% endsnapshot %} -""" - - -class TestSnapshotsWithCommentAtEnd: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_with_comment_at_end__snapshot_sql} - - def test_comment_ending(self, project): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - # N.B. Snapshot is run twice to ensure snapshot_check_all_get_existing_columns is fully run - # (it exits early if the table doesn't already exist) - run_dbt(["snapshot"]) - results = run_dbt(["snapshot"]) - assert len(results) == 1 diff --git a/tests/functional/simple_snapshot/test_cross_schema_snapshot.py b/tests/functional/simple_snapshot/test_cross_schema_snapshot.py deleted file mode 100644 index 1072a5aa..00000000 --- a/tests/functional/simple_snapshot/test_cross_schema_snapshot.py +++ /dev/null @@ -1,48 +0,0 @@ -import os - -from dbt.tests.util import run_dbt -import pytest - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - models__ref_snapshot_sql, - models__schema_yml, - snapshots_pg__snapshot_sql, -) - - -NUM_SNAPSHOT_MODELS = 1 - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot.sql": snapshots_pg__snapshot_sql} - - -@pytest.fixture(scope="class") -def models(): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - -@pytest.fixture(scope="class") -def macros(): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - -def test_cross_schema_snapshot(project): - # populate seed and snapshot tables - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - - target_schema = "{}_snapshotted".format(project.test_schema) - - # create a snapshot using the new schema - results = run_dbt(["snapshot", "--vars", '{{"target_schema": "{}"}}'.format(target_schema)]) - assert len(results) == NUM_SNAPSHOT_MODELS - - # run dbt from test_schema with a ref to to new target_schema - results = run_dbt(["run", "--vars", '{{"target_schema": {}}}'.format(target_schema)]) - assert len(results) == 1 diff --git a/tests/functional/simple_snapshot/test_hard_delete_snapshot.py b/tests/functional/simple_snapshot/test_hard_delete_snapshot.py deleted file mode 100644 index ab25bbfa..00000000 --- a/tests/functional/simple_snapshot/test_hard_delete_snapshot.py +++ /dev/null @@ -1,192 +0,0 @@ -from datetime import datetime, timedelta -import os - -from dbt.tests.adapter.utils.test_current_timestamp import is_aware -from dbt.tests.util import run_dbt, check_relations_equal -import pytest -import pytz - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - models__ref_snapshot_sql, - models__schema_yml, - snapshots_pg__snapshot_sql, -) - - -# These tests uses the same seed data, containing 20 records of which we hard delete the last 10. -# These deleted records set the dbt_valid_to to time the snapshot was ran. - - -def convert_to_aware(d: datetime) -> datetime: - # There are two types of datetime objects in Python: naive and aware - # Assume any dbt snapshot timestamp that is naive is meant to represent UTC - if d is None: - return d - elif is_aware(d): - return d - else: - return d.replace(tzinfo=pytz.UTC) - - -def is_close_datetime( - dt1: datetime, dt2: datetime, atol: timedelta = timedelta(microseconds=1) -) -> bool: - # Similar to pytest.approx, math.isclose, and numpy.isclose - # Use an absolute tolerance to compare datetimes that may not be perfectly equal. - # Two None values will compare as equal. - if dt1 is None and dt2 is None: - return True - elif dt1 is not None and dt2 is not None: - return (dt1 > (dt2 - atol)) and (dt1 < (dt2 + atol)) - else: - return False - - -def datetime_snapshot(): - NUM_SNAPSHOT_MODELS = 1 - begin_snapshot_datetime = datetime.now(pytz.UTC) - results = run_dbt(["snapshot", "--vars", "{invalidate_hard_deletes: true}"]) - assert len(results) == NUM_SNAPSHOT_MODELS - - return begin_snapshot_datetime - - -@pytest.fixture(scope="class", autouse=True) -def setUp(project): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot.sql": snapshots_pg__snapshot_sql} - - -@pytest.fixture(scope="class") -def models(): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - -@pytest.fixture(scope="class") -def macros(): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - -def test_snapshot_hard_delete(project): - # run the first snapshot - datetime_snapshot() - - check_relations_equal(project.adapter, ["snapshot_expected", "snapshot_actual"]) - - invalidated_snapshot_datetime = None - revived_snapshot_datetime = None - - # hard delete last 10 records - project.run_sql( - "delete from {}.{}.seed where id >= 10;".format(project.database, project.test_schema) - ) - - # snapshot and assert invalidated - invalidated_snapshot_datetime = datetime_snapshot() - - snapshotted = project.run_sql( - """ - select - id, - dbt_valid_to - from {}.{}.snapshot_actual - order by id - """.format( - project.database, project.test_schema - ), - fetch="all", - ) - - assert len(snapshotted) == 20 - for result in snapshotted[10:]: - # result is a tuple, the dbt_valid_to column is the latest - assert isinstance(result[-1], datetime) - dbt_valid_to = convert_to_aware(result[-1]) - - # Plenty of wiggle room if clocks aren't perfectly sync'd, etc - assert is_close_datetime( - dbt_valid_to, invalidated_snapshot_datetime, timedelta(minutes=1) - ), f"SQL timestamp {dbt_valid_to.isoformat()} is not close enough to Python UTC {invalidated_snapshot_datetime.isoformat()}" - - # revive records - # Timestamp must have microseconds for tests below to be meaningful - # Assume `updated_at` is TIMESTAMP WITHOUT TIME ZONE that implicitly represents UTC - revival_timestamp = datetime.now(pytz.UTC).strftime("%Y-%m-%d %H:%M:%S.%f") - project.run_sql( - """ - insert into {}.{}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values - (10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '{}'), - (11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '{}') - """.format( - project.database, project.test_schema, revival_timestamp, revival_timestamp - ) - ) - - # snapshot and assert records were revived - # Note: the revived_snapshot_datetime here is later than the revival_timestamp above - revived_snapshot_datetime = datetime_snapshot() - - # records which weren't revived (id != 10, 11) - # dbt_valid_to is not null - invalidated_records = project.run_sql( - """ - select - id, - dbt_valid_to - from {}.{}.snapshot_actual - where dbt_valid_to is not null - order by id - """.format( - project.database, project.test_schema - ), - fetch="all", - ) - - assert len(invalidated_records) == 11 - for result in invalidated_records: - # result is a tuple, the dbt_valid_to column is the latest - assert isinstance(result[1], datetime) - dbt_valid_to = convert_to_aware(result[1]) - - # Plenty of wiggle room if clocks aren't perfectly sync'd, etc - assert is_close_datetime( - dbt_valid_to, invalidated_snapshot_datetime, timedelta(minutes=1) - ), f"SQL timestamp {dbt_valid_to.isoformat()} is not close enough to Python UTC {invalidated_snapshot_datetime.isoformat()}" - - # records which were revived (id = 10, 11) - # dbt_valid_to is null - revived_records = project.run_sql( - """ - select - id, - dbt_valid_from, - dbt_valid_to - from {}.{}.snapshot_actual - where dbt_valid_to is null - and id IN (10, 11) - """.format( - project.database, project.test_schema - ), - fetch="all", - ) - - assert len(revived_records) == 2 - for result in revived_records: - # result is a tuple, the dbt_valid_from is second and dbt_valid_to is latest - # dbt_valid_from is the same as the 'updated_at' added in the revived_rows - # dbt_valid_to is null - assert isinstance(result[1], datetime) - dbt_valid_from = convert_to_aware(result[1]) - dbt_valid_to = result[2] - - assert dbt_valid_from <= revived_snapshot_datetime - assert dbt_valid_to is None diff --git a/tests/functional/simple_snapshot/test_invalid_namespace_snapshot.py b/tests/functional/simple_snapshot/test_invalid_namespace_snapshot.py deleted file mode 100644 index 1ee8fa40..00000000 --- a/tests/functional/simple_snapshot/test_invalid_namespace_snapshot.py +++ /dev/null @@ -1,67 +0,0 @@ -import os - -from dbt.tests.util import run_dbt -import pytest - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - macros_custom_snapshot__custom_sql, - models__ref_snapshot_sql, - models__schema_yml, - seeds__seed_csv, - seeds__seed_newcol_csv, -) - - -NUM_SNAPSHOT_MODELS = 1 - - -snapshots_pg_custom_invalid__snapshot_sql = """ -{% snapshot snapshot_actual %} - {# this custom strategy does not exist in the 'dbt' package #} - {{ - config( - target_database=var('target_database', database), - target_schema=var('target_schema', schema), - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='dbt.custom', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{target.schema}}.seed - -{% endsnapshot %} -""" - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshots.sql": snapshots_pg_custom_invalid__snapshot_sql} - - -@pytest.fixture(scope="class") -def macros(): - return { - "test_no_overlaps.sql": macros__test_no_overlaps_sql, - "custom.sql": macros_custom_snapshot__custom_sql, - } - - -@pytest.fixture(scope="class") -def models(): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - -@pytest.fixture(scope="class") -def seeds(): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - -def test_custom_snapshot_invalid_namespace(project): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - results = run_dbt(["snapshot"], expect_pass=False) - assert len(results) == NUM_SNAPSHOT_MODELS diff --git a/tests/functional/simple_snapshot/test_long_text_snapshot.py b/tests/functional/simple_snapshot/test_long_text_snapshot.py deleted file mode 100644 index 0793a3fc..00000000 --- a/tests/functional/simple_snapshot/test_long_text_snapshot.py +++ /dev/null @@ -1,70 +0,0 @@ -from dbt.tests.util import run_dbt -import pytest - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - models__ref_snapshot_sql, - models__schema_yml, -) - - -seed_longtext_sql = """ -create table {database}.{schema}.super_long ( - id INTEGER, - longstring TEXT, - updated_at TIMESTAMP WITHOUT TIME ZONE -); - -insert into {database}.{schema}.super_long (id, longstring, updated_at) VALUES -(1, 'short', current_timestamp), -(2, repeat('a', 500), current_timestamp); -""" - -snapshots_longtext__snapshot_sql = """ -{% snapshot snapshot_actual %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{target.database}}.{{schema}}.super_long -{% endsnapshot %} -""" - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot.sql": snapshots_longtext__snapshot_sql} - - -@pytest.fixture(scope="class") -def models(): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - -@pytest.fixture(scope="class") -def macros(): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - -def test_long_text(project): - project.run_sql(seed_longtext_sql) - - results = run_dbt(["snapshot"]) - assert len(results) == 1 - - with project.adapter.connection_named("test"): - status, results = project.adapter.execute( - "select * from {}.{}.snapshot_actual".format(project.database, project.test_schema), - fetch=True, - ) - assert len(results) == 2 - got_names = set(r.get("longstring") for r in results) - assert got_names == {"a" * 500, "short"} diff --git a/tests/functional/simple_snapshot/test_renamed_source_snapshot.py b/tests/functional/simple_snapshot/test_renamed_source_snapshot.py deleted file mode 100644 index 23db614b..00000000 --- a/tests/functional/simple_snapshot/test_renamed_source_snapshot.py +++ /dev/null @@ -1,74 +0,0 @@ -from dbt.tests.util import run_dbt -import pytest - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - macros_custom_snapshot__custom_sql, - seeds__seed_csv, - seeds__seed_newcol_csv, -) - - -snapshots_checkall__snapshot_sql = """ -{% snapshot my_snapshot %} - {{ config(check_cols='all', unique_key='id', strategy='check', target_database=database, target_schema=schema) }} - select * from {{ ref(var('seed_name', 'seed')) }} -{% endsnapshot %} -""" - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot.sql": snapshots_checkall__snapshot_sql} - - -@pytest.fixture(scope="class") -def macros(): - return { - "test_no_overlaps.sql": macros__test_no_overlaps_sql, - "custom.sql": macros_custom_snapshot__custom_sql, - } - - -@pytest.fixture(scope="class") -def seeds(): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - -def test_renamed_source(project): - run_dbt(["seed"]) - run_dbt(["snapshot"]) - database = project.database - results = project.run_sql( - "select * from {}.{}.my_snapshot".format(database, project.test_schema), - fetch="all", - ) - assert len(results) == 3 - for result in results: - assert len(result) == 6 - - # over ride the ref var in the snapshot definition to use a seed with an additional column, last_name - run_dbt(["snapshot", "--vars", "{seed_name: seed_newcol}"]) - results = project.run_sql( - "select * from {}.{}.my_snapshot where last_name is not NULL".format( - database, project.test_schema - ), - fetch="all", - ) - assert len(results) == 3 - - for result in results: - # new column - assert len(result) == 7 - assert result[-1] is not None - - results = project.run_sql( - "select * from {}.{}.my_snapshot where last_name is NULL".format( - database, project.test_schema - ), - fetch="all", - ) - assert len(results) == 3 - for result in results: - # new column - assert len(result) == 7 diff --git a/tests/functional/simple_snapshot/test_select_exclude_snapshot.py b/tests/functional/simple_snapshot/test_select_exclude_snapshot.py deleted file mode 100644 index ac2b4bc9..00000000 --- a/tests/functional/simple_snapshot/test_select_exclude_snapshot.py +++ /dev/null @@ -1,161 +0,0 @@ -import os - -from dbt.tests.util import ( - check_relations_equal, - check_table_does_not_exist, - run_dbt, -) -import pytest - -from tests.functional.simple_snapshot.fixtures import ( - macros__test_no_overlaps_sql, - models__ref_snapshot_sql, - models__schema_yml, - seeds__seed_csv, - seeds__seed_newcol_csv, - snapshots_pg__snapshot_sql, - snapshots_select__snapshot_sql, - snapshots_select_noconfig__snapshot_sql, -) - - -def all_snapshots(project): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - - results = run_dbt(["snapshot"]) - assert len(results) == 4 - - check_relations_equal(project.adapter, ["snapshot_castillo", "snapshot_castillo_expected"]) - check_relations_equal(project.adapter, ["snapshot_alvarez", "snapshot_alvarez_expected"]) - check_relations_equal(project.adapter, ["snapshot_kelly", "snapshot_kelly_expected"]) - check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) - - path = os.path.join(project.test_data_dir, "invalidate_postgres.sql") - project.run_sql_file(path) - - path = os.path.join(project.test_data_dir, "update.sql") - project.run_sql_file(path) - - results = run_dbt(["snapshot"]) - assert len(results) == 4 - check_relations_equal(project.adapter, ["snapshot_castillo", "snapshot_castillo_expected"]) - check_relations_equal(project.adapter, ["snapshot_alvarez", "snapshot_alvarez_expected"]) - check_relations_equal(project.adapter, ["snapshot_kelly", "snapshot_kelly_expected"]) - check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) - - -def exclude_snapshots(project): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - results = run_dbt(["snapshot", "--exclude", "snapshot_castillo"]) - assert len(results) == 3 - - check_table_does_not_exist(project.adapter, "snapshot_castillo") - check_relations_equal(project.adapter, ["snapshot_alvarez", "snapshot_alvarez_expected"]) - check_relations_equal(project.adapter, ["snapshot_kelly", "snapshot_kelly_expected"]) - check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) - - -def select_snapshots(project): - path = os.path.join(project.test_data_dir, "seed_pg.sql") - project.run_sql_file(path) - results = run_dbt(["snapshot", "--select", "snapshot_castillo"]) - assert len(results) == 1 - - check_relations_equal(project.adapter, ["snapshot_castillo", "snapshot_castillo_expected"]) - check_table_does_not_exist(project.adapter, "snapshot_alvarez") - check_table_does_not_exist(project.adapter, "snapshot_kelly") - check_table_does_not_exist(project.adapter, "snapshot_actual") - - -# all of the tests below use one of both of the above tests with -# various combinations of snapshots and macros -class SelectBasicSetup: - @pytest.fixture(scope="class") - def snapshots(self): - return { - "snapshot.sql": snapshots_pg__snapshot_sql, - "snapshot_select.sql": snapshots_select__snapshot_sql, - } - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def macros(self): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - -class TestAllBasic(SelectBasicSetup): - def test_all_snapshots(self, project): - all_snapshots(project) - - -class TestExcludeBasic(SelectBasicSetup): - def test_exclude_snapshots(self, project): - exclude_snapshots(project) - - -class TestSelectBasic(SelectBasicSetup): - def test_select_snapshots(self, project): - select_snapshots(project) - - -class SelectConfiguredSetup: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshots_select_noconfig__snapshot_sql} - - @pytest.fixture(scope="class") - def seeds(self): - return {"seed_newcol.csv": seeds__seed_newcol_csv, "seed.csv": seeds__seed_csv} - - @pytest.fixture(scope="class") - def models(self): - return { - "schema.yml": models__schema_yml, - "ref_snapshot.sql": models__ref_snapshot_sql, - } - - @pytest.fixture(scope="class") - def macros(self): - return {"test_no_overlaps.sql": macros__test_no_overlaps_sql} - - # TODO: don't have access to project here so this breaks - @pytest.fixture(scope="class") - def project_config_update(self): - snapshot_config = { - "snapshots": { - "test": { - "target_schema": "{{ target.schema }}", - "unique_key": "id || '-' || first_name", - "strategy": "timestamp", - "updated_at": "updated_at", - } - } - } - return snapshot_config - - -class TestConfigured(SelectConfiguredSetup): - def test_all_configured_snapshots(self, project): - all_snapshots(project) - - -class TestConfiguredExclude(SelectConfiguredSetup): - def test_exclude_configured_snapshots(self, project): - exclude_snapshots(project) - - -class TestConfiguredSelect(SelectConfiguredSetup): - def test_select_configured_snapshots(self, project): - select_snapshots(project) diff --git a/tests/functional/simple_snapshot/test_slow_query_snapshot.py b/tests/functional/simple_snapshot/test_slow_query_snapshot.py deleted file mode 100644 index a65b6cb3..00000000 --- a/tests/functional/simple_snapshot/test_slow_query_snapshot.py +++ /dev/null @@ -1,82 +0,0 @@ -from dbt.tests.util import run_dbt -import pytest - -from tests.functional.simple_snapshot.fixtures import models_slow__gen_sql - - -snapshots_slow__snapshot_sql = """ - -{% snapshot my_slow_snapshot %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id', - strategy='timestamp', - updated_at='updated_at' - ) - }} - - select - id, - updated_at, - seconds - - from {{ ref('gen') }} - -{% endsnapshot %} -""" - - -test_snapshots_slow__test_timestamps_sql = """ - -/* - Assert that the dbt_valid_from of the latest record - is equal to the dbt_valid_to of the previous record -*/ - -with snapshot as ( - - select * from {{ ref('my_slow_snapshot') }} - -) - -select - snap1.id, - snap1.dbt_valid_from as new_valid_from, - snap2.dbt_valid_from as old_valid_from, - snap2.dbt_valid_to as old_valid_to - -from snapshot as snap1 -join snapshot as snap2 on snap1.id = snap2.id -where snap1.dbt_valid_to is null - and snap2.dbt_valid_to is not null - and snap1.dbt_valid_from != snap2.dbt_valid_to -""" - - -@pytest.fixture(scope="class") -def models(): - return {"gen.sql": models_slow__gen_sql} - - -@pytest.fixture(scope="class") -def snapshots(): - return {"snapshot.sql": snapshots_slow__snapshot_sql} - - -@pytest.fixture(scope="class") -def tests(): - return {"test_timestamps.sql": test_snapshots_slow__test_timestamps_sql} - - -def test_slow(project): - results = run_dbt(["snapshot"]) - assert len(results) == 1 - - results = run_dbt(["snapshot"]) - assert len(results) == 1 - - results = run_dbt(["test"]) - assert len(results) == 1