From 13cb49b668eb84c28ff8af0bc9cf88ea494d5f0e Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:15:24 -0800 Subject: [PATCH 1/7] Reproduce. Test fail. Fix. Test green. --- dbt/adapters/snowflake/impl.py | 9 +++++++++ dbt/include/snowflake/macros/adapters.sql | 7 +++++-- tests/functional/iceberg/test_table_basic.py | 5 +++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/snowflake/impl.py b/dbt/adapters/snowflake/impl.py index 89c21f531..1abe50ba4 100644 --- a/dbt/adapters/snowflake/impl.py +++ b/dbt/adapters/snowflake/impl.py @@ -263,6 +263,15 @@ def list_relations_without_caching( # this can be collapsed once Snowflake adds is_iceberg to show objects columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"] if self.behavior.enable_iceberg_materializations.no_warn: + # The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like + # is_iceberg which is created by dbt, but it does not affect the case + # of column values in Snowflake's SHOW OBJECTS query! This normalization + # step ensures consistent scanning of the is_iceberg column as a + # lowercase name, treating it as if it were a system-provided field. + # + # This along with the behavior flag in general may be removed when + # is_iceberg comes on show objects. + schema_objects = schema_objects.rename(column_names={"IS_ICEBERG": "is_iceberg"}) columns.append("is_iceberg") return [self._parse_list_relations_result(obj) for obj in schema_objects.select(columns)] diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index b60cea0b0..e780b9765 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -145,9 +145,12 @@ {% endif -%} {# -- Gated for performance reason. If you don't want Iceberg, you shouldn't pay the - -- latency penalty. #} + -- latency penalty. + -- + -- Note: The capitalization of is_iceberg is handled in Python due to + -- unpredictable quoting behavior based on QUOTED_IDENTIFIERS_IGNORE_CASE. #} {% if adapter.behavior.enable_iceberg_materializations.no_warn %} - select all_objects.*, is_iceberg as "is_iceberg" + select all_objects.*, is_iceberg as {{ adapter.quote('is_iceberg') }} from table(result_scan(last_query_id(-1))) all_objects left join INFORMATION_SCHEMA.tables as all_tables on all_tables.table_name = all_objects."name" diff --git a/tests/functional/iceberg/test_table_basic.py b/tests/functional/iceberg/test_table_basic.py index e835a5fce..b7af60852 100644 --- a/tests/functional/iceberg/test_table_basic.py +++ b/tests/functional/iceberg/test_table_basic.py @@ -35,6 +35,11 @@ def test_iceberg_tables_build_and_can_be_referred(self, project): run_results = run_dbt() assert len(run_results) == 5 + def test_iceberg_tables_handle_quoting_ignore_flag(self, project): + project.run_sql("ALTER SESSION SET QUOTED_IDENTIFIERS_IGNORE_CASE = true;") + run_results = run_dbt() + assert len(run_results) == 5 + class TestIcebergTableTypeBuildsOnExistingTable: @pytest.fixture(scope="class") From 074ea9cc8de124cb1dde38ed735a5cb45a406ad9 Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Mon, 4 Nov 2024 17:24:02 -0800 Subject: [PATCH 2/7] Add changelog. --- .changes/unreleased/Fixes-20241104-172349.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20241104-172349.yaml diff --git a/.changes/unreleased/Fixes-20241104-172349.yaml b/.changes/unreleased/Fixes-20241104-172349.yaml new file mode 100644 index 000000000..07c90d93c --- /dev/null +++ b/.changes/unreleased/Fixes-20241104-172349.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Iceberg quoting ignore fix. +time: 2024-11-04T17:23:49.706297-08:00 +custom: + Author: versusfacit + Issue: "1227" From 96e9eabfcac6fcc1f16e1929cf867d9c820f4312 Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Mon, 4 Nov 2024 22:31:15 -0800 Subject: [PATCH 3/7] Make a test that tests the actual behavior without hardcoding the quoted flag. --- dbt/include/snowflake/macros/adapters.sql | 8 ++- .../list_relations_tests/test_show_objects.py | 72 +++++++++++++++---- tests/functional/iceberg/test_table_basic.py | 5 -- 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index e780b9765..66274d51c 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -134,10 +134,16 @@ {% endmacro %} -{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %} +{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000, query_pre_hook=None) %} + {# -- Use query_pre_hook for optional configurations prior to fetching relations. For sake of + -- consistent use, place ;s in the query_pre_hook definition, not this macro. #} {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} + {% if query_pre_hook %} + {{ query_pre_hook }} + {% endif -%} + {% if schema_relation is string %} show objects in {{ schema_relation }} limit {{ max_results_per_iter }}; {% else %} diff --git a/tests/functional/adapter/list_relations_tests/test_show_objects.py b/tests/functional/adapter/list_relations_tests/test_show_objects.py index e5eee39d9..822f9cafa 100644 --- a/tests/functional/adapter/list_relations_tests/test_show_objects.py +++ b/tests/functional/adapter/list_relations_tests/test_show_objects.py @@ -3,10 +3,12 @@ import pytest +from pathlib import Path + from dbt.adapters.factory import get_adapter_by_type from dbt.adapters.snowflake import SnowflakeRelation -from dbt.tests.util import run_dbt, get_connection +from dbt.tests.util import run_dbt, get_connection, write_file SEED = """ @@ -41,8 +43,40 @@ """ ) +_MODEL_ICEBERG = """ +{{ + config( + materialized = "table", + table_format="iceberg", + external_volume="s3_iceberg_snow", + ) +}} + +select 1 +""" + +macro_template_for_iceberg_quoted_identifiers_flag_test = """ +{{% macro list_relations_without_caching(schema_relation={}) -%}} + {{%- set pre_hook = 'ALTER SESSION SET QUOTED_IDENTIFIERS_IGNORE_CASE = true;' -%}} + {{%- set result = snowflake__list_relations_without_caching(schema_relation=schema_relation, query_pre_hook=pre_hook) -%}} + {{%- do return(result) -%}} +{{% endmacro %}} +""" + -class TestShowObjects: +class ShowObjectsBase: + @staticmethod + def list_relations_without_caching(project) -> List[SnowflakeRelation]: + my_adapter = get_adapter_by_type("snowflake") + schema = my_adapter.Relation.create( + database=project.database, schema=project.test_schema, identifier="" + ) + with get_connection(my_adapter): + relations = my_adapter.list_relations_without_caching(schema) + return relations + + +class TestShowObjects(ShowObjectsBase): views: int = 10 tables: int = 10 dynamic_tables: int = 10 @@ -66,16 +100,6 @@ def setup(self, project): run_dbt(["seed"]) run_dbt(["run"]) - @staticmethod - def list_relations_without_caching(project) -> List[SnowflakeRelation]: - my_adapter = get_adapter_by_type("snowflake") - schema = my_adapter.Relation.create( - database=project.database, schema=project.test_schema, identifier="" - ) - with get_connection(my_adapter): - relations = my_adapter.list_relations_without_caching(schema) - return relations - def test_list_relations_without_caching(self, project): relations = self.list_relations_without_caching(project) assert len([relation for relation in relations if relation.is_view]) == self.views @@ -87,3 +111,27 @@ def test_list_relations_without_caching(self, project): len([relation for relation in relations if relation.is_dynamic_table]) == self.dynamic_tables ) + + +class TestShowIcebergObjects(ShowObjectsBase): + @pytest.fixture(scope="class") + def project_config_update(self): + return {"flags": {"enable_iceberg_materializations": True}} + + @pytest.fixture(scope="class") + def models(self): + return {"my_model.sql": _MODEL_ICEBERG} + + def test_quoting_ignore_flag_doesnt_break_iceberg_metadata(self, project): + """We inject the QUOTED_IDENTIFIERS_IGNORE_CASE into the underlying query that fetches + objects which will fail without proper normalization within the python function after + the list relations macro returns.""" + macro_file = project.project_root / Path("macros") / Path("macros_for_this_test.sql") + write_file( + macro_template_for_iceberg_quoted_identifiers_flag_test.format(project.test_schema), + macro_file, + ) + + run_dbt(["run"]) + + self.list_relations_without_caching(project) diff --git a/tests/functional/iceberg/test_table_basic.py b/tests/functional/iceberg/test_table_basic.py index b7af60852..e835a5fce 100644 --- a/tests/functional/iceberg/test_table_basic.py +++ b/tests/functional/iceberg/test_table_basic.py @@ -35,11 +35,6 @@ def test_iceberg_tables_build_and_can_be_referred(self, project): run_results = run_dbt() assert len(run_results) == 5 - def test_iceberg_tables_handle_quoting_ignore_flag(self, project): - project.run_sql("ALTER SESSION SET QUOTED_IDENTIFIERS_IGNORE_CASE = true;") - run_results = run_dbt() - assert len(run_results) == 5 - class TestIcebergTableTypeBuildsOnExistingTable: @pytest.fixture(scope="class") From 0fdef5de694d05cbc64f1a71f50999fccf8262e1 Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Tue, 5 Nov 2024 14:57:46 -0800 Subject: [PATCH 4/7] I can't believe how elegant a solution this is. Use a session-only configuration. --- dbt/adapters/snowflake/impl.py | 9 --------- dbt/include/snowflake/macros/adapters.sql | 19 +++++++++++-------- .../list_relations_tests/test_show_objects.py | 14 -------------- 3 files changed, 11 insertions(+), 31 deletions(-) diff --git a/dbt/adapters/snowflake/impl.py b/dbt/adapters/snowflake/impl.py index 1abe50ba4..89c21f531 100644 --- a/dbt/adapters/snowflake/impl.py +++ b/dbt/adapters/snowflake/impl.py @@ -263,15 +263,6 @@ def list_relations_without_caching( # this can be collapsed once Snowflake adds is_iceberg to show objects columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"] if self.behavior.enable_iceberg_materializations.no_warn: - # The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like - # is_iceberg which is created by dbt, but it does not affect the case - # of column values in Snowflake's SHOW OBJECTS query! This normalization - # step ensures consistent scanning of the is_iceberg column as a - # lowercase name, treating it as if it were a system-provided field. - # - # This along with the behavior flag in general may be removed when - # is_iceberg comes on show objects. - schema_objects = schema_objects.rename(column_names={"IS_ICEBERG": "is_iceberg"}) columns.append("is_iceberg") return [self._parse_list_relations_result(obj) for obj in schema_objects.select(columns)] diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index 66274d51c..db150ff77 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -134,15 +134,21 @@ {% endmacro %} -{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000, query_pre_hook=None) %} +{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %} {# -- Use query_pre_hook for optional configurations prior to fetching relations. For sake of -- consistent use, place ;s in the query_pre_hook definition, not this macro. #} {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} - {% if query_pre_hook %} - {{ query_pre_hook }} - {% endif -%} + {# -- The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like + -- is_iceberg which is created by dbt, but it does not affect the case + -- of column values in Snowflake's SHOW OBJECTS query! This session-only + -- alteration ensures the is_iceberg column is lowercase, juast if it + -- were a system-provided field. + -- + -- This along with the behavior flag in general may be removed when + -- is_iceberg comes on show objects. #} + alter session set QUOTED_IDENTIFIERS_IGNORE_CASE = false; {% if schema_relation is string %} show objects in {{ schema_relation }} limit {{ max_results_per_iter }}; @@ -151,10 +157,7 @@ {% endif -%} {# -- Gated for performance reason. If you don't want Iceberg, you shouldn't pay the - -- latency penalty. - -- - -- Note: The capitalization of is_iceberg is handled in Python due to - -- unpredictable quoting behavior based on QUOTED_IDENTIFIERS_IGNORE_CASE. #} + -- latency penalty. #} {% if adapter.behavior.enable_iceberg_materializations.no_warn %} select all_objects.*, is_iceberg as {{ adapter.quote('is_iceberg') }} from table(result_scan(last_query_id(-1))) all_objects diff --git a/tests/functional/adapter/list_relations_tests/test_show_objects.py b/tests/functional/adapter/list_relations_tests/test_show_objects.py index 822f9cafa..edb0944fe 100644 --- a/tests/functional/adapter/list_relations_tests/test_show_objects.py +++ b/tests/functional/adapter/list_relations_tests/test_show_objects.py @@ -55,14 +55,6 @@ select 1 """ -macro_template_for_iceberg_quoted_identifiers_flag_test = """ -{{% macro list_relations_without_caching(schema_relation={}) -%}} - {{%- set pre_hook = 'ALTER SESSION SET QUOTED_IDENTIFIERS_IGNORE_CASE = true;' -%}} - {{%- set result = snowflake__list_relations_without_caching(schema_relation=schema_relation, query_pre_hook=pre_hook) -%}} - {{%- do return(result) -%}} -{{% endmacro %}} -""" - class ShowObjectsBase: @staticmethod @@ -126,12 +118,6 @@ def test_quoting_ignore_flag_doesnt_break_iceberg_metadata(self, project): """We inject the QUOTED_IDENTIFIERS_IGNORE_CASE into the underlying query that fetches objects which will fail without proper normalization within the python function after the list relations macro returns.""" - macro_file = project.project_root / Path("macros") / Path("macros_for_this_test.sql") - write_file( - macro_template_for_iceberg_quoted_identifiers_flag_test.format(project.test_schema), - macro_file, - ) - run_dbt(["run"]) self.list_relations_without_caching(project) From d515335860f63b83f295437e5ad9b890d92c67c8 Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:06:49 -0800 Subject: [PATCH 5/7] Fix test. --- .../adapter/list_relations_tests/test_show_objects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/adapter/list_relations_tests/test_show_objects.py b/tests/functional/adapter/list_relations_tests/test_show_objects.py index edb0944fe..a167874d3 100644 --- a/tests/functional/adapter/list_relations_tests/test_show_objects.py +++ b/tests/functional/adapter/list_relations_tests/test_show_objects.py @@ -8,7 +8,7 @@ from dbt.adapters.factory import get_adapter_by_type from dbt.adapters.snowflake import SnowflakeRelation -from dbt.tests.util import run_dbt, get_connection, write_file +from dbt.tests.util import run_dbt, get_connection SEED = """ From 2570641bb08491c664b8095441b4f08f45300ebb Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:12:22 -0800 Subject: [PATCH 6/7] Simplify logic for this by moving this away from a quoting situation. Columns that unquoted are capitalized regardless of the quoting ignore flag. So we let it be uppercase and then normalize it down to lowercase in Python. --- dbt/adapters/snowflake/impl.py | 5 +++++ dbt/include/snowflake/macros/adapters.sql | 12 +----------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/dbt/adapters/snowflake/impl.py b/dbt/adapters/snowflake/impl.py index 89c21f531..dc0926d93 100644 --- a/dbt/adapters/snowflake/impl.py +++ b/dbt/adapters/snowflake/impl.py @@ -263,6 +263,11 @@ def list_relations_without_caching( # this can be collapsed once Snowflake adds is_iceberg to show objects columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"] if self.behavior.enable_iceberg_materializations.no_warn: + # The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like + # is_iceberg which is created by dbt, but it does not affect the case + # of column values in Snowflake's SHOW OBJECTS query! This + # normalization step ensures metadata queries are handled consistently. + schema_objects = schema_objects.rename(column_names={"IS_ICEBERG": "is_iceberg"}) columns.append("is_iceberg") return [self._parse_list_relations_result(obj) for obj in schema_objects.select(columns)] diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index db150ff77..da1f93877 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -140,16 +140,6 @@ {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} - {# -- The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like - -- is_iceberg which is created by dbt, but it does not affect the case - -- of column values in Snowflake's SHOW OBJECTS query! This session-only - -- alteration ensures the is_iceberg column is lowercase, juast if it - -- were a system-provided field. - -- - -- This along with the behavior flag in general may be removed when - -- is_iceberg comes on show objects. #} - alter session set QUOTED_IDENTIFIERS_IGNORE_CASE = false; - {% if schema_relation is string %} show objects in {{ schema_relation }} limit {{ max_results_per_iter }}; {% else %} @@ -159,7 +149,7 @@ {# -- Gated for performance reason. If you don't want Iceberg, you shouldn't pay the -- latency penalty. #} {% if adapter.behavior.enable_iceberg_materializations.no_warn %} - select all_objects.*, is_iceberg as {{ adapter.quote('is_iceberg') }} + select all_objects.*, is_iceberg from table(result_scan(last_query_id(-1))) all_objects left join INFORMATION_SCHEMA.tables as all_tables on all_tables.table_name = all_objects."name" From a5b39dfb3802dadb31697c79b36aa6c7914f73c8 Mon Sep 17 00:00:00 2001 From: VersusFacit <67295367+VersusFacit@users.noreply.github.com> Date: Tue, 5 Nov 2024 17:33:13 -0800 Subject: [PATCH 7/7] edit comments. --- dbt/include/snowflake/macros/adapters.sql | 3 --- .../adapter/list_relations_tests/test_show_objects.py | 10 +++++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dbt/include/snowflake/macros/adapters.sql b/dbt/include/snowflake/macros/adapters.sql index da1f93877..0ca756c6c 100644 --- a/dbt/include/snowflake/macros/adapters.sql +++ b/dbt/include/snowflake/macros/adapters.sql @@ -135,9 +135,6 @@ {% endmacro %} {% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %} - {# -- Use query_pre_hook for optional configurations prior to fetching relations. For sake of - -- consistent use, place ;s in the query_pre_hook definition, not this macro. #} - {%- set max_total_results = max_results_per_iter * max_iter -%} {%- set sql -%} {% if schema_relation is string %} diff --git a/tests/functional/adapter/list_relations_tests/test_show_objects.py b/tests/functional/adapter/list_relations_tests/test_show_objects.py index a167874d3..91fb94f79 100644 --- a/tests/functional/adapter/list_relations_tests/test_show_objects.py +++ b/tests/functional/adapter/list_relations_tests/test_show_objects.py @@ -115,9 +115,13 @@ def models(self): return {"my_model.sql": _MODEL_ICEBERG} def test_quoting_ignore_flag_doesnt_break_iceberg_metadata(self, project): - """We inject the QUOTED_IDENTIFIERS_IGNORE_CASE into the underlying query that fetches - objects which will fail without proper normalization within the python function after - the list relations macro returns.""" + """https://github.com/dbt-labs/dbt-snowflake/issues/1227 + + The list relations function involves a metadata sub-query. Regardless of + QUOTED_IDENTIFIERS_IGNORE_CASE, this function will fail without proper + normalization within the encapsulating python function after the macro invocation + returns. This test verifies that normalization is working. + """ run_dbt(["run"]) self.list_relations_without_caching(project)