Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize is_iceberg Field for Consistency in Snowflake with QUOTED_IDENTIFIERS_IGNORE_CASE #1229

Merged
merged 9 commits into from
Nov 10, 2024
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20241104-172349.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Iceberg quoting ignore fix.
time: 2024-11-04T17:23:49.706297-08:00
custom:
Author: versusfacit
Issue: "1227"
9 changes: 9 additions & 0 deletions dbt/adapters/snowflake/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,15 @@ def list_relations_without_caching(
# this can be collapsed once Snowflake adds is_iceberg to show objects
columns = ["database_name", "schema_name", "name", "kind", "is_dynamic"]
if self.behavior.enable_iceberg_materializations.no_warn:
# The QUOTED_IDENTIFIERS_IGNORE_CASE setting impacts column names like
# is_iceberg which is created by dbt, but it does not affect the case
# of column values in Snowflake's SHOW OBJECTS query! This normalization
# step ensures consistent scanning of the is_iceberg column as a
# lowercase name, treating it as if it were a system-provided field.
#
# This along with the behavior flag in general may be removed when
# is_iceberg comes on show objects.
schema_objects = schema_objects.rename(column_names={"IS_ICEBERG": "is_iceberg"})
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
columns.append("is_iceberg")

return [self._parse_list_relations_result(obj) for obj in schema_objects.select(columns)]
Expand Down
15 changes: 12 additions & 3 deletions dbt/include/snowflake/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,29 @@

{% endmacro %}

{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000) %}
{% macro snowflake__list_relations_without_caching(schema_relation, max_iter=10, max_results_per_iter=10000, query_pre_hook=None) %}
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved
{# -- Use query_pre_hook for optional configurations prior to fetching relations. For sake of
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved
-- consistent use, place ;s in the query_pre_hook definition, not this macro. #}

{%- set max_total_results = max_results_per_iter * max_iter -%}
{%- set sql -%}
{% if query_pre_hook %}
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved
{{ query_pre_hook }}
{% endif -%}

{% if schema_relation is string %}
show objects in {{ schema_relation }} limit {{ max_results_per_iter }};
{% else %}
show objects in {{ schema_relation.include(identifier=False) }} limit {{ max_results_per_iter }};
{% endif -%}

{# -- Gated for performance reason. If you don't want Iceberg, you shouldn't pay the
-- latency penalty. #}
-- latency penalty.
--
-- Note: The capitalization of is_iceberg is handled in Python due to
-- unpredictable quoting behavior based on QUOTED_IDENTIFIERS_IGNORE_CASE. #}
{% if adapter.behavior.enable_iceberg_materializations.no_warn %}
select all_objects.*, is_iceberg as "is_iceberg"
select all_objects.*, is_iceberg as {{ adapter.quote('is_iceberg') }}
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved
from table(result_scan(last_query_id(-1))) all_objects
left join INFORMATION_SCHEMA.tables as all_tables
on all_tables.table_name = all_objects."name"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

import pytest

from pathlib import Path

from dbt.adapters.factory import get_adapter_by_type
from dbt.adapters.snowflake import SnowflakeRelation

from dbt.tests.util import run_dbt, get_connection
from dbt.tests.util import run_dbt, get_connection, write_file


SEED = """
Expand Down Expand Up @@ -41,8 +43,40 @@
"""
)

_MODEL_ICEBERG = """
{{
config(
materialized = "table",
table_format="iceberg",
external_volume="s3_iceberg_snow",
)
}}

select 1
"""

macro_template_for_iceberg_quoted_identifiers_flag_test = """
{{% macro list_relations_without_caching(schema_relation={}) -%}}
{{%- set pre_hook = 'ALTER SESSION SET QUOTED_IDENTIFIERS_IGNORE_CASE = true;' -%}}
{{%- set result = snowflake__list_relations_without_caching(schema_relation=schema_relation, query_pre_hook=pre_hook) -%}}
{{%- do return(result) -%}}
{{% endmacro %}}
"""


class TestShowObjects:
class ShowObjectsBase:
@staticmethod
def list_relations_without_caching(project) -> List[SnowflakeRelation]:
my_adapter = get_adapter_by_type("snowflake")
schema = my_adapter.Relation.create(
database=project.database, schema=project.test_schema, identifier=""
)
with get_connection(my_adapter):
relations = my_adapter.list_relations_without_caching(schema)
return relations


class TestShowObjects(ShowObjectsBase):
views: int = 10
tables: int = 10
dynamic_tables: int = 10
Expand All @@ -66,16 +100,6 @@ def setup(self, project):
run_dbt(["seed"])
run_dbt(["run"])

@staticmethod
def list_relations_without_caching(project) -> List[SnowflakeRelation]:
my_adapter = get_adapter_by_type("snowflake")
schema = my_adapter.Relation.create(
database=project.database, schema=project.test_schema, identifier=""
)
with get_connection(my_adapter):
relations = my_adapter.list_relations_without_caching(schema)
return relations

def test_list_relations_without_caching(self, project):
relations = self.list_relations_without_caching(project)
assert len([relation for relation in relations if relation.is_view]) == self.views
Expand All @@ -87,3 +111,27 @@ def test_list_relations_without_caching(self, project):
len([relation for relation in relations if relation.is_dynamic_table])
== self.dynamic_tables
)


class TestShowIcebergObjects(ShowObjectsBase):
@pytest.fixture(scope="class")
def project_config_update(self):
return {"flags": {"enable_iceberg_materializations": True}}

@pytest.fixture(scope="class")
def models(self):
return {"my_model.sql": _MODEL_ICEBERG}

def test_quoting_ignore_flag_doesnt_break_iceberg_metadata(self, project):
"""We inject the QUOTED_IDENTIFIERS_IGNORE_CASE into the underlying query that fetches
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved
objects which will fail without proper normalization within the python function after
the list relations macro returns."""
macro_file = project.project_root / Path("macros") / Path("macros_for_this_test.sql")
write_file(
macro_template_for_iceberg_quoted_identifiers_flag_test.format(project.test_schema),
macro_file,
)

run_dbt(["run"])

self.list_relations_without_caching(project)
Loading