From 8bbdbf266a6eec73c6793f9bd2c8f10276f119fd Mon Sep 17 00:00:00 2001 From: Luciano Vitti Date: Fri, 9 Aug 2024 19:34:57 -0300 Subject: [PATCH] Fix config change detection not working for multiple sortkey in materialized views (#841) * fix config change detection not working for multiple sortkey * add test * changelog * fix sort_key_position for non sort_key column --------- Co-authored-by: Colin Rogers <111200756+colin-rogers-dbt@users.noreply.github.com> Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> --- .../unreleased/Fixes-20240610-104114.yaml | 6 + .../relation_configs/materialized_view.py | 18 ++- .../redshift/relation_configs/sort.py | 37 +++-- .../relations/materialized_view/describe.sql | 20 ++- tests/unit/test_materialized_view.py | 55 +++++++ tests/unit/test_relation.py | 142 +++++++++++++++++- 6 files changed, 256 insertions(+), 22 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240610-104114.yaml diff --git a/.changes/unreleased/Fixes-20240610-104114.yaml b/.changes/unreleased/Fixes-20240610-104114.yaml new file mode 100644 index 000000000..d220288f8 --- /dev/null +++ b/.changes/unreleased/Fixes-20240610-104114.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix config change detection not working for multiple sortkey in materialized views +time: 2024-06-10T10:41:14.721411-03:00 +custom: + Author: lvitti + Issue: "838" diff --git a/dbt/adapters/redshift/relation_configs/materialized_view.py b/dbt/adapters/redshift/relation_configs/materialized_view.py index 48c04b554..a01185f22 100644 --- a/dbt/adapters/redshift/relation_configs/materialized_view.py +++ b/dbt/adapters/redshift/relation_configs/materialized_view.py @@ -169,6 +169,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: "query": agate.Table( agate.Row({"definition": "")} ), + "columns": agate.Table( + agate.Row({ + "column": "", + "sort_key_position": , + "is_dist_key: any(true, false), + }) + ), } Additional columns in either value is fine, as long as `sortkey` and `sortstyle` are available. @@ -199,11 +206,12 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict: {"dist": RedshiftDistConfig.parse_relation_results(materialized_view)} ) - # TODO: this only shows the first column in the sort key - if materialized_view.get("sortkey1"): - config_dict.update( - {"sort": RedshiftSortConfig.parse_relation_results(materialized_view)} - ) + if columns := relation_results.get("columns"): + sort_columns = [row for row in columns.rows if row.get("sort_key_position", 0) > 0] + if sort_columns: + config_dict.update( + {"sort": RedshiftSortConfig.parse_relation_results(sort_columns)} + ) return config_dict diff --git a/dbt/adapters/redshift/relation_configs/sort.py b/dbt/adapters/redshift/relation_configs/sort.py index be5b3627d..f38d5a1e1 100644 --- a/dbt/adapters/redshift/relation_configs/sort.py +++ b/dbt/adapters/redshift/relation_configs/sort.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from dbt.adapters.contracts.relation import RelationConfig -from typing import Optional, FrozenSet, Set, Dict, Any, TYPE_CHECKING +from typing import Optional, Set, Dict, Any, TYPE_CHECKING, Tuple from dbt.adapters.relation_configs import ( RelationConfigChange, @@ -46,7 +46,7 @@ class RedshiftSortConfig(RedshiftRelationConfigBase, RelationConfigValidationMix """ sortstyle: Optional[RedshiftSortStyle] = None - sortkey: Optional[FrozenSet[str]] = None + sortkey: Optional[Tuple[str]] = None def __post_init__(self): # maintains `frozen=True` while allowing for a variable default on `sort_type` @@ -103,7 +103,7 @@ def validation_rules(self) -> Set[RelationConfigValidationRule]: def from_dict(cls, config_dict) -> Self: kwargs_dict = { "sortstyle": config_dict.get("sortstyle"), - "sortkey": frozenset(column for column in config_dict.get("sortkey", {})), + "sortkey": tuple(column for column in config_dict.get("sortkey", {})), } sort: Self = super().from_dict(kwargs_dict) # type: ignore return sort # type: ignore @@ -133,12 +133,12 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any if isinstance(sortkey, str): sortkey = [sortkey] - config_dict.update({"sortkey": set(sortkey)}) + config_dict.update({"sortkey": tuple(sortkey)}) return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict: + def parse_relation_results(cls, relation_results_entry: "agate.MappedSequence") -> dict: """ Translate agate objects from the database into a standard dictionary. @@ -147,19 +147,26 @@ def parse_relation_results(cls, relation_results_entry: "agate.Row") -> dict: Processing of `sortstyle` has been omitted here, which means it's the default (compound). Args: - relation_results_entry: the description of the sortkey and sortstyle from the database in this format: - - agate.Row({ - ..., - "sortkey1": "", - ... - }) + relation_results_entry: The list of rows that contains the sortkey in this format: + [ + agate.Row({ + ..., + "column": "", + "sort_key_position": , + ... + }), + ] Returns: a standard dictionary describing this `RedshiftSortConfig` instance """ - if sortkey := relation_results_entry.get("sortkey1"): - return {"sortkey": {sortkey}} - return {} + sort_config = [] + + sorted_columns = sorted(relation_results_entry, key=lambda x: x["sort_key_position"]) + for column in sorted_columns: + if column.get("sort_key_position"): + sort_config.append(column.get("column")) + + return {"sortkey": sort_config} @dataclass(frozen=True, eq=True, unsafe_hash=True) diff --git a/dbt/include/redshift/macros/relations/materialized_view/describe.sql b/dbt/include/redshift/macros/relations/materialized_view/describe.sql index a56d0756e..3f038b9ad 100644 --- a/dbt/include/redshift/macros/relations/materialized_view/describe.sql +++ b/dbt/include/redshift/macros/relations/materialized_view/describe.sql @@ -24,6 +24,20 @@ {%- endset %} {% set _materialized_view = run_query(_materialized_view_sql) %} + {%- set _column_descriptor_sql -%} + SELECT + a.attname as column, + a.attisdistkey as is_dist_key, + a.attsortkeyord as sort_key_position + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + JOIN pg_attribute a ON a.attrelid = c.oid + WHERE + n.nspname ilike '{{ relation.schema }}' + AND c.relname LIKE 'mv_tbl__{{ relation.identifier }}__%' + {%- endset %} + {% set _column_descriptor = run_query(_column_descriptor_sql) %} + {%- set _query_sql -%} select vw.definition @@ -34,6 +48,10 @@ {%- endset %} {% set _query = run_query(_query_sql) %} - {% do return({'materialized_view': _materialized_view, 'query': _query}) %} + {% do return({ + 'materialized_view': _materialized_view, + 'query': _query, + 'columns': _column_descriptor, + })%} {% endmacro %} diff --git a/tests/unit/test_materialized_view.py b/tests/unit/test_materialized_view.py index 8e4f6ca3e..322b134f8 100644 --- a/tests/unit/test_materialized_view.py +++ b/tests/unit/test_materialized_view.py @@ -1,5 +1,6 @@ from unittest.mock import Mock +import agate import pytest from dbt.adapters.redshift.relation_configs import RedshiftMaterializedViewConfig @@ -53,3 +54,57 @@ def test_redshift_materialized_view_config_throws_expected_exception_with_invali ) with pytest.raises(ValueError): config.parse_relation_config(model_node) + + +def test_redshift_materialized_view_parse_relation_results_handles_multiples_sort_key(): + materialized_view = agate.Table.from_object( + [], + [ + "database", + "schema", + "table", + "diststyle", + "sortkey1", + "autorefresh", + ], + ) + + column_descriptor = agate.Table.from_object( + [ + { + "column": "my_column", + "is_dist_key": True, + "sort_key_position": 1, + }, + { + "column": "my_column2", + "is_dist_key": True, + "sort_key_position": 2, + }, + { + "column": "my_column5", + "is_dist_key": False, + "sort_key_position": 0, + }, + ], + ) + + query = agate.Table.from_object( + [ + { + "definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + } + ] + ) + + relation_results = { + "materialized_view": materialized_view, + "columns": column_descriptor, + "query": query, + } + + config_dict = RedshiftMaterializedViewConfig.parse_relation_results(relation_results) + + assert isinstance(config_dict["sort"], dict) + assert config_dict["sort"]["sortkey"][0] == "my_column" + assert config_dict["sort"]["sortkey"][1] == "my_column2" diff --git a/tests/unit/test_relation.py b/tests/unit/test_relation.py index 4817ab100..0985c62c8 100644 --- a/tests/unit/test_relation.py +++ b/tests/unit/test_relation.py @@ -1,5 +1,15 @@ +from unittest.mock import Mock + +import agate +import pytest + from dbt.adapters.redshift.relation import RedshiftRelation -from dbt.adapters.contracts.relation import RelationType +from dbt.adapters.contracts.relation import ( + RelationType, + RelationConfig, +) + +from dbt.adapters.redshift.relation_configs.sort import RedshiftSortStyle def test_renameable_relation(): @@ -15,3 +25,133 @@ def test_renameable_relation(): RelationType.Table, } ) + + +@pytest.fixture +def materialized_view_without_sort_key_from_db(): + materialized_view = agate.Table.from_object( + [ + { + "database": "my_db", + "schema": "my_schema", + "table": "my_table", + } + ], + ) + + column_descriptor = agate.Table.from_object([]) + + query = agate.Table.from_object( + [ + { + "definition": "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + } + ] + ) + + relation_results = { + "materialized_view": materialized_view, + "columns": column_descriptor, + "query": query, + } + return relation_results + + +@pytest.fixture +def materialized_view_without_sort_key_config(): + relation_config = Mock(spec=RelationConfig) + + relation_config.database = "my_db" + relation_config.identifier = "my_table" + relation_config.schema = "my_schema" + relation_config.config = Mock() + relation_config.config.extra = {} + relation_config.config.sort = "" + relation_config.compiled_code = ( + "create materialized view my_view as (select 1 as my_column, 'value' as my_column2)" + ) + return relation_config + + +@pytest.fixture +def materialized_view_multiple_sort_key_from_db(materialized_view_without_sort_key_from_db): + materialized_view_without_sort_key_from_db["columns"] = agate.Table.from_object( + [ + { + "column": "my_column", + "is_dist_key": True, + "sort_key_position": 1, + }, + { + "column": "my_column2", + "is_dist_key": True, + "sort_key_position": 2, + }, + ], + ) + return materialized_view_without_sort_key_from_db + + +@pytest.fixture +def materialized_view_multiple_sort_key_config(materialized_view_without_sort_key_config): + materialized_view_without_sort_key_config.config.extra = { + "sort_type": RedshiftSortStyle.compound, + "sort": ["my_column", "my_column2"], + } + + return materialized_view_without_sort_key_config + + +def test_materialized_view_config_changeset_without_sort_key_empty_changes( + materialized_view_without_sort_key_from_db, + materialized_view_without_sort_key_config, +): + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_without_sort_key_from_db, + materialized_view_without_sort_key_config, + ) + + assert change_set is None + + +def test_materialized_view_config_changeset_multiple_sort_key_without_changes( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is None + + +def test_materialized_view_config_changeset_multiple_sort_key_with_changes( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + materialized_view_multiple_sort_key_config.config.extra["sort"].append("my_column3") + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is not None + assert change_set.sort.context.sortkey == ("my_column", "my_column2", "my_column3") + + +def test_materialized_view_config_changeset_multiple_sort_key_with_changes_in_order_column( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, +): + materialized_view_multiple_sort_key_config.config.extra["sort"] = ["my_column2", "my_column"] + + change_set = RedshiftRelation.materialized_view_config_changeset( + materialized_view_multiple_sort_key_from_db, + materialized_view_multiple_sort_key_config, + ) + + assert change_set is not None + assert change_set.sort.context.sortkey == ("my_column2", "my_column")