From 16b683e7a5245e827a69faa1d0c4fd290fd8cc71 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 18 Sep 2024 16:22:51 -0400 Subject: [PATCH] Add "object" mergebehavior to MergeBehavior processing (#192) * Add MergeBehavior.Object * change name to snapshot_meta_column_names * Update empty dictionary in merge_config_dicts * Fix test * Changie --- .../unreleased/Features-20240918-114903.yaml | 6 + dbt_common/contracts/config/base.py | 90 +++++++++++++-- tests/unit/test_model_config.py | 108 ++++++++++++++++-- 3 files changed, 185 insertions(+), 19 deletions(-) create mode 100644 .changes/unreleased/Features-20240918-114903.yaml diff --git a/.changes/unreleased/Features-20240918-114903.yaml b/.changes/unreleased/Features-20240918-114903.yaml new file mode 100644 index 00000000..f3bfb7fa --- /dev/null +++ b/.changes/unreleased/Features-20240918-114903.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add MergeBehavior.Object for snapshot column feature +time: 2024-09-18T11:49:03.257411-04:00 +custom: + Author: gshank + Issue: "191" diff --git a/dbt_common/contracts/config/base.py b/dbt_common/contracts/config/base.py index df8afb24..b571cb2a 100644 --- a/dbt_common/contracts/config/base.py +++ b/dbt_common/contracts/config/base.py @@ -96,10 +96,14 @@ def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> boo return False return True - # This is used in 'add_config_call' to create the combined config_call_dict. - # 'meta' moved here from node + # This is used in 'merge_config_dicts' to create the combined orig_dict. + # Note: "clobber" fields aren't defined, because that's the default. + # "access" is currently the only Clobber field. + # This shouldn't really be defined here. It would be better to have it + # associated with the config definitions, but at the point we use it, we + # don't know which config we're dealing with. mergebehavior = { - "append": ["pre-hook", "pre_hook", "post-hook", "post_hook", "tags"], + "append": ["pre-hook", "pre_hook", "post-hook", "post_hook", "tags", "packages"], "update": [ "quoting", "column_types", @@ -108,6 +112,7 @@ def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> boo "contract", ], "dict_key_append": ["grants"], + "object": ["snapshot_meta_column_names"], } @classmethod @@ -180,6 +185,7 @@ class MergeBehavior(Metadata): Update = 2 Clobber = 3 DictKeyAppend = 4 + Object = 5 @classmethod def default_field(cls) -> "MergeBehavior": @@ -215,8 +221,10 @@ def _listify(value: Any) -> List[Any]: # There are two versions of this code. The one here is for config -# objects, the one in _add_config_call in core context_config.py is for -# config_call_dict dictionaries. +# objects which can get the "MergeBehavior" from the field in the class, +# the one below in 'merge_config_dicts' (formerly in +# _add_config_call in core context_config.py) is for config_call dictionaries +# where we need to get the MergeBehavior from someplace else. def _merge_field_value( merge_behavior: MergeBehavior, self_value: Any, @@ -225,7 +233,8 @@ def _merge_field_value( if merge_behavior == MergeBehavior.Clobber: return other_value elif merge_behavior == MergeBehavior.Append: - return _listify(self_value) + _listify(other_value) + new_value = _listify(self_value) + _listify(other_value) + return new_value elif merge_behavior == MergeBehavior.Update: if not isinstance(self_value, dict): raise DbtInternalError(f"expected dict, got {self_value}") @@ -258,6 +267,73 @@ def _merge_field_value( # clobber the list new_dict[new_key] = _listify(other_value[key]) return new_dict - + elif merge_behavior == MergeBehavior.Object: + # All fields in classes with MergeBehavior.Object should have a default of None + if not type(self_value).__name__ == type(other_value).__name__: + raise DbtInternalError( + f"got conflicting types: {type(self_value).__name__} and {type(other_value).__name__}" + ) + new_value = self_value.copy() + new_value.update(other_value) + return new_value else: raise DbtInternalError(f"Got an invalid merge_behavior: {merge_behavior}") + + +# This is used in ContextConfig._add_config_call. It updates the orig_dict in place. +def merge_config_dicts(orig_dict: Dict[str, Any], new_dict: Dict[str, Any]) -> None: + # orig_dict is already encountered configs, new_dict is new + # This mirrors code in _merge_field_value in model_config.py which is similar but + # operates on config objects. + if orig_dict == {}: + orig_dict.update(new_dict) + return + for k, v in new_dict.items(): + # MergeBehavior for post-hook and pre-hook is to collect all + # values, instead of overwriting + if k in BaseConfig.mergebehavior["append"]: + if k in orig_dict: # should always be a list here + orig_dict[k] = _listify(orig_dict[k]) + _listify(v) + else: + orig_dict[k] = _listify(v) + elif k in BaseConfig.mergebehavior["update"]: + if not isinstance(v, dict): + raise DbtInternalError(f"expected dict, got {v}") + if k in orig_dict and isinstance(orig_dict[k], dict): + orig_dict[k].update(v) + else: + orig_dict[k] = v + elif k in BaseConfig.mergebehavior["dict_key_append"]: + if not isinstance(v, dict): + raise DbtInternalError(f"expected dict, got {v}") + if k in orig_dict: # should always be a dict + for key in orig_dict[k].keys(): + orig_dict[k][key] = _listify(orig_dict[k][key]) + for key, value in v.items(): + extend = False + # This might start with a +, to indicate we should extend the list + # instead of just clobbering it. We don't want to remove the + here + # (like in the other method) because we want it preserved + if key.startswith("+"): + extend = True + if key in orig_dict[k] and extend: + # extend the list + orig_dict[k][key].extend(_listify(value)) + else: + # clobber the list + orig_dict[k][key] = _listify(value) + else: + # This is always a dictionary + orig_dict[k] = v + # listify everything + for key, value in orig_dict[k].items(): + orig_dict[k][key] = _listify(value) + elif k in BaseConfig.mergebehavior["object"]: + if not isinstance(v, dict): + raise DbtInternalError(f"expected dict, got {v}") + if k not in orig_dict: + orig_dict[k] = {} + for obj_k, obj_v in v.items(): + orig_dict[k][obj_k] = obj_v + else: # Clobber + orig_dict[k] = v diff --git a/tests/unit/test_model_config.py b/tests/unit/test_model_config.py index 57a14438..9d1de1b5 100644 --- a/tests/unit/test_model_config.py +++ b/tests/unit/test_model_config.py @@ -1,17 +1,39 @@ from dataclasses import dataclass, field from dbt_common.dataclass_schema import dbtClassMixin -from typing import List, Dict +from typing import List, Dict, Optional, Any from dbt_common.contracts.config.metadata import ShowBehavior -from dbt_common.contracts.config.base import MergeBehavior, CompareBehavior +from dbt_common.contracts.config.base import ( + MergeBehavior, + CompareBehavior, + BaseConfig, + merge_config_dicts, +) @dataclass -class ThingWithMergeBehavior(dbtClassMixin): - default_behavior: int - appended: List[str] = field(metadata={"merge": MergeBehavior.Append}) - updated: Dict[str, int] = field(metadata={"merge": MergeBehavior.Update}) - clobbered: str = field(metadata={"merge": MergeBehavior.Clobber}) - keysappended: Dict[str, int] = field(metadata={"merge": MergeBehavior.DictKeyAppend}) +class TableColumnNames(dbtClassMixin): + first_column: Optional[str] = None + second_column: Optional[str] = None + third_column: Optional[str] = None + + +@dataclass +class SubstituteAdapterConfig(BaseConfig): + pass + + +@dataclass +class ThingWithMergeBehavior(BaseConfig): + default_behavior: Optional[int] = None + tags: List[str] = field(metadata={"merge": MergeBehavior.Append}, default_factory=list) + meta: Dict[str, int] = field(metadata={"merge": MergeBehavior.Update}, default_factory=dict) + clobbered: Optional[str] = field(metadata={"merge": MergeBehavior.Clobber}, default=None) + grants: Dict[str, Any] = field( + metadata={"merge": MergeBehavior.DictKeyAppend}, default_factory=dict + ) + snapshot_table_column_names: TableColumnNames = field( + metadata={"merge": MergeBehavior.Object}, default_factory=TableColumnNames + ) def test_merge_behavior_meta() -> None: @@ -22,6 +44,7 @@ def test_merge_behavior_meta() -> None: MergeBehavior.Update, MergeBehavior.Clobber, MergeBehavior.DictKeyAppend, + MergeBehavior.Object, } for behavior in MergeBehavior: assert behavior.meta() == {"merge": behavior} @@ -31,12 +54,73 @@ def test_merge_behavior_meta() -> None: def test_merge_behavior_from_field() -> None: fields2 = {name: f for f, name in ThingWithMergeBehavior._get_fields()} - assert set(fields2) == {"default_behavior", "appended", "updated", "clobbered", "keysappended"} + assert set(fields2) == { + "default_behavior", + "tags", + "meta", + "clobbered", + "grants", + "snapshot_table_column_names", + } assert MergeBehavior.from_field(fields2["default_behavior"]) == MergeBehavior.Clobber - assert MergeBehavior.from_field(fields2["appended"]) == MergeBehavior.Append - assert MergeBehavior.from_field(fields2["updated"]) == MergeBehavior.Update + assert MergeBehavior.from_field(fields2["tags"]) == MergeBehavior.Append + assert MergeBehavior.from_field(fields2["meta"]) == MergeBehavior.Update assert MergeBehavior.from_field(fields2["clobbered"]) == MergeBehavior.Clobber - assert MergeBehavior.from_field(fields2["keysappended"]) == MergeBehavior.DictKeyAppend + assert MergeBehavior.from_field(fields2["grants"]) == MergeBehavior.DictKeyAppend + assert MergeBehavior.from_field(fields2["snapshot_table_column_names"]) == MergeBehavior.Object + + +def test_update_from() -> None: + initial_dct = { + "default_behavior": 4, + "tags": ["one", "two"], + "meta": {"one": 1, "two": 2}, + "clobbered": "initial", + "grants": {"one": "alt"}, + "snapshot_table_column_names": {"second_column": "dbt_something"}, + } + initial_obj = ThingWithMergeBehavior.from_dict(initial_dct.copy()) + assert initial_obj + assert isinstance(initial_obj.snapshot_table_column_names, TableColumnNames) + update_dct = { + "default_behavior": 3, + "tags": ["five"], + "meta": {"two": 2, "three": 3}, + "clobbered": "later", + "grants": {"two": "fine", "+one": "some"}, + "snapshot_table_column_names": {"first_column": "dbt_ack", "second_column": "dbt_more"}, + } + updated_obj = initial_obj.update_from(update_dct.copy(), SubstituteAdapterConfig) + + assert updated_obj.default_behavior == 3 + assert updated_obj.tags == ["one", "two", "five"] + assert updated_obj.meta == {"one": 1, "two": 2, "three": 3} + assert updated_obj.clobbered == "later" + assert updated_obj.grants == {"one": ["alt", "some"], "two": ["fine"]} + assert updated_obj.snapshot_table_column_names.to_dict() == { + "first_column": "dbt_ack", + "second_column": "dbt_more", + "third_column": None, + } + assert updated_obj.snapshot_table_column_names.first_column == "dbt_ack" + assert updated_obj.snapshot_table_column_names.second_column == "dbt_more" + assert updated_obj.snapshot_table_column_names.third_column is None + + # If we want to merge both grants, they need to both have a "+" + initial_dct["grants"] = {"+one": "alt"} + + merge_config_dicts(initial_dct, update_dct) + + expected = { + "default_behavior": 3, + "tags": ["one", "two", "five"], + "meta": {"one": 1, "two": 2, "three": 3}, + "clobbered": "later", + "grants": {"+one": ["alt", "some"], "two": ["fine"]}, + "snapshot_table_column_names": {"first_column": "dbt_ack", "second_column": "dbt_more"}, + } + + assert initial_dct == expected @dataclass