From 261e1245a88d2fd282eef5ec8d87d64c0d1e64e0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 25 Nov 2024 18:01:24 -0500 Subject: [PATCH 01/42] Fix NodeVersion definition, make_semantic_model utility --- core/dbt/artifacts/resources/v1/components.py | 2 +- tests/unit/utils/manifest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 8eb43f35d8e..5d138c9bfde 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -12,7 +12,7 @@ from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, dbtClassMixin from dbt_semantic_interfaces.type_enums import TimeGranularity -NodeVersion = Union[str, float] +NodeVersion = Union[int, float, str] @dataclass diff --git a/tests/unit/utils/manifest.py b/tests/unit/utils/manifest.py index 0950f68ebb5..26b564209be 100644 --- a/tests/unit/utils/manifest.py +++ b/tests/unit/utils/manifest.py @@ -470,7 +470,7 @@ def make_semantic_model( return SemanticModel( name=name, resource_type=NodeType.SemanticModel, - model=model, + model=model.name, node_relation=NodeRelation( alias=model.alias, schema_name="dbt", From 37be156f4226039dd9c26f93ca53a8982c757cad Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 25 Nov 2024 18:03:14 -0500 Subject: [PATCH 02/42] Bump up mashumaro version again --- core/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/setup.py b/core/setup.py index 7da702f0dda..d27da9a2fbe 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.9,<3.15", + "mashumaro[msgpack]>=3.9,<4.0", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. From 52fa7c0956d973819b517975d5468aef51cc2e37 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 25 Nov 2024 21:41:41 -0500 Subject: [PATCH 03/42] Try pinning to >=3.15,<4.0 --- core/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/setup.py b/core/setup.py index d27da9a2fbe..c8fa182d318 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.9,<4.0", + "mashumaro[msgpack]>=3.15,<4.0", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. From 354af6e7ff33d9dffe5ee2c981d920173840c888 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 25 Nov 2024 21:52:30 -0500 Subject: [PATCH 04/42] specify dbt-adapters branch --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 5f393349744..860763a177e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/dbt-labs/dbt-adapters.git@main +git+https://github.com/dbt-labs/dbt-adapters.git@mashumaro_fixes git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter git+https://github.com/dbt-labs/dbt-common.git@main git+https://github.com/dbt-labs/dbt-postgres.git@main From 7876edb7dc40162510b778a9b11b2cce0c5187ea Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 25 Nov 2024 21:57:49 -0500 Subject: [PATCH 05/42] Update v12.json schema --- schemas/dbt/manifest/v12.json | 162 +++++++++++++++++++++++++++------- 1 file changed, 129 insertions(+), 33 deletions(-) diff --git a/schemas/dbt/manifest/v12.json b/schemas/dbt/manifest/v12.json index 892c0014e41..8336d983f1d 100644 --- a/schemas/dbt/manifest/v12.json +++ b/schemas/dbt/manifest/v12.json @@ -13,7 +13,7 @@ }, "dbt_version": { "type": "string", - "default": "1.9.0b4" + "default": "1.10.0a1" }, "generated_at": { "type": "string" @@ -1802,11 +1802,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -2456,11 +2459,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -3250,11 +3256,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -4063,11 +4072,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -4292,11 +4304,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -4306,11 +4321,14 @@ "latest_version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -5430,11 +5448,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -6084,11 +6105,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -7073,11 +7097,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -8632,11 +8659,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -9882,11 +9912,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -11731,11 +11764,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -12385,11 +12421,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -13179,11 +13218,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -13992,11 +14034,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -14221,11 +14266,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -14235,11 +14283,14 @@ "latest_version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -15359,11 +15410,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -16013,11 +16067,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -17002,11 +17059,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -18350,11 +18410,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -19593,11 +19656,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -20028,11 +20094,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -20679,11 +20748,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -21057,10 +21129,13 @@ "items": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" + }, + { + "type": "string" } ] } @@ -21078,10 +21153,13 @@ "items": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" + }, + { + "type": "string" } ] } @@ -21104,11 +21182,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -21583,11 +21664,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -22241,11 +22325,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } @@ -22626,10 +22713,13 @@ "items": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" + }, + { + "type": "string" } ] } @@ -22647,10 +22737,13 @@ "items": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" + }, + { + "type": "string" } ] } @@ -22673,11 +22766,14 @@ "version": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "number" }, + { + "type": "string" + }, { "type": "null" } From c0ea26e14362e9a32743226240786736674d904f Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 13:34:24 -0500 Subject: [PATCH 06/42] validate in "update_from" --- core/dbt/context/context_config.py | 3 ++- tests/functional/configs/test_disabled_model.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index b1aace475c7..8a4812196eb 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -202,13 +202,14 @@ def initial_result(self, resource_type: NodeType, base: bool) -> C: result = config_cls.from_dict({}) return result - def _update_from_config(self, result: C, partial: Dict[str, Any], validate: bool = False) -> C: + def _update_from_config(self, result: C, partial: Dict[str, Any], validate: bool = True) -> C: translated = self._active_project.credentials.translate_aliases(partial) translated = self.translate_hook_names(translated) adapter_type = self._active_project.credentials.type adapter_config_cls = get_config_class_by_name(adapter_type) + # The "update_from" method in BaseConfig merges dictionaries and does a from_dict. updated = result.update_from(translated, adapter_config_cls, validate=validate) return updated diff --git a/tests/functional/configs/test_disabled_model.py b/tests/functional/configs/test_disabled_model.py index a918067ac15..5af1a7a617f 100644 --- a/tests/functional/configs/test_disabled_model.py +++ b/tests/functional/configs/test_disabled_model.py @@ -1,6 +1,7 @@ import pytest from dbt.exceptions import CompilationError, ParsingError, SchemaConfigError +from dbt_common.dataclass_schema import ValidationError from dbt.tests.util import get_manifest, run_dbt from tests.functional.configs.fixtures import ( my_model, @@ -394,7 +395,7 @@ def models(self): } def test_invalid_config(self, project): - with pytest.raises(SchemaConfigError) as exc: + with pytest.raises(ValidationError) as exc: run_dbt(["parse"]) exc_str = " ".join(str(exc.value).split()) # flatten all whitespace expected_msg = "'True and False' is not of type 'boolean'" From dfca4e572a1c69b667d1acc0481407ab02d0bc6a Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 13:41:39 -0500 Subject: [PATCH 07/42] error classes --- tests/functional/configs/test_disabled_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/functional/configs/test_disabled_model.py b/tests/functional/configs/test_disabled_model.py index 5af1a7a617f..1f83f48e6c6 100644 --- a/tests/functional/configs/test_disabled_model.py +++ b/tests/functional/configs/test_disabled_model.py @@ -1,8 +1,8 @@ import pytest -from dbt.exceptions import CompilationError, ParsingError, SchemaConfigError -from dbt_common.dataclass_schema import ValidationError +from dbt.exceptions import CompilationError, ParsingError from dbt.tests.util import get_manifest, run_dbt +from dbt_common.dataclass_schema import ValidationError from tests.functional.configs.fixtures import ( my_model, my_model_2, From b81632e0ae66735abfd5fbe565f4bc50b37c46f2 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 21:01:29 -0500 Subject: [PATCH 08/42] Comments --- core/dbt/context/context_config.py | 43 +++++++++++++++++------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 8a4812196eb..684dda5efd8 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -139,6 +139,7 @@ def _update_from_config( @abstractmethod def initial_result(self, resource_type: NodeType, base: bool) -> T: ... + # BaseContextConfigGenerator def calculate_node_config( self, config_call_dict: Dict[str, Any], @@ -150,29 +151,28 @@ def calculate_node_config( ) -> BaseConfig: own_config = self.get_node_project(project_name) - result = self.initial_result(resource_type=resource_type, base=base) + # creates "default" config object ("cls.from_dict({})") + config_obj = self.initial_result(resource_type=resource_type, base=base) project_configs = self._project_configs(own_config, fqn, resource_type) for fqn_config in project_configs: - result = self._update_from_config(result, fqn_config) + config_obj = self._update_from_config(config_obj, fqn_config) # When schema files patch config, it has lower precedence than # config in the models (config_call_dict), so we add the patch_config_dict # before the config_call_dict if patch_config_dict: - result = self._update_from_config(result, patch_config_dict) + config_obj = self._update_from_config(config_obj, patch_config_dict) # config_calls are created in the 'experimental' model parser and # the ParseConfigObject (via add_config_call) - result = self._update_from_config(result, config_call_dict) + config_obj = self._update_from_config(config_obj, config_call_dict) if own_config.project_name != self._active_project.project_name: for fqn_config in self._active_project_configs(fqn, resource_type): - result = self._update_from_config(result, fqn_config) + config_obj = self._update_from_config(config_obj, fqn_config) - # this is mostly impactful in the snapshot config case - # TODO CT-211 - return result # type: ignore[return-value] + return config_obj # type: ignore[return-value] @abstractmethod def calculate_node_config_dict( @@ -223,6 +223,7 @@ def translate_hook_names(self, project_dict): project_dict["post-hook"] = project_dict.pop("post_hook") return project_dict + # ContextConfigGenerator def calculate_node_config_dict( self, config_call_dict: Dict[str, Any], @@ -232,7 +233,9 @@ def calculate_node_config_dict( base: bool, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: - config = self.calculate_node_config( + + # calls BaseContextConfigGenerator.calculate_node_config + config_obj = self.calculate_node_config( config_call_dict=config_call_dict, fqn=fqn, resource_type=resource_type, @@ -241,17 +244,20 @@ def calculate_node_config_dict( patch_config_dict=patch_config_dict, ) try: - finalized = config.finalize_and_validate() + # Call "finalize_and_validate" on the config obj + finalized = config_obj.finalize_and_validate() + # THEN return a dictionary!!! Why!! return finalized.to_dict(omit_none=True) except ValidationError as exc: # we got a ValidationError - probably bad types in config() - raise SchemaConfigError(exc, node=config) from exc + raise SchemaConfigError(exc, node=config_obj) from exc class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]): def get_config_source(self, project: Project) -> ConfigSource: return UnrenderedConfig(project) + # UnrenderedConfigGenerator def calculate_node_config_dict( self, config_call_dict: Dict[str, Any], @@ -261,7 +267,8 @@ def calculate_node_config_dict( base: bool, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: - # TODO CT-211 + + # calls BaseContextConfigGenerator.calculate_node_config return self.calculate_node_config( config_call_dict=config_call_dict, fqn=fqn, @@ -270,6 +277,7 @@ def calculate_node_config_dict( base=base, patch_config_dict=patch_config_dict, ) # type: ignore[return-value] + # Note: this returns a config_obj, NOT a dictionary def initial_result(self, resource_type: NodeType, base: bool) -> Dict[str, Any]: return {} @@ -308,6 +316,7 @@ def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None: # Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types. self._unrendered_config_call_dict.update(opts) + # ContextConfig def build_config_dict( self, base: bool = False, @@ -316,12 +325,10 @@ def build_config_dict( patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: if rendered: - # TODO CT-211 - src = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated] + config_generator = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated] config_call_dict = self._config_call_dict - else: - # TODO CT-211 - src = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment] + else: # unrendered + config_generator = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment] # preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict if get_flags().state_modified_compare_more_unrendered_values is False: @@ -334,7 +341,7 @@ def build_config_dict( else: config_call_dict = self._unrendered_config_call_dict - return src.calculate_node_config_dict( + return config_generator.calculate_node_config_dict( config_call_dict=config_call_dict, fqn=self._fqn, resource_type=self._resource_type, From e47bed34fa908bb8703d52e24177fe4c18d15ca0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 21:03:20 -0500 Subject: [PATCH 09/42] Put back test_disabled_model.py --- tests/functional/configs/test_disabled_model.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/functional/configs/test_disabled_model.py b/tests/functional/configs/test_disabled_model.py index 1f83f48e6c6..a918067ac15 100644 --- a/tests/functional/configs/test_disabled_model.py +++ b/tests/functional/configs/test_disabled_model.py @@ -1,8 +1,7 @@ import pytest -from dbt.exceptions import CompilationError, ParsingError +from dbt.exceptions import CompilationError, ParsingError, SchemaConfigError from dbt.tests.util import get_manifest, run_dbt -from dbt_common.dataclass_schema import ValidationError from tests.functional.configs.fixtures import ( my_model, my_model_2, @@ -395,7 +394,7 @@ def models(self): } def test_invalid_config(self, project): - with pytest.raises(ValidationError) as exc: + with pytest.raises(SchemaConfigError) as exc: run_dbt(["parse"]) exc_str = " ".join(str(exc.value).split()) # flatten all whitespace expected_msg = "'True and False' is not of type 'boolean'" From 9d6daa188438e50fda705aab0d4a9fb5716bfd6c Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 21:04:14 -0500 Subject: [PATCH 10/42] Use older mashumaro --- core/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/setup.py b/core/setup.py index c8fa182d318..ae4d8306eb0 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.15,<4.0", + "mashumaro[msgpack]>=3.0,<3.15", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. From baf42c4a0aeb36f2577fce261e26fb369cdde5ff Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 21:29:07 -0500 Subject: [PATCH 11/42] Again --- core/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/setup.py b/core/setup.py index ae4d8306eb0..7da702f0dda 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.0,<3.15", + "mashumaro[msgpack]>=3.9,<3.15", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. From 9df05a87b52218cc8f07a4e547c1b8c3c77d5298 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 21:52:36 -0500 Subject: [PATCH 12/42] Put back default for validate in _update_from_config --- core/dbt/context/context_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 684dda5efd8..40673b74857 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -202,7 +202,7 @@ def initial_result(self, resource_type: NodeType, base: bool) -> C: result = config_cls.from_dict({}) return result - def _update_from_config(self, result: C, partial: Dict[str, Any], validate: bool = True) -> C: + def _update_from_config(self, result: C, partial: Dict[str, Any], validate: bool = False) -> C: translated = self._active_project.credentials.translate_aliases(partial) translated = self.translate_hook_names(translated) From 524ce54d82d017a9a75bcce7e1e96063ae9d040c Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 26 Nov 2024 22:25:46 -0500 Subject: [PATCH 13/42] Fix NodeVersion in artifacts test --- tests/functional/artifacts/expected_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/artifacts/expected_manifest.py b/tests/functional/artifacts/expected_manifest.py index c7deb2e8ea8..cb1c751fed8 100644 --- a/tests/functional/artifacts/expected_manifest.py +++ b/tests/functional/artifacts/expected_manifest.py @@ -1712,7 +1712,7 @@ def expected_versions_manifest(project): "language": "sql", "refs": [ {"name": "versioned_model", "package": None, "version": 2}, - {"name": "versioned_model", "package": None, "version": "2"}, + {"name": "versioned_model", "package": None, "version": 2}, {"name": "versioned_model", "package": None, "version": 2}, {"name": "versioned_model", "package": None, "version": None}, {"name": "versioned_model", "package": None, "version": 1}, From df23c7d9820f151c8f803298a96f8d42ef272551 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Sat, 30 Nov 2024 13:49:51 -0500 Subject: [PATCH 14/42] Skip a couple of tests until mash 3.15 --- tests/functional/graph_selection/test_version_selection.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/functional/graph_selection/test_version_selection.py b/tests/functional/graph_selection/test_version_selection.py index 335fad25270..6b2b769eee7 100644 --- a/tests/functional/graph_selection/test_version_selection.py +++ b/tests/functional/graph_selection/test_version_selection.py @@ -54,7 +54,12 @@ def seeds(self, test_data_dir): def selectors(self): return selectors_yml + @pytest.mark.skip('broken until mash 3.15') def test_select_none_versions(self, project): + manifest = run_dbt(["parse"]) + print(f"--- nodes.keys(): {manifest.nodes.keys()}") + # This wrongly includes test.versioned_v4.5 + # This is fixed by mashumaro 3.15 results = run_dbt(["ls", "--select", "version:none"]) assert sorted(results) == [ "test.base_users", @@ -72,6 +77,7 @@ def test_select_old_versions(self, project): results = run_dbt(["ls", "--select", "version:old"]) assert sorted(results) == ["test.versioned.v1"] + @pytest.mark.skip('broken until mash 3.15') def test_select_prerelease_versions(self, project): results = run_dbt(["ls", "--select", "version:prerelease"]) assert sorted(results) == [ From 42a1afb0d91b385893064fe42830381dd8ce149c Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Sat, 30 Nov 2024 14:34:02 -0500 Subject: [PATCH 15/42] Make specialized 'calculate_node_config' methods --- core/dbt/context/context_config.py | 120 +++++++++++++----- .../graph_selection/test_version_selection.py | 4 +- 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 40673b74857..07f4bfef81a 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -100,7 +100,7 @@ def __init__(self, active_project: RuntimeConfig): def get_config_source(self, project: Project) -> ConfigSource: return RenderedConfig(project) - def get_node_project(self, project_name: str): + def get_node_project_config(self, project_name: str): if project_name == self._active_project.project_name: return self._active_project dependencies = self._active_project.load_dependencies() @@ -131,6 +131,17 @@ def _active_project_configs( ) -> Iterator[Dict[str, Any]]: return self._project_configs(self._active_project, fqn, resource_type) + @abstractmethod + def calculate_node_config( + self, + config_call_dict: Dict[str, Any], + fqn: List[str], + resource_type: NodeType, + project_name: str, + base: bool, + patch_config_dict: Optional[Dict[str, Any]] = None, + ) -> T: ... + @abstractmethod def _update_from_config( self, result: T, partial: Dict[str, Any], validate: bool = False @@ -140,6 +151,25 @@ def _update_from_config( def initial_result(self, resource_type: NodeType, base: bool) -> T: ... # BaseContextConfigGenerator + @abstractmethod + def calculate_node_config_dict( + self, + config_call_dict: Dict[str, Any], + fqn: List[str], + resource_type: NodeType, + project_name: str, + base: bool, + patch_config_dict: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: ... + + +class ContextConfigGenerator(BaseContextConfigGenerator[C]): + def __init__(self, active_project: RuntimeConfig): + self._active_project = active_project + + def get_config_source(self, project: Project) -> ConfigSource: + return RenderedConfig(project) + def calculate_node_config( self, config_call_dict: Dict[str, Any], @@ -148,13 +178,18 @@ def calculate_node_config( project_name: str, base: bool, patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> BaseConfig: - own_config = self.get_node_project(project_name) + ) -> C: + # Note: This method returns a BaseConfig object. This is a duplicate of + # of UnrenderedConfigGenerator.calculate_node_config, but calls methods + # that deal with config objects instead of dictionaries. + # Additions to one method, should probably also go in the other. + + project_config = self.get_node_project_config(project_name) # creates "default" config object ("cls.from_dict({})") config_obj = self.initial_result(resource_type=resource_type, base=base) - project_configs = self._project_configs(own_config, fqn, resource_type) + project_configs = self._project_configs(project_config, fqn, resource_type) for fqn_config in project_configs: config_obj = self._update_from_config(config_obj, fqn_config) @@ -168,33 +203,14 @@ def calculate_node_config( # the ParseConfigObject (via add_config_call) config_obj = self._update_from_config(config_obj, config_call_dict) - if own_config.project_name != self._active_project.project_name: + if project_config.project_name != self._active_project.project_name: for fqn_config in self._active_project_configs(fqn, resource_type): config_obj = self._update_from_config(config_obj, fqn_config) - return config_obj # type: ignore[return-value] - - @abstractmethod - def calculate_node_config_dict( - self, - config_call_dict: Dict[str, Any], - fqn: List[str], - resource_type: NodeType, - project_name: str, - base: bool, - patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: ... - - -class ContextConfigGenerator(BaseContextConfigGenerator[C]): - def __init__(self, active_project: RuntimeConfig): - self._active_project = active_project - - def get_config_source(self, project: Project) -> ConfigSource: - return RenderedConfig(project) + return config_obj def initial_result(self, resource_type: NodeType, base: bool) -> C: - # defaults, own_config, config calls, active_config (if != own_config) + # defaults, project_config, config calls, active_config (if != project_config) config_cls = get_config_for(resource_type, base=base) # Calculate the defaults. We don't want to validate the defaults, # because it might be invalid in the case of required config members @@ -234,7 +250,7 @@ def calculate_node_config_dict( patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: - # calls BaseContextConfigGenerator.calculate_node_config + # returns a config object config_obj = self.calculate_node_config( config_call_dict=config_call_dict, fqn=fqn, @@ -257,6 +273,45 @@ class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]): def get_config_source(self, project: Project) -> ConfigSource: return UnrenderedConfig(project) + def calculate_node_config( + self, + config_call_dict: Dict[str, Any], + fqn: List[str], + resource_type: NodeType, + project_name: str, + base: bool, + patch_config_dict: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + # Note: This method returns a Dict[str, Any]. This is a duplicate of + # of ContextConfigGenerator.calculate_node_config, but calls methods + # that deal with dictionaries instead of config object. + # Additions to one method, should probably also go in the other. + + project_config = self.get_node_project_config(project_name) + + # creates "default" config object ({}) + config_dict = self.initial_result(resource_type=resource_type, base=base) + + project_configs = self._project_configs(project_config, fqn, resource_type) + for fqn_config in project_configs: + config_dict = self._update_from_config(config_dict, fqn_config) + + # When schema files patch config, it has lower precedence than + # config in the models (config_call_dict), so we add the patch_config_dict + # before the config_call_dict + if patch_config_dict: + config_dict = self._update_from_config(config_dict, patch_config_dict) + + # config_calls are created in the 'experimental' model parser and + # the ParseConfigObject (via add_config_call) + config_dict = self._update_from_config(config_dict, config_call_dict) + + if project_config.project_name != self._active_project.project_name: + for fqn_config in self._active_project_configs(fqn, resource_type): + config_dict = self._update_from_config(config_dict, fqn_config) + + return config_dict + # UnrenderedConfigGenerator def calculate_node_config_dict( self, @@ -267,17 +322,18 @@ def calculate_node_config_dict( base: bool, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: - - # calls BaseContextConfigGenerator.calculate_node_config - return self.calculate_node_config( + # Just call UnrenderedConfigGenerator.calculate_node_config, which + # will return a config dictionary + result = self.calculate_node_config( config_call_dict=config_call_dict, fqn=fqn, resource_type=resource_type, project_name=project_name, base=base, patch_config_dict=patch_config_dict, - ) # type: ignore[return-value] - # Note: this returns a config_obj, NOT a dictionary + ) + # Note: this returns a dictionary + return result def initial_result(self, resource_type: NodeType, base: bool) -> Dict[str, Any]: return {} diff --git a/tests/functional/graph_selection/test_version_selection.py b/tests/functional/graph_selection/test_version_selection.py index 6b2b769eee7..e55115464ad 100644 --- a/tests/functional/graph_selection/test_version_selection.py +++ b/tests/functional/graph_selection/test_version_selection.py @@ -54,7 +54,7 @@ def seeds(self, test_data_dir): def selectors(self): return selectors_yml - @pytest.mark.skip('broken until mash 3.15') + @pytest.mark.skip("broken until mash 3.15") def test_select_none_versions(self, project): manifest = run_dbt(["parse"]) print(f"--- nodes.keys(): {manifest.nodes.keys()}") @@ -77,7 +77,7 @@ def test_select_old_versions(self, project): results = run_dbt(["ls", "--select", "version:old"]) assert sorted(results) == ["test.versioned.v1"] - @pytest.mark.skip('broken until mash 3.15') + @pytest.mark.skip("broken until mash 3.15") def test_select_prerelease_versions(self, project): results = run_dbt(["ls", "--select", "version:prerelease"]) assert sorted(results) == [ From 9c6883b7078ea1dffa1804458dcd58d6365299fc Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 11:45:38 -0500 Subject: [PATCH 16/42] Change order of Union in source definitions partitions --- core/dbt/artifacts/resources/v1/source_definition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index 9044307563e..96289b13b42 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -40,7 +40,7 @@ class ExternalTable(AdditionalPropertiesAllowed, Mergeable): file_format: Optional[str] = None row_format: Optional[str] = None tbl_properties: Optional[str] = None - partitions: Optional[Union[List[str], List[ExternalPartition]]] = None + partitions: Optional[Union[List[ExternalPartition], List[str]]] = None def __bool__(self): return self.location is not None From 25899ca69571991e65d0158cd26ffcc22083e28e Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 11:47:16 -0500 Subject: [PATCH 17/42] Change order of external.partitions schema elements --- schemas/dbt/manifest/v12.json | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/schemas/dbt/manifest/v12.json b/schemas/dbt/manifest/v12.json index 8336d983f1d..4c66e128fe2 100644 --- a/schemas/dbt/manifest/v12.json +++ b/schemas/dbt/manifest/v12.json @@ -7943,12 +7943,6 @@ }, "partitions": { "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, { "type": "array", "items": { @@ -7983,6 +7977,12 @@ "additionalProperties": true } }, + { + "type": "array", + "items": { + "type": "string" + } + }, { "type": "null" } @@ -17896,12 +17896,6 @@ }, "partitions": { "anyOf": [ - { - "type": "array", - "items": { - "type": "string" - } - }, { "type": "array", "items": { @@ -17936,6 +17930,12 @@ "additionalProperties": true } }, + { + "type": "array", + "items": { + "type": "string" + } + }, { "type": "null" } From 5b4af1741eab1dd7505cfa38091d580a92722c01 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 11:54:50 -0500 Subject: [PATCH 18/42] Changie --- .changes/unreleased/Under the Hood-20241202-115445.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Under the Hood-20241202-115445.yaml diff --git a/.changes/unreleased/Under the Hood-20241202-115445.yaml b/.changes/unreleased/Under the Hood-20241202-115445.yaml new file mode 100644 index 00000000000..e6bd65eea49 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20241202-115445.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Support upgrading mashumaro to 3.15 +time: 2024-12-02T11:54:45.103325-05:00 +custom: + Author: gshank + Issue: "11044" From 86791e0ff5e146ba61978acd35f30b4e0f22f15e Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 12:51:38 -0500 Subject: [PATCH 19/42] Remove unnecessary "base" parameter --- core/dbt/context/context_config.py | 22 ++++++---------------- core/dbt/contracts/graph/model_config.py | 11 +---------- core/dbt/parser/base.py | 2 +- core/dbt/parser/schema_yaml_readers.py | 4 ---- core/dbt/parser/sources.py | 1 - 5 files changed, 8 insertions(+), 32 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 07f4bfef81a..48486a20a95 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -138,7 +138,6 @@ def calculate_node_config( fqn: List[str], resource_type: NodeType, project_name: str, - base: bool, patch_config_dict: Optional[Dict[str, Any]] = None, ) -> T: ... @@ -148,7 +147,7 @@ def _update_from_config( ) -> T: ... @abstractmethod - def initial_result(self, resource_type: NodeType, base: bool) -> T: ... + def initial_result(self, resource_type: NodeType) -> T: ... # BaseContextConfigGenerator @abstractmethod @@ -158,7 +157,6 @@ def calculate_node_config_dict( fqn: List[str], resource_type: NodeType, project_name: str, - base: bool, patch_config_dict: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: ... @@ -176,7 +174,6 @@ def calculate_node_config( fqn: List[str], resource_type: NodeType, project_name: str, - base: bool, patch_config_dict: Optional[Dict[str, Any]] = None, ) -> C: # Note: This method returns a BaseConfig object. This is a duplicate of @@ -187,7 +184,7 @@ def calculate_node_config( project_config = self.get_node_project_config(project_name) # creates "default" config object ("cls.from_dict({})") - config_obj = self.initial_result(resource_type=resource_type, base=base) + config_obj = self.initial_result(resource_type=resource_type) project_configs = self._project_configs(project_config, fqn, resource_type) for fqn_config in project_configs: @@ -209,9 +206,9 @@ def calculate_node_config( return config_obj - def initial_result(self, resource_type: NodeType, base: bool) -> C: + def initial_result(self, resource_type: NodeType) -> C: # defaults, project_config, config calls, active_config (if != project_config) - config_cls = get_config_for(resource_type, base=base) + config_cls = get_config_for(resource_type) # Calculate the defaults. We don't want to validate the defaults, # because it might be invalid in the case of required config members # (such as on snapshots!) @@ -246,7 +243,6 @@ def calculate_node_config_dict( fqn: List[str], resource_type: NodeType, project_name: str, - base: bool, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: @@ -256,7 +252,6 @@ def calculate_node_config_dict( fqn=fqn, resource_type=resource_type, project_name=project_name, - base=base, patch_config_dict=patch_config_dict, ) try: @@ -279,7 +274,6 @@ def calculate_node_config( fqn: List[str], resource_type: NodeType, project_name: str, - base: bool, patch_config_dict: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: # Note: This method returns a Dict[str, Any]. This is a duplicate of @@ -290,7 +284,7 @@ def calculate_node_config( project_config = self.get_node_project_config(project_name) # creates "default" config object ({}) - config_dict = self.initial_result(resource_type=resource_type, base=base) + config_dict = self.initial_result(resource_type=resource_type) project_configs = self._project_configs(project_config, fqn, resource_type) for fqn_config in project_configs: @@ -319,7 +313,6 @@ def calculate_node_config_dict( fqn: List[str], resource_type: NodeType, project_name: str, - base: bool, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: # Just call UnrenderedConfigGenerator.calculate_node_config, which @@ -329,13 +322,12 @@ def calculate_node_config_dict( fqn=fqn, resource_type=resource_type, project_name=project_name, - base=base, patch_config_dict=patch_config_dict, ) # Note: this returns a dictionary return result - def initial_result(self, resource_type: NodeType, base: bool) -> Dict[str, Any]: + def initial_result(self, resource_type: NodeType) -> Dict[str, Any]: return {} def _update_from_config( @@ -375,7 +367,6 @@ def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None: # ContextConfig def build_config_dict( self, - base: bool = False, *, rendered: bool = True, patch_config_dict: Optional[dict] = None, @@ -402,6 +393,5 @@ def build_config_dict( fqn=self._fqn, resource_type=self._resource_type, project_name=self._project_name, - base=base, patch_config_dict=patch_config_dict, ) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index b3d5952e268..72c87705fd6 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -53,14 +53,5 @@ class UnitTestNodeConfig(NodeConfig): } -# base resource types are like resource types, except nothing has mandatory -# configs. -BASE_RESOURCE_TYPES: Dict[NodeType, Type[BaseConfig]] = RESOURCE_TYPES.copy() - - def get_config_for(resource_type: NodeType, base=False) -> Type[BaseConfig]: - if base: - lookup = BASE_RESOURCE_TYPES - else: - lookup = RESOURCE_TYPES - return lookup.get(resource_type, NodeConfig) + return RESOURCE_TYPES.get(resource_type, NodeConfig) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 1d27947a25f..607c563fc6d 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -443,7 +443,7 @@ def config_dict( self, config: ContextConfig, ) -> Dict[str, Any]: - config_dict = config.build_config_dict(base=True) + config_dict = config.build_config_dict() self._mangle_hooks(config_dict) return config_dict diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 9b4a550b5d3..a22e71d73e3 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -169,7 +169,6 @@ def _generate_exposure_config( fqn=fqn, resource_type=NodeType.Exposure, project_name=package_name, - base=False, patch_config_dict=precedence_configs, ) @@ -472,7 +471,6 @@ def _generate_metric_config( fqn=fqn, resource_type=NodeType.Metric, project_name=package_name, - base=False, patch_config_dict=precedence_configs, ) return config @@ -641,7 +639,6 @@ def _generate_semantic_model_config( fqn=fqn, resource_type=NodeType.SemanticModel, project_name=package_name, - base=False, patch_config_dict=precedence_configs, ) @@ -756,7 +753,6 @@ def _generate_saved_query_config( fqn=fqn, resource_type=NodeType.SavedQuery, project_name=package_name, - base=False, patch_config_dict=precedence_configs, ) diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 0fe882750ae..97dac31d313 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -307,7 +307,6 @@ def _generate_source_config(self, target: UnpatchedSourceDefinition, rendered: b fqn=target.fqn, resource_type=NodeType.Source, project_name=target.package_name, - base=False, patch_config_dict=precedence_configs, ) From d5616de14bb647247528df3630fab0f6502b0622 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 17:14:35 -0500 Subject: [PATCH 20/42] passes unit tests --- core/dbt/context/context_config.py | 98 ++++++++++++++------------ core/dbt/parser/base.py | 12 ++-- core/dbt/parser/schema_yaml_readers.py | 8 +-- core/dbt/parser/sources.py | 2 +- 4 files changed, 64 insertions(+), 56 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 48486a20a95..8de6d11ba8a 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -1,7 +1,7 @@ from abc import abstractmethod from copy import deepcopy from dataclasses import dataclass -from typing import Any, Dict, Generic, Iterator, List, Optional, TypeVar +from typing import Any, Dict, Generic, Iterator, List, Optional, Type, TypeVar from dbt.adapters.factory import get_config_class_by_name from dbt.config import IsFQNResource, Project, RuntimeConfig @@ -132,26 +132,25 @@ def _active_project_configs( return self._project_configs(self._active_project, fqn, resource_type) @abstractmethod - def calculate_node_config( + def merge_config_dicts( self, config_call_dict: Dict[str, Any], fqn: List[str], resource_type: NodeType, project_name: str, patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> T: ... + ) -> Dict[str, Any]: ... @abstractmethod def _update_from_config( - self, result: T, partial: Dict[str, Any], validate: bool = False - ) -> T: ... + self, config_cls: Type[BaseConfig], result_dict: Dict[str, Any], partial: Dict[str, Any] + ) -> Dict[str, Any]: ... @abstractmethod - def initial_result(self, resource_type: NodeType) -> T: ... + def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: ... - # BaseContextConfigGenerator @abstractmethod - def calculate_node_config_dict( + def generate_node_config( self, config_call_dict: Dict[str, Any], fqn: List[str], @@ -168,62 +167,63 @@ def __init__(self, active_project: RuntimeConfig): def get_config_source(self, project: Project) -> ConfigSource: return RenderedConfig(project) - def calculate_node_config( + def merge_config_dicts( self, config_call_dict: Dict[str, Any], fqn: List[str], resource_type: NodeType, project_name: str, patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> C: + ) -> Dict[str, Any]: # Note: This method returns a BaseConfig object. This is a duplicate of - # of UnrenderedConfigGenerator.calculate_node_config, but calls methods + # of UnrenderedConfigGenerator.generate_node_config, but calls methods # that deal with config objects instead of dictionaries. # Additions to one method, should probably also go in the other. project_config = self.get_node_project_config(project_name) + config_cls = get_config_for(resource_type) # creates "default" config object ("cls.from_dict({})") - config_obj = self.initial_result(resource_type=resource_type) + config_dict = self.initial_result(config_cls) project_configs = self._project_configs(project_config, fqn, resource_type) for fqn_config in project_configs: - config_obj = self._update_from_config(config_obj, fqn_config) + config_dict = self._update_from_config(config_cls, config_dict, fqn_config) # When schema files patch config, it has lower precedence than # config in the models (config_call_dict), so we add the patch_config_dict # before the config_call_dict if patch_config_dict: - config_obj = self._update_from_config(config_obj, patch_config_dict) + config_dict = self._update_from_config(config_cls, config_dict, patch_config_dict) # config_calls are created in the 'experimental' model parser and # the ParseConfigObject (via add_config_call) - config_obj = self._update_from_config(config_obj, config_call_dict) + config_dict = self._update_from_config(config_cls, config_dict, config_call_dict) if project_config.project_name != self._active_project.project_name: for fqn_config in self._active_project_configs(fqn, resource_type): - config_obj = self._update_from_config(config_obj, fqn_config) + config_dict = self._update_from_config(config_cls, config_dict, fqn_config) - return config_obj + return config_dict - def initial_result(self, resource_type: NodeType) -> C: - # defaults, project_config, config calls, active_config (if != project_config) - config_cls = get_config_for(resource_type) + def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: # Calculate the defaults. We don't want to validate the defaults, # because it might be invalid in the case of required config members # (such as on snapshots!) - result = config_cls.from_dict({}) + result = config_cls.from_dict({}).to_dict() return result - def _update_from_config(self, result: C, partial: Dict[str, Any], validate: bool = False) -> C: + def _update_from_config( + self, config_cls: Type[BaseConfig], result_dict: Dict[str, Any], partial: Dict[str, Any] + ) -> Dict[str, Any]: translated = self._active_project.credentials.translate_aliases(partial) translated = self.translate_hook_names(translated) adapter_type = self._active_project.credentials.type adapter_config_cls = get_config_class_by_name(adapter_type) - # The "update_from" method in BaseConfig merges dictionaries and does a from_dict. - updated = result.update_from(translated, adapter_config_cls, validate=validate) + # The "update_from" method in BaseConfig merges dictionaries using MergeBehavior + updated = config_cls.update_from(result_dict, translated, adapter_config_cls) return updated def translate_hook_names(self, project_dict): @@ -237,7 +237,7 @@ def translate_hook_names(self, project_dict): return project_dict # ContextConfigGenerator - def calculate_node_config_dict( + def generate_node_config( self, config_call_dict: Dict[str, Any], fqn: List[str], @@ -246,8 +246,9 @@ def calculate_node_config_dict( patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: + config_cls = get_config_for(resource_type) # returns a config object - config_obj = self.calculate_node_config( + config_dict = self.merge_config_dicts( config_call_dict=config_call_dict, fqn=fqn, resource_type=resource_type, @@ -256,11 +257,13 @@ def calculate_node_config_dict( ) try: # Call "finalize_and_validate" on the config obj - finalized = config_obj.finalize_and_validate() - # THEN return a dictionary!!! Why!! - return finalized.to_dict(omit_none=True) + config_cls.validate(config_dict) + # return finalized.to_dict(omit_none=True) + config_obj = config_cls.from_dict(config_dict) + return config_obj except ValidationError as exc: # we got a ValidationError - probably bad types in config() + config_obj = config_cls.from_dict(config_dict) raise SchemaConfigError(exc, node=config_obj) from exc @@ -268,7 +271,7 @@ class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]): def get_config_source(self, project: Project) -> ConfigSource: return UnrenderedConfig(project) - def calculate_node_config( + def merge_config_dicts( self, config_call_dict: Dict[str, Any], fqn: List[str], @@ -277,37 +280,38 @@ def calculate_node_config( patch_config_dict: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: # Note: This method returns a Dict[str, Any]. This is a duplicate of - # of ContextConfigGenerator.calculate_node_config, but calls methods + # of ContextConfigGenerator.generate_node_config, but calls methods # that deal with dictionaries instead of config object. # Additions to one method, should probably also go in the other. project_config = self.get_node_project_config(project_name) + config_cls = get_config_for(resource_type) # creates "default" config object ({}) - config_dict = self.initial_result(resource_type=resource_type) + config_dict = self.initial_result(config_cls) project_configs = self._project_configs(project_config, fqn, resource_type) for fqn_config in project_configs: - config_dict = self._update_from_config(config_dict, fqn_config) + config_dict = self._update_from_config(config_cls, config_dict, fqn_config) # When schema files patch config, it has lower precedence than # config in the models (config_call_dict), so we add the patch_config_dict # before the config_call_dict if patch_config_dict: - config_dict = self._update_from_config(config_dict, patch_config_dict) + config_dict = self._update_from_config(config_cls, config_dict, patch_config_dict) # config_calls are created in the 'experimental' model parser and # the ParseConfigObject (via add_config_call) - config_dict = self._update_from_config(config_dict, config_call_dict) + config_dict = self._update_from_config(config_cls, config_dict, config_call_dict) if project_config.project_name != self._active_project.project_name: for fqn_config in self._active_project_configs(fqn, resource_type): - config_dict = self._update_from_config(config_dict, fqn_config) + config_dict = self._update_from_config(config_cls, config_dict, fqn_config) return config_dict # UnrenderedConfigGenerator - def calculate_node_config_dict( + def generate_node_config( self, config_call_dict: Dict[str, Any], fqn: List[str], @@ -315,9 +319,9 @@ def calculate_node_config_dict( project_name: str, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: - # Just call UnrenderedConfigGenerator.calculate_node_config, which + # Just call UnrenderedConfigGenerator.merge_config_dicts, which # will return a config dictionary - result = self.calculate_node_config( + result = self.merge_config_dicts( config_call_dict=config_call_dict, fqn=fqn, resource_type=resource_type, @@ -327,18 +331,18 @@ def calculate_node_config_dict( # Note: this returns a dictionary return result - def initial_result(self, resource_type: NodeType) -> Dict[str, Any]: + def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: return {} def _update_from_config( self, - result: Dict[str, Any], + config_cls: Type[BaseConfig], + result_dict: Dict[str, Any], partial: Dict[str, Any], - validate: bool = False, ) -> Dict[str, Any]: translated = self._active_project.credentials.translate_aliases(partial) - result.update(translated) - return result + result_dict.update(translated) + return result_dict class ContextConfig: @@ -388,10 +392,14 @@ def build_config_dict( else: config_call_dict = self._unrendered_config_call_dict - return config_generator.calculate_node_config_dict( + config = config_generator.generate_node_config( config_call_dict=config_call_dict, fqn=self._fqn, resource_type=self._resource_type, project_name=self._project_name, patch_config_dict=patch_config_dict, ) + if isinstance(config, BaseConfig): + return config.to_dict(omit_none=True) + else: + return config diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 607c563fc6d..b2158ce7b0f 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -308,7 +308,7 @@ def update_parsed_node_relation_names( def update_parsed_node_config( self, parsed_node: FinalNode, - config: ContextConfig, + context_config: ContextConfig, context=None, patch_config_dict=None, patch_file_id=None, @@ -330,7 +330,7 @@ def update_parsed_node_config( ): if "materialized" not in patch_config_dict: patch_config_dict["materialized"] = "table" - config_dict = config.build_config_dict(patch_config_dict=patch_config_dict) + config_dict = context_config.build_config_dict(patch_config_dict=patch_config_dict) # Set tags on node provided in config blocks. Tags are additive, so even if # config has been built before, we don't have to reset tags in the parsed_node. @@ -396,12 +396,12 @@ def update_parsed_node_config( # unrendered_config is used to compare the original database/schema/alias # values and to handle 'same_config' and 'same_contents' calls - parsed_node.unrendered_config = config.build_config_dict( + parsed_node.unrendered_config = context_config.build_config_dict( rendered=False, patch_config_dict=patch_config_dict ) - parsed_node.config_call_dict = config._config_call_dict - parsed_node.unrendered_config_call_dict = config._unrendered_config_call_dict + parsed_node.config_call_dict = context_config._config_call_dict + parsed_node.unrendered_config_call_dict = context_config._unrendered_config_call_dict # do this once before we parse the node database/schema/alias, so # parsed_node.config is what it would be if they did nothing @@ -421,7 +421,7 @@ def update_parsed_node_config( if not hooks: return if not context: - context = self._context_for(parsed_node, config) + context = self._context_for(parsed_node, context_config) for hook in hooks: get_rendered(hook.sql, context, parsed_node, capture_macros=True) diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index a22e71d73e3..51d509cf358 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -164,7 +164,7 @@ def _generate_exposure_config( # apply exposure configs precedence_configs.update(target.config) - return generator.calculate_node_config( + return generator.generate_node_config( config_call_dict={}, fqn=fqn, resource_type=NodeType.Exposure, @@ -466,7 +466,7 @@ def _generate_metric_config( # first apply metric configs precedence_configs.update(target.config) - config = generator.calculate_node_config( + config = generator.generate_node_config( config_call_dict={}, fqn=fqn, resource_type=NodeType.Metric, @@ -634,7 +634,7 @@ def _generate_semantic_model_config( # first apply semantic model configs precedence_configs.update(target.config) - config = generator.calculate_node_config( + config = generator.generate_node_config( config_call_dict={}, fqn=fqn, resource_type=NodeType.SemanticModel, @@ -748,7 +748,7 @@ def _generate_saved_query_config( # first apply semantic model configs precedence_configs.update(target.config) - config = generator.calculate_node_config( + config = generator.generate_node_config( config_call_dict={}, fqn=fqn, resource_type=NodeType.SavedQuery, diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 97dac31d313..228303195e8 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -302,7 +302,7 @@ def _generate_source_config(self, target: UnpatchedSourceDefinition, rendered: b # it works while source configs can only include `enabled`. precedence_configs.update(target.table.config) - return generator.calculate_node_config( + return generator.generate_node_config( config_call_dict={}, fqn=target.fqn, resource_type=NodeType.Source, From 4554eb36a6ed7ba688662483dd6d9511774210ca Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 17:41:05 -0500 Subject: [PATCH 21/42] comment --- core/dbt/context/context_config.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 8de6d11ba8a..a9967a1a15b 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -175,10 +175,9 @@ def merge_config_dicts( project_name: str, patch_config_dict: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: - # Note: This method returns a BaseConfig object. This is a duplicate of - # of UnrenderedConfigGenerator.generate_node_config, but calls methods - # that deal with config objects instead of dictionaries. - # Additions to one method, should probably also go in the other. + # Note: This method returns a dictionary object. This is a duplicate of + # of UnrenderedConfigGenerator.generate_node_config ... TODO: check to + # see if methods can be combined project_config = self.get_node_project_config(project_name) config_cls = get_config_for(resource_type) From b4f72e403bcba859b8e6cda71591a64a8965214b Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 20:58:43 -0500 Subject: [PATCH 22/42] Use dbt-common branch --- dev-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 860763a177e..cac7be14f6b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ git+https://github.com/dbt-labs/dbt-adapters.git@mashumaro_fixes git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter -git+https://github.com/dbt-labs/dbt-common.git@main +git+https://github.com/dbt-labs/dbt-common.git@mashumaro_fixes git+https://github.com/dbt-labs/dbt-postgres.git@main # black must match what's in .pre-commit-config.yaml to be sure local env matches CI black==24.3.0 From 54adabe2c7114492b2e5a9e213bd625a68780069 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 2 Dec 2024 22:08:38 -0500 Subject: [PATCH 23/42] Fix a few tests, remove unnecessary finalize_and_validate calls --- core/dbt/parser/schema_yaml_readers.py | 15 ++++++++++----- core/dbt/parser/sources.py | 4 +++- .../functional/exposures/test_exposure_configs.py | 4 ++-- tests/functional/metrics/test_metric_configs.py | 5 ++--- tests/functional/sources/test_source_configs.py | 4 ++-- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 51d509cf358..3629a95e6bc 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -101,7 +101,8 @@ def parse_exposure(self, unparsed: UnparsedExposure) -> None: rendered=True, ) - config = config.finalize_and_validate() + # Already validated + # config = config.finalize_and_validate() unrendered_config = self._generate_exposure_config( target=unparsed, @@ -396,6 +397,7 @@ def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = fqn = self.schema_parser.get_fqn_prefix(path) fqn.append(unparsed.name) + # Following will validate also config = self._generate_metric_config( target=unparsed, fqn=fqn, @@ -403,7 +405,8 @@ def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = rendered=True, ) - config = config.finalize_and_validate() + # Already validated + # config = config.finalize_and_validate() unrendered_config = self._generate_metric_config( target=unparsed, @@ -461,7 +464,7 @@ def _generate_metric_config( else: generator = UnrenderedConfigGenerator(self.root_project) - # configs with precendence set + # configs with precedence set precedence_configs = dict() # first apply metric configs precedence_configs.update(target.config) @@ -659,7 +662,8 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel) -> None: rendered=True, ) - config = config.finalize_and_validate() + # Already validated + # config = config.finalize_and_validate() unrendered_config = self._generate_semantic_model_config( target=unparsed, @@ -803,7 +807,8 @@ def parse_saved_query(self, unparsed: UnparsedSavedQuery) -> None: rendered=True, ) - config = config.finalize_and_validate() + # Already validated + # config = config.finalize_and_validate() unrendered_config = self._generate_saved_query_config( target=unparsed, diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index 228303195e8..cf9618eb058 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -146,12 +146,14 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: # make sure we don't do duplicate tags from source + table tags = sorted(set(itertools.chain(source.tags, table.tags))) + # This will also validate config = self._generate_source_config( target=target, rendered=True, ) - config = config.finalize_and_validate() + # Already validated + # config = config.finalize_and_validate() unrendered_config = self._generate_source_config( target=target, diff --git a/tests/functional/exposures/test_exposure_configs.py b/tests/functional/exposures/test_exposure_configs.py index 2ec309623a7..aec50a8523d 100644 --- a/tests/functional/exposures/test_exposure_configs.py +++ b/tests/functional/exposures/test_exposure_configs.py @@ -1,8 +1,8 @@ import pytest from dbt.artifacts.resources import ExposureConfig +from dbt.exceptions import SchemaConfigError from dbt.tests.util import get_manifest, run_dbt, update_config_file -from dbt_common.dataclass_schema import ValidationError from tests.functional.exposures.fixtures import ( disabled_models_exposure_yml, enabled_yaml_level_exposure_yml, @@ -126,7 +126,7 @@ def models(self): } def test_exposure_config_yaml_level(self, project): - with pytest.raises(ValidationError) as excinfo: + with pytest.raises(SchemaConfigError) as excinfo: run_dbt(["parse"]) expected_msg = "'True and False' is not of type 'boolean'" assert expected_msg in str(excinfo.value) diff --git a/tests/functional/metrics/test_metric_configs.py b/tests/functional/metrics/test_metric_configs.py index 2be68d9e17f..a944c1ea5a4 100644 --- a/tests/functional/metrics/test_metric_configs.py +++ b/tests/functional/metrics/test_metric_configs.py @@ -1,9 +1,8 @@ import pytest from dbt.artifacts.resources import MetricConfig -from dbt.exceptions import CompilationError, ParsingError +from dbt.exceptions import CompilationError, ParsingError, SchemaConfigError from dbt.tests.util import get_manifest, run_dbt, update_config_file -from dbt_common.dataclass_schema import ValidationError from tests.functional.metrics.fixtures import ( disabled_metric_level_schema_yml, enabled_metric_level_schema_yml, @@ -170,7 +169,7 @@ def models(self): } def test_invalid_config_metric(self, project): - with pytest.raises(ValidationError) as excinfo: + with pytest.raises(SchemaConfigError) as excinfo: run_dbt(["parse"]) expected_msg = "'True and False' is not of type 'boolean'" assert expected_msg in str(excinfo.value) diff --git a/tests/functional/sources/test_source_configs.py b/tests/functional/sources/test_source_configs.py index 1ceca5d0522..12372c2fbb9 100644 --- a/tests/functional/sources/test_source_configs.py +++ b/tests/functional/sources/test_source_configs.py @@ -1,8 +1,8 @@ import pytest from dbt.artifacts.resources import SourceConfig +from dbt.exceptions import SchemaConfigError from dbt.tests.util import get_manifest, run_dbt, update_config_file -from dbt_common.dataclass_schema import ValidationError from tests.functional.sources.fixtures import ( all_configs_everywhere_schema_yml, all_configs_not_table_schema_yml, @@ -175,7 +175,7 @@ def models(self): } def test_invalid_config_source(self, project): - with pytest.raises(ValidationError) as excinfo: + with pytest.raises(SchemaConfigError) as excinfo: run_dbt(["parse"]) expected_msg = "'True and False' is not of type 'boolean'" assert expected_msg in str(excinfo.value) From bbfc6e6887e4a31c5c91f47efc9cc15ea7243fb9 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 11:26:19 -0500 Subject: [PATCH 24/42] rename config to context_config --- core/dbt/parser/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index b2158ce7b0f..dd24b3ccda4 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -441,9 +441,9 @@ def initial_config(self, fqn: List[str]) -> ContextConfig: def config_dict( self, - config: ContextConfig, + context_config: ContextConfig, ) -> Dict[str, Any]: - config_dict = config.build_config_dict() + config_dict = context_config.build_config_dict() self._mangle_hooks(config_dict) return config_dict From 1afb2bb04f13450f0c05bc1f749da480e245dac7 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 12:35:32 -0500 Subject: [PATCH 25/42] fix hooks before validation --- core/dbt/context/context_config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index a9967a1a15b..cdf5d7e08a4 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from typing import Any, Dict, Generic, Iterator, List, Optional, Type, TypeVar +from dbt import hooks from dbt.adapters.factory import get_config_class_by_name from dbt.config import IsFQNResource, Project, RuntimeConfig from dbt.contracts.graph.model_config import get_config_for @@ -93,6 +94,16 @@ def get_config_dict(self, resource_type: NodeType) -> Dict[str, Any]: return model_configs +def fix_hooks(config_dict: Dict[str, Any]): + """Given a config dict that may have `pre-hook`/`post-hook` keys, + convert it from the yucky maybe-a-string, maybe-a-dict to a dict. + """ + # Like most of parsing, this is a horrible hack :( + for key in hooks.ModelHookType: + if key in config_dict: + config_dict[key] = [hooks.get_hook_dict(h) for h in config_dict[key]] + + class BaseContextConfigGenerator(Generic[T]): def __init__(self, active_project: RuntimeConfig): self._active_project = active_project @@ -254,6 +265,7 @@ def generate_node_config( project_name=project_name, patch_config_dict=patch_config_dict, ) + fix_hooks(config_dict) try: # Call "finalize_and_validate" on the config obj config_cls.validate(config_dict) From 7d78b475ef67a82b8d593d0a5d3a7ea874e86f40 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 13:45:20 -0500 Subject: [PATCH 26/42] Bump mashumaro again --- core/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/setup.py b/core/setup.py index 7da702f0dda..c8fa182d318 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.9,<3.15", + "mashumaro[msgpack]>=3.15,<4.0", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. From 1c709d2c19211228a615080236453d69245a3b6e Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 14:02:38 -0500 Subject: [PATCH 27/42] Remove skips from tests --- tests/functional/graph_selection/test_version_selection.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/functional/graph_selection/test_version_selection.py b/tests/functional/graph_selection/test_version_selection.py index e55115464ad..335fad25270 100644 --- a/tests/functional/graph_selection/test_version_selection.py +++ b/tests/functional/graph_selection/test_version_selection.py @@ -54,12 +54,7 @@ def seeds(self, test_data_dir): def selectors(self): return selectors_yml - @pytest.mark.skip("broken until mash 3.15") def test_select_none_versions(self, project): - manifest = run_dbt(["parse"]) - print(f"--- nodes.keys(): {manifest.nodes.keys()}") - # This wrongly includes test.versioned_v4.5 - # This is fixed by mashumaro 3.15 results = run_dbt(["ls", "--select", "version:none"]) assert sorted(results) == [ "test.base_users", @@ -77,7 +72,6 @@ def test_select_old_versions(self, project): results = run_dbt(["ls", "--select", "version:old"]) assert sorted(results) == ["test.versioned.v1"] - @pytest.mark.skip("broken until mash 3.15") def test_select_prerelease_versions(self, project): results = run_dbt(["ls", "--select", "version:prerelease"]) assert sorted(results) == [ From 13105cd930694d75c3be5b65ac32c1f367a2032f Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 14:12:08 -0500 Subject: [PATCH 28/42] fix expected_manifest version: "2" --- tests/functional/artifacts/expected_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/artifacts/expected_manifest.py b/tests/functional/artifacts/expected_manifest.py index cb1c751fed8..c7deb2e8ea8 100644 --- a/tests/functional/artifacts/expected_manifest.py +++ b/tests/functional/artifacts/expected_manifest.py @@ -1712,7 +1712,7 @@ def expected_versions_manifest(project): "language": "sql", "refs": [ {"name": "versioned_model", "package": None, "version": 2}, - {"name": "versioned_model", "package": None, "version": 2}, + {"name": "versioned_model", "package": None, "version": "2"}, {"name": "versioned_model", "package": None, "version": 2}, {"name": "versioned_model", "package": None, "version": None}, {"name": "versioned_model", "package": None, "version": 1}, From 3f7ee0ed1ec0425052aaf65a08e3d161e8d923d1 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 14:53:09 -0500 Subject: [PATCH 29/42] Remove unnecessary ConfigSource --- core/dbt/context/context_config.py | 128 ++++++++++++----------------- 1 file changed, 52 insertions(+), 76 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index cdf5d7e08a4..f8ed1b873be 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -27,73 +27,6 @@ class ModelParts(IsFQNResource): C = TypeVar("C", bound=BaseConfig) -class ConfigSource: - def __init__(self, project): - self.project = project - - def get_config_dict(self, resource_type: NodeType): ... - - -class UnrenderedConfig(ConfigSource): - def __init__(self, project: Project): - self.project = project - - def get_config_dict(self, resource_type: NodeType) -> Dict[str, Any]: - unrendered = self.project.unrendered.project_dict - if resource_type == NodeType.Seed: - model_configs = unrendered.get("seeds") - elif resource_type == NodeType.Snapshot: - model_configs = unrendered.get("snapshots") - elif resource_type == NodeType.Source: - model_configs = unrendered.get("sources") - elif resource_type == NodeType.Test: - model_configs = unrendered.get("data_tests") - elif resource_type == NodeType.Metric: - model_configs = unrendered.get("metrics") - elif resource_type == NodeType.SemanticModel: - model_configs = unrendered.get("semantic_models") - elif resource_type == NodeType.SavedQuery: - model_configs = unrendered.get("saved_queries") - elif resource_type == NodeType.Exposure: - model_configs = unrendered.get("exposures") - elif resource_type == NodeType.Unit: - model_configs = unrendered.get("unit_tests") - else: - model_configs = unrendered.get("models") - if model_configs is None: - return {} - else: - return model_configs - - -class RenderedConfig(ConfigSource): - def __init__(self, project: Project): - self.project = project - - def get_config_dict(self, resource_type: NodeType) -> Dict[str, Any]: - if resource_type == NodeType.Seed: - model_configs = self.project.seeds - elif resource_type == NodeType.Snapshot: - model_configs = self.project.snapshots - elif resource_type == NodeType.Source: - model_configs = self.project.sources - elif resource_type == NodeType.Test: - model_configs = self.project.data_tests - elif resource_type == NodeType.Metric: - model_configs = self.project.metrics - elif resource_type == NodeType.SemanticModel: - model_configs = self.project.semantic_models - elif resource_type == NodeType.SavedQuery: - model_configs = self.project.saved_queries - elif resource_type == NodeType.Exposure: - model_configs = self.project.exposures - elif resource_type == NodeType.Unit: - model_configs = self.project.unit_tests - else: - model_configs = self.project.models - return model_configs - - def fix_hooks(config_dict: Dict[str, Any]): """Given a config dict that may have `pre-hook`/`post-hook` keys, convert it from the yucky maybe-a-string, maybe-a-dict to a dict. @@ -108,9 +41,6 @@ class BaseContextConfigGenerator(Generic[T]): def __init__(self, active_project: RuntimeConfig): self._active_project = active_project - def get_config_source(self, project: Project) -> ConfigSource: - return RenderedConfig(project) - def get_node_project_config(self, project_name: str): if project_name == self._active_project.project_name: return self._active_project @@ -125,8 +55,7 @@ def get_node_project_config(self, project_name: str): def _project_configs( self, project: Project, fqn: List[str], resource_type: NodeType ) -> Iterator[Dict[str, Any]]: - src = self.get_config_source(project) - model_configs = src.get_config_dict(resource_type) + model_configs = self.get_model_configs(project, resource_type) for level_config in fqn_search(model_configs, fqn): result = {} for key, value in level_config.items(): @@ -142,6 +71,9 @@ def _active_project_configs( ) -> Iterator[Dict[str, Any]]: return self._project_configs(self._active_project, fqn, resource_type) + @abstractmethod + def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: ... + @abstractmethod def merge_config_dicts( self, @@ -175,8 +107,28 @@ class ContextConfigGenerator(BaseContextConfigGenerator[C]): def __init__(self, active_project: RuntimeConfig): self._active_project = active_project - def get_config_source(self, project: Project) -> ConfigSource: - return RenderedConfig(project) + def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: + if resource_type == NodeType.Seed: + model_configs = project.seeds + elif resource_type == NodeType.Snapshot: + model_configs = project.snapshots + elif resource_type == NodeType.Source: + model_configs = project.sources + elif resource_type == NodeType.Test: + model_configs = project.data_tests + elif resource_type == NodeType.Metric: + model_configs = project.metrics + elif resource_type == NodeType.SemanticModel: + model_configs = project.semantic_models + elif resource_type == NodeType.SavedQuery: + model_configs = project.saved_queries + elif resource_type == NodeType.Exposure: + model_configs = project.exposures + elif resource_type == NodeType.Unit: + model_configs = project.unit_tests + else: + model_configs = project.models + return model_configs def merge_config_dicts( self, @@ -279,8 +231,32 @@ def generate_node_config( class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]): - def get_config_source(self, project: Project) -> ConfigSource: - return UnrenderedConfig(project) + def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: + unrendered = project.unrendered.project_dict + if resource_type == NodeType.Seed: + model_configs = unrendered.get("seeds") + elif resource_type == NodeType.Snapshot: + model_configs = unrendered.get("snapshots") + elif resource_type == NodeType.Source: + model_configs = unrendered.get("sources") + elif resource_type == NodeType.Test: + model_configs = unrendered.get("data_tests") + elif resource_type == NodeType.Metric: + model_configs = unrendered.get("metrics") + elif resource_type == NodeType.SemanticModel: + model_configs = unrendered.get("semantic_models") + elif resource_type == NodeType.SavedQuery: + model_configs = unrendered.get("saved_queries") + elif resource_type == NodeType.Exposure: + model_configs = unrendered.get("exposures") + elif resource_type == NodeType.Unit: + model_configs = unrendered.get("unit_tests") + else: + model_configs = unrendered.get("models") + if model_configs is None: + return {} + else: + return model_configs def merge_config_dicts( self, From c16cbe05e74448ca2ea6c6bf2744e2cfa605e658 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 15:47:38 -0500 Subject: [PATCH 30/42] Rename ContextConfig ConfigBuilder --- core/dbt/artifacts/resources/v1/snapshot.py | 6 -- core/dbt/context/context_config.py | 17 ++--- core/dbt/context/providers.py | 40 +++++------ core/dbt/parser/base.py | 72 ++++++++++--------- core/dbt/parser/hooks.py | 6 +- core/dbt/parser/models.py | 76 ++++++++++++--------- core/dbt/parser/schema_generic_tests.py | 34 ++++----- core/dbt/parser/schema_yaml_readers.py | 37 ++++------ core/dbt/parser/schemas.py | 14 ++-- core/dbt/parser/seeds.py | 4 +- core/dbt/parser/sources.py | 11 ++- core/dbt/parser/unit_tests.py | 6 +- tests/unit/context/test_context.py | 2 +- tests/unit/parser/test_parser.py | 62 +++++++++-------- 14 files changed, 187 insertions(+), 200 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py index 976c93a4d60..24941c6d54f 100644 --- a/core/dbt/artifacts/resources/v1/snapshot.py +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -71,12 +71,6 @@ def final_validate(self): if self.materialized and self.materialized != "snapshot": raise ValidationError("A snapshot must have a materialized value of 'snapshot'") - # Called by "calculate_node_config_dict" in ContextConfigGenerator - def finalize_and_validate(self): - data = self.to_dict(omit_none=True) - self.validate(data) - return self.from_dict(data) - @dataclass class Snapshot(CompiledResource): diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index f8ed1b873be..0c19bf21580 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -37,7 +37,7 @@ def fix_hooks(config_dict: Dict[str, Any]): config_dict[key] = [hooks.get_hook_dict(h) for h in config_dict[key]] -class BaseContextConfigGenerator(Generic[T]): +class BaseConfigGenerator(Generic[T]): def __init__(self, active_project: RuntimeConfig): self._active_project = active_project @@ -103,7 +103,7 @@ def generate_node_config( ) -> Dict[str, Any]: ... -class ContextConfigGenerator(BaseContextConfigGenerator[C]): +class RenderedConfigGenerator(BaseConfigGenerator[C]): def __init__(self, active_project: RuntimeConfig): self._active_project = active_project @@ -198,7 +198,7 @@ def translate_hook_names(self, project_dict): project_dict["post-hook"] = project_dict.pop("post_hook") return project_dict - # ContextConfigGenerator + # RenderedConfigGenerator def generate_node_config( self, config_call_dict: Dict[str, Any], @@ -219,9 +219,7 @@ def generate_node_config( ) fix_hooks(config_dict) try: - # Call "finalize_and_validate" on the config obj config_cls.validate(config_dict) - # return finalized.to_dict(omit_none=True) config_obj = config_cls.from_dict(config_dict) return config_obj except ValidationError as exc: @@ -230,7 +228,7 @@ def generate_node_config( raise SchemaConfigError(exc, node=config_obj) from exc -class UnrenderedConfigGenerator(BaseContextConfigGenerator[Dict[str, Any]]): +class UnrenderedConfigGenerator(BaseConfigGenerator[Dict[str, Any]]): def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: unrendered = project.unrendered.project_dict if resource_type == NodeType.Seed: @@ -267,7 +265,7 @@ def merge_config_dicts( patch_config_dict: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: # Note: This method returns a Dict[str, Any]. This is a duplicate of - # of ContextConfigGenerator.generate_node_config, but calls methods + # of RenderedConfigGenerator.generate_node_config, but calls methods # that deal with dictionaries instead of config object. # Additions to one method, should probably also go in the other. @@ -332,7 +330,7 @@ def _update_from_config( return result_dict -class ContextConfig: +class ConfigBuilder: def __init__( self, active_project: RuntimeConfig, @@ -355,7 +353,6 @@ def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None: # Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types. self._unrendered_config_call_dict.update(opts) - # ContextConfig def build_config_dict( self, *, @@ -363,7 +360,7 @@ def build_config_dict( patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: if rendered: - config_generator = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated] + config_generator = RenderedConfigGenerator(self._active_project) # type: ignore[var-annotated] config_call_dict = self._config_call_dict else: # unrendered config_generator = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment] diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index 188ad5480b0..3b3c718570a 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -40,7 +40,7 @@ from dbt.constants import DEFAULT_ENV_PLACEHOLDER from dbt.context.base import Var, contextmember, contextproperty from dbt.context.configured import FQNLookup -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.context.exceptions_jinja import wrapped_exports from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace from dbt.context.macros import MacroNamespace, MacroNamespaceBuilder @@ -366,14 +366,14 @@ def __call__(self, *args: str) -> MetricReference: class Config(Protocol): - def __init__(self, model, context_config: Optional[ContextConfig]): ... + def __init__(self, model, config_builder: Optional[ConfigBuilder]): ... # Implementation of "config(..)" calls in models class ParseConfigObject(Config): - def __init__(self, model, context_config: Optional[ContextConfig]): + def __init__(self, model, config_builder: Optional[ConfigBuilder]): self.model = model - self.context_config = context_config + self.config_builder = config_builder def _transform_config(self, config): for oldkey in ("pre_hook", "post_hook"): @@ -394,19 +394,19 @@ def __call__(self, *args, **kwargs): opts = self._transform_config(opts) - # it's ok to have a parse context with no context config, but you must + # it's ok to have a parse context with no config builder, but you must # not call it! - if self.context_config is None: - raise DbtRuntimeError("At parse time, did not receive a context config") + if self.config_builder is None: + raise DbtRuntimeError("At parse time, did not receive a config builder") # Track unrendered opts to build parsed node unrendered_config later on if get_flags().state_modified_compare_more_unrendered_values: unrendered_config = statically_parse_unrendered_config(self.model.raw_code) if unrendered_config: - self.context_config.add_unrendered_config_call(unrendered_config) + self.config_builder.add_unrendered_config_call(unrendered_config) - # Use rendered opts to populate context_config - self.context_config.add_config_call(opts) + # Use rendered opts to populate config builder + self.config_builder.add_config_call(opts) return "" def set(self, name, value): @@ -426,7 +426,7 @@ def persist_column_docs(self) -> bool: class RuntimeConfigObject(Config): - def __init__(self, model, context_config: Optional[ContextConfig] = None): + def __init__(self, model, config_builder: Optional[ConfigBuilder] = None): self.model = model # we never use or get a config, only the parser cares @@ -886,7 +886,7 @@ def __init__( config: RuntimeConfig, manifest: Manifest, provider: Provider, - context_config: Optional[ContextConfig], + config_builder: Optional[ConfigBuilder], ) -> None: if provider is None: raise DbtInternalError(f"Invalid provider given to context: {provider}") @@ -895,7 +895,7 @@ def __init__( self.model: Union[Macro, ManifestNode] = model super().__init__(config, manifest, model.package_name) self.sql_results: Dict[str, Optional[AttrDict]] = {} - self.context_config: Optional[ContextConfig] = context_config + self.config_builder: Optional[ConfigBuilder] = config_builder self.provider: Provider = provider self.adapter = get_adapter(self.config) # The macro namespace is used in creating the DatabaseWrapper @@ -1164,7 +1164,7 @@ def ctx_config(self) -> Config: {%- set unique_key = config.require('unique_key') -%} ... """ # noqa - return self.provider.Config(self.model, self.context_config) + return self.provider.Config(self.model, self.config_builder) @contextproperty() def execute(self) -> bool: @@ -1688,12 +1688,12 @@ def generate_parser_model_context( model: ManifestNode, config: RuntimeConfig, manifest: Manifest, - context_config: ContextConfig, + config_builder: ConfigBuilder, ) -> Dict[str, Any]: # The __init__ method of ModelContext also initializes # a ManifestContext object which creates a MacroNamespaceBuilder # which adds every macro in the Manifest. - ctx = ModelContext(model, config, manifest, ParseProvider(), context_config) + ctx = ModelContext(model, config, manifest, ParseProvider(), config_builder) # The 'to_dict' method in ManifestContext moves all of the macro names # in the macro 'namespace' up to top level keys return ctx.to_dict() @@ -1895,14 +1895,14 @@ def __init__( config: RuntimeConfig, manifest: Manifest, provider: Provider, - context_config: Optional[ContextConfig], + config_builder: Optional[ConfigBuilder], macro_resolver: MacroResolver, ) -> None: # this must be before super init so that macro_resolver exists for # build_namespace self.macro_resolver = macro_resolver self.thread_ctx = MacroStack() - super().__init__(model, config, manifest, provider, context_config) + super().__init__(model, config, manifest, provider, config_builder) self._build_test_namespace() # We need to rebuild this because it's already been built by # the ProviderContext with the wrong namespace. @@ -1974,10 +1974,10 @@ def generate_test_context( model: ManifestNode, config: RuntimeConfig, manifest: Manifest, - context_config: ContextConfig, + config_builder: ConfigBuilder, macro_resolver: MacroResolver, ) -> Dict[str, Any]: - ctx = TestContext(model, config, manifest, ParseProvider(), context_config, macro_resolver) + ctx = TestContext(model, config, manifest, ParseProvider(), config_builder, macro_resolver) # The 'to_dict' method in ManifestContext moves all of the macro names # in the macro 'namespace' up to top level keys return ctx.to_dict() diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index dd24b3ccda4..fe9e1e6bf8f 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -8,7 +8,7 @@ from dbt.artifacts.resources import Contract from dbt.clients.jinja import MacroGenerator, get_rendered from dbt.config import RuntimeConfig -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.context.providers import ( generate_generate_name_macro_context, generate_parser_model_context, @@ -209,7 +209,7 @@ def _create_parsetime_node( self, block: ConfiguredBlockType, path: str, - config: ContextConfig, + config_builder: ConfigBuilder, fqn: List[str], name=None, **kwargs, @@ -239,7 +239,7 @@ def _create_parsetime_node( "raw_code": block.contents, "language": language, "unique_id": self.generate_unique_id(name), - "config": self.config_dict(config), + "config": self.config_dict(config_builder), "checksum": block.file.checksum.to_dict(omit_none=True), } dct.update(kwargs) @@ -257,14 +257,18 @@ def _create_parsetime_node( ) raise DictParseError(exc, node=node) - def _context_for(self, parsed_node: FinalNode, config: ContextConfig) -> Dict[str, Any]: - return generate_parser_model_context(parsed_node, self.root_project, self.manifest, config) + def _context_for( + self, parsed_node: FinalNode, config_builder: ConfigBuilder + ) -> Dict[str, Any]: + return generate_parser_model_context( + parsed_node, self.root_project, self.manifest, config_builder + ) - def render_with_context(self, parsed_node: FinalNode, config: ContextConfig): - # Given the parsed node and a ContextConfig to use during parsing, + def render_with_context(self, parsed_node: FinalNode, config_builder: ConfigBuilder): + # Given the parsed node and a ConfigBuilder to use during parsing, # render the node's sql with macro capture enabled. # Note: this mutates the config object when config calls are rendered. - context = self._context_for(parsed_node, config) + context = self._context_for(parsed_node, config_builder) # this goes through the process of rendering, but just throws away # the rendered result. The "macro capture" is the point? @@ -308,17 +312,17 @@ def update_parsed_node_relation_names( def update_parsed_node_config( self, parsed_node: FinalNode, - context_config: ContextConfig, + config_builder: ConfigBuilder, context=None, patch_config_dict=None, patch_file_id=None, ) -> None: - """Given the ContextConfig used for parsing and the parsed node, + """Given the ConfigBuilder used for parsing and the parsed node, generate and set the true values to use, overriding the temporary parse values set in _build_intermediate_parsed_node. """ - # build_config_dict takes the config_call_dict in the ContextConfig object + # build_config_dict takes the config_call_dict in the ConfigBuilder object # and calls calculate_node_config to combine dbt_project configs and # config calls from SQL files, plus patch configs (from schema files) # This normalize the config for a model node due #8520; should be improved latter @@ -330,7 +334,7 @@ def update_parsed_node_config( ): if "materialized" not in patch_config_dict: patch_config_dict["materialized"] = "table" - config_dict = context_config.build_config_dict(patch_config_dict=patch_config_dict) + config_dict = config_builder.build_config_dict(patch_config_dict=patch_config_dict) # Set tags on node provided in config blocks. Tags are additive, so even if # config has been built before, we don't have to reset tags in the parsed_node. @@ -396,12 +400,12 @@ def update_parsed_node_config( # unrendered_config is used to compare the original database/schema/alias # values and to handle 'same_config' and 'same_contents' calls - parsed_node.unrendered_config = context_config.build_config_dict( + parsed_node.unrendered_config = config_builder.build_config_dict( rendered=False, patch_config_dict=patch_config_dict ) - parsed_node.config_call_dict = context_config._config_call_dict - parsed_node.unrendered_config_call_dict = context_config._unrendered_config_call_dict + parsed_node.config_call_dict = config_builder._config_call_dict + parsed_node.unrendered_config_call_dict = config_builder._unrendered_config_call_dict # do this once before we parse the node database/schema/alias, so # parsed_node.config is what it would be if they did nothing @@ -421,36 +425,30 @@ def update_parsed_node_config( if not hooks: return if not context: - context = self._context_for(parsed_node, context_config) + context = self._context_for(parsed_node, config_builder) for hook in hooks: get_rendered(hook.sql, context, parsed_node, capture_macros=True) - def initial_config(self, fqn: List[str]) -> ContextConfig: - config_version = min([self.project.config_version, self.root_project.config_version]) - if config_version == 2: - return ContextConfig( - self.root_project, - fqn, - self.resource_type, - self.project.project_name, - ) - else: - raise DbtInternalError( - f"Got an unexpected project version={config_version}, expected 2" - ) + def initial_config_builder(self, fqn: List[str]) -> ConfigBuilder: + return ConfigBuilder( + self.root_project, + fqn, + self.resource_type, + self.project.project_name, + ) def config_dict( self, - context_config: ContextConfig, + config_builder: ConfigBuilder, ) -> Dict[str, Any]: - config_dict = context_config.build_config_dict() + config_dict = config_builder.build_config_dict() self._mangle_hooks(config_dict) return config_dict - def render_update(self, node: FinalNode, config: ContextConfig) -> None: + def render_update(self, node: FinalNode, config_builder: ConfigBuilder) -> None: try: - context = self.render_with_context(node, config) - self.update_parsed_node_config(node, config, context=context) + context = self.render_with_context(node, config_builder) + self.update_parsed_node_config(node, config_builder, context=context) except ValidationError as exc: # we got a ValidationError - probably bad types in config() raise ConfigUpdateError(exc, node=node) from exc @@ -465,15 +463,15 @@ def parse_node(self, block: ConfiguredBlockType) -> FinalNode: compiled_path: str = self.get_compiled_path(block) fqn = self.get_fqn(compiled_path, block.name) - config: ContextConfig = self.initial_config(fqn) + config_builder: ConfigBuilder = self.initial_config_builder(fqn) node = self._create_parsetime_node( block=block, path=compiled_path, - config=config, + config_builder=config_builder, fqn=fqn, ) - self.render_update(node, config) + self.render_update(node, config_builder) self.add_result_node(block, node) return node diff --git a/core/dbt/parser/hooks.py b/core/dbt/parser/hooks.py index bcc25c0d937..f7836b7e57e 100644 --- a/core/dbt/parser/hooks.py +++ b/core/dbt/parser/hooks.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Iterable, Iterator, List, Tuple, Union -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.contracts.files import FilePath from dbt.contracts.graph.nodes import HookNode from dbt.node_types import NodeType, RunHookType @@ -92,7 +92,7 @@ def _create_parsetime_node( self, block: HookBlock, path: str, - config: ContextConfig, + config_builder: ConfigBuilder, fqn: List[str], name=None, **kwargs, @@ -101,7 +101,7 @@ def _create_parsetime_node( return super()._create_parsetime_node( block=block, path=path, - config=config, + config_builder=config_builder, fqn=fqn, index=block.index, name=name, diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py index 06e11a89649..5084ee03842 100644 --- a/core/dbt/parser/models.py +++ b/core/dbt/parser/models.py @@ -10,7 +10,7 @@ from dbt import utils from dbt.artifacts.resources import RefArgs from dbt.clients.jinja import get_rendered -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.contracts.graph.nodes import ModelNode from dbt.exceptions import ( ModelConfigError, @@ -233,15 +233,15 @@ def parse_python_model(self, node, config, context): config_keys_defaults=config_keys_defaults, ) - def render_update(self, node: ModelNode, config: ContextConfig) -> None: + def render_update(self, node: ModelNode, config_builder: ConfigBuilder) -> None: self.manifest._parsing_info.static_analysis_path_count += 1 flags = get_flags() if node.language == ModelLanguage.python: try: verify_python_model_code(node) - context = self._context_for(node, config) - self.parse_python_model(node, config, context) - self.update_parsed_node_config(node, config, context=context) + context = self._context_for(node, config_builder) + self.parse_python_model(node, config_builder, context) + self.update_parsed_node_config(node, config_builder, context=context) except ValidationError as exc: # we got a ValidationError - probably bad types in config() @@ -250,7 +250,7 @@ def render_update(self, node: ModelNode, config: ContextConfig) -> None: elif not flags.STATIC_PARSER: # jinja rendering - super().render_update(node, config) + super().render_update(node, config_builder) return # only sample for experimental parser correctness on normal runs, @@ -277,9 +277,9 @@ def render_update(self, node: ModelNode, config: ContextConfig) -> None: statically_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None experimental_sample: Optional[Union[str, Dict[str, List[Any]]]] = None exp_sample_node: Optional[ModelNode] = None - exp_sample_config: Optional[ContextConfig] = None + exp_sample_config_builder: Optional[ConfigBuilder] = None jinja_sample_node: Optional[ModelNode] = None - jinja_sample_config: Optional[ContextConfig] = None + jinja_sample_config_builder: Optional[ConfigBuilder] = None result: List[str] = [] # sample the experimental parser only during a normal run @@ -295,8 +295,10 @@ def render_update(self, node: ModelNode, config: ContextConfig) -> None: if isinstance(experimental_sample, dict): model_parser_copy = self.partial_deepcopy() exp_sample_node = deepcopy(node) - exp_sample_config = deepcopy(config) - model_parser_copy.populate(exp_sample_node, exp_sample_config, experimental_sample) + exp_sample_config_builder = deepcopy(config_builder) + model_parser_copy.populate( + exp_sample_node, exp_sample_config_builder, experimental_sample + ) # use the experimental parser exclusively if the flag is on if flags.USE_EXPERIMENTAL_PARSER: statically_parsed = self.run_experimental_parser(node) @@ -317,36 +319,36 @@ def render_update(self, node: ModelNode, config: ContextConfig) -> None: # but we can't really guarantee that going forward. model_parser_copy = self.partial_deepcopy() jinja_sample_node = deepcopy(node) - jinja_sample_config = deepcopy(config) + jinja_sample_config_builder = deepcopy(config_builder) # rendering mutates the node and the config super(ModelParser, model_parser_copy).render_update( - jinja_sample_node, jinja_sample_config + jinja_sample_node, jinja_sample_config_builder ) # update the unrendered config with values from the static parser. # values from yaml files are in there already - self.populate(node, config, statically_parsed) + self.populate(node, config_builder, statically_parsed) # if we took a jinja sample, compare now that the base node has been populated - if jinja_sample_node is not None and jinja_sample_config is not None: + if jinja_sample_node is not None and jinja_sample_config_builder is not None: result = _get_stable_sample_result( - jinja_sample_node, jinja_sample_config, node, config + jinja_sample_node, jinja_sample_config_builder, node, config_builder ) # if we took an experimental sample, compare now that the base node has been populated - if exp_sample_node is not None and exp_sample_config is not None: + if exp_sample_node is not None and exp_sample_config_builder is not None: result = _get_exp_sample_result( exp_sample_node, - exp_sample_config, + exp_sample_config_builder, node, - config, + config_builder, ) self.manifest._parsing_info.static_analysis_parsed_path_count += 1 # if the static parser didn't succeed, fall back to jinja else: # jinja rendering - super().render_update(node, config) + super().render_update(node, config_builder) # if sampling, add the correct messages for tracking if exp_sample and isinstance(experimental_sample, str): @@ -432,13 +434,15 @@ def _has_banned_macro(self, node: ModelNode) -> bool: # this method updates the model node rendered and unrendered config as well # as the node object. Used to populate these values when circumventing jinja # rendering like the static parser. - def populate(self, node: ModelNode, config: ContextConfig, statically_parsed: Dict[str, Any]): + def populate( + self, node: ModelNode, config_builder: ConfigBuilder, statically_parsed: Dict[str, Any] + ): # manually fit configs in - config._config_call_dict = _get_config_call_dict(statically_parsed) + config_builder._config_call_dict = _get_config_call_dict(statically_parsed) # if there are hooks present this, it WILL render jinja. Will need to change # when the experimental parser supports hooks - self.update_parsed_node_config(node, config) + self.update_parsed_node_config(node, config_builder) # update the unrendered config with values from the file. # values from yaml files are in there already @@ -488,11 +492,13 @@ def _shift_sources(static_parser_result: Dict[str, List[Any]]) -> Dict[str, List # returns a list of string codes to be sent as a tracking event def _get_exp_sample_result( sample_node: ModelNode, - sample_config: ContextConfig, + sample_config_builder: ConfigBuilder, node: ModelNode, - config: ContextConfig, + config_builder: ConfigBuilder, ) -> List[str]: - result: List[Tuple[int, str]] = _get_sample_result(sample_node, sample_config, node, config) + result: List[Tuple[int, str]] = _get_sample_result( + sample_node, sample_config_builder, node, config_builder + ) def process(codemsg): code, msg = codemsg @@ -504,11 +510,13 @@ def process(codemsg): # returns a list of string codes to be sent as a tracking event def _get_stable_sample_result( sample_node: ModelNode, - sample_config: ContextConfig, + sample_config_builder: ConfigBuilder, node: ModelNode, - config: ContextConfig, + config_builder: ConfigBuilder, ) -> List[str]: - result: List[Tuple[int, str]] = _get_sample_result(sample_node, sample_config, node, config) + result: List[Tuple[int, str]] = _get_sample_result( + sample_node, sample_config_builder, node, config_builder + ) def process(codemsg): code, msg = codemsg @@ -521,20 +529,20 @@ def process(codemsg): # before being sent as a tracking event def _get_sample_result( sample_node: ModelNode, - sample_config: ContextConfig, + sample_config_builder: ConfigBuilder, node: ModelNode, - config: ContextConfig, + config_builder: ConfigBuilder, ) -> List[Tuple[int, str]]: result: List[Tuple[int, str]] = [] # look for false positive configs - for k in sample_config._config_call_dict.keys(): - if k not in config._config_call_dict.keys(): + for k in sample_config_builder._config_call_dict.keys(): + if k not in config_builder._config_call_dict.keys(): result += [(2, "false_positive_config_value")] break # look for missed configs - for k in config._config_call_dict.keys(): - if k not in sample_config._config_call_dict.keys(): + for k in config_builder._config_call_dict.keys(): + if k not in sample_config_builder._config_call_dict.keys(): result += [(3, "missed_config_value")] break diff --git a/core/dbt/parser/schema_generic_tests.py b/core/dbt/parser/schema_generic_tests.py index 58be6dc94be..bdd7228a907 100644 --- a/core/dbt/parser/schema_generic_tests.py +++ b/core/dbt/parser/schema_generic_tests.py @@ -7,7 +7,7 @@ from dbt.artifacts.resources import NodeVersion, RefArgs from dbt.clients.jinja import add_rendered_test_kwargs, get_rendered from dbt.context.configured import SchemaYamlVars, generate_schema_yml_context -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.context.macro_resolver import MacroResolver from dbt.context.providers import generate_test_context from dbt.contracts.files import FileHash @@ -88,7 +88,7 @@ def create_test_node( self, target: Union[UnpatchedSourceDefinition, UnparsedNodeUpdate], path: str, - config: ContextConfig, + config_builder: ConfigBuilder, tags: List[str], fqn: List[str], name: str, @@ -130,7 +130,7 @@ def get_hashable_md(data: Union[str, int, float, List, Dict]) -> Union[str, List "raw_code": raw_code, "language": "sql", "unique_id": self.generate_unique_id(name, test_hash), - "config": self.config_dict(config), + "config": self.config_dict(config_builder), "test_metadata": test_metadata, "column_name": column_name, "checksum": FileHash.empty().to_dict(omit_none=True), @@ -200,11 +200,11 @@ def parse_generic_test( relative_path = str(path.relative_to(*path.parts[:1])) fqn = self.get_fqn(relative_path, builder.fqn_name) - # this is the ContextConfig that is used in render_update - config: ContextConfig = self.initial_config(fqn) - # Adding the builder's config to the ContextConfig + # this is the ConfigBuilder that is used in render_update + config_builder: ConfigBuilder = self.initial_config_builder(fqn) + # Adding the builder's config to the ConfigBuilder # is needed to ensure the config makes it to the pre_model hook which dbt-snowflake needs - config.add_config_call(builder.config) + config_builder.add_config_call(builder.config) # builder.args contains keyword args for the test macro, # not configs which have been separated out in the builder. # The keyword args are not completely rendered until compilation. @@ -223,7 +223,7 @@ def parse_generic_test( node = self.create_test_node( target=target, path=compiled_path, - config=config, + config_builder=config_builder, fqn=fqn, tags=tags, name=builder.fqn_name, @@ -233,7 +233,7 @@ def parse_generic_test( file_key_name=file_key_name, description=builder.description, ) - self.render_test_update(node, config, builder, schema_file_id) + self.render_test_update(node, config_builder, builder, schema_file_id) return node @@ -278,7 +278,7 @@ def store_env_vars(self, target, schema_file_id, env_vars): # In the future we will look at generalizing this # more to handle additional macros or to use static # parsing to avoid jinja overhead. - def render_test_update(self, node, config, builder, schema_file_id): + def render_test_update(self, node, config_builder, builder, schema_file_id): macro_unique_id = self.macro_resolver.get_macro_id( node.package_name, "test_" + builder.name ) @@ -287,9 +287,9 @@ def render_test_update(self, node, config, builder, schema_file_id): node.depends_on.add_macro(macro_unique_id) if macro_unique_id in ["macro.dbt.test_not_null", "macro.dbt.test_unique"]: config_call_dict = builder.get_static_config() - config._config_call_dict = config_call_dict + config_builder._config_call_dict = config_call_dict # This sets the config from dbt_project - self.update_parsed_node_config(node, config) + self.update_parsed_node_config(node, config_builder) # source node tests are processed at patch_source time if isinstance(builder.target, UnpatchedSourceDefinition): sources = [builder.target.fqn[-2], builder.target.fqn[-1]] @@ -303,7 +303,7 @@ def render_test_update(self, node, config, builder, schema_file_id): node, self.root_project, self.manifest, - config, + config_builder, self.macro_resolver, ) # update with rendered test kwargs (which collects any refs) @@ -312,7 +312,7 @@ def render_test_update(self, node, config, builder, schema_file_id): add_rendered_test_kwargs(context, node, capture_macros=True) # the parsed node is not rendered in the native context. get_rendered(node.raw_code, context, node, capture_macros=True) - self.update_parsed_node_config(node, config) + self.update_parsed_node_config(node, config_builder) # env_vars should have been updated in the context env_var method except ValidationError as exc: # we got a ValidationError - probably bad types in config() @@ -351,14 +351,14 @@ def add_test_node(self, block: GenericTestBlock, node: GenericTestNode): def render_with_context( self, node: GenericTestNode, - config: ContextConfig, + config_builder: ConfigBuilder, ) -> None: - """Given the parsed node and a ContextConfig to use during + """Given the parsed node and a ConfigBuilder to use during parsing, collect all the refs that might be squirreled away in the test arguments. This includes the implicit "model" argument. """ # make a base context that doesn't have the magic kwargs field - context = self._context_for(node, config) + context = self._context_for(node, config_builder) # update it with the rendered test kwargs (which collects any refs) add_rendered_test_kwargs(context, node, capture_macros=True) diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 3629a95e6bc..19141b9499f 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -23,8 +23,8 @@ ) from dbt.clients.jinja import get_rendered from dbt.context.context_config import ( - BaseContextConfigGenerator, - ContextConfigGenerator, + BaseConfigGenerator, + RenderedConfigGenerator, UnrenderedConfigGenerator, ) from dbt.context.providers import ( @@ -94,6 +94,7 @@ def parse_exposure(self, unparsed: UnparsedExposure) -> None: fqn = self.schema_parser.get_fqn_prefix(path) fqn.append(unparsed.name) + # Also validates config = self._generate_exposure_config( target=unparsed, fqn=fqn, @@ -101,9 +102,6 @@ def parse_exposure(self, unparsed: UnparsedExposure) -> None: rendered=True, ) - # Already validated - # config = config.finalize_and_validate() - unrendered_config = self._generate_exposure_config( target=unparsed, fqn=fqn, @@ -154,9 +152,9 @@ def parse_exposure(self, unparsed: UnparsedExposure) -> None: def _generate_exposure_config( self, target: UnparsedExposure, fqn: List[str], package_name: str, rendered: bool ): - generator: BaseContextConfigGenerator + generator: BaseConfigGenerator if rendered: - generator = ContextConfigGenerator(self.root_project) + generator = RenderedConfigGenerator(self.root_project) else: generator = UnrenderedConfigGenerator(self.root_project) @@ -397,7 +395,7 @@ def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = fqn = self.schema_parser.get_fqn_prefix(path) fqn.append(unparsed.name) - # Following will validate also + # Also validates config = self._generate_metric_config( target=unparsed, fqn=fqn, @@ -405,9 +403,6 @@ def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = rendered=True, ) - # Already validated - # config = config.finalize_and_validate() - unrendered_config = self._generate_metric_config( target=unparsed, fqn=fqn, @@ -458,9 +453,9 @@ def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = def _generate_metric_config( self, target: UnparsedMetric, fqn: List[str], package_name: str, rendered: bool ): - generator: BaseContextConfigGenerator + generator: BaseConfigGenerator if rendered: - generator = ContextConfigGenerator(self.root_project) + generator = RenderedConfigGenerator(self.root_project) else: generator = UnrenderedConfigGenerator(self.root_project) @@ -626,9 +621,9 @@ def _create_metric( def _generate_semantic_model_config( self, target: UnparsedSemanticModel, fqn: List[str], package_name: str, rendered: bool ): - generator: BaseContextConfigGenerator + generator: BaseConfigGenerator if rendered: - generator = ContextConfigGenerator(self.root_project) + generator = RenderedConfigGenerator(self.root_project) else: generator = UnrenderedConfigGenerator(self.root_project) @@ -655,6 +650,7 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel) -> None: fqn = self.schema_parser.get_fqn_prefix(path) fqn.append(unparsed.name) + # Also validates config = self._generate_semantic_model_config( target=unparsed, fqn=fqn, @@ -662,9 +658,6 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel) -> None: rendered=True, ) - # Already validated - # config = config.finalize_and_validate() - unrendered_config = self._generate_semantic_model_config( target=unparsed, fqn=fqn, @@ -741,9 +734,9 @@ def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock) -> None: def _generate_saved_query_config( self, target: UnparsedSavedQuery, fqn: List[str], package_name: str, rendered: bool ): - generator: BaseContextConfigGenerator + generator: BaseConfigGenerator if rendered: - generator = ContextConfigGenerator(self.root_project) + generator = RenderedConfigGenerator(self.root_project) else: generator = UnrenderedConfigGenerator(self.root_project) @@ -800,6 +793,7 @@ def parse_saved_query(self, unparsed: UnparsedSavedQuery) -> None: fqn = self.schema_parser.get_fqn_prefix(path) fqn.append(unparsed.name) + # Also validates config = self._generate_saved_query_config( target=unparsed, fqn=fqn, @@ -807,9 +801,6 @@ def parse_saved_query(self, unparsed: UnparsedSavedQuery) -> None: rendered=True, ) - # Already validated - # config = config.finalize_and_validate() - unrendered_config = self._generate_saved_query_config( target=unparsed, fqn=fqn, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index e0c94b8b444..0fd5126a3c8 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -10,7 +10,7 @@ from dbt.clients.yaml_helper import load_yaml_text from dbt.config import RuntimeConfig from dbt.context.configured import SchemaYamlVars, generate_schema_yml_context -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.contracts.files import SchemaSourceFile, SourceFile from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.nodes import ( @@ -292,7 +292,7 @@ def _add_yaml_snapshot_nodes_to_manifest( snapshot_node = parser._create_parsetime_node( block, self.get_compiled_path(block), - parser.initial_config(fqn), + parser.initial_config_builder(fqn), fqn, snapshot["name"], ) @@ -687,20 +687,20 @@ def patch_node_config(self, node, patch) -> None: unique_id=node.unique_id, field_value=patch.config["access"], ) - # Get the ContextConfig that's used in calculating the config + # Get the ConfigBuilder that's used in calculating the config # This must match the model resource_type that's being patched - config = ContextConfig( + config_builder = ConfigBuilder( self.schema_parser.root_project, node.fqn, node.resource_type, self.schema_parser.project.project_name, ) # We need to re-apply the config_call_dict after the patch config - config._config_call_dict = node.config_call_dict - config._unrendered_config_call_dict = node.unrendered_config_call_dict + config_builder._config_call_dict = node.config_call_dict + config_builder._unrendered_config_call_dict = node.unrendered_config_call_dict self.schema_parser.update_parsed_node_config( node, - config, + config_builder, patch_config_dict=patch.config, patch_file_id=patch.file_id, ) diff --git a/core/dbt/parser/seeds.py b/core/dbt/parser/seeds.py index 23c77e1ed7c..cebaaa88f4c 100644 --- a/core/dbt/parser/seeds.py +++ b/core/dbt/parser/seeds.py @@ -1,4 +1,4 @@ -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.contracts.graph.nodes import SeedNode from dbt.node_types import NodeType from dbt.parser.base import SimpleSQLParser @@ -24,5 +24,5 @@ def resource_type(self) -> NodeType: def get_compiled_path(cls, block: FileBlock): return block.path.relative_path - def render_with_context(self, parsed_node: SeedNode, config: ContextConfig) -> None: + def render_with_context(self, parsed_node: SeedNode, config_builder: ConfigBuilder) -> None: """Seeds don't need to do any rendering.""" diff --git a/core/dbt/parser/sources.py b/core/dbt/parser/sources.py index cf9618eb058..0545ce9ee80 100644 --- a/core/dbt/parser/sources.py +++ b/core/dbt/parser/sources.py @@ -8,8 +8,8 @@ from dbt.artifacts.resources import FreshnessThreshold, SourceConfig, Time from dbt.config import RuntimeConfig from dbt.context.context_config import ( - BaseContextConfigGenerator, - ContextConfigGenerator, + BaseConfigGenerator, + RenderedConfigGenerator, UnrenderedConfigGenerator, ) from dbt.contracts.graph.manifest import Manifest, SourceKey @@ -152,9 +152,6 @@ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition: rendered=True, ) - # Already validated - # config = config.finalize_and_validate() - unrendered_config = self._generate_source_config( target=target, rendered=False, @@ -289,9 +286,9 @@ def parse_source_test( return node def _generate_source_config(self, target: UnpatchedSourceDefinition, rendered: bool): - generator: BaseContextConfigGenerator + generator: BaseConfigGenerator if rendered: - generator = ContextConfigGenerator(self.root_project) + generator = RenderedConfigGenerator(self.root_project) else: generator = UnrenderedConfigGenerator(self.root_project) diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index 38a9c81fb3d..5e016d4758c 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -9,7 +9,7 @@ from dbt import utils from dbt.artifacts.resources import ModelConfig, UnitTestConfig, UnitTestFormat from dbt.config import RuntimeConfig -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.context.providers import generate_parse_exposure, get_rendered from dbt.contracts.files import FileHash, SchemaSourceFile from dbt.contracts.graph.manifest import Manifest @@ -314,13 +314,13 @@ def _get_unit_test(self, data: Dict[str, Any]) -> UnparsedUnitTest: def _build_unit_test_config( self, unit_test_fqn: List[str], config_dict: Dict[str, Any] ) -> UnitTestConfig: - config = ContextConfig( + config_builder = ConfigBuilder( self.schema_parser.root_project, unit_test_fqn, NodeType.Unit, self.schema_parser.project.project_name, ) - unit_test_config_dict = config.build_config_dict(patch_config_dict=config_dict) + unit_test_config_dict = config_builder.build_config_dict(patch_config_dict=config_dict) unit_test_config_dict = self.render_entry(unit_test_config_dict) return UnitTestConfig.from_dict(unit_test_config_dict) diff --git a/tests/unit/context/test_context.py b/tests/unit/context/test_context.py index 10e591093ee..2580f7b692d 100644 --- a/tests/unit/context/test_context.py +++ b/tests/unit/context/test_context.py @@ -473,7 +473,7 @@ def test_model_parse_context(config_postgres, manifest_fx, get_adapter, get_incl model=mock_model(), config=config_postgres, manifest=manifest_fx, - context_config=mock.MagicMock(), + config_builder=mock.MagicMock(), ) assert_has_keys(REQUIRED_MODEL_KEYS, MAYBE_KEYS, ctx) diff --git a/tests/unit/parser/test_parser.py b/tests/unit/parser/test_parser.py index 8894e47ce84..88b8f9414fe 100644 --- a/tests/unit/parser/test_parser.py +++ b/tests/unit/parser/test_parser.py @@ -8,7 +8,7 @@ from dbt import tracking from dbt.artifacts.resources import ModelConfig, RefArgs -from dbt.context.context_config import ContextConfig +from dbt.context.context_config import ConfigBuilder from dbt.contracts.files import FileHash, FilePath, SchemaSourceFile, SourceFile from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.model_config import NodeConfig, SnapshotConfig, TestConfig @@ -1285,7 +1285,7 @@ def setUp(self): checksum=None, unrendered_config={"materialized": "table"}, ) - self.example_config = ContextConfig( + self.example_config_builder = ConfigBuilder( self.root_project_config, self.example_node.fqn, self.example_node.resource_type, @@ -1315,90 +1315,92 @@ def test_source_shifting(self): def test_sample_results(self): # --- missed ref --- # node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) + sample_config_builder = deepcopy(self.example_config_builder) sample_node.refs = [] node.refs = ["myref"] - result = _get_sample_result(sample_node, sample_config, node, config) + result = _get_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual([(7, "missed_ref_value")], result) # --- false positive ref --- # node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) + sample_config_builder = deepcopy(self.example_config_builder) sample_node.refs = ["myref"] node.refs = [] - result = _get_sample_result(sample_node, sample_config, node, config) + result = _get_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual([(6, "false_positive_ref_value")], result) # --- missed source --- # node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) + sample_config_builder = deepcopy(self.example_config_builder) sample_node.sources = [] node.sources = [["abc", "def"]] - result = _get_sample_result(sample_node, sample_config, node, config) + result = _get_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual([(5, "missed_source_value")], result) # --- false positive source --- # node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) + sample_config_builder = deepcopy(self.example_config_builder) sample_node.sources = [["abc", "def"]] node.sources = [] - result = _get_sample_result(sample_node, sample_config, node, config) + result = _get_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual([(4, "false_positive_source_value")], result) # --- missed config --- # node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) + sample_config_builder = deepcopy(self.example_config_builder) - sample_config._config_call_dict = {} - config._config_call_dict = {"key": "value"} + sample_config_builder._config_call_dict = {} + config_builder._config_call_dict = {"key": "value"} - result = _get_sample_result(sample_node, sample_config, node, config) + result = _get_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual([(3, "missed_config_value")], result) # --- false positive config --- # node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) + sample_config_builder = deepcopy(self.example_config_builder) - sample_config._config_call_dict = {"key": "value"} - config._config_call_dict = {} + sample_config_builder._config_call_dict = {"key": "value"} + config_builder._config_call_dict = {} - result = _get_sample_result(sample_node, sample_config, node, config) + result = _get_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual([(2, "false_positive_config_value")], result) def test_exp_sample_results(self): node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) - result = _get_exp_sample_result(sample_node, sample_config, node, config) + sample_config_builder = deepcopy(self.example_config_builder) + result = _get_exp_sample_result(sample_node, sample_config_builder, node, config_builder) self.assertEqual(["00_experimental_exact_match"], result) def test_stable_sample_results(self): node = deepcopy(self.example_node) - config = deepcopy(self.example_config) + config_builder = deepcopy(self.example_config_builder) sample_node = deepcopy(self.example_node) - sample_config = deepcopy(self.example_config) - result = _get_stable_sample_result(sample_node, sample_config, node, config) + sample_config_builder = deepcopy(self.example_config_builder) + result = _get_stable_sample_result( + sample_node, sample_config_builder, node, config_builder + ) self.assertEqual(["80_stable_exact_match"], result) From c0fd389341d84ce7ee45f07ba4fa08c034b32900 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 17:10:34 -0500 Subject: [PATCH 31/42] rename model_configs resource_configs --- core/dbt/context/context_config.py | 57 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 0c19bf21580..bb483f83cb8 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -55,8 +55,8 @@ def get_node_project_config(self, project_name: str): def _project_configs( self, project: Project, fqn: List[str], resource_type: NodeType ) -> Iterator[Dict[str, Any]]: - model_configs = self.get_model_configs(project, resource_type) - for level_config in fqn_search(model_configs, fqn): + resource_configs = self.get_resource_configs(project, resource_type) + for level_config in fqn_search(resource_configs, fqn): result = {} for key, value in level_config.items(): if key.startswith("+"): @@ -72,7 +72,7 @@ def _active_project_configs( return self._project_configs(self._active_project, fqn, resource_type) @abstractmethod - def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: ... + def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: ... @abstractmethod def merge_config_dicts( @@ -107,28 +107,28 @@ class RenderedConfigGenerator(BaseConfigGenerator[C]): def __init__(self, active_project: RuntimeConfig): self._active_project = active_project - def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: + def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: if resource_type == NodeType.Seed: - model_configs = project.seeds + resource_configs = project.seeds elif resource_type == NodeType.Snapshot: - model_configs = project.snapshots + resource_configs = project.snapshots elif resource_type == NodeType.Source: - model_configs = project.sources + resource_configs = project.sources elif resource_type == NodeType.Test: - model_configs = project.data_tests + resource_configs = project.data_tests elif resource_type == NodeType.Metric: - model_configs = project.metrics + resource_configs = project.metrics elif resource_type == NodeType.SemanticModel: - model_configs = project.semantic_models + resource_configs = project.semantic_models elif resource_type == NodeType.SavedQuery: - model_configs = project.saved_queries + resource_configs = project.saved_queries elif resource_type == NodeType.Exposure: - model_configs = project.exposures + resource_configs = project.exposures elif resource_type == NodeType.Unit: - model_configs = project.unit_tests + resource_configs = project.unit_tests else: - model_configs = project.models - return model_configs + resource_configs = project.models + return resource_configs def merge_config_dicts( self, @@ -229,32 +229,33 @@ def generate_node_config( class UnrenderedConfigGenerator(BaseConfigGenerator[Dict[str, Any]]): - def get_model_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: + def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: + """ Get configs for this resource_type from the project's unrendered config""" unrendered = project.unrendered.project_dict if resource_type == NodeType.Seed: - model_configs = unrendered.get("seeds") + resource_configs = unrendered.get("seeds") elif resource_type == NodeType.Snapshot: - model_configs = unrendered.get("snapshots") + resource_configs = unrendered.get("snapshots") elif resource_type == NodeType.Source: - model_configs = unrendered.get("sources") + resource_configs = unrendered.get("sources") elif resource_type == NodeType.Test: - model_configs = unrendered.get("data_tests") + resource_configs = unrendered.get("data_tests") elif resource_type == NodeType.Metric: - model_configs = unrendered.get("metrics") + resource_configs = unrendered.get("metrics") elif resource_type == NodeType.SemanticModel: - model_configs = unrendered.get("semantic_models") + resource_configs = unrendered.get("semantic_models") elif resource_type == NodeType.SavedQuery: - model_configs = unrendered.get("saved_queries") + resource_configs = unrendered.get("saved_queries") elif resource_type == NodeType.Exposure: - model_configs = unrendered.get("exposures") + resource_configs = unrendered.get("exposures") elif resource_type == NodeType.Unit: - model_configs = unrendered.get("unit_tests") + resource_configs = unrendered.get("unit_tests") else: - model_configs = unrendered.get("models") - if model_configs is None: + resource_configs = unrendered.get("models") + if resource_configs is None: return {} else: - return model_configs + return resource_configs def merge_config_dicts( self, From 049418e436cbda39f7d7217caf709b2ee826d565 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 17:33:11 -0500 Subject: [PATCH 32/42] More cleanup --- core/dbt/context/context_config.py | 135 ++++++++++------------------- 1 file changed, 44 insertions(+), 91 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index bb483f83cb8..9d851adf381 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -71,17 +71,46 @@ def _active_project_configs( ) -> Iterator[Dict[str, Any]]: return self._project_configs(self._active_project, fqn, resource_type) - @abstractmethod - def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: ... - - @abstractmethod - def merge_config_dicts( + def combine_config_dicts( self, config_call_dict: Dict[str, Any], fqn: List[str], resource_type: NodeType, project_name: str, patch_config_dict: Optional[Dict[str, Any]] = None, + ) -> Dict[str, Any]: + """This method takes resource configs from the project, the model (if applicable), + and the patch, and combines them into one config dictionary.""" + + project_config = self.get_node_project_config(project_name) + config_cls = get_config_for(resource_type) + + # creates "default" config object. Unrendered config starts with + # empty dictionary, rendered config starts with to_dict() from empty config object. + config_dict = self.initial_result(config_cls) + + # Update with project configs + project_configs = self._project_configs(project_config, fqn, resource_type) + for fqn_config in project_configs: + config_dict = self._update_from_config(config_cls, config_dict, fqn_config) + + # Update with schema file configs (patches) + if patch_config_dict: + config_dict = self._update_from_config(config_cls, config_dict, patch_config_dict) + + # Update with config dictionary from sql files (config_call_dict) + config_dict = self._update_from_config(config_cls, config_dict, config_call_dict) + + # If this is not the root project, update with configs from root project + if project_config.project_name != self._active_project.project_name: + for fqn_config in self._active_project_configs(fqn, resource_type): + config_dict = self._update_from_config(config_cls, config_dict, fqn_config) + + return config_dict + + @abstractmethod + def get_resource_configs( + self, project: Project, resource_type: NodeType ) -> Dict[str, Any]: ... @abstractmethod @@ -104,6 +133,8 @@ def generate_node_config( class RenderedConfigGenerator(BaseConfigGenerator[C]): + """This class produces the config dictionary used to create the resource config.""" + def __init__(self, active_project: RuntimeConfig): self._active_project = active_project @@ -130,44 +161,6 @@ def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dic resource_configs = project.models return resource_configs - def merge_config_dicts( - self, - config_call_dict: Dict[str, Any], - fqn: List[str], - resource_type: NodeType, - project_name: str, - patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: - # Note: This method returns a dictionary object. This is a duplicate of - # of UnrenderedConfigGenerator.generate_node_config ... TODO: check to - # see if methods can be combined - - project_config = self.get_node_project_config(project_name) - config_cls = get_config_for(resource_type) - - # creates "default" config object ("cls.from_dict({})") - config_dict = self.initial_result(config_cls) - - project_configs = self._project_configs(project_config, fqn, resource_type) - for fqn_config in project_configs: - config_dict = self._update_from_config(config_cls, config_dict, fqn_config) - - # When schema files patch config, it has lower precedence than - # config in the models (config_call_dict), so we add the patch_config_dict - # before the config_call_dict - if patch_config_dict: - config_dict = self._update_from_config(config_cls, config_dict, patch_config_dict) - - # config_calls are created in the 'experimental' model parser and - # the ParseConfigObject (via add_config_call) - config_dict = self._update_from_config(config_cls, config_dict, config_call_dict) - - if project_config.project_name != self._active_project.project_name: - for fqn_config in self._active_project_configs(fqn, resource_type): - config_dict = self._update_from_config(config_cls, config_dict, fqn_config) - - return config_dict - def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: # Calculate the defaults. We don't want to validate the defaults, # because it might be invalid in the case of required config members @@ -198,7 +191,7 @@ def translate_hook_names(self, project_dict): project_dict["post-hook"] = project_dict.pop("post_hook") return project_dict - # RenderedConfigGenerator + # RenderedConfigGenerator. Validation is performed, and a config object is returned. def generate_node_config( self, config_call_dict: Dict[str, Any], @@ -210,7 +203,7 @@ def generate_node_config( config_cls = get_config_for(resource_type) # returns a config object - config_dict = self.merge_config_dicts( + config_dict = self.combine_config_dicts( config_call_dict=config_call_dict, fqn=fqn, resource_type=resource_type, @@ -229,8 +222,10 @@ def generate_node_config( class UnrenderedConfigGenerator(BaseConfigGenerator[Dict[str, Any]]): + """This class produces the unrendered_config dictionary in the resource.""" + def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dict[str, Any]: - """ Get configs for this resource_type from the project's unrendered config""" + """Get configs for this resource_type from the project's unrendered config""" unrendered = project.unrendered.project_dict if resource_type == NodeType.Seed: resource_configs = unrendered.get("seeds") @@ -257,46 +252,7 @@ def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dic else: return resource_configs - def merge_config_dicts( - self, - config_call_dict: Dict[str, Any], - fqn: List[str], - resource_type: NodeType, - project_name: str, - patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: - # Note: This method returns a Dict[str, Any]. This is a duplicate of - # of RenderedConfigGenerator.generate_node_config, but calls methods - # that deal with dictionaries instead of config object. - # Additions to one method, should probably also go in the other. - - project_config = self.get_node_project_config(project_name) - config_cls = get_config_for(resource_type) - - # creates "default" config object ({}) - config_dict = self.initial_result(config_cls) - - project_configs = self._project_configs(project_config, fqn, resource_type) - for fqn_config in project_configs: - config_dict = self._update_from_config(config_cls, config_dict, fqn_config) - - # When schema files patch config, it has lower precedence than - # config in the models (config_call_dict), so we add the patch_config_dict - # before the config_call_dict - if patch_config_dict: - config_dict = self._update_from_config(config_cls, config_dict, patch_config_dict) - - # config_calls are created in the 'experimental' model parser and - # the ParseConfigObject (via add_config_call) - config_dict = self._update_from_config(config_cls, config_dict, config_call_dict) - - if project_config.project_name != self._active_project.project_name: - for fqn_config in self._active_project_configs(fqn, resource_type): - config_dict = self._update_from_config(config_cls, config_dict, fqn_config) - - return config_dict - - # UnrenderedConfigGenerator + # UnrenderedConfigGenerator. No validation is performed and a dictionary is returned. def generate_node_config( self, config_call_dict: Dict[str, Any], @@ -305,16 +261,14 @@ def generate_node_config( project_name: str, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: - # Just call UnrenderedConfigGenerator.merge_config_dicts, which - # will return a config dictionary - result = self.merge_config_dicts( + + result = self.combine_config_dicts( config_call_dict=config_call_dict, fqn=fqn, resource_type=resource_type, project_name=project_name, patch_config_dict=patch_config_dict, ) - # Note: this returns a dictionary return result def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: @@ -356,7 +310,6 @@ def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None: def build_config_dict( self, - *, rendered: bool = True, patch_config_dict: Optional[dict] = None, ) -> Dict[str, Any]: From e6fc9b0c903a2d1a01bdc3aecf647dfca2d0a504 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 3 Dec 2024 17:58:05 -0500 Subject: [PATCH 33/42] Remove unnecessary mangle_hooks, other comments and cleanup --- core/dbt/parser/base.py | 41 ++++++++++++++--------------------- core/dbt/parser/unit_tests.py | 4 +++- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index fe9e1e6bf8f..71b54021ddb 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -3,7 +3,7 @@ import os from typing import Any, Dict, Generic, List, Optional, TypeVar -from dbt import hooks, utils +from dbt import utils from dbt.adapters.factory import get_adapter # noqa: F401 from dbt.artifacts.resources import Contract from dbt.clients.jinja import MacroGenerator, get_rendered @@ -178,15 +178,6 @@ def get_fqn(self, path: str, name: str) -> List[str]: fqn.append(name) return fqn - def _mangle_hooks(self, config): - """Given a config dict that may have `pre-hook`/`post-hook` keys, - convert it from the yucky maybe-a-string, maybe-a-dict to a dict. - """ - # Like most of parsing, this is a horrible hack :( - for key in hooks.ModelHookType: - if key in config: - config[key] = [hooks.get_hook_dict(h) for h in config[key]] - def _create_error_node( self, name: str, path: str, original_file_path: str, raw_code: str, language: str = "sql" ) -> UnparsedNode: @@ -278,14 +269,12 @@ def render_with_context(self, parsed_node: FinalNode, config_builder: ConfigBuil # This is taking the original config for the node, converting it to a dict, # updating the config with new config passed in, then re-creating the # config from the dict in the node. - def update_parsed_node_config_dict( + def clean_and_fix_config_dict( self, parsed_node: FinalNode, config_dict: Dict[str, Any] ) -> None: # Overwrite node config final_config_dict = parsed_node.config.to_dict(omit_none=True) final_config_dict.update({k.strip(): v for (k, v) in config_dict.items()}) - # re-mangle hooks, in case we got new ones - self._mangle_hooks(final_config_dict) parsed_node.config = parsed_node.config.from_dict(final_config_dict) def update_parsed_node_relation_names( @@ -318,23 +307,27 @@ def update_parsed_node_config( patch_file_id=None, ) -> None: """Given the ConfigBuilder used for parsing and the parsed node, - generate and set the true values to use, overriding the temporary parse - values set in _build_intermediate_parsed_node. + generate the final resource config and the unrendered_config """ - # build_config_dict takes the config_call_dict in the ConfigBuilder object - # and calls calculate_node_config to combine dbt_project configs and - # config calls from SQL files, plus patch configs (from schema files) - # This normalize the config for a model node due #8520; should be improved latter if not patch_config_dict: patch_config_dict = {} if ( parsed_node.resource_type == NodeType.Model and parsed_node.language == ModelLanguage.python ): + # This normalize the config for a python model node due #8520; should be improved latter if "materialized" not in patch_config_dict: patch_config_dict["materialized"] = "table" - config_dict = config_builder.build_config_dict(patch_config_dict=patch_config_dict) + + # build_config_dict takes the config_call_dict in the ConfigBuilder object + # and calls generate_node_config to combine dbt_project configs and + # config calls from SQL files, plus patch configs (from schema files). + # Validation is performed when building the rendered config_dict and + # hooks are converted into hook objects for later rendering. + config_dict = config_builder.build_config_dict( + rendered=True, patch_config_dict=patch_config_dict + ) # Set tags on node provided in config blocks. Tags are additive, so even if # config has been built before, we don't have to reset tags in the parsed_node. @@ -409,7 +402,7 @@ def update_parsed_node_config( # do this once before we parse the node database/schema/alias, so # parsed_node.config is what it would be if they did nothing - self.update_parsed_node_config_dict(parsed_node, config_dict) + self.clean_and_fix_config_dict(parsed_node, config_dict) # This updates the node database/schema/alias/relation_name self.update_parsed_node_relation_names(parsed_node, config_dict) @@ -417,8 +410,7 @@ def update_parsed_node_config( if parsed_node.resource_type == NodeType.Test: return - # at this point, we've collected our hooks. Use the node context to - # render each hook and collect refs/sources + # Use the node context to render each hook and collect refs/sources. assert hasattr(parsed_node.config, "pre_hook") and hasattr(parsed_node.config, "post_hook") hooks = list(itertools.chain(parsed_node.config.pre_hook, parsed_node.config.post_hook)) # skip context rebuilding if there aren't any hooks @@ -441,8 +433,7 @@ def config_dict( self, config_builder: ConfigBuilder, ) -> Dict[str, Any]: - config_dict = config_builder.build_config_dict() - self._mangle_hooks(config_dict) + config_dict = config_builder.build_config_dict(rendered=True) return config_dict def render_update(self, node: FinalNode, config_builder: ConfigBuilder) -> None: diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index 5e016d4758c..d17bc72271a 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -320,7 +320,9 @@ def _build_unit_test_config( NodeType.Unit, self.schema_parser.project.project_name, ) - unit_test_config_dict = config_builder.build_config_dict(patch_config_dict=config_dict) + unit_test_config_dict = config_builder.build_config_dict( + rendered=True, patch_config_dict=config_dict + ) unit_test_config_dict = self.render_entry(unit_test_config_dict) return UnitTestConfig.from_dict(unit_test_config_dict) From 31ce086116187eed3182cf70142cdc6e154d09a3 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 4 Dec 2024 11:35:56 -0500 Subject: [PATCH 34/42] More comments --- core/dbt/context/context_config.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 9d851adf381..46fd71dceb2 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -162,9 +162,7 @@ def get_resource_configs(self, project: Project, resource_type: NodeType) -> Dic return resource_configs def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: - # Calculate the defaults. We don't want to validate the defaults, - # because it might be invalid in the case of required config members - # (such as on snapshots!) + # Produce a dictionary with config defaults. result = config_cls.from_dict({}).to_dict() return result @@ -202,7 +200,6 @@ def generate_node_config( ) -> Dict[str, Any]: config_cls = get_config_for(resource_type) - # returns a config object config_dict = self.combine_config_dicts( config_call_dict=config_call_dict, fqn=fqn, @@ -272,6 +269,8 @@ def generate_node_config( return result def initial_result(self, config_cls: Type[BaseConfig]) -> Dict[str, Any]: + # We don't want the config defaults here, just the configs which have + # actually been set. return {} def _update_from_config( @@ -286,6 +285,11 @@ def _update_from_config( class ConfigBuilder: + """This object is included in various jinja contexts in order to collect the _config_call_dicts + and the _unrendered_config_call dicts from the config calls in sql files. + It is then used to run "build_config_dict" which calls the rendered or unrendered + config generators and returns a config dictionary.""" + def __init__( self, active_project: RuntimeConfig, From 923b8b6341a788478b90a53bb36968c4e343a16b Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 9 Dec 2024 12:15:52 -0500 Subject: [PATCH 35/42] add test for serialization of ExternalPartitions in ExternalTable --- .../artifacts/test_serialization.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/functional/artifacts/test_serialization.py diff --git a/tests/functional/artifacts/test_serialization.py b/tests/functional/artifacts/test_serialization.py new file mode 100644 index 00000000000..d24979abc8f --- /dev/null +++ b/tests/functional/artifacts/test_serialization.py @@ -0,0 +1,27 @@ +from dbt.artifacts.resources import ExternalPartition, ExternalTable + + +def test_partitions_serialization(): + + part1 = ExternalPartition( + name="partition 1", + description="partition 1", + data_type="string", + ) + + part2 = ExternalPartition( + name="partition 2", + description="partition 2", + data_type="string", + ) + + ext_table = ExternalTable( + location="my_location", + file_format="my file format", + row_format="row format", + partitions=[part1, part2], + ) + + ext_table_dict = ext_table.to_dict() + assert isinstance(ext_table_dict["partitions"][0], dict) + ext_table.validate(ext_table_dict) From 1a30f648bdfae5e6813c1aa1c990bfe1c3f7ff2e Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 10 Dec 2024 11:05:28 -0500 Subject: [PATCH 36/42] Update test_serialization to simplify for mashumaro, add test_graph_serialization.py --- .../artifacts/test_graph_serialization.py | 48 +++++++++++++++++++ .../artifacts/test_serialization.py | 26 +++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 tests/functional/artifacts/test_graph_serialization.py diff --git a/tests/functional/artifacts/test_graph_serialization.py b/tests/functional/artifacts/test_graph_serialization.py new file mode 100644 index 00000000000..f779f3e54cb --- /dev/null +++ b/tests/functional/artifacts/test_graph_serialization.py @@ -0,0 +1,48 @@ +import pytest +from dbt.tests.util import run_dbt, get_artifact + +sources_yml = """ +sources: +- name: TEST + schema: STAGE + tables: + - name: TABLE + external: + partitions: + - name: dl_partition + data_type: string + expression: split_part(METADATA$FILENAME, '/', 2) +""" + +get_partitions_sql = """ +{% macro get_partitions() -%} + {% set source_nodes = graph.sources.values() if graph.sources else [] %} + {% for node in source_nodes %} + {% if node.external %} + {% if node.external.partitions %} + {{print(node.external.partitions)}} + {% endif %} + {% endif %} + {% endfor %} +{%- endmacro %} +""" + +class TestGraphSerialization(): + @pytest.fixture(scope="class") + def models(self): + return { + "sources.yml": sources_yml, + } + + @pytest.fixture(scope="class") + def macros(self): + return { + "get_partitions.sql": get_partitions_sql + } + + def test_graph_serialization(self, project): + manifest = run_dbt(["parse"]) + assert manifest + assert len(manifest.sources) == 1 + + run_dbt(["run-operation", "get_partitions"]) diff --git a/tests/functional/artifacts/test_serialization.py b/tests/functional/artifacts/test_serialization.py index d24979abc8f..ad8b219ec5f 100644 --- a/tests/functional/artifacts/test_serialization.py +++ b/tests/functional/artifacts/test_serialization.py @@ -1,4 +1,24 @@ -from dbt.artifacts.resources import ExternalPartition, ExternalTable +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Union + +from mashumaro.mixins.msgpack import DataClassMessagePackMixin + + +@dataclass +class ExternalPartition(DataClassMessagePackMixin): + name: str = "" + description: str = "" + data_type: str = "" + meta: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class ExternalTable(DataClassMessagePackMixin): + location: Optional[str] = None + file_format: Optional[str] = None + row_format: Optional[str] = None + tbl_properties: Optional[str] = None + partitions: Optional[Union[List[ExternalPartition], List[str]]] = None def test_partitions_serialization(): @@ -24,4 +44,6 @@ def test_partitions_serialization(): ext_table_dict = ext_table.to_dict() assert isinstance(ext_table_dict["partitions"][0], dict) - ext_table.validate(ext_table_dict) + + ext_table_msgpack = ext_table.to_msgpack() + assert ext_table_msgpack From f0117892b92033ad50381d2d1baa74282fcb56c3 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 11 Dec 2024 17:23:42 -0500 Subject: [PATCH 37/42] Remove base parameter from get_config_for --- core/dbt/contracts/graph/model_config.py | 2 +- tests/functional/artifacts/test_graph_serialization.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 72c87705fd6..e18caaaa035 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -53,5 +53,5 @@ class UnitTestNodeConfig(NodeConfig): } -def get_config_for(resource_type: NodeType, base=False) -> Type[BaseConfig]: +def get_config_for(resource_type: NodeType) -> Type[BaseConfig]: return RESOURCE_TYPES.get(resource_type, NodeConfig) diff --git a/tests/functional/artifacts/test_graph_serialization.py b/tests/functional/artifacts/test_graph_serialization.py index f779f3e54cb..512ec5ee34c 100644 --- a/tests/functional/artifacts/test_graph_serialization.py +++ b/tests/functional/artifacts/test_graph_serialization.py @@ -1,5 +1,6 @@ import pytest -from dbt.tests.util import run_dbt, get_artifact + +from dbt.tests.util import run_dbt sources_yml = """ sources: @@ -27,7 +28,8 @@ {%- endmacro %} """ -class TestGraphSerialization(): + +class TestGraphSerialization: @pytest.fixture(scope="class") def models(self): return { @@ -36,9 +38,7 @@ def models(self): @pytest.fixture(scope="class") def macros(self): - return { - "get_partitions.sql": get_partitions_sql - } + return {"get_partitions.sql": get_partitions_sql} def test_graph_serialization(self, project): manifest = run_dbt(["parse"]) From 0fb57ee7f029667fa43d6367a883b31be8fd2282 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 11 Dec 2024 18:02:00 -0500 Subject: [PATCH 38/42] Update return type of generate_node_config --- core/dbt/context/context_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index 46fd71dceb2..87ef1ab0d04 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -129,7 +129,7 @@ def generate_node_config( resource_type: NodeType, project_name: str, patch_config_dict: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: ... + ): ... class RenderedConfigGenerator(BaseConfigGenerator[C]): @@ -197,7 +197,7 @@ def generate_node_config( resource_type: NodeType, project_name: str, patch_config_dict: Optional[dict] = None, - ) -> Dict[str, Any]: + ) -> BaseConfig: config_cls = get_config_for(resource_type) config_dict = self.combine_config_dicts( From ebc2359f1e11bbf847096981b5ebf62f9e135aed Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 12 Dec 2024 10:16:22 -0500 Subject: [PATCH 39/42] Remove requirements for 3.15 --- core/dbt/artifacts/resources/v1/components.py | 2 +- core/dbt/artifacts/resources/v1/source_definition.py | 2 +- core/setup.py | 2 +- dev-requirements.txt | 2 +- tests/functional/artifacts/test_serialization.py | 2 ++ 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 5d138c9bfde..8eb43f35d8e 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -12,7 +12,7 @@ from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, dbtClassMixin from dbt_semantic_interfaces.type_enums import TimeGranularity -NodeVersion = Union[int, float, str] +NodeVersion = Union[str, float] @dataclass diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index 96289b13b42..9044307563e 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -40,7 +40,7 @@ class ExternalTable(AdditionalPropertiesAllowed, Mergeable): file_format: Optional[str] = None row_format: Optional[str] = None tbl_properties: Optional[str] = None - partitions: Optional[Union[List[ExternalPartition], List[str]]] = None + partitions: Optional[Union[List[str], List[ExternalPartition]]] = None def __bool__(self): return self.location is not None diff --git a/core/setup.py b/core/setup.py index a78e64c6e6d..be77d1ba73b 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.15,<4.0", + "mashumaro[msgpack]>=3.9,<3.15", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. diff --git a/dev-requirements.txt b/dev-requirements.txt index cac7be14f6b..33175c9ee4e 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/dbt-labs/dbt-adapters.git@mashumaro_fixes +git+https://github.com/dbt-labs/dbt-adapters.git@main git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter git+https://github.com/dbt-labs/dbt-common.git@mashumaro_fixes git+https://github.com/dbt-labs/dbt-postgres.git@main diff --git a/tests/functional/artifacts/test_serialization.py b/tests/functional/artifacts/test_serialization.py index ad8b219ec5f..05b5dfceb42 100644 --- a/tests/functional/artifacts/test_serialization.py +++ b/tests/functional/artifacts/test_serialization.py @@ -1,3 +1,4 @@ +import pytest from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union @@ -21,6 +22,7 @@ class ExternalTable(DataClassMessagePackMixin): partitions: Optional[Union[List[ExternalPartition], List[str]]] = None +@pytest.mark.skip("skip until mashumaro 3.15") def test_partitions_serialization(): part1 = ExternalPartition( From 4585bd032be81ebc381dc9b734f73683ecb4b873 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 16 Dec 2024 15:55:28 -0500 Subject: [PATCH 40/42] formatting --- tests/functional/artifacts/test_serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/artifacts/test_serialization.py b/tests/functional/artifacts/test_serialization.py index 05b5dfceb42..1b3ac60df81 100644 --- a/tests/functional/artifacts/test_serialization.py +++ b/tests/functional/artifacts/test_serialization.py @@ -1,7 +1,7 @@ -import pytest from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union +import pytest from mashumaro.mixins.msgpack import DataClassMessagePackMixin From 715ad2ff1a4061fadb8fed493007733692a0a7ac Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 19 Dec 2024 13:19:15 -0500 Subject: [PATCH 41/42] dev-requirements --- core/setup.py | 2 +- dev-requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/setup.py b/core/setup.py index be77d1ba73b..a78e64c6e6d 100644 --- a/core/setup.py +++ b/core/setup.py @@ -51,7 +51,7 @@ # Pin to the patch or minor version, and bump in each new minor version of dbt-core. "agate>=1.7.0,<1.10", "Jinja2>=3.1.3,<4", - "mashumaro[msgpack]>=3.9,<3.15", + "mashumaro[msgpack]>=3.15,<4.0", # ---- # dbt-core uses these packages in standard ways. Pin to the major version, and check compatibility # with major versions in each new minor version of dbt-core. diff --git a/dev-requirements.txt b/dev-requirements.txt index 33175c9ee4e..cac7be14f6b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/dbt-labs/dbt-adapters.git@main +git+https://github.com/dbt-labs/dbt-adapters.git@mashumaro_fixes git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter git+https://github.com/dbt-labs/dbt-common.git@mashumaro_fixes git+https://github.com/dbt-labs/dbt-postgres.git@main From 5f94de572ce4683da556b61b27196b937b7b7fc7 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 19 Dec 2024 13:41:29 -0500 Subject: [PATCH 42/42] Put back Union order changes, use dbtClassMixin in test_serialization.py --- core/dbt/artifacts/resources/v1/components.py | 2 +- core/dbt/artifacts/resources/v1/source_definition.py | 2 +- core/dbt/parser/schemas.py | 2 +- tests/functional/artifacts/test_serialization.py | 8 +++----- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 8eb43f35d8e..5d138c9bfde 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -12,7 +12,7 @@ from dbt_common.dataclass_schema import ExtensibleDbtClassMixin, dbtClassMixin from dbt_semantic_interfaces.type_enums import TimeGranularity -NodeVersion = Union[str, float] +NodeVersion = Union[int, float, str] @dataclass diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index 9044307563e..96289b13b42 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -40,7 +40,7 @@ class ExternalTable(AdditionalPropertiesAllowed, Mergeable): file_format: Optional[str] = None row_format: Optional[str] = None tbl_properties: Optional[str] = None - partitions: Optional[Union[List[str], List[ExternalPartition]]] = None + partitions: Optional[Union[List[ExternalPartition], List[str]]] = None def __bool__(self): return self.location is not None diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 818eb71212c..773108ea57b 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -299,7 +299,7 @@ def _add_yaml_snapshot_nodes_to_manifest( snapshot_node = parser._create_parsetime_node( block, compiled_path, - parser.initial_config(fqn), + parser.initial_config_builder(fqn), fqn, snapshot["name"], ) diff --git a/tests/functional/artifacts/test_serialization.py b/tests/functional/artifacts/test_serialization.py index 1b3ac60df81..9fa6d8a8fcb 100644 --- a/tests/functional/artifacts/test_serialization.py +++ b/tests/functional/artifacts/test_serialization.py @@ -1,12 +1,11 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union -import pytest -from mashumaro.mixins.msgpack import DataClassMessagePackMixin +from dbt_common.dataclass_schema import dbtClassMixin @dataclass -class ExternalPartition(DataClassMessagePackMixin): +class ExternalPartition(dbtClassMixin): name: str = "" description: str = "" data_type: str = "" @@ -14,7 +13,7 @@ class ExternalPartition(DataClassMessagePackMixin): @dataclass -class ExternalTable(DataClassMessagePackMixin): +class ExternalTable(dbtClassMixin): location: Optional[str] = None file_format: Optional[str] = None row_format: Optional[str] = None @@ -22,7 +21,6 @@ class ExternalTable(DataClassMessagePackMixin): partitions: Optional[Union[List[ExternalPartition], List[str]]] = None -@pytest.mark.skip("skip until mashumaro 3.15") def test_partitions_serialization(): part1 = ExternalPartition(