From 31c1d4ac334329cf3ca96c440d63b1ef2920b41b Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Thu, 19 Oct 2023 16:32:18 +0300 Subject: [PATCH 01/11] resolving issues with the DBT_MANIFEST load method when has_test attribute is not assigned to the node correctly --- cosmos/dbt/graph.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 889948fdd..29e6cc445 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -330,9 +330,6 @@ def load_from_dbt_manifest(self) -> None: However, since the Manifest does not represent filters, it relies on the Custom Cosmos implementation to filter out the nodes relevant to the user (based on self.exclude and self.select). - Noted that if dbt project contains versioned models, need to use dbt>=1.6.0 instead. Because, as dbt<1.6.0, - dbt cli doesn't support select a specific versioned models as stg_customers_v1, customers_v1, ... - Updates in-place: * self.nodes * self.filtered_nodes @@ -341,6 +338,7 @@ def load_from_dbt_manifest(self) -> None: nodes = {} with open(self.project.manifest_path) as fp: # type: ignore[arg-type] manifest = json.load(fp) + node_dependencies = manifest.get("child_map", {}) resources = {**manifest.get("nodes", {}), **manifest.get("sources", {}), **manifest.get("exposures", {})} for unique_id, node_dict in resources.items(): @@ -361,20 +359,28 @@ def load_from_dbt_manifest(self) -> None: project_dir=self.project.dir, nodes=nodes, select=self.select, exclude=self.exclude ) - self.update_node_dependency() + self.update_node_dependency(node_dependencies=node_dependencies) logger.info("Total nodes: %i", len(self.nodes)) logger.info("Total filtered nodes: %i", len(self.nodes)) - def update_node_dependency(self) -> None: + def update_node_dependency(self, node_dependencies={}) -> None: """ - This will update the property `has_text` if node has `dbt` test + This will update the property `has_test` if node has `dbt` test Updates in-place: * self.filtered_nodes """ - for _, node in self.filtered_nodes.items(): - if node.resource_type == DbtResourceType.TEST: - for node_id in node.depends_on: - if node_id in self.filtered_nodes: - self.filtered_nodes[node_id].has_test = True + if node_dependencies: + for node_id, node in self.filtered_nodes.items(): + if node.resource_type == DbtResourceType.MODEL: + for dependency in node_dependencies[node_id]: + if self.nodes[dependency].resource_type == DbtResourceType.TEST: + self.filtered_nodes[node_id].has_test = True + break + else: + for _, node in self.filtered_nodes.items(): + if node.resource_type == DbtResourceType.TEST: + for node_id in node.depends_on: + if node_id in self.filtered_nodes: + self.filtered_nodes[node_id].has_test = True \ No newline at end of file From b09fb28730f3d20a883491c21e12a18557fe26dd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 13:39:33 +0000 Subject: [PATCH 02/11] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/dbt/graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 29e6cc445..d9003231c 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -383,4 +383,4 @@ def update_node_dependency(self, node_dependencies={}) -> None: if node.resource_type == DbtResourceType.TEST: for node_id in node.depends_on: if node_id in self.filtered_nodes: - self.filtered_nodes[node_id].has_test = True \ No newline at end of file + self.filtered_nodes[node_id].has_test = True From ce5b99f235cd50d1b9cf5e6660940e720e7ae3cb Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Thu, 19 Oct 2023 16:41:50 +0300 Subject: [PATCH 03/11] accidentally removed a part of the docstring. Undoing --- cosmos/dbt/graph.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index d9003231c..508c1a781 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -330,6 +330,9 @@ def load_from_dbt_manifest(self) -> None: However, since the Manifest does not represent filters, it relies on the Custom Cosmos implementation to filter out the nodes relevant to the user (based on self.exclude and self.select). + Noted that if dbt project contains versioned models, need to use dbt>=1.6.0 instead. Because, as dbt<1.6.0, + dbt cli doesn't support select a specific versioned models as stg_customers_v1, customers_v1, ... + Updates in-place: * self.nodes * self.filtered_nodes From 81bca50640cdc3be8dede53a8738e37fea3b1787 Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Thu, 19 Oct 2023 16:45:45 +0300 Subject: [PATCH 04/11] specifying argument type for the node_dependencies argument --- cosmos/dbt/graph.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 508c1a781..74703b275 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -367,7 +367,8 @@ def load_from_dbt_manifest(self) -> None: logger.info("Total nodes: %i", len(self.nodes)) logger.info("Total filtered nodes: %i", len(self.nodes)) - def update_node_dependency(self, node_dependencies={}) -> None: + def update_node_dependency(self, + node_dependencies: dict[str, Any] = {}) -> None: """ This will update the property `has_test` if node has `dbt` test From 998fafdb361af489d67cae4dbdcf0a69c0679d46 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Oct 2023 13:48:01 +0000 Subject: [PATCH 05/11] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/dbt/graph.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 74703b275..f09157d32 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -367,8 +367,7 @@ def load_from_dbt_manifest(self) -> None: logger.info("Total nodes: %i", len(self.nodes)) logger.info("Total filtered nodes: %i", len(self.nodes)) - def update_node_dependency(self, - node_dependencies: dict[str, Any] = {}) -> None: + def update_node_dependency(self, node_dependencies: dict[str, Any] = {}) -> None: """ This will update the property `has_test` if node has `dbt` test From 6526ca4e19fa9bc25c1b6ec8ab1a61584bdad1ac Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Mon, 23 Oct 2023 12:50:08 +0300 Subject: [PATCH 06/11] tags are assigned to a test based on the parent model --- cosmos/dbt/graph.py | 25 ++++++++----------------- cosmos/dbt/selector.py | 3 +++ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index f09157d32..5ef5d89e8 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -341,7 +341,6 @@ def load_from_dbt_manifest(self) -> None: nodes = {} with open(self.project.manifest_path) as fp: # type: ignore[arg-type] manifest = json.load(fp) - node_dependencies = manifest.get("child_map", {}) resources = {**manifest.get("nodes", {}), **manifest.get("sources", {}), **manifest.get("exposures", {})} for unique_id, node_dict in resources.items(): @@ -362,28 +361,20 @@ def load_from_dbt_manifest(self) -> None: project_dir=self.project.dir, nodes=nodes, select=self.select, exclude=self.exclude ) - self.update_node_dependency(node_dependencies=node_dependencies) + self.update_node_dependency() logger.info("Total nodes: %i", len(self.nodes)) logger.info("Total filtered nodes: %i", len(self.nodes)) - def update_node_dependency(self, node_dependencies: dict[str, Any] = {}) -> None: + def update_node_dependency(self) -> None: """ - This will update the property `has_test` if node has `dbt` test + This will update the property `has_text` if node has `dbt` test Updates in-place: * self.filtered_nodes """ - if node_dependencies: - for node_id, node in self.filtered_nodes.items(): - if node.resource_type == DbtResourceType.MODEL: - for dependency in node_dependencies[node_id]: - if self.nodes[dependency].resource_type == DbtResourceType.TEST: - self.filtered_nodes[node_id].has_test = True - break - else: - for _, node in self.filtered_nodes.items(): - if node.resource_type == DbtResourceType.TEST: - for node_id in node.depends_on: - if node_id in self.filtered_nodes: - self.filtered_nodes[node_id].has_test = True + for _, node in self.filtered_nodes.items(): + if node.resource_type == DbtResourceType.TEST: + for node_id in node.depends_on: + if node_id in self.filtered_nodes: + self.filtered_nodes[node_id].has_test = True \ No newline at end of file diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index 926cc6b1a..41da794e2 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -105,6 +105,9 @@ def should_include_node(node_id: str, node: DbtNode) -> bool: visited_nodes.add(node_id) + if node.resource_type == DbtResourceType.TEST: + node.tags = getattr(nodes.get(node.depends_on[0]), "tags", []) + if config.tags: if not (set(config.tags) <= set(node.tags)): return False From 78615d8bd3e0ea6225fba8fba02282ed51954d95 Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Mon, 23 Oct 2023 12:52:06 +0300 Subject: [PATCH 07/11] adding newline which I for some reason removed in an earlier commit --- cosmos/dbt/graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cosmos/dbt/graph.py b/cosmos/dbt/graph.py index 5ef5d89e8..889948fdd 100644 --- a/cosmos/dbt/graph.py +++ b/cosmos/dbt/graph.py @@ -377,4 +377,4 @@ def update_node_dependency(self) -> None: if node.resource_type == DbtResourceType.TEST: for node_id in node.depends_on: if node_id in self.filtered_nodes: - self.filtered_nodes[node_id].has_test = True \ No newline at end of file + self.filtered_nodes[node_id].has_test = True From adaab2c5387607380ff94727323840a6476990a4 Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Thu, 26 Oct 2023 13:43:50 +0300 Subject: [PATCH 08/11] adding a test that checks if models that are selected by tags have tests based on the has_test attribute + specifying tag in manifest.json for a couple of models. --- tests/dbt/test_graph.py | 11 +++++++++++ tests/sample/manifest.json | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index dc4189aca..21c8cf794 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -418,6 +418,17 @@ def test_update_node_dependency_test_not_exist(): assert nodes.has_test is False +def test_tag_selected_node_test_exist(): + dbt_project = DbtProject(name="jaffle_shop", root_dir=DBT_PROJECTS_ROOT_DIR, manifest_path=SAMPLE_MANIFEST) + dbt_graph = DbtGraph(project=dbt_project, select=["tag:test_tag"]) + dbt_graph.load_from_dbt_manifest() + + for _, node in dbt_graph.filtered_nodes.items(): + assert node.tags == ["test_tag"] + if node.resource_type == DbtResourceType.MODEL: + assert node.has_test is True + + @pytest.mark.integration @pytest.mark.parametrize("load_method", ["load_via_dbt_ls", "load_from_dbt_manifest"]) def test_load_dbt_ls_and_manifest_with_model_version(load_method): diff --git a/tests/sample/manifest.json b/tests/sample/manifest.json index 4150234b8..51a04d476 100644 --- a/tests/sample/manifest.json +++ b/tests/sample/manifest.json @@ -7576,7 +7576,7 @@ "resource_type": "model", "schema": "public", "sources": [], - "tags": [], + "tags": ["test_tag"], "unique_id": "model.jaffle_shop.customers", "unrendered_config": { "materialized": "table" @@ -7754,7 +7754,7 @@ "resource_type": "model", "schema": "public", "sources": [], - "tags": [], + "tags": ["test_tag"], "unique_id": "model.jaffle_shop.orders", "unrendered_config": { "materialized": "table" From 01e870fe942ea856897f68d137cb905319ee2c4a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 10:44:02 +0000 Subject: [PATCH 09/11] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/sample/manifest.json | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/sample/manifest.json b/tests/sample/manifest.json index 51a04d476..d0b19c7b6 100644 --- a/tests/sample/manifest.json +++ b/tests/sample/manifest.json @@ -7576,7 +7576,9 @@ "resource_type": "model", "schema": "public", "sources": [], - "tags": ["test_tag"], + "tags": [ + "test_tag" + ], "unique_id": "model.jaffle_shop.customers", "unrendered_config": { "materialized": "table" @@ -7754,7 +7756,9 @@ "resource_type": "model", "schema": "public", "sources": [], - "tags": ["test_tag"], + "tags": [ + "test_tag" + ], "unique_id": "model.jaffle_shop.orders", "unrendered_config": { "materialized": "table" From 3a8bf12bca1414ede7f8b21dc682f9f245b20c8d Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Thu, 26 Oct 2023 14:00:37 +0300 Subject: [PATCH 10/11] missed the fact that a naming change was made to the DbtProject class --- tests/dbt/test_graph.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index 598acfc3d..f626ada42 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -505,8 +505,15 @@ def test_update_node_dependency_test_not_exist(): def test_tag_selected_node_test_exist(): - dbt_project = DbtProject(name="jaffle_shop", root_dir=DBT_PROJECTS_ROOT_DIR, manifest_path=SAMPLE_MANIFEST) - dbt_graph = DbtGraph(project=dbt_project, select=["tag:test_tag"]) + project_config = ProjectConfig( + dbt_project_path=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME, manifest_path=SAMPLE_MANIFEST + ) + profile_config = ProfileConfig( + profile_name="test", + target_name="test", + profiles_yml_filepath=DBT_PROJECTS_ROOT_DIR / DBT_PROJECT_NAME / "profiles.yml", + ) + dbt_graph = DbtGraph(project=project_config, profile_config=profile_config, select=["tag:test_tag"]) dbt_graph.load_from_dbt_manifest() for _, node in dbt_graph.filtered_nodes.items(): From bb58d8269ce6aa80eb9b0e9cd967b9b28b3f9470 Mon Sep 17 00:00:00 2001 From: edgarasnavickas Date: Mon, 30 Oct 2023 10:02:41 +0200 Subject: [PATCH 11/11] adding an assert statement to ensure that a list we're looping through isn't empty --- tests/dbt/test_graph.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/dbt/test_graph.py b/tests/dbt/test_graph.py index f626ada42..b36bf92fb 100644 --- a/tests/dbt/test_graph.py +++ b/tests/dbt/test_graph.py @@ -516,6 +516,8 @@ def test_tag_selected_node_test_exist(): dbt_graph = DbtGraph(project=project_config, profile_config=profile_config, select=["tag:test_tag"]) dbt_graph.load_from_dbt_manifest() + assert len(dbt_graph.filtered_nodes) > 0 + for _, node in dbt_graph.filtered_nodes.items(): assert node.tags == ["test_tag"] if node.resource_type == DbtResourceType.MODEL: