From 6deba2240c04744ef302fe7c541de91d7397f742 Mon Sep 17 00:00:00 2001 From: Tatiana Al-Chueyr Date: Tue, 5 Dec 2023 11:19:27 +0000 Subject: [PATCH] Add support to select using graph-operators when using `LoadMode.CUSTOM` or `LoadMode.DBT_MANIFEST` (#728) Add support for the following when using `LoadMode.CUSTOM` or `LoadMode.DBT_MANIFEST`: * Support selection of model by name * Support the selection of models by name & their children (with or without degrees) * Support the selection of models by name & their parents (with or without degrees) * Support intersections and unions involving graph selectors (with or without other supported selectors, eg. tags) Examples of select/exclusion statements that now work regardless of the `LoadMode` being used: ``` model_a +model_b model_c+ +model_d+ 2+model_e model_f+3 model_f+,tag:nightly ``` Related dbt documentation: https://docs.getdbt.com/reference/node-selection/graph-operators https://docs.getdbt.com/reference/node-selection/set-operators Limitations: * The at operator is not supported yet (`@`) * If users opt to use graph selector, it will increase the DAG parsing time and the task execution time when using `LoadMode.CUSTOM` or `LoadMode.DBT_MANIFEST` This PR improves and extends the original implementation proposed by @tseruga in #429. Some of the changes that were introduced on top of the original PR: * Add support to descendants (before only precursors were supported) * Add support to different depths/degrees of precursors/descendants * Add support to the union between graph operators and graph/non-graph operators * Add support to the intersection between graph operators and graph/non-graph operators Closes: #684 Co-authored-by: Tyler Seruga --- cosmos/dbt/selector.py | 215 ++++++++++++++++++-- docs/configuration/selecting-excluding.rst | 35 +++- tests/dbt/test_selector.py | 219 ++++++++++++++++++--- 3 files changed, 424 insertions(+), 45 deletions(-) diff --git a/cosmos/dbt/selector.py b/cosmos/dbt/selector.py index c7316dc75..c7eb89307 100644 --- a/cosmos/dbt/selector.py +++ b/cosmos/dbt/selector.py @@ -1,7 +1,9 @@ from __future__ import annotations -from pathlib import Path import copy - +import re +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path from typing import TYPE_CHECKING, Any from cosmos.constants import DbtResourceType @@ -16,11 +18,154 @@ PATH_SELECTOR = "path:" TAG_SELECTOR = "tag:" CONFIG_SELECTOR = "config." - +PLUS_SELECTOR = "+" +GRAPH_SELECTOR_REGEX = r"^([0-9]*\+)?([^\+]+)(\+[0-9]*)?$|" logger = get_logger(__name__) +@dataclass +class GraphSelector: + """ + Implements dbt graph operator selectors: + model_a + +model_b + model_c+ + +model_d+ + 2+model_e + model_f+3 + + https://docs.getdbt.com/reference/node-selection/graph-operators + """ + + node_name: str + precursors: str | None + descendants: str | None + + @property + def precursors_depth(self) -> int: + """ + Calculates the depth/degrees/generations of precursors (parents). + Return: + -1: if it should return all the generations of precursors + 0: if it shouldn't return any precursors + >0: upperbound number of parent generations + """ + if not self.precursors: + return 0 + if self.precursors == "+": + return -1 + else: + return int(self.precursors[:-1]) + + @property + def descendants_depth(self) -> int: + """ + Calculates the depth/degrees/generations of descendants (children). + Return: + -1: if it should return all the generations of children + 0: if it shouldn't return any children + >0: upperbound of children generations + """ + if not self.descendants: + return 0 + if self.descendants == "+": + return -1 + else: + return int(self.descendants[1:]) + + @staticmethod + def parse(text: str) -> GraphSelector | None: + """ + Parse a string and identify if there are graph selectors, including the desired node name, descendants and + precursors. Return a GraphSelector instance if the pattern matches. + """ + regex_match = re.search(GRAPH_SELECTOR_REGEX, text) + if regex_match: + precursors, node_name, descendants = regex_match.groups() + return GraphSelector(node_name, precursors, descendants) + return None + + def select_node_precursors(self, nodes: dict[str, DbtNode], root_id: str, selected_nodes: set[str]) -> None: + """ + Parse original nodes and add the precursor nodes related to this config to the selected_nodes set. + + :param nodes: Original dbt nodes list + :param root_id: Unique identifier of self.node_name + :param selected_nodes: Set where precursor nodes will be added to. + """ + if self.precursors: + depth = self.precursors_depth + previous_generation = {root_id} + processed_nodes = set() + while depth and previous_generation: + new_generation: set[str] = set() + for node_id in previous_generation: + if node_id not in processed_nodes: + new_generation.update(set(nodes[node_id].depends_on)) + processed_nodes.add(node_id) + selected_nodes.update(new_generation) + previous_generation = new_generation + depth -= 1 + + def select_node_descendants(self, nodes: dict[str, DbtNode], root_id: str, selected_nodes: set[str]) -> None: + """ + Parse original nodes and add the descendant nodes related to this config to the selected_nodes set. + + :param nodes: Original dbt nodes list + :param root_id: Unique identifier of self.node_name + :param selected_nodes: Set where descendant nodes will be added to. + """ + if self.descendants: + children_by_node = defaultdict(set) + # Index nodes by parent id + # We could optimize by doing this only once for the dbt project and giving it + # as a parameter to the GraphSelector + for node_id, node in nodes.items(): + for parent_id in node.depends_on: + children_by_node[parent_id].add(node_id) + + depth = self.descendants_depth + previous_generation = {root_id} + processed_nodes = set() + while depth and previous_generation: + new_generation: set[str] = set() + for node_id in previous_generation: + if node_id not in processed_nodes: + new_generation.update(children_by_node[node_id]) + processed_nodes.add(node_id) + selected_nodes.update(new_generation) + previous_generation = new_generation + depth -= 1 + + def filter_nodes(self, nodes: dict[str, DbtNode]) -> set[str]: + """ + Given a dictionary with the original dbt project nodes, applies the current graph selector to + identify the subset of nodes that matches the selection criteria. + + :param nodes: dbt project nodes + :return: set of node ids that matches current graph selector + """ + selected_nodes: set[str] = set() + + # Index nodes by name, we can improve performance by doing this once + # for multiple GraphSelectors + node_by_name = {} + for node_id, node in nodes.items(): + node_by_name[node.name] = node_id + + if self.node_name in node_by_name: + root_id = node_by_name[self.node_name] + else: + logger.warn(f"Selector {self.node_name} not found.") + return selected_nodes + + selected_nodes.add(root_id) + self.select_node_precursors(nodes, root_id, selected_nodes) + self.select_node_descendants(nodes, root_id, selected_nodes) + return selected_nodes + + class SelectorConfig: """ Represents a select/exclude statement. @@ -43,11 +188,12 @@ def __init__(self, project_dir: Path | None, statement: str): self.tags: list[str] = [] self.config: dict[str, str] = {} self.other: list[str] = [] + self.graph_selectors: list[GraphSelector] = [] self.load_from_statement(statement) @property def is_empty(self) -> bool: - return not (self.paths or self.tags or self.config or self.other) + return not (self.paths or self.tags or self.config or self.graph_selectors or self.other) def load_from_statement(self, statement: str) -> None: """ @@ -61,6 +207,7 @@ def load_from_statement(self, statement: str) -> None: https://docs.getdbt.com/reference/node-selection/yaml-selectors """ items = statement.split(",") + for item in items: if item.startswith(PATH_SELECTOR): index = len(PATH_SELECTOR) @@ -77,11 +224,16 @@ def load_from_statement(self, statement: str) -> None: if key in SUPPORTED_CONFIG: self.config[key] = value else: - self.other.append(item) - logger.warning("Unsupported select statement: %s", item) + if item: + graph_selector = GraphSelector.parse(item) + if graph_selector is not None: + self.graph_selectors.append(graph_selector) + else: + self.other.append(item) + logger.warning("Unsupported select statement: %s", item) def __repr__(self) -> str: - return f"SelectorConfig(paths={self.paths}, tags={self.tags}, config={self.config}, other={self.other})" + return f"SelectorConfig(paths={self.paths}, tags={self.tags}, config={self.config}, other={self.other}, graph_selectors={self.graph_selectors})" class NodeSelector: @@ -95,7 +247,9 @@ class NodeSelector: def __init__(self, nodes: dict[str, DbtNode], config: SelectorConfig) -> None: self.nodes = nodes self.config = config + self.selected_nodes: set[str] = set() + @property def select_nodes_ids_by_intersection(self) -> set[str]: """ Return a list of node ids which matches the configuration defined in config. @@ -107,14 +261,19 @@ def select_nodes_ids_by_intersection(self) -> set[str]: if self.config.is_empty: return set(self.nodes.keys()) - self.selected_nodes: set[str] = set() + selected_nodes: set[str] = set() self.visited_nodes: set[str] = set() for node_id, node in self.nodes.items(): if self._should_include_node(node_id, node): - self.selected_nodes.add(node_id) + selected_nodes.add(node_id) + + if self.config.graph_selectors: + nodes_by_graph_selector = self.select_by_graph_operator() + selected_nodes = selected_nodes.intersection(nodes_by_graph_selector) - return self.selected_nodes + self.selected_nodes = selected_nodes + return selected_nodes def _should_include_node(self, node_id: str, node: DbtNode) -> bool: "Checks if a single node should be included. Only runs once per node with caching." @@ -175,6 +334,22 @@ def _is_path_matching(self, node: DbtNode) -> bool: return self._should_include_node(node.depends_on[0], model_node) return False + def select_by_graph_operator(self) -> set[str]: + """ + Return a list of node ids which match the configuration defined in the config. + + Return all nodes that are parents (or parents from parents) of the root defined in the configuration. + + References: + https://docs.getdbt.com/reference/node-selection/syntax + https://docs.getdbt.com/reference/node-selection/yaml-selectors + """ + selected_nodes_by_selector: list[set[str]] = [] + + for graph_selector in self.config.graph_selectors: + selected_nodes_by_selector.append(graph_selector.filter_nodes(self.nodes)) + return set.intersection(*selected_nodes_by_selector) + def retrieve_by_label(statement_list: list[str], label: str) -> set[str]: """ @@ -189,7 +364,7 @@ def retrieve_by_label(statement_list: list[str], label: str) -> set[str]: for statement in statement_list: config = SelectorConfig(Path(), statement) item_values = getattr(config, label) - label_values = label_values.union(item_values) + label_values.update(item_values) return label_values @@ -217,11 +392,14 @@ def select_nodes( filters = [["select", select], ["exclude", exclude]] for filter_type, filter in filters: for filter_parameter in filter: - if filter_parameter.startswith(PATH_SELECTOR) or filter_parameter.startswith(TAG_SELECTOR): + if ( + filter_parameter.startswith(PATH_SELECTOR) + or filter_parameter.startswith(TAG_SELECTOR) + or PLUS_SELECTOR in filter_parameter + or any([filter_parameter.startswith(CONFIG_SELECTOR + config + ":") for config in SUPPORTED_CONFIG]) + ): continue - elif any([filter_parameter.startswith(CONFIG_SELECTOR + config + ":") for config in SUPPORTED_CONFIG]): - continue - else: + elif ":" in filter_parameter: raise CosmosValueError(f"Invalid {filter_type} filter: {filter_parameter}") subset_ids: set[str] = set() @@ -229,8 +407,9 @@ def select_nodes( for statement in select: config = SelectorConfig(project_dir, statement) node_selector = NodeSelector(nodes, config) - select_ids = node_selector.select_nodes_ids_by_intersection() - subset_ids = subset_ids.union(set(select_ids)) + + select_ids = node_selector.select_nodes_ids_by_intersection + subset_ids.update(set(select_ids)) if select: nodes = {id_: nodes[id_] for id_ in subset_ids} @@ -241,7 +420,7 @@ def select_nodes( for statement in exclude: config = SelectorConfig(project_dir, statement) node_selector = NodeSelector(nodes, config) - exclude_ids = exclude_ids.union(set(node_selector.select_nodes_ids_by_intersection())) + exclude_ids.update(set(node_selector.select_nodes_ids_by_intersection)) subset_ids = set(nodes_ids) - set(exclude_ids) return {id_: nodes[id_] for id_ in subset_ids} diff --git a/docs/configuration/selecting-excluding.rst b/docs/configuration/selecting-excluding.rst index fadea1485..dfa4a96c5 100644 --- a/docs/configuration/selecting-excluding.rst +++ b/docs/configuration/selecting-excluding.rst @@ -10,7 +10,9 @@ The ``select`` and ``exclude`` parameters are lists, with values like the follow - ``tag:my_tag``: include/exclude models with the tag ``my_tag`` - ``config.materialized:table``: include/exclude models with the config ``materialized: table`` - ``path:analytics/tables``: include/exclude models in the ``analytics/tables`` directory - +- ``+node_name+1`` (graph operators): include/exclude the node with name ``node_name``, all its parents, and its first generation of children (`dbt graph selector docs `_) +- ``tag:my_tag,+node_name`` (intersection): include/exclude ``node_name`` and its parents if they have the tag ``my_tag`` (`dbt set operator docs `_) +- ``['tag:first_tag', 'tag:second_tag']`` (union): include/exclude nodes that have either ``tag:first_tag`` or ``tag:second_tag`` .. note:: @@ -51,3 +53,34 @@ Examples: select=["path:analytics/tables"], ) ) + + +.. code-block:: python + + from cosmos import DbtDag, RenderConfig + + jaffle_shop = DbtDag( + render_config=RenderConfig( + select=["tag:include_tag1", "tag:include_tag2"], # union + ) + ) + +.. code-block:: python + + from cosmos import DbtDag, RenderConfig + + jaffle_shop = DbtDag( + render_config=RenderConfig( + select=["tag:include_tag1,tag:include_tag2"], # intersection + ) + ) + +.. code-block:: python + + from cosmos import DbtDag, RenderConfig + + jaffle_shop = DbtDag( + render_config=RenderConfig( + exclude=["node_name+"], # node_name and its children + ) + ) diff --git a/tests/dbt/test_selector.py b/tests/dbt/test_selector.py index f7ece6391..1cf987124 100644 --- a/tests/dbt/test_selector.py +++ b/tests/dbt/test_selector.py @@ -46,47 +46,69 @@ def test_is_empty_config(selector_config, paths, tags, config, other, expected): tags=["has_child"], config={"materialized": "view", "tags": ["has_child"]}, ) + +another_grandparent_node = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.another_grandparent_node", + resource_type=DbtResourceType.MODEL, + depends_on=[], + file_path=SAMPLE_PROJ_PATH / "gen1/models/another_grandparent_node.sql", + tags=[], + config={}, +) + parent_node = DbtNode( unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.parent", resource_type=DbtResourceType.MODEL, - depends_on=["grandparent"], + depends_on=[grandparent_node.unique_id, another_grandparent_node.unique_id], file_path=SAMPLE_PROJ_PATH / "gen2/models/parent.sql", tags=["has_child", "is_child"], config={"materialized": "view", "tags": ["has_child", "is_child"]}, ) + child_node = DbtNode( unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.child", resource_type=DbtResourceType.MODEL, - depends_on=["parent"], + depends_on=[parent_node.unique_id], file_path=SAMPLE_PROJ_PATH / "gen3/models/child.sql", tags=["nightly", "is_child"], config={"materialized": "table", "tags": ["nightly", "is_child"]}, ) -grandchild_1_test_node = DbtNode( - unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.grandchild_1", +sibling1_node = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.sibling1", resource_type=DbtResourceType.MODEL, - depends_on=["parent"], - file_path=SAMPLE_PROJ_PATH / "gen3/models/grandchild_1.sql", + depends_on=[parent_node.unique_id], + file_path=SAMPLE_PROJ_PATH / "gen3/models/sibling1.sql", tags=["nightly", "deprecated", "test"], config={"materialized": "table", "tags": ["nightly", "deprecated", "test"]}, ) -grandchild_2_test_node = DbtNode( - unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.grandchild_2", +sibling2_node = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.sibling2", resource_type=DbtResourceType.MODEL, - depends_on=["parent"], - file_path=SAMPLE_PROJ_PATH / "gen3/models/grandchild_2.sql", + depends_on=[parent_node.unique_id], + file_path=SAMPLE_PROJ_PATH / "gen3/models/sibling2.sql", tags=["nightly", "deprecated", "test2"], config={"materialized": "table", "tags": ["nightly", "deprecated", "test2"]}, ) +orphaned_node = DbtNode( + unique_id=f"{DbtResourceType.MODEL.value}.{SAMPLE_PROJ_PATH.stem}.orphaned", + resource_type=DbtResourceType.MODEL, + depends_on=[], + file_path=SAMPLE_PROJ_PATH / "gen3/models/orphaned.sql", + tags=[], + config={}, +) + sample_nodes = { grandparent_node.unique_id: grandparent_node, + another_grandparent_node.unique_id: another_grandparent_node, parent_node.unique_id: parent_node, child_node.unique_id: child_node, - grandchild_1_test_node.unique_id: grandchild_1_test_node, - grandchild_2_test_node.unique_id: grandchild_2_test_node, + sibling1_node.unique_id: sibling1_node, + sibling2_node.unique_id: sibling2_node, + orphaned_node.unique_id: orphaned_node, } @@ -100,8 +122,8 @@ def test_select_nodes_by_select_config(): selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["config.materialized:table"]) expected = { child_node.unique_id: child_node, - grandchild_1_test_node.unique_id: grandchild_1_test_node, - grandchild_2_test_node.unique_id: grandchild_2_test_node, + sibling1_node.unique_id: sibling1_node, + sibling2_node.unique_id: sibling2_node, } assert selected == expected @@ -136,8 +158,8 @@ def test_select_nodes_by_select_union_config_test_tags(): expected = { grandparent_node.unique_id: grandparent_node, parent_node.unique_id: parent_node, - grandchild_1_test_node.unique_id: grandchild_1_test_node, - grandchild_2_test_node.unique_id: grandchild_2_test_node, + sibling1_node.unique_id: sibling1_node, + sibling2_node.unique_id: sibling2_node, } assert selected == expected @@ -176,8 +198,8 @@ def test_select_nodes_by_select_union(): grandparent_node.unique_id: grandparent_node, parent_node.unique_id: parent_node, child_node.unique_id: child_node, - grandchild_1_test_node.unique_id: grandchild_1_test_node, - grandchild_2_test_node.unique_id: grandchild_2_test_node, + sibling1_node.unique_id: sibling1_node, + sibling2_node.unique_id: sibling2_node, } assert selected == expected @@ -191,8 +213,10 @@ def test_select_nodes_by_exclude_tag(): selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, exclude=["tag:has_child"]) expected = { child_node.unique_id: child_node, - grandchild_1_test_node.unique_id: grandchild_1_test_node, - grandchild_2_test_node.unique_id: grandchild_2_test_node, + sibling1_node.unique_id: sibling1_node, + sibling2_node.unique_id: sibling2_node, + another_grandparent_node.unique_id: another_grandparent_node, + orphaned_node.unique_id: orphaned_node, } assert selected == expected @@ -217,8 +241,10 @@ def test_select_nodes_by_exclude_union_config_test_tags(): ) expected = { grandparent_node.unique_id: grandparent_node, + another_grandparent_node.unique_id: another_grandparent_node, parent_node.unique_id: parent_node, child_node.unique_id: child_node, + orphaned_node.unique_id: orphaned_node, } assert selected == expected @@ -227,15 +253,156 @@ def test_select_nodes_by_path_dir(): selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["path:gen3/models"]) expected = { child_node.unique_id: child_node, - grandchild_1_test_node.unique_id: grandchild_1_test_node, - grandchild_2_test_node.unique_id: grandchild_2_test_node, + sibling1_node.unique_id: sibling1_node, + sibling2_node.unique_id: sibling2_node, + orphaned_node.unique_id: orphaned_node, } assert selected == expected def test_select_nodes_by_path_file(): selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["path:gen2/models/parent.sql"]) - expected = { - parent_node.unique_id: parent_node, - } - assert selected == expected + expected = [parent_node.unique_id] + assert list(selected.keys()) == expected + + +def test_select_nodes_by_child_and_precursors(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["+child"]) + expected = [ + another_grandparent_node.unique_id, + child_node.unique_id, + grandparent_node.unique_id, + parent_node.unique_id, + ] + assert sorted(selected.keys()) == expected + + +def test_select_nodes_by_child_and_precursors_exclude_tags(): + selected = select_nodes( + project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["+child"], exclude=["tag:has_child"] + ) + expected = [another_grandparent_node.unique_id, child_node.unique_id] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_child_and_precursors_partial_tree(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["+parent"]) + expected = [another_grandparent_node.unique_id, grandparent_node.unique_id, parent_node.unique_id] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_precursors_with_orphaned_node(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["+orphaned"]) + expected = [orphaned_node.unique_id] + assert list(selected.keys()) == expected + + +def test_select_nodes_by_child_and_first_degree_precursors(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["1+child"]) + expected = [ + child_node.unique_id, + parent_node.unique_id, + ] + assert sorted(selected.keys()) == expected + + +def test_select_nodes_by_child_and_second_degree_precursors(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["2+child"]) + expected = [ + another_grandparent_node.unique_id, + child_node.unique_id, + grandparent_node.unique_id, + parent_node.unique_id, + ] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_exact_node_name(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["child"]) + expected = [child_node.unique_id] + assert list(selected.keys()) == expected + + +def test_select_node_by_child_and_precursors_no_node(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["+modelDoesntExist"]) + expected = [] + assert list(selected.keys()) == expected + + +def test_select_node_by_descendants(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+"]) + expected = [ + "model.dbt-proj.child", + "model.dbt-proj.grandparent", + "model.dbt-proj.parent", + "model.dbt-proj.sibling1", + "model.dbt-proj.sibling2", + ] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_descendants_depth_first_degree(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+1"]) + expected = [ + "model.dbt-proj.grandparent", + "model.dbt-proj.parent", + ] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_descendants_union(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+1", "parent+1"]) + expected = [ + "model.dbt-proj.child", + "model.dbt-proj.grandparent", + "model.dbt-proj.parent", + "model.dbt-proj.sibling1", + "model.dbt-proj.sibling2", + ] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_descendants_intersection(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["grandparent+1,parent+1"]) + expected = [ + "model.dbt-proj.parent", + ] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_descendants_intersection_with_tag(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["parent+1,tag:has_child"]) + expected = [ + "model.dbt-proj.parent", + ] + assert sorted(selected.keys()) == expected + + +def test_select_node_by_descendants_and_tag_union(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, select=["child", "tag:has_child"]) + expected = [ + "model.dbt-proj.child", + "model.dbt-proj.grandparent", + "model.dbt-proj.parent", + ] + assert sorted(selected.keys()) == expected + + +def test_exclude_by_graph_selector(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, exclude=["+parent"]) + expected = [ + "model.dbt-proj.child", + "model.dbt-proj.orphaned", + "model.dbt-proj.sibling1", + "model.dbt-proj.sibling2", + ] + assert sorted(selected.keys()) == expected + + +def test_exclude_by_union_graph_selector_and_tag(): + selected = select_nodes(project_dir=SAMPLE_PROJ_PATH, nodes=sample_nodes, exclude=["+parent", "tag:deprecated"]) + expected = [ + "model.dbt-proj.child", + "model.dbt-proj.orphaned", + ] + assert sorted(selected.keys()) == expected