From bfa9f40bdbf88c37c87b257e70f0e78a39f147a9 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 13:45:22 -0500 Subject: [PATCH 01/12] feat: Add data structure for tracking Jinja blocks --- dbt_meshify/storage/jinja_blocks.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 dbt_meshify/storage/jinja_blocks.py diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py new file mode 100644 index 0000000..f88221e --- /dev/null +++ b/dbt_meshify/storage/jinja_blocks.py @@ -0,0 +1,14 @@ +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class JinjaBlock: + """ + A common data structure for tracking blocks of text that represent Jinja blocks. + """ + + path: Path + start: int + end: int + content: str From a2c8d79c029ee36d1f735c675df897e84bde18dc Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 16:30:02 -0500 Subject: [PATCH 02/12] feat: Add basic JinjaBlock parsing and associated unit tests --- dbt_meshify/dbt_projects.py | 15 ++++++ dbt_meshify/storage/jinja_blocks.py | 53 +++++++++++++++++++ test-projects/split/split_proj/models/docs.md | 3 ++ .../split_proj/models/marts/__models.yml | 26 +++++---- .../split_proj/models/staging/__models.yml | 2 +- tests/unit/test_jinja_blocks.py | 39 ++++++++++++++ 6 files changed, 127 insertions(+), 11 deletions(-) create mode 100644 test-projects/split/split_proj/models/docs.md create mode 100644 tests/unit/test_jinja_blocks.py diff --git a/dbt_meshify/dbt_projects.py b/dbt_meshify/dbt_projects.py index 5c34b88..bb22cb9 100644 --- a/dbt_meshify/dbt_projects.py +++ b/dbt_meshify/dbt_projects.py @@ -26,6 +26,7 @@ from dbt_meshify.dbt import Dbt from dbt_meshify.exceptions import FatalMeshifyException +from dbt_meshify.storage.jinja_blocks import JinjaBlock class BaseDbtProject: @@ -311,6 +312,20 @@ def __init__( super().__init__(manifest, project, catalog, name, resources) + self.jinja_blocks: Dict[str, JinjaBlock] = self.find_jinja_blocks() + + def find_jinja_blocks(self) -> Dict[str, JinjaBlock]: + """For a given dbt Project, find all Jinja blocks for docs and macros""" + + blocks = {} + + for unique_id, item in self.manifest.docs.items(): + blocks[unique_id] = JinjaBlock.from_file( + path=self.path / item.original_file_path, block_type="docs", name=item.name + ) + + return blocks + def select_resources( self, select: Optional[str] = None, diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py index f88221e..dc78773 100644 --- a/dbt_meshify/storage/jinja_blocks.py +++ b/dbt_meshify/storage/jinja_blocks.py @@ -1,5 +1,7 @@ +import re from dataclasses import dataclass from pathlib import Path +from typing import Tuple @dataclass @@ -9,6 +11,57 @@ class JinjaBlock: """ path: Path + block_type: str + name: str start: int end: int content: str + + @staticmethod + def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int, int]: + """Find the line number that a block started.""" + start_line = None + end_line = None + + for match in re.finditer( + r"{%\s+" + block_type + r"\s+" + name + r"\s+%}", file_content, re.MULTILINE + ): + start = match.span()[0] # .span() gives tuple (start, end) + start_line = file_content[:start].count("\n") + break + + if start_line is None: + raise Exception(f"Unable to find a {block_type} block with the name {name}.") + + for match in re.finditer(r"{%\s+end" + block_type + r"\s+%}", file_content, re.MULTILINE): + start = match.span()[0] # .span() gives tuple (start, end) + new_end_line = file_content[:start].count("\n") + + if new_end_line >= start_line: + end_line = new_end_line + break + + if end_line is None: + raise Exception(f"Unable to find a the closing end{block_type} block for {name}.") + + return start_line, end_line + + @staticmethod + def isolate_content_from_line_range(file_content: str, start: int, end: int) -> str: + """Given content, a start line number, and an end line number, return the content of a Jinja block.""" + print(file_content.split("\n")[start + 1 :]) + return "/n".join(file_content.split("\n")[start + 1 : end]) + + @classmethod + def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock": + """Find a specific Jinja block within a file, based on the block type and the name.""" + + file_content = open(path).read() + start, end = cls.find_block_range(file_content, block_type, name) + content = cls.isolate_content_from_line_range( + file_content=file_content, start=start, end=end + ) + + return cls( + path=path, block_type=block_type, name=name, start=start, end=end, content=content + ) diff --git a/test-projects/split/split_proj/models/docs.md b/test-projects/split/split_proj/models/docs.md new file mode 100644 index 0000000..491d656 --- /dev/null +++ b/test-projects/split/split_proj/models/docs.md @@ -0,0 +1,3 @@ +{% docs customer_id %} +The unique key for each customer. +{% enddocs %} diff --git a/test-projects/split/split_proj/models/marts/__models.yml b/test-projects/split/split_proj/models/marts/__models.yml index a9823fa..685b402 100644 --- a/test-projects/split/split_proj/models/marts/__models.yml +++ b/test-projects/split/split_proj/models/marts/__models.yml @@ -2,11 +2,12 @@ version: 2 models: - name: customers - description: Customer overview data mart, offering key details for each unique + description: + Customer overview data mart, offering key details for each unique customer. One row per customer. columns: - name: customer_id - description: The unique key of the orders mart. + description: "{{ doc('customer_id') }}" tests: - not_null - unique @@ -19,20 +20,24 @@ models: - name: last_ordered_at description: The timestamp of a customer's most recent order. - name: lifetime_spend_pretax - description: The sum of all the pre-tax subtotals of every order a customer + description: + The sum of all the pre-tax subtotals of every order a customer has placed. - name: lifetime_spend - description: The sum of all the order totals (including tax) that a customer + description: + The sum of all the order totals (including tax) that a customer has ever placed. - name: customer_type - description: Options are 'new' or 'returning', indicating if a customer has + description: + Options are 'new' or 'returning', indicating if a customer has ordered more than once or has only placed their first order to date. tests: - accepted_values: values: [new, returning] - name: orders - description: Order overview data mart, offering key details for each order inlcluding + description: + Order overview data mart, offering key details for each order inlcluding if it's a customer's first order and a food vs. drink item breakdown. One row per order. tests: @@ -53,7 +58,8 @@ models: to: ref('stg_customers') field: customer_id - name: location_id - description: The foreign key relating to the location the order was placed + description: + The foreign key relating to the location the order was placed at. - name: order_total description: The total amount of the order in USD including tax. @@ -74,14 +80,14 @@ models: - name: order_cost description: The sum of supply expenses to fulfill the order. - name: location_name - description: The full location name of where this order was placed. Denormalized + description: + The full location name of where this order was placed. Denormalized from `stg_locations`. - name: is_food_order description: A boolean indicating if this order included any food items. - name: is_drink_order description: A boolean indicating if this order included any drink items. - - name: leaf_node description: A leaf node model that is not referenced by any other model. columns: @@ -89,4 +95,4 @@ models: description: The unique key of the leaf node. tests: - not_null - - unique \ No newline at end of file + - unique diff --git a/test-projects/split/split_proj/models/staging/__models.yml b/test-projects/split/split_proj/models/staging/__models.yml index 1667c52..125edb7 100644 --- a/test-projects/split/split_proj/models/staging/__models.yml +++ b/test-projects/split/split_proj/models/staging/__models.yml @@ -5,7 +5,7 @@ models: description: Customer data with basic cleaning and transformation applied, one row per customer. columns: - name: customer_id - description: The unique key for each customer. + description: "{{ doc('customer_id') }}" tests: - not_null - unique diff --git a/tests/unit/test_jinja_blocks.py b/tests/unit/test_jinja_blocks.py new file mode 100644 index 0000000..d638458 --- /dev/null +++ b/tests/unit/test_jinja_blocks.py @@ -0,0 +1,39 @@ +from dbt_meshify.storage.jinja_blocks import JinjaBlock + +string = """\ + + +{% docs customer_id %} +The unique key for each customer. +{% enddocs %} +""" + +multiple_blocks = """\ + + +{% docs customer_id %} +The unique key for each customer. +{% enddocs %} + +{% docs potato_name %} +The name of the customer's favorite potato dish. +{% enddocs %} +""" + + +class TestJinjaBlock: + def test_from_file_detects_block_range(self): + range = JinjaBlock.find_block_range(string, "docs", "customer_id") + assert range == (2, 4) + + def test_from_file_extracts_content(self): + content = JinjaBlock.isolate_content_from_line_range(string, 2, 4) + assert content == "The unique key for each customer." + + def test_from_file_detects_block_range_in_multiple_blocks(self): + range = JinjaBlock.find_block_range(multiple_blocks, "docs", "potato_name") + assert range == (6, 8) + + def test_from_file_extracts_content_in_files_with_multiple_blocks(self): + content = JinjaBlock.isolate_content_from_line_range(multiple_blocks, 6, 8) + assert content == "The name of the customer's favorite potato dish." From 4b1029072b8b90c5f566cfca86afa95ff32a7f98 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 16:40:35 -0500 Subject: [PATCH 03/12] fix: Migrate parsing code to use character posisions and not line numbers --- dbt_meshify/storage/jinja_blocks.py | 15 +++++++++------ tests/unit/test_jinja_blocks.py | 8 ++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py index dc78773..c9d2e5d 100644 --- a/dbt_meshify/storage/jinja_blocks.py +++ b/dbt_meshify/storage/jinja_blocks.py @@ -7,7 +7,7 @@ @dataclass class JinjaBlock: """ - A common data structure for tracking blocks of text that represent Jinja blocks. + A data structure for tracking Jinja blocks of text. Includes the start and end character positions, and the content of the block """ path: Path @@ -27,15 +27,15 @@ def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int r"{%\s+" + block_type + r"\s+" + name + r"\s+%}", file_content, re.MULTILINE ): start = match.span()[0] # .span() gives tuple (start, end) - start_line = file_content[:start].count("\n") + start_line = start # file_content[:start].count("\n") break if start_line is None: raise Exception(f"Unable to find a {block_type} block with the name {name}.") for match in re.finditer(r"{%\s+end" + block_type + r"\s+%}", file_content, re.MULTILINE): - start = match.span()[0] # .span() gives tuple (start, end) - new_end_line = file_content[:start].count("\n") + end = match.span()[1] # .span() gives tuple (start, end) + new_end_line = end # file_content[:start].count("\n") if new_end_line >= start_line: end_line = new_end_line @@ -49,8 +49,11 @@ def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int @staticmethod def isolate_content_from_line_range(file_content: str, start: int, end: int) -> str: """Given content, a start line number, and an end line number, return the content of a Jinja block.""" - print(file_content.split("\n")[start + 1 :]) - return "/n".join(file_content.split("\n")[start + 1 : end]) + raw_content = file_content[start:end] + match = re.search(r"{%.*%}\n(.*)\n{%.*%}", raw_content) + if match is None: + raise Exception("Unable to find the jinja block within the designated range.") + return match.group(1) @classmethod def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock": diff --git a/tests/unit/test_jinja_blocks.py b/tests/unit/test_jinja_blocks.py index d638458..66f0d5b 100644 --- a/tests/unit/test_jinja_blocks.py +++ b/tests/unit/test_jinja_blocks.py @@ -24,16 +24,16 @@ class TestJinjaBlock: def test_from_file_detects_block_range(self): range = JinjaBlock.find_block_range(string, "docs", "customer_id") - assert range == (2, 4) + assert range == (2, 72) def test_from_file_extracts_content(self): - content = JinjaBlock.isolate_content_from_line_range(string, 2, 4) + content = JinjaBlock.isolate_content_from_line_range(string, 2, 72) assert content == "The unique key for each customer." def test_from_file_detects_block_range_in_multiple_blocks(self): range = JinjaBlock.find_block_range(multiple_blocks, "docs", "potato_name") - assert range == (6, 8) + assert range == (74, 159) def test_from_file_extracts_content_in_files_with_multiple_blocks(self): - content = JinjaBlock.isolate_content_from_line_range(multiple_blocks, 6, 8) + content = JinjaBlock.isolate_content_from_line_range(multiple_blocks, 74, 159) assert content == "The name of the customer's favorite potato dish." From 45fef34ce8367c64a18491e818926baffd6c059c Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 17:01:45 -0500 Subject: [PATCH 04/12] fix: Support formatting characters in block range calculations --- dbt_meshify/dbt_projects.py | 11 +++++++++++ dbt_meshify/storage/jinja_blocks.py | 16 ++++++++-------- tests/unit/test_jinja_blocks.py | 9 +++++++-- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/dbt_meshify/dbt_projects.py b/dbt_meshify/dbt_projects.py index bb22cb9..716443f 100644 --- a/dbt_meshify/dbt_projects.py +++ b/dbt_meshify/dbt_projects.py @@ -320,10 +320,21 @@ def find_jinja_blocks(self) -> Dict[str, JinjaBlock]: blocks = {} for unique_id, item in self.manifest.docs.items(): + if item.package_name != self.name: + continue + blocks[unique_id] = JinjaBlock.from_file( path=self.path / item.original_file_path, block_type="docs", name=item.name ) + for unique_id, macro in self.manifest.macros.items(): + if macro.package_name != self.name: + continue + + blocks[unique_id] = JinjaBlock.from_file( + path=self.path / macro.original_file_path, block_type="macro", name=macro.name + ) + return blocks def select_resources( diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py index c9d2e5d..7585697 100644 --- a/dbt_meshify/storage/jinja_blocks.py +++ b/dbt_meshify/storage/jinja_blocks.py @@ -24,7 +24,9 @@ def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int end_line = None for match in re.finditer( - r"{%\s+" + block_type + r"\s+" + name + r"\s+%}", file_content, re.MULTILINE + r"{%-?\s+" + block_type + r"\s+" + name + r"([(a-zA-Z0-9=,_ )]*)\s-?%}", + file_content, + re.MULTILINE, ): start = match.span()[0] # .span() gives tuple (start, end) start_line = start # file_content[:start].count("\n") @@ -33,7 +35,9 @@ def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int if start_line is None: raise Exception(f"Unable to find a {block_type} block with the name {name}.") - for match in re.finditer(r"{%\s+end" + block_type + r"\s+%}", file_content, re.MULTILINE): + for match in re.finditer( + r"{%-?\s+end" + block_type + r"\s+-?%}", file_content, re.MULTILINE + ): end = match.span()[1] # .span() gives tuple (start, end) new_end_line = end # file_content[:start].count("\n") @@ -48,12 +52,8 @@ def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int @staticmethod def isolate_content_from_line_range(file_content: str, start: int, end: int) -> str: - """Given content, a start line number, and an end line number, return the content of a Jinja block.""" - raw_content = file_content[start:end] - match = re.search(r"{%.*%}\n(.*)\n{%.*%}", raw_content) - if match is None: - raise Exception("Unable to find the jinja block within the designated range.") - return match.group(1) + """Given content, a start position, and an end position, return the content of a Jinja block.""" + return file_content[start:end] @classmethod def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock": diff --git a/tests/unit/test_jinja_blocks.py b/tests/unit/test_jinja_blocks.py index 66f0d5b..020fc0c 100644 --- a/tests/unit/test_jinja_blocks.py +++ b/tests/unit/test_jinja_blocks.py @@ -28,7 +28,9 @@ def test_from_file_detects_block_range(self): def test_from_file_extracts_content(self): content = JinjaBlock.isolate_content_from_line_range(string, 2, 72) - assert content == "The unique key for each customer." + assert ( + content == "{% docs customer_id %}\nThe unique key for each customer.\n{% enddocs %}" + ) def test_from_file_detects_block_range_in_multiple_blocks(self): range = JinjaBlock.find_block_range(multiple_blocks, "docs", "potato_name") @@ -36,4 +38,7 @@ def test_from_file_detects_block_range_in_multiple_blocks(self): def test_from_file_extracts_content_in_files_with_multiple_blocks(self): content = JinjaBlock.isolate_content_from_line_range(multiple_blocks, 74, 159) - assert content == "The name of the customer's favorite potato dish." + assert ( + content + == "{% docs potato_name %}\nThe name of the customer's favorite potato dish.\n{% enddocs %}" + ) From 25c56d6d18389ec72eac0120048b8e9cb89808d1 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 19:36:34 -0500 Subject: [PATCH 05/12] feat: Add file appending. Wire up basic macro appending into new projects --- dbt_meshify/change.py | 2 ++ dbt_meshify/dbt_projects.py | 1 - dbt_meshify/main.py | 3 ++- dbt_meshify/storage/dbt_project_editors.py | 24 ++++++++++++++++++++- dbt_meshify/storage/file_content_editors.py | 12 +++++++++++ dbt_meshify/storage/file_manager.py | 6 ++++++ 6 files changed, 45 insertions(+), 3 deletions(-) diff --git a/dbt_meshify/change.py b/dbt_meshify/change.py index 429eee8..1871805 100644 --- a/dbt_meshify/change.py +++ b/dbt_meshify/change.py @@ -9,6 +9,7 @@ class Operation(str, Enum): """An operation describes the type of work being performed.""" Add = "add" + Append = "append" Update = "update" Remove = "remove" Copy = "copy" @@ -17,6 +18,7 @@ class Operation(str, Enum): prepositions = { Operation.Add: "to", + Operation.Append: "to", Operation.Move: "to", Operation.Copy: "to", Operation.Update: "in", diff --git a/dbt_meshify/dbt_projects.py b/dbt_meshify/dbt_projects.py index 716443f..913320f 100644 --- a/dbt_meshify/dbt_projects.py +++ b/dbt_meshify/dbt_projects.py @@ -311,7 +311,6 @@ def __init__( resources = self.select_resources(output_key="unique_id") super().__init__(manifest, project, catalog, name, resources) - self.jinja_blocks: Dict[str, JinjaBlock] = self.find_jinja_blocks() def find_jinja_blocks(self) -> Dict[str, JinjaBlock]: diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 9c3001f..ec9248d 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -246,7 +246,8 @@ def split( change_set = subproject_creator.initialize() return [change_set] - except Exception: + except Exception as e: + logger.debug(e) raise FatalMeshifyException(f"Error creating subproject {subproject.name}") diff --git a/dbt_meshify/storage/dbt_project_editors.py b/dbt_meshify/storage/dbt_project_editors.py index ecea8ba..fa6da1a 100644 --- a/dbt_meshify/storage/dbt_project_editors.py +++ b/dbt_meshify/storage/dbt_project_editors.py @@ -22,6 +22,7 @@ from dbt_meshify.dbt_projects import DbtSubProject from dbt_meshify.storage.file_content_editors import NamedList, filter_empty_dict_items from dbt_meshify.storage.file_manager import YAMLFileManager, yaml +from dbt_meshify.storage.jinja_blocks import JinjaBlock from dbt_meshify.utilities.contractor import Contractor from dbt_meshify.utilities.dependencies import DependenciesUpdater from dbt_meshify.utilities.grouper import ResourceGrouper @@ -185,7 +186,17 @@ def initialize(self) -> ChangeSet: elif resource.resource_type in ["macro", "group"]: if hasattr(resource, "patch_path") and resource.patch_path: change_set.add(self.copy_resource_yml(resource)) - change_set.add(self.copy_resource(resource)) + + if resource.unique_id in self.subproject.parent_project.jinja_blocks: + change_set.add( + self.copy_jinja_block( + resource, + self.subproject.parent_project.jinja_blocks[resource.unique_id], + ) + ) + + else: + change_set.add(self.copy_resource(resource)) else: logger.debug( @@ -237,6 +248,17 @@ def move_resource(self, resource: Resource) -> FileChange: source=self.subproject.parent_project.resolve_file_path(resource), ) + def copy_jinja_block(self, resource: Resource, jinja_block: JinjaBlock) -> FileChange: + """Move an existing jinja block to a new project""" + + return FileChange( + operation=Operation.Append, + entity_type=EntityType.Code, + identifier=resource.name, + path=self.subproject.resolve_file_path(resource), + data=jinja_block.content, + ) + def copy_resource(self, resource: Resource) -> FileChange: """ Copy a resource file from one project to another diff --git a/dbt_meshify/storage/file_content_editors.py b/dbt_meshify/storage/file_content_editors.py index ecd9997..5f88c44 100644 --- a/dbt_meshify/storage/file_content_editors.py +++ b/dbt_meshify/storage/file_content_editors.py @@ -107,6 +107,18 @@ def add(change: FileChange): RawFileManager.write_file(path=change.path, content=change.data) + @staticmethod + def append(change: FileChange): + """Append data to an existing file.""" + + if not change.path.parent.exists(): + change.path.parent.mkdir(parents=True, exist_ok=True) + + if change.data is None: + return RawFileManager.touch_file(change.path) + + RawFileManager.append_file(path=change.path, content=change.data) + @staticmethod def update(change: FileChange): """Update data to a new file.""" diff --git a/dbt_meshify/storage/file_manager.py b/dbt_meshify/storage/file_manager.py index 76a1e72..e845dcd 100644 --- a/dbt_meshify/storage/file_manager.py +++ b/dbt_meshify/storage/file_manager.py @@ -62,6 +62,12 @@ def write_file(path: Path, content: str) -> None: """Write a string value to a file in the filesystem""" path.write_text(content) + @staticmethod + def append_file(path: Path, content: str) -> None: + """Append a string value to a file in the filesystem""" + with open(path, "a") as file: + file.write(content) + @staticmethod def copy_file(source_path: Path, target_path: Path) -> None: if not target_path.parent.exists(): From 8a0fd7b3ca775caf7be568579afdf514da3a4b17 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 19:44:51 -0500 Subject: [PATCH 06/12] docs: Add some TODOs to keep me honest --- dbt_meshify/storage/dbt_project_editors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbt_meshify/storage/dbt_project_editors.py b/dbt_meshify/storage/dbt_project_editors.py index fa6da1a..3c5840b 100644 --- a/dbt_meshify/storage/dbt_project_editors.py +++ b/dbt_meshify/storage/dbt_project_editors.py @@ -188,6 +188,9 @@ def initialize(self) -> ChangeSet: change_set.add(self.copy_resource_yml(resource)) if resource.unique_id in self.subproject.parent_project.jinja_blocks: + # TODO: How do we know when to copy vs move? + # TODO: What about docs? Docs have no graph, right? + change_set.add( self.copy_jinja_block( resource, From ee6ee20941eb975ce8a4cf296a49a0f80b586838 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 21:07:27 -0500 Subject: [PATCH 07/12] feat: Add doc reference tracking and support writing docs to destination projects --- dbt_meshify/dbt_projects.py | 37 +++++++++++++++++++++- dbt_meshify/main.py | 2 +- dbt_meshify/storage/dbt_project_editors.py | 9 ++++-- dbt_meshify/storage/jinja_blocks.py | 9 +++++- 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/dbt_meshify/dbt_projects.py b/dbt_meshify/dbt_projects.py index 913320f..1ba0930 100644 --- a/dbt_meshify/dbt_projects.py +++ b/dbt_meshify/dbt_projects.py @@ -26,7 +26,7 @@ from dbt_meshify.dbt import Dbt from dbt_meshify.exceptions import FatalMeshifyException -from dbt_meshify.storage.jinja_blocks import JinjaBlock +from dbt_meshify.storage.jinja_blocks import JinjaBlock, find_doc_reference class BaseDbtProject: @@ -421,6 +421,8 @@ def __init__( self.groups = self._get_indirect_groups() self._rename_project() + self._referenced_docs: Optional[Set[str]] = None + def _rename_project(self) -> None: """ edits the project yml to take any instance of the parent project name and update it to the subproject name @@ -457,6 +459,39 @@ def _get_custom_macros(self) -> Set[str]: macros_set.update(self._get_macro_dependencies(macro)) return macros_set + @property + def referenced_docs(self) -> Set[str]: + """Return a list of all docs referenced within this SubProject.""" + + if self._referenced_docs: + return self._referenced_docs + + docs = set() + for unique_id in self.resources: + if unique_id.startswith("test."): + continue + + node = self.get_manifest_node(unique_id) + + if node is None: + continue + + if hasattr(node, "raw_code"): + docs.update(find_doc_reference(node.raw_code)) + + if hasattr(node, "patch_path"): + path = self.parent_project.resolve_patch_path(node) + if path.exists(): + with open(path) as file: + docs.update(find_doc_reference(file.read())) + + # Use the search name for the doc to resolve a unique_id for the doc resource. + self._referenced_docs = { + unique_id for unique_id, doc in self.manifest.docs.items() if doc.name in docs + } + + return self._referenced_docs + def _get_indirect_groups(self) -> Set[str]: """ get a set of group unique_ids for all the selected resources diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index ec9248d..3742e43 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -247,7 +247,7 @@ def split( return [change_set] except Exception as e: - logger.debug(e) + logger.exception(e) # TODO: Remove this line! raise FatalMeshifyException(f"Error creating subproject {subproject.name}") diff --git a/dbt_meshify/storage/dbt_project_editors.py b/dbt_meshify/storage/dbt_project_editors.py index 3c5840b..423bce8 100644 --- a/dbt_meshify/storage/dbt_project_editors.py +++ b/dbt_meshify/storage/dbt_project_editors.py @@ -142,7 +142,12 @@ def initialize(self) -> ChangeSet: f"Identifying operations required to split {subproject.name} from {subproject.parent_project.name}." ) - for unique_id in subproject.resources | subproject.custom_macros | subproject.groups: + for unique_id in ( + subproject.resources + | subproject.custom_macros + | subproject.groups + | subproject.referenced_docs + ): resource = subproject.get_manifest_node(unique_id) if not resource: raise KeyError(f"Resource {unique_id} not found in manifest") @@ -183,7 +188,7 @@ def initialize(self) -> ChangeSet: ): change_set.extend(reference_updater.update_parent_refs(resource)) - elif resource.resource_type in ["macro", "group"]: + elif resource.resource_type in ["macro", "group", "doc"]: if hasattr(resource, "patch_path") and resource.patch_path: change_set.add(self.copy_resource_yml(resource)) diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py index 7585697..ddf7b17 100644 --- a/dbt_meshify/storage/jinja_blocks.py +++ b/dbt_meshify/storage/jinja_blocks.py @@ -1,7 +1,7 @@ import re from dataclasses import dataclass from pathlib import Path -from typing import Tuple +from typing import Set, Tuple @dataclass @@ -68,3 +68,10 @@ def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock": return cls( path=path, block_type=block_type, name=name, start=start, end=end, content=content ) + + +def find_doc_reference(content: str) -> Set[str]: + """Find all doc block references within a string.""" + matches = re.findall(r"{{\sdoc\(\'?\"?([a-zA-Z0-9_\-\.]+)\'?\"?\)\s}}", content) + + return set(matches) From 688cebbecd4a40e667f762465e4ef8ab641a161b Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 21:25:04 -0500 Subject: [PATCH 08/12] tests: Tweak tests to cover new macro and doc copying behavior --- .../split/split_proj/macros/_macros.yml | 4 +++- .../split/split_proj/macros/cents_to_dollars.sql | 5 +++++ tests/integration/test_split_command.py | 16 ++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/test-projects/split/split_proj/macros/_macros.yml b/test-projects/split/split_proj/macros/_macros.yml index 40d3dab..1767960 100644 --- a/test-projects/split/split_proj/macros/_macros.yml +++ b/test-projects/split/split_proj/macros/_macros.yml @@ -1,3 +1,5 @@ macros: - name: cents_to_dollars - description: Converts cents to dollars \ No newline at end of file + description: Converts cents to dollars + - name: dollars_to_cents + description: Converts dollars to cents diff --git a/test-projects/split/split_proj/macros/cents_to_dollars.sql b/test-projects/split/split_proj/macros/cents_to_dollars.sql index efe3361..a48174a 100644 --- a/test-projects/split/split_proj/macros/cents_to_dollars.sql +++ b/test-projects/split/split_proj/macros/cents_to_dollars.sql @@ -3,3 +3,8 @@ {% macro cents_to_dollars(column_name, precision=2) -%} ({{ column_name }} / 100)::{{ type_numeric() }}(16, {{ precision }}) {%- endmacro %} + + +{% macro dollars_to_cents(column_name) -%} + ({{ column_name }} * 100)::{{ type_numeric() }}(16, 0) +{%- endmacro %} diff --git a/tests/integration/test_split_command.py b/tests/integration/test_split_command.py index bd15751..6801624 100644 --- a/tests/integration/test_split_command.py +++ b/tests/integration/test_split_command.py @@ -43,6 +43,13 @@ def test_split_one_model(self, project): ).read_text() assert x_proj_ref in child_sql + # Copied a referenced docs block + assert (Path(dest_project_path) / "my_new_project" / "models" / "docs.md").exists() + assert ( + "customer_id" + in (Path(dest_project_path) / "my_new_project" / "models" / "docs.md").read_text() + ) + def test_split_one_model_one_source(self, project): runner = CliRunner() result = runner.invoke( @@ -111,6 +118,15 @@ def test_split_one_model_one_source_custom_macro(self, project): assert ( Path(dest_project_path) / "my_new_project" / "macros" / "cents_to_dollars.sql" ).exists() + + # Confirm that we did not bring over an unreferenced dollars_to_cents macro + assert ( + "dollars_to_cents" + not in ( + Path(dest_project_path) / "my_new_project" / "macros" / "cents_to_dollars.sql" + ).read_text() + ) + # copied custom macro parents too! assert ( Path(dest_project_path) / "my_new_project" / "macros" / "type_numeric.sql" From 1d9008fceede0520d2d83e8ee6f20c367b6830cd Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 21:33:10 -0500 Subject: [PATCH 09/12] style: Remove unnecessary todos --- dbt_meshify/storage/dbt_project_editors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbt_meshify/storage/dbt_project_editors.py b/dbt_meshify/storage/dbt_project_editors.py index 423bce8..2b45c91 100644 --- a/dbt_meshify/storage/dbt_project_editors.py +++ b/dbt_meshify/storage/dbt_project_editors.py @@ -193,9 +193,6 @@ def initialize(self) -> ChangeSet: change_set.add(self.copy_resource_yml(resource)) if resource.unique_id in self.subproject.parent_project.jinja_blocks: - # TODO: How do we know when to copy vs move? - # TODO: What about docs? Docs have no graph, right? - change_set.add( self.copy_jinja_block( resource, From 892dd650c256a512cf064ac5e55cf33c646bc45d Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Thu, 28 Dec 2023 21:35:20 -0500 Subject: [PATCH 10/12] refactor: Rename a method in JinjaBlock --- dbt_meshify/storage/jinja_blocks.py | 6 ++---- tests/unit/test_jinja_blocks.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py index ddf7b17..dcc4630 100644 --- a/dbt_meshify/storage/jinja_blocks.py +++ b/dbt_meshify/storage/jinja_blocks.py @@ -51,7 +51,7 @@ def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int return start_line, end_line @staticmethod - def isolate_content_from_line_range(file_content: str, start: int, end: int) -> str: + def isolate_content(file_content: str, start: int, end: int) -> str: """Given content, a start position, and an end position, return the content of a Jinja block.""" return file_content[start:end] @@ -61,9 +61,7 @@ def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock": file_content = open(path).read() start, end = cls.find_block_range(file_content, block_type, name) - content = cls.isolate_content_from_line_range( - file_content=file_content, start=start, end=end - ) + content = cls.isolate_content(file_content=file_content, start=start, end=end) return cls( path=path, block_type=block_type, name=name, start=start, end=end, content=content diff --git a/tests/unit/test_jinja_blocks.py b/tests/unit/test_jinja_blocks.py index 020fc0c..6a8e896 100644 --- a/tests/unit/test_jinja_blocks.py +++ b/tests/unit/test_jinja_blocks.py @@ -27,7 +27,7 @@ def test_from_file_detects_block_range(self): assert range == (2, 72) def test_from_file_extracts_content(self): - content = JinjaBlock.isolate_content_from_line_range(string, 2, 72) + content = JinjaBlock.isolate_content(string, 2, 72) assert ( content == "{% docs customer_id %}\nThe unique key for each customer.\n{% enddocs %}" ) @@ -37,7 +37,7 @@ def test_from_file_detects_block_range_in_multiple_blocks(self): assert range == (74, 159) def test_from_file_extracts_content_in_files_with_multiple_blocks(self): - content = JinjaBlock.isolate_content_from_line_range(multiple_blocks, 74, 159) + content = JinjaBlock.isolate_content(multiple_blocks, 74, 159) assert ( content == "{% docs potato_name %}\nThe name of the customer's favorite potato dish.\n{% enddocs %}" From 08246a598ad5472a56a8b5ef11aaaa34a0b6fae0 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Tue, 2 Jan 2024 13:44:36 -0500 Subject: [PATCH 11/12] fix: Convert from open().read() to read_text() --- dbt_meshify/storage/jinja_blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_meshify/storage/jinja_blocks.py b/dbt_meshify/storage/jinja_blocks.py index dcc4630..795f061 100644 --- a/dbt_meshify/storage/jinja_blocks.py +++ b/dbt_meshify/storage/jinja_blocks.py @@ -59,7 +59,7 @@ def isolate_content(file_content: str, start: int, end: int) -> str: def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock": """Find a specific Jinja block within a file, based on the block type and the name.""" - file_content = open(path).read() + file_content = path.read_text() start, end = cls.find_block_range(file_content, block_type, name) content = cls.isolate_content(file_content=file_content, start=start, end=end) From 0163c70ffc190aa6f720f1dd8e29ded6680f3aa2 Mon Sep 17 00:00:00 2001 From: Nicholas Yager Date: Tue, 2 Jan 2024 13:49:59 -0500 Subject: [PATCH 12/12] fix: Raise an exceptioj in referenced_docs if a node cannot be found --- dbt_meshify/dbt_projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_meshify/dbt_projects.py b/dbt_meshify/dbt_projects.py index 1ba0930..a0ed37f 100644 --- a/dbt_meshify/dbt_projects.py +++ b/dbt_meshify/dbt_projects.py @@ -474,7 +474,7 @@ def referenced_docs(self) -> Set[str]: node = self.get_manifest_node(unique_id) if node is None: - continue + raise Exception(f"Unable to find referenced node {node}") if hasattr(node, "raw_code"): docs.update(find_doc_reference(node.raw_code))