From da9b1be1a8d899a313fd3664284541d91d25447e Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 00:30:44 +0000 Subject: [PATCH 1/7] Fix #9608: Unit test fixture compilation --- core/dbt/contracts/graph/nodes.py | 10 ---- core/dbt/parser/unit_tests.py | 7 ++- core/dbt/task/printer.py | 2 +- core/dbt/task/test.py | 26 +++++--- .../unit_testing/test_unit_testing.py | 59 +++++++++++++++++++ 5 files changed, 82 insertions(+), 22 deletions(-) diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index e62bcdafb48..134c272db23 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -954,16 +954,6 @@ class UnitTestDefinition(NodeInfoMixin, GraphNode, UnitTestDefinitionResource): def resource_class(cls) -> Type[UnitTestDefinitionResource]: return UnitTestDefinitionResource - @property - def build_path(self): - # TODO: is this actually necessary? - return self.original_file_path - - @property - def compiled_path(self): - # TODO: is this actually necessary? - return self.original_file_path - @property def depends_on_nodes(self): return self.depends_on.nodes diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index 1355a29f671..f987f5a512f 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -123,7 +123,8 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): common_fields = { "resource_type": NodeType.Model, - "original_file_path": original_input_node.original_file_path, + # root directory for input and output fixtures + "original_file_path": unit_test_node.original_file_path, "config": ModelConfig(materialized="ephemeral"), "database": original_input_node.database, "alias": original_input_node.identifier, @@ -144,7 +145,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): package_name=original_input_node.package_name, unique_id=f"model.{original_input_node.package_name}.{input_name}", name=input_name, - path=original_input_node.path or f"{input_name}.sql", + path=f"{input_name}.sql", ) if ( original_input_node.resource_type == NodeType.Model @@ -162,7 +163,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): package_name=original_input_node.package_name, unique_id=f"model.{original_input_node.package_name}.{input_name}", name=original_input_node.name, # must be the same name for source lookup to work - path=input_name + ".sql", # for writing out compiled_code + path=f"{input_name}.sql", # for writing out compiled_code source_name=original_input_node.source_name, # needed for source lookup ) # Sources need to go in the sources dictionary in order to create the right lookup diff --git a/core/dbt/task/printer.py b/core/dbt/task/printer.py index 8d6db2e35ba..d67f6f00b09 100644 --- a/core/dbt/task/printer.py +++ b/core/dbt/task/printer.py @@ -105,7 +105,7 @@ def print_run_result_error(result, newline: bool = True, is_warning: bool = Fals else: fire_event(RunResultErrorNoMessage(status=result.status)) - if result.node.build_path is not None: + if result.node.compiled_path is not None: with TextOnly(): fire_event(Formatting("")) fire_event(SQLCompiledPath(path=result.node.compiled_path)) diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index d3b16b88bc0..84b1739c4ce 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -12,7 +12,13 @@ from .compile import CompileRunner from .run import RunTask -from dbt.contracts.graph.nodes import TestNode, UnitTestDefinition, UnitTestNode +from dbt.contracts.graph.nodes import ( + TestNode, + UnitTestDefinition, + UnitTestNode, + GenericTestNode, + SingularTestNode, +) from dbt.contracts.graph.manifest import Manifest from dbt.artifacts.schemas.results import TestStatus from dbt.artifacts.schemas.run import RunResult @@ -180,7 +186,7 @@ def build_unit_test_manifest_from_test( def execute_unit_test( self, unit_test_def: UnitTestDefinition, manifest: Manifest - ) -> UnitTestResultData: + ) -> tuple[UnitTestNode, UnitTestResultData]: unit_test_manifest = self.build_unit_test_manifest_from_test(unit_test_def, manifest) @@ -190,6 +196,7 @@ def execute_unit_test( # Compile the node unit_test_node = self.compiler.compile_node(unit_test_node, unit_test_manifest, {}) + assert isinstance(unit_test_node, UnitTestNode) # generate_runtime_unit_test_context not strictly needed - this is to run the 'unit' # materialization, not compile the node.compiled_code @@ -243,18 +250,21 @@ def execute_unit_test( rendered=rendered, ) - return UnitTestResultData( + unit_test_result_data = UnitTestResultData( diff=diff, should_error=should_error, adapter_response=adapter_response, ) - def execute(self, test: Union[TestNode, UnitTestDefinition], manifest: Manifest): + return unit_test_node, unit_test_result_data + + def execute(self, test: Union[TestNode, UnitTestNode], manifest: Manifest): if isinstance(test, UnitTestDefinition): - unit_test_result = self.execute_unit_test(test, manifest) - return self.build_unit_test_run_result(test, unit_test_result) + unit_test_node, unit_test_result = self.execute_unit_test(test, manifest) + return self.build_unit_test_run_result(unit_test_node, unit_test_result) else: # Note: manifest here is a normal manifest + assert isinstance(test, (SingularTestNode, GenericTestNode)) test_result = self.execute_data_test(test, manifest) return self.build_test_run_result(test, test_result) @@ -293,7 +303,7 @@ def build_test_run_result(self, test: TestNode, result: TestResultData) -> RunRe return run_result def build_unit_test_run_result( - self, test: UnitTestDefinition, result: UnitTestResultData + self, test: UnitTestNode, result: UnitTestResultData ) -> RunResult: thread_id = threading.current_thread().name @@ -306,7 +316,7 @@ def build_unit_test_run_result( failures = 1 return RunResult( - node=test, # type: ignore + node=test, status=status, timing=[], thread_id=thread_id, diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index c5ce83d2eb3..35281e1c86e 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -5,6 +5,9 @@ run_dbt, write_file, get_manifest, + run_dbt_and_capture, + read_file, + file_exists, ) from dbt.contracts.results import NodeStatus from dbt.exceptions import DuplicateResourceNameError, ParsingError @@ -442,3 +445,59 @@ def test_unit_test_ext_nodes( run_dbt(["run"], expect_pass=True) results = run_dbt(["test", "--select", "valid_emails"], expect_pass=True) assert len(results) == 1 + + +subfolder_model_a_sql = """ +select + 1 as id, 'blue' as color +""" + +subfolder_model_b_sql = """ +select + id, + color +from {{ ref('model_a') }} +""" + +subfolder_my_model_yml = """ +unit_tests: + - name: my_unit_test + model: model_b + given: + - input: ref('model_a') + rows: + - { id: 1, color: 'blue' } + expect: + rows: + - { id: 1, color: 'red' } +""" + + +class TestUnitTestSubfolderPath: + @pytest.fixture(scope="class") + def models(self): + return { + "subfolder": { + "model_a.sql": subfolder_model_a_sql, + "model_b.sql": subfolder_model_b_sql, + "my_model.yml": subfolder_my_model_yml, + } + } + + def test_subfolder_unit_test(self, project): + results, output = run_dbt_and_capture(["build"], expect_pass=False) + + # Test that input fixture doesn't overwrite the original model + assert ( + read_file("target/compiled/test/models/subfolder/model_a.sql").strip() + == subfolder_model_a_sql.strip() + ) + + # Test that correct path is written in logs + assert ( + "target/compiled/test/models/subfolder/my_model.yml/models/subfolder/my_unit_test.sql" + in output + ) + assert file_exists( + "target/compiled/test/models/subfolder/my_model.yml/models/subfolder/my_unit_test.sql" + ) From 8d185c16cab6bc4161e77800a3191f4a1c8c6e69 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 00:34:17 +0000 Subject: [PATCH 2/7] Add changelog entry --- .changes/unreleased/Fixes-20240326-003411.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Fixes-20240326-003411.yaml diff --git a/.changes/unreleased/Fixes-20240326-003411.yaml b/.changes/unreleased/Fixes-20240326-003411.yaml new file mode 100644 index 00000000000..f5b5fe9e095 --- /dev/null +++ b/.changes/unreleased/Fixes-20240326-003411.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Unit test path outputs +time: 2024-03-26T00:34:11.162594Z +custom: + Author: aranke + Issue: "9608" From 62b4c2a4b379a641d9e0c8735f38f864b2b66f45 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 00:39:25 +0000 Subject: [PATCH 3/7] tuple -> Tuple --- core/dbt/task/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index 84b1739c4ce..7f5ed77f7e1 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -7,7 +7,7 @@ from dbt_common.events.format import pluralize from dbt_common.dataclass_schema import dbtClassMixin import threading -from typing import Dict, Any, Optional, Union, List, TYPE_CHECKING +from typing import Dict, Any, Optional, Union, List, TYPE_CHECKING, Tuple from .compile import CompileRunner from .run import RunTask @@ -186,7 +186,7 @@ def build_unit_test_manifest_from_test( def execute_unit_test( self, unit_test_def: UnitTestDefinition, manifest: Manifest - ) -> tuple[UnitTestNode, UnitTestResultData]: + ) -> Tuple[UnitTestNode, UnitTestResultData]: unit_test_manifest = self.build_unit_test_manifest_from_test(unit_test_def, manifest) From 23378d1ada2e96aa2bfe0b049bc2e69813cb0f20 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 01:01:25 +0000 Subject: [PATCH 4/7] Use node unique_id instead of name in test_ut_names --- tests/functional/unit_testing/test_ut_names.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/functional/unit_testing/test_ut_names.py b/tests/functional/unit_testing/test_ut_names.py index 27c0a56201c..d1721438576 100644 --- a/tests/functional/unit_testing/test_ut_names.py +++ b/tests/functional/unit_testing/test_ut_names.py @@ -27,33 +27,37 @@ def test_duplicate_test_names_across_models(self, project): # Select duplicate tests results, log_output = run_dbt_and_capture(["test"], expect_pass=True) assert len(results) == 2 - assert ["my_model_a", "my_model_b"] == sorted([result.node.model for result in results]) + assert {"model.test.my_model_a", "model.test.my_model_b"} == { + result.node.tested_node_unique_id for result in results + } assert "my_model_a::my_test_name" in log_output assert "my_model_b::my_test_name" in log_output # Test select duplicates by by test name results = run_dbt(["test", "--select", "test_name:my_test_name"]) assert len(results) == 2 - assert ["my_model_a", "my_model_b"] == sorted([result.node.model for result in results]) + assert {"model.test.my_model_a", "model.test.my_model_b"} == { + result.node.tested_node_unique_id for result in results + } assert "my_model_a::my_test_name" in log_output assert "my_model_b::my_test_name" in log_output results = run_dbt(["test", "--select", "my_model_a,test_name:my_test_name"]) assert len(results) == 1 - assert results[0].node.model == "my_model_a" + assert results[0].node.tested_node_unique_id == "model.test.my_model_a" results = run_dbt(["test", "--select", "my_model_b,test_name:my_test_name"]) assert len(results) == 1 - assert results[0].node.model == "my_model_b" + assert results[0].node.tested_node_unique_id == "model.test.my_model_b" # Test select by model name results = run_dbt(["test", "--select", "my_model_a"]) assert len(results) == 1 - assert results[0].node.model == "my_model_a" + assert results[0].node.tested_node_unique_id == "model.test.my_model_a" results = run_dbt(["test", "--select", "my_model_b"]) assert len(results) == 1 - assert results[0].node.model == "my_model_b" + assert results[0].node.tested_node_unique_id == "model.test.my_model_b" class TestUnitTestDuplicateTestNamesWithinModel: From efec28b25be2dbfda0c416d0728a634484df4df3 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 01:14:19 +0000 Subject: [PATCH 5/7] Fix newlines on windows --- tests/functional/unit_testing/test_unit_testing.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 35281e1c86e..7ac8b4f3493 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -447,10 +447,7 @@ def test_unit_test_ext_nodes( assert len(results) == 1 -subfolder_model_a_sql = """ -select - 1 as id, 'blue' as color -""" +subfolder_model_a_sql = """select 1 as id, 'blue' as color""" subfolder_model_b_sql = """ select From 77c91441999fc9f92d83c99416070eeebc34f057 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 01:35:17 +0000 Subject: [PATCH 6/7] normalize paths for windows --- tests/functional/unit_testing/test_unit_testing.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index 7ac8b4f3493..887c1907e76 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -33,6 +33,7 @@ test_my_model_incremental_yml_wrong_override, test_my_model_incremental_yml_no_this_input, ) +from tests.unit.utils import normalize class TestUnitTests: @@ -492,9 +493,13 @@ def test_subfolder_unit_test(self, project): # Test that correct path is written in logs assert ( - "target/compiled/test/models/subfolder/my_model.yml/models/subfolder/my_unit_test.sql" + normalize( + "target/compiled/test/models/subfolder/my_model.yml/models/subfolder/my_unit_test.sql" + ) in output ) assert file_exists( - "target/compiled/test/models/subfolder/my_model.yml/models/subfolder/my_unit_test.sql" + normalize( + "target/compiled/test/models/subfolder/my_model.yml/models/subfolder/my_unit_test.sql" + ) ) From f2e0b0d2b56aa65602d4d5d89bed2eedadedfd0c Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 26 Mar 2024 12:50:37 +0000 Subject: [PATCH 7/7] consolidate fields --- core/dbt/parser/unit_tests.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index f987f5a512f..763efab44aa 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -120,6 +120,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): original_input_node = self._get_original_input_node( given.input, tested_node, test_case.name ) + input_name = original_input_node.name common_fields = { "resource_type": NodeType.Model, @@ -132,6 +133,10 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): "fqn": original_input_node.fqn, "checksum": FileHash.empty(), "raw_code": self._build_fixture_raw_code(given.rows, None), + "package_name": original_input_node.package_name, + "unique_id": f"model.{original_input_node.package_name}.{input_name}", + "name": input_name, + "path": f"{input_name}.sql", } if original_input_node.resource_type in ( @@ -139,14 +144,7 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): NodeType.Seed, NodeType.Snapshot, ): - input_name = original_input_node.name - input_node = ModelNode( - **common_fields, - package_name=original_input_node.package_name, - unique_id=f"model.{original_input_node.package_name}.{input_name}", - name=input_name, - path=f"{input_name}.sql", - ) + input_node = ModelNode(**common_fields) if ( original_input_node.resource_type == NodeType.Model and original_input_node.version @@ -157,13 +155,8 @@ def parse_unit_test_case(self, test_case: UnitTestDefinition): # We are reusing the database/schema/identifier from the original source, # but that shouldn't matter since this acts as an ephemeral model which just # wraps a CTE around the unit test node. - input_name = original_input_node.name input_node = UnitTestSourceDefinition( **common_fields, - package_name=original_input_node.package_name, - unique_id=f"model.{original_input_node.package_name}.{input_name}", - name=original_input_node.name, # must be the same name for source lookup to work - path=f"{input_name}.sql", # for writing out compiled_code source_name=original_input_node.source_name, # needed for source lookup ) # Sources need to go in the sources dictionary in order to create the right lookup