Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partial parse yaml snapshots #10907

Merged
merged 3 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20241022-222927.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Implement partial parsing for all-yaml snapshots
time: 2024-10-22T22:29:27.396378-04:00
custom:
Author: gshank
Issue: "10903"
1 change: 1 addition & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
snapshots: List[str] = field(default_factory=list)
# The following field will no longer be used. Leaving
# here to avoid breaking existing projects. To be removed
# later if possible.
Expand Down
9 changes: 6 additions & 3 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
SeedNode,
SemanticModel,
SingularTestNode,
SnapshotNode,
SourceDefinition,
UnitTestDefinition,
UnitTestFileFixture,
Expand Down Expand Up @@ -1600,12 +1601,14 @@ def add_node(self, source_file: AnySourceFile, node: ManifestNode, test_from=Non
if isinstance(node, GenericTestNode):
assert test_from
source_file.add_test(node.unique_id, test_from)
if isinstance(node, Metric):
elif isinstance(node, Metric):
source_file.metrics.append(node.unique_id)
if isinstance(node, Exposure):
elif isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
if isinstance(node, Group):
elif isinstance(node, Group):
source_file.groups.append(node.unique_id)
elif isinstance(node, SnapshotNode):
source_file.snapshots.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass
else:
Expand Down
21 changes: 21 additions & 0 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,10 +658,14 @@
key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
if key_diff["changed"]:
for elem in key_diff["changed"]:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem, True)
if key_diff["deleted"]:
for elem in key_diff["deleted"]:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)

Check warning on line 668 in core/dbt/parser/partial.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/partial.py#L668

Added line #L668 was not covered by tests
self.delete_schema_mssa_links(schema_file, dict_key, elem)
if key_diff["added"]:
for elem in key_diff["added"]:
Expand All @@ -673,6 +677,8 @@
continue
elem = self.get_schema_element(new_yaml_dict[dict_key], name)
if elem:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)

Check warning on line 681 in core/dbt/parser/partial.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/partial.py#L681

Added line #L681 was not covered by tests
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem, True)

Expand Down Expand Up @@ -828,6 +834,8 @@
# remove elem node and remove unique_id from node_patches
for elem_unique_id in elem_unique_ids:
# might have been already removed
# For all-yaml snapshots, we don't do this, since the node
# should have already been removed.
if (
elem_unique_id in self.saved_manifest.nodes
or elem_unique_id in self.saved_manifest.disabled
Expand Down Expand Up @@ -868,6 +876,19 @@
self.saved_manifest.nodes.pop(test_unique_id)
schema_file.remove_tests(dict_key, name)

def delete_yaml_snapshot(self, schema_file, snapshot_dict):
snapshot_name = snapshot_dict["name"]
snapshots = schema_file.snapshots.copy()
for unique_id in snapshots:
if unique_id in self.saved_manifest.nodes:
snapshot = self.saved_manifest.nodes[unique_id]
if snapshot.name == snapshot_name:
self.saved_manifest.nodes.pop(unique_id)
schema_file.snapshots.remove(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
schema_file.snapshots.remove(unique_id)

Check warning on line 890 in core/dbt/parser/partial.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/partial.py#L888-L890

Added lines #L888 - L890 were not covered by tests

def delete_schema_source(self, schema_file, source_dict):
# both patches, tests, and source nodes
source_name = source_dict["name"]
Expand Down
5 changes: 3 additions & 2 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ def _add_yaml_snapshot_nodes_to_manifest(
snapshot_node.raw_code = "select * from {{ " + snapshot["relation"] + " }}"

# Add our new node to the manifest, and note that ref lookup collections
# will need to be rebuilt.
self.manifest.add_node_nofile(snapshot_node)
# will need to be rebuilt. This adds the node unique_id to the "snapshots"
# list in the SchemaSourceFile.
self.manifest.add_node(block.file, snapshot_node)
rebuild_refs = True

if rebuild_refs:
Expand Down
13 changes: 12 additions & 1 deletion tests/functional/snapshots/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,6 @@
"""

snapshots_pg__snapshot_yml = """
version: 2
snapshots:
- name: snapshot_actual
relation: "ref('seed')"
Expand All @@ -304,6 +303,18 @@
owner: 'a_owner'
"""

snapshots_pg__snapshot_mod_yml = """
snapshots:
- name: snapshot_actual
relation: "ref('seed')"
config:
unique_key: "id || '-' || first_name"
strategy: timestamp
updated_at: updated_at
meta:
owner: 'b_owner'
"""

snapshots_pg__snapshot_no_target_schema_sql = """
{% snapshot snapshot_actual %}

Expand Down
33 changes: 33 additions & 0 deletions tests/functional/snapshots/test_basic_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
check_relations_equal,
relation_from_name,
run_dbt,
update_config_file,
write_file,
)
from tests.functional.snapshots.fixtures import (
Expand All @@ -18,6 +19,7 @@
models__schema_yml,
seeds__seed_csv,
seeds__seed_newcol_csv,
snapshots_pg__snapshot_mod_yml,
snapshots_pg__snapshot_no_target_schema_sql,
snapshots_pg__snapshot_sql,
snapshots_pg__snapshot_yml,
Expand Down Expand Up @@ -394,3 +396,34 @@ def models(self):
class TestBasicSnapshotYaml(BasicYaml):
def test_basic_snapshot_yaml(self, project):
snapshot_setup(project, num_snapshot_models=1)


class TestYamlSnapshotPartialParsing(BasicYaml):
def test_snapshot_partial_parsing(self, project):
manifest = run_dbt(["parse"])
snapshot_id = "snapshot.test.snapshot_actual"
assert snapshot_id in manifest.nodes
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "a_owner"

# change snapshot yml file and re-parse
write_file(snapshots_pg__snapshot_mod_yml, "snapshots", "snapshot.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "b_owner"

# modify dbt_project.yml and re-parse
config_updates = {
"snapshots": {
"test": {
"+snapshot_meta_column_names": {
"dbt_valid_to": "test_valid_to",
"dbt_valid_from": "test_valid_from",
},
}
}
}
update_config_file(config_updates, "dbt_project.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.config.snapshot_meta_column_names.dbt_valid_to == "test_valid_to"
Loading