Skip to content

Commit

Permalink
Partial parse yaml snapshots (#10907)
Browse files Browse the repository at this point in the history
  • Loading branch information
gshank authored Oct 23, 2024
1 parent f7b7935 commit bdb79e8
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 6 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20241022-222927.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Implement partial parsing for all-yaml snapshots
time: 2024-10-22T22:29:27.396378-04:00
custom:
Author: gshank
Issue: "10903"
1 change: 1 addition & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
snapshots: List[str] = field(default_factory=list)
# The following field will no longer be used. Leaving
# here to avoid breaking existing projects. To be removed
# later if possible.
Expand Down
9 changes: 6 additions & 3 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
SeedNode,
SemanticModel,
SingularTestNode,
SnapshotNode,
SourceDefinition,
UnitTestDefinition,
UnitTestFileFixture,
Expand Down Expand Up @@ -1600,12 +1601,14 @@ def add_node(self, source_file: AnySourceFile, node: ManifestNode, test_from=Non
if isinstance(node, GenericTestNode):
assert test_from
source_file.add_test(node.unique_id, test_from)
if isinstance(node, Metric):
elif isinstance(node, Metric):
source_file.metrics.append(node.unique_id)
if isinstance(node, Exposure):
elif isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
if isinstance(node, Group):
elif isinstance(node, Group):
source_file.groups.append(node.unique_id)
elif isinstance(node, SnapshotNode):
source_file.snapshots.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass
else:
Expand Down
21 changes: 21 additions & 0 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,10 +658,14 @@ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict
key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
if key_diff["changed"]:
for elem in key_diff["changed"]:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem, True)
if key_diff["deleted"]:
for elem in key_diff["deleted"]:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
if key_diff["added"]:
for elem in key_diff["added"]:
Expand All @@ -673,6 +677,8 @@ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict
continue
elem = self.get_schema_element(new_yaml_dict[dict_key], name)
if elem:
if dict_key == "snapshots" and "relation" in elem:
self.delete_yaml_snapshot(schema_file, elem)
self.delete_schema_mssa_links(schema_file, dict_key, elem)
self.merge_patch(schema_file, dict_key, elem, True)

Expand Down Expand Up @@ -828,6 +834,8 @@ def delete_schema_mssa_links(self, schema_file, dict_key, elem):
# remove elem node and remove unique_id from node_patches
for elem_unique_id in elem_unique_ids:
# might have been already removed
# For all-yaml snapshots, we don't do this, since the node
# should have already been removed.
if (
elem_unique_id in self.saved_manifest.nodes
or elem_unique_id in self.saved_manifest.disabled
Expand Down Expand Up @@ -868,6 +876,19 @@ def remove_tests(self, schema_file, dict_key, name):
self.saved_manifest.nodes.pop(test_unique_id)
schema_file.remove_tests(dict_key, name)

def delete_yaml_snapshot(self, schema_file, snapshot_dict):
snapshot_name = snapshot_dict["name"]
snapshots = schema_file.snapshots.copy()
for unique_id in snapshots:
if unique_id in self.saved_manifest.nodes:
snapshot = self.saved_manifest.nodes[unique_id]
if snapshot.name == snapshot_name:
self.saved_manifest.nodes.pop(unique_id)
schema_file.snapshots.remove(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
schema_file.snapshots.remove(unique_id)

def delete_schema_source(self, schema_file, source_dict):
# both patches, tests, and source nodes
source_name = source_dict["name"]
Expand Down
5 changes: 3 additions & 2 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ def _add_yaml_snapshot_nodes_to_manifest(
snapshot_node.raw_code = "select * from {{ " + snapshot["relation"] + " }}"

# Add our new node to the manifest, and note that ref lookup collections
# will need to be rebuilt.
self.manifest.add_node_nofile(snapshot_node)
# will need to be rebuilt. This adds the node unique_id to the "snapshots"
# list in the SchemaSourceFile.
self.manifest.add_node(block.file, snapshot_node)
rebuild_refs = True

if rebuild_refs:
Expand Down
13 changes: 12 additions & 1 deletion tests/functional/snapshots/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,6 @@
"""

snapshots_pg__snapshot_yml = """
version: 2
snapshots:
- name: snapshot_actual
relation: "ref('seed')"
Expand All @@ -304,6 +303,18 @@
owner: 'a_owner'
"""

snapshots_pg__snapshot_mod_yml = """
snapshots:
- name: snapshot_actual
relation: "ref('seed')"
config:
unique_key: "id || '-' || first_name"
strategy: timestamp
updated_at: updated_at
meta:
owner: 'b_owner'
"""

snapshots_pg__snapshot_no_target_schema_sql = """
{% snapshot snapshot_actual %}
Expand Down
33 changes: 33 additions & 0 deletions tests/functional/snapshots/test_basic_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
check_relations_equal,
relation_from_name,
run_dbt,
update_config_file,
write_file,
)
from tests.functional.snapshots.fixtures import (
Expand All @@ -18,6 +19,7 @@
models__schema_yml,
seeds__seed_csv,
seeds__seed_newcol_csv,
snapshots_pg__snapshot_mod_yml,
snapshots_pg__snapshot_no_target_schema_sql,
snapshots_pg__snapshot_sql,
snapshots_pg__snapshot_yml,
Expand Down Expand Up @@ -394,3 +396,34 @@ def models(self):
class TestBasicSnapshotYaml(BasicYaml):
def test_basic_snapshot_yaml(self, project):
snapshot_setup(project, num_snapshot_models=1)


class TestYamlSnapshotPartialParsing(BasicYaml):
def test_snapshot_partial_parsing(self, project):
manifest = run_dbt(["parse"])
snapshot_id = "snapshot.test.snapshot_actual"
assert snapshot_id in manifest.nodes
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "a_owner"

# change snapshot yml file and re-parse
write_file(snapshots_pg__snapshot_mod_yml, "snapshots", "snapshot.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.meta["owner"] == "b_owner"

# modify dbt_project.yml and re-parse
config_updates = {
"snapshots": {
"test": {
"+snapshot_meta_column_names": {
"dbt_valid_to": "test_valid_to",
"dbt_valid_from": "test_valid_from",
},
}
}
}
update_config_file(config_updates, "dbt_project.yml")
manifest = run_dbt(["parse"])
snapshot = manifest.nodes[snapshot_id]
assert snapshot.config.snapshot_meta_column_names.dbt_valid_to == "test_valid_to"

0 comments on commit bdb79e8

Please sign in to comment.