Skip to content

Commit bbe3a74

Browse files
Fix schema file patch collection (#12055) (#12060)
(cherry picked from commit 963251d) Co-authored-by: Gerda Shank <gerda@dbtlabs.com>
1 parent 12cd79d commit bbe3a74

File tree

3 files changed

+312
-1
lines changed

3 files changed

+312
-1
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
kind: Fixes
2+
body: Fix errors in partial parsing when working with versioned models
3+
time: 2025-09-25T21:27:28.591187-04:00
4+
custom:
5+
Author: gshank
6+
Issue: "11869"

core/dbt/parser/partial.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"seeds": "seed",
3838
"snapshots": "snapshot",
3939
"analyses": "analysis",
40+
"sources": "source",
4041
}
4142

4243

@@ -178,6 +179,10 @@ def get_parsing_files(self):
178179
self.add_to_saved(file_id)
179180
# Need to process schema files next, because the dictionaries
180181
# need to be in place for handling SQL file changes
182+
# The reverse sort here is just to ensure that the schema file
183+
# processing order test case works, because otherwise the order
184+
# of processing the schema files is not guaranteed.
185+
self.file_diff["changed_schema_files"].sort(reverse=True)
181186
for file_id in self.file_diff["changed_schema_files"]:
182187
self.processing_file = file_id
183188
self.change_schema_file(file_id)
@@ -244,6 +249,10 @@ def handle_added_schema_file(self, source_file):
244249
# be properly patched
245250
if "overrides" in source:
246251
self.remove_source_override_target(source)
252+
if "models" in source_file.pp_dict:
253+
for model in source_file.pp_dict["models"]:
254+
if "versions" in model:
255+
self.versioned_model_delete_schema_mssa_links(source_file, "models", model)
247256

248257
def delete_disabled(self, unique_id, file_id):
249258
# This node/metric/exposure is disabled. Find it and remove it from disabled dictionary.
@@ -628,7 +637,8 @@ def change_schema_file(self, file_id):
628637
new_schema_file = deepcopy(self.new_files[file_id])
629638
saved_yaml_dict = saved_schema_file.dict_from_yaml
630639
new_yaml_dict = new_schema_file.dict_from_yaml
631-
saved_schema_file.pp_dict = {}
640+
if saved_schema_file.pp_dict is None:
641+
saved_schema_file.pp_dict = {}
632642
self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
633643

634644
# copy from new schema_file to saved_schema_file to preserve references
@@ -675,6 +685,8 @@ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict
675685
self.delete_schema_mssa_links(schema_file, dict_key, elem)
676686
if key_diff["added"]:
677687
for elem in key_diff["added"]:
688+
if dict_key == "models" and "versions" in elem:
689+
self.versioned_model_delete_schema_mssa_links(schema_file, dict_key, elem)
678690
self.merge_patch(schema_file, dict_key, elem, True)
679691
# Handle schema file updates due to env_var changes
680692
if dict_key in env_var_changes and dict_key in new_yaml_dict:
@@ -837,7 +849,19 @@ def delete_schema_mssa_links(self, schema_file, dict_key, elem) -> None:
837849
elem_name = parts[2]
838850
if elem_name == elem["name"]:
839851
elem_unique_ids.append(unique_id)
852+
self._delete_schema_mssa_links(schema_file, dict_key, elem, elem_unique_ids)
840853

854+
def versioned_model_delete_schema_mssa_links(self, schema_file, dict_key, elem) -> None:
855+
elem_unique_ids = []
856+
# We need to look up possible existing models that this new or modified patch applies to
857+
unique_id = f"model.{schema_file.project_name}.{elem['name']}"
858+
if unique_id in self.saved_manifest.nodes:
859+
elem_unique_ids.append(unique_id)
860+
if not elem_unique_ids:
861+
return
862+
self._delete_schema_mssa_links(schema_file, dict_key, elem, elem_unique_ids)
863+
864+
def _delete_schema_mssa_links(self, schema_file, dict_key, elem, elem_unique_ids):
841865
# remove elem node and remove unique_id from node_patches
842866
for elem_unique_id in elem_unique_ids:
843867
# might have been already removed
@@ -886,6 +910,17 @@ def remove_tests(self, schema_file, dict_key, name):
886910
if test_unique_id in self.saved_manifest.nodes:
887911
self.saved_manifest.nodes.pop(test_unique_id)
888912
schema_file.remove_tests(dict_key, name)
913+
# We also need to remove tests in other schema files that
914+
# reference this node.
915+
unique_id = f"{key_to_prefix[dict_key]}.{schema_file.project_name}.{name}"
916+
if unique_id in self.saved_manifest.child_map:
917+
for child_id in self.saved_manifest.child_map[unique_id]:
918+
if child_id.startswith("test") and child_id in self.saved_manifest.nodes:
919+
child_test = self.saved_manifest.nodes[child_id]
920+
if child_test.attached_node:
921+
if child_test.attached_node in self.saved_manifest.nodes:
922+
attached_node = self.saved_manifest.nodes[child_test.attached_node]
923+
self.update_in_saved(attached_node.file_id)
889924

890925
def delete_yaml_snapshot(self, schema_file, snapshot_dict):
891926
snapshot_name = snapshot_dict["name"]
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
import os
2+
3+
import pytest
4+
5+
from dbt.tests.util import get_manifest, rm_file, run_dbt, write_file
6+
7+
os.environ["DBT_PP_TEST"] = "true"
8+
9+
colors_sql = """
10+
select 'green' as first, 'red' as second, 'blue' as third
11+
"""
12+
13+
another_v1_sql = """
14+
select * from {{ ref("colors") }}
15+
"""
16+
17+
another_ref_sql = """
18+
select * from {{ ref("another") }}
19+
"""
20+
21+
colors_yml = """
22+
models:
23+
- name: colors
24+
description: "a list of colors"
25+
- name: another
26+
description: "another model"
27+
versions:
28+
- v: 1
29+
"""
30+
31+
colors_alt_yml = """
32+
models:
33+
- name: colors
34+
description: "a list of colors"
35+
- name: another
36+
description: "YET another model"
37+
versions:
38+
- v: 1
39+
"""
40+
41+
foo_model_sql = """
42+
select 1 as id
43+
"""
44+
45+
another_ref_yml = """
46+
models:
47+
- name: another_ref
48+
description: "model with reference to another ref"
49+
- name: foo_model
50+
description: "some random model"
51+
"""
52+
53+
another_ref_alt_yml = """
54+
models:
55+
- name: another_ref
56+
description: "model with reference to another ref"
57+
- name: foo_model
58+
description: "some random other model"
59+
"""
60+
61+
62+
class TestSchemaFileOrder:
63+
@pytest.fixture(scope="class")
64+
def models(self):
65+
return {
66+
"colors.sql": colors_sql,
67+
"colors.yml": colors_yml,
68+
"another_v1.sql": another_v1_sql,
69+
"another_ref.sql": another_ref_sql,
70+
"foo_model.sql": foo_model_sql,
71+
"another_ref.yml": another_ref_yml,
72+
}
73+
74+
def test_schema_file_order(self, project):
75+
76+
# initial run
77+
results = run_dbt(["run"])
78+
assert len(results) == 4
79+
80+
manifest = get_manifest(project.project_root)
81+
model_id = "model.test.another_ref"
82+
model = manifest.nodes.get(model_id)
83+
assert model.description == "model with reference to another ref"
84+
85+
write_file(colors_alt_yml, project.project_root, "models", "colors.yml")
86+
write_file(another_ref_alt_yml, project.project_root, "models", "another_ref.yml")
87+
results = run_dbt(["--partial-parse", "run"])
88+
assert len(results) == 4
89+
manifest = get_manifest(project.project_root)
90+
model = manifest.nodes.get(model_id)
91+
assert model.name == "another_ref"
92+
# The description here would be '' without the bug fix
93+
assert model.description == "model with reference to another ref"
94+
95+
96+
foo_sql = """
97+
select 1 c
98+
"""
99+
100+
bar_sql = """
101+
select 1 c
102+
"""
103+
104+
bar_with_ref_sql = """
105+
select * from {{ ref('foo') }}
106+
"""
107+
108+
foo_v2_sql = """
109+
select 1 c
110+
"""
111+
112+
schema_yml = """
113+
# models/schema.yml
114+
models:
115+
- name: foo
116+
latest_version: 1
117+
versions:
118+
- v: 1
119+
- v: 2
120+
"""
121+
122+
foo_yml = """
123+
# models/foo.yml
124+
models:
125+
- name: foo
126+
"""
127+
128+
bar_yml = """
129+
# models/bar.yml
130+
models:
131+
- name: bar
132+
columns:
133+
- name: c
134+
tests:
135+
- relationships:
136+
to: ref('foo')
137+
field: c
138+
"""
139+
140+
foo_alt_yml = """
141+
# models/foo.yml
142+
models:
143+
- name: foo
144+
latest_version: 1
145+
versions:
146+
- v: 1
147+
- v: 2
148+
"""
149+
150+
151+
class TestNewVersionedSchemaFile:
152+
@pytest.fixture(scope="class")
153+
def models(self):
154+
return {
155+
"foo.sql": foo_sql,
156+
"bar.sql": bar_with_ref_sql,
157+
}
158+
159+
def test_schema_file_order_new_versions(self, project):
160+
# This tests that when a model referring to an existing model
161+
# which has had a version added in a yaml file has been re-parsed
162+
# in order to fix the depends_on to the correct versioned model
163+
164+
# initial run
165+
results = run_dbt(["compile"])
166+
assert len(results) == 2
167+
168+
write_file(foo_v2_sql, project.project_root, "models", "foo_v2.sql")
169+
write_file(schema_yml, project.project_root, "models", "schema.yml")
170+
171+
results = run_dbt(["compile"])
172+
173+
174+
class TestMoreNewVersionedSchemaFile:
175+
@pytest.fixture(scope="class")
176+
def models(self):
177+
return {
178+
"foo.sql": foo_sql,
179+
"bar.sql": bar_sql,
180+
"foo.yml": foo_yml,
181+
"bar.yml": bar_yml,
182+
}
183+
184+
def test_more_schema_file_new_versions(self, project):
185+
186+
# initial run
187+
results = run_dbt(["compile"])
188+
assert len(results) == 3
189+
190+
rm_file(project.project_root, "models", "foo.sql")
191+
write_file(foo_sql, project.project_root, "models", "foo_v1.sql")
192+
write_file(foo_sql, project.project_root, "models", "foo_v2.sql")
193+
write_file(foo_alt_yml, project.project_root, "models", "foo.yml")
194+
195+
results = run_dbt(["compile"])
196+
197+
198+
sources_yml = """
199+
sources:
200+
- name: top_source
201+
tables:
202+
- name: abcd
203+
- name: efgh
204+
- name: ijkl
205+
"""
206+
207+
abcd_sql = """
208+
select * from {{ source("top_source", "abcd") }}
209+
"""
210+
211+
efgh_sql = """
212+
select * from {{ source("top_source", "efgh") }}
213+
"""
214+
215+
ijkl_sql = """
216+
select * from {{ source("top_source", "ijkl") }}
217+
"""
218+
219+
models_yml = """
220+
models:
221+
- name: abcd
222+
description: "abcd model"
223+
- name: efgh
224+
description: "efgh model"
225+
- name: ijkl
226+
description: "ijkl model"
227+
"""
228+
229+
append_sources_yml = """
230+
- name: mnop
231+
"""
232+
233+
append_models_yml = """
234+
- name: mnop
235+
description: "mnop model"
236+
"""
237+
238+
mnop_sql = """
239+
select * from {{ source("top_source", "mnop") }}
240+
"""
241+
242+
243+
class TestSourcesAndSchemaFiles:
244+
@pytest.fixture(scope="class")
245+
def models(self):
246+
return {
247+
"sources.yml": sources_yml,
248+
"abcd.sql": abcd_sql,
249+
"efgh.sql": efgh_sql,
250+
"ijkl.sql": ijkl_sql,
251+
"_models.yml": models_yml,
252+
}
253+
254+
def test_schema_file_order_new_versions(self, project):
255+
256+
# initial run
257+
manifest = run_dbt(["parse"])
258+
assert len(manifest.nodes) == 3
259+
260+
write_file(models_yml + append_models_yml, project.project_root, "models", "_models.yml")
261+
write_file(mnop_sql, project.project_root, "models", "mnop.sql")
262+
write_file(sources_yml + append_sources_yml, project.project_root, "models", "sources.yml")
263+
264+
manifest = run_dbt(["parse"])
265+
assert len(manifest.nodes) == 4
266+
267+
# Without the fix the three original nodes will all be missing the
268+
# the patch updates, including description, so description will be ""
269+
for node in manifest.nodes.values():
270+
assert node.description == f"{node.name} model"

0 commit comments

Comments
 (0)