Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[state:modified] persist unrendered_config from schema.yml, and more reliably compute unrendered_config from .sql files #10487

Merged
merged 29 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
1659c7b
persist schema yml unrendered_config before rendering
MichelleArk Jul 26, 2024
8ad3c03
persist unrendered config on schema file instead of manifest
MichelleArk Jul 26, 2024
4f96f5c
update test_configs_in_schema_files.py
MichelleArk Jul 26, 2024
acd23b0
support versioning in SchemaSourceFile.add_unrendered_config
MichelleArk Jul 26, 2024
9e10428
Merge branch 'main' into state-modified-source-tests
MichelleArk Aug 12, 2024
fe9b572
fix: do not use None for version key in unrendered_configs
MichelleArk Aug 12, 2024
f368cdf
support reliable unrendered_config in .sql files
MichelleArk Aug 12, 2024
57db0bf
more reliably set unrendered_config_call_dict + prefer config_call_di…
MichelleArk Aug 19, 2024
1ce11ec
store + reset unrendered_config_call_dict on node
MichelleArk Aug 20, 2024
bad5723
Merge branch 'main' into state-modified-source-tests
MichelleArk Aug 22, 2024
ce2b150
Merge branch 'main' into state-modified-source-tests
MichelleArk Sep 4, 2024
bad6b92
first pass: put behind legacy flag
MichelleArk Sep 4, 2024
d8bf0e6
fix SnapshotParserTest unit test
MichelleArk Sep 5, 2024
fad1295
Return early to avoid creating jinja environemt if no config call in …
MichelleArk Sep 6, 2024
47b18b1
test var sensitivity with require_config_jinja_insensitivity_for_stat…
MichelleArk Sep 6, 2024
d625a23
cleanup comment
MichelleArk Sep 6, 2024
33e2359
Merge branch 'main' into state-modified-source-tests
MichelleArk Sep 10, 2024
4081627
rename behaviour flag
MichelleArk Sep 10, 2024
897c2e5
Merge branch 'main' into state-modified-source-tests
MichelleArk Sep 25, 2024
ac42ec4
changelog entry
MichelleArk Sep 25, 2024
71bda2a
fix state:modified for jinja if statements in configs
MichelleArk Sep 25, 2024
8677a9a
simplify constructing unrendered_config from jinja
MichelleArk Sep 25, 2024
9adb4b8
update unit tests for unrendered_config_call_dict values
MichelleArk Sep 25, 2024
428df36
clean up debugging print
MichelleArk Sep 25, 2024
40f1a67
put static parsing behind behaviour change flag
MichelleArk Sep 25, 2024
7986726
tests reflect behaviour flag setting
MichelleArk Sep 26, 2024
f4a5c0b
use resource_types_to_schema_file_keys to get patch unrendered configs
MichelleArk Sep 26, 2024
259d6f8
handle versioned unrendered_configs during partial parsing
MichelleArk Sep 26, 2024
5b0bce0
Merge branch 'main' into state-modified-source-tests
MichelleArk Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20240925-131028.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Ignore rendered jinja in configs for state:modified, behind state_modified_compare_more_unrendered_values
behaviour flag
time: 2024-09-25T13:10:28.490042+01:00
custom:
Author: michelleark
Issue: "9564"
3 changes: 3 additions & 0 deletions core/dbt/artifacts/resources/v1/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,16 @@ class ParsedResource(ParsedResourceMandatory):
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
raw_code: str = ""

def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
dct = super().__post_serialize__(dct, context)
if context and context.get("artifact") and "config_call_dict" in dct:
del dct["config_call_dict"]
if context and context.get("artifact") and "unrendered_config_call_dict" in dct:
del dct["unrendered_config_call_dict"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even though this isn't serialized, we still need a schemas.getdbt.com PR, similar to config_call_dict: dbt-labs/schemas.getdbt.com#60

return dct


Expand Down
54 changes: 54 additions & 0 deletions core/dbt/clients/jinja_static.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,57 @@
raise ParsingError(f"Invalid ref or source expression: {expression}")

return ref_or_source


def statically_parse_unrendered_config(string: str) -> Optional[Dict[str, Any]]:
"""
Given a string with jinja, extract an unrendered config call.
If no config call is present, returns None.

For example, given:
"{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }}\nselect 1 as id"
returns: {'materialized': "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='DBT_TEST_STATE_MODIFIED')], kwargs=[], dyn_args=None, dyn_kwargs=None))"}

No config call:
"select 1 as id"
returns: None
"""
# Return early to avoid creating jinja environemt if no config call in input string
if "config(" not in string:
return None

# set 'capture_macros' to capture undefined
env = get_environment(None, capture_macros=True)

global _TESTING_MACRO_CACHE
if test_caching_enabled() and _TESTING_MACRO_CACHE and string in _TESTING_MACRO_CACHE:
parsed = _TESTING_MACRO_CACHE.get(string, None)
func_calls = getattr(parsed, "_dbt_cached_calls")

Check warning on line 219 in core/dbt/clients/jinja_static.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/clients/jinja_static.py#L218-L219

Added lines #L218 - L219 were not covered by tests
else:
parsed = env.parse(string)
func_calls = tuple(parsed.find_all(jinja2.nodes.Call))

config_func_calls = list(
filter(
lambda f: hasattr(f, "node") and hasattr(f.node, "name") and f.node.name == "config",
func_calls,
)
)
# There should only be one {{ config(...) }} call per input
config_func_call = config_func_calls[0] if config_func_calls else None

if not config_func_call:
return None

unrendered_config = {}
for kwarg in config_func_call.kwargs:
unrendered_config[kwarg.key] = construct_static_kwarg_value(kwarg)

return unrendered_config


def construct_static_kwarg_value(kwarg):
# Instead of trying to re-assemble complex kwarg value, simply stringify the value
# This is still useful to be able to detect changes in unrendered configs, even if it is
# not an exact representation of the user input.
return str(kwarg)
20 changes: 19 additions & 1 deletion core/dbt/context/context_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dbt.adapters.factory import get_config_class_by_name
from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.contracts.graph.model_config import get_config_for
from dbt.flags import get_flags
from dbt.node_types import NodeType
from dbt.utils import fqn_search
from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
Expand Down Expand Up @@ -286,6 +287,7 @@ def __init__(
project_name: str,
) -> None:
self._config_call_dict: Dict[str, Any] = {}
self._unrendered_config_call_dict: Dict[str, Any] = {}
self._active_project = active_project
self._fqn = fqn
self._resource_type = resource_type
Expand All @@ -295,6 +297,10 @@ def add_config_call(self, opts: Dict[str, Any]) -> None:
dct = self._config_call_dict
merge_config_dicts(dct, opts)

def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None:
# Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types.
self._unrendered_config_call_dict.update(opts)

def build_config_dict(
self,
base: bool = False,
Expand All @@ -305,12 +311,24 @@ def build_config_dict(
if rendered:
# TODO CT-211
src = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated]
config_call_dict = self._config_call_dict
else:
# TODO CT-211
src = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment]

# preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict
if get_flags().state_modified_compare_more_unrendered_values is False:
config_call_dict = self._config_call_dict
else:
# Prefer _config_call_dict if it is available and _unrendered_config_call_dict is not,
# as _unrendered_config_call_dict is unreliable for non-sql nodes (e.g. no jinja config block rendered for python models, etc)
if self._config_call_dict and not self._unrendered_config_call_dict:
config_call_dict = self._config_call_dict
else:
config_call_dict = self._unrendered_config_call_dict

return src.calculate_node_config_dict(
config_call_dict=self._config_call_dict,
config_call_dict=config_call_dict,
fqn=self._fqn,
resource_type=self._resource_type,
project_name=self._project_name,
Expand Down
8 changes: 8 additions & 0 deletions core/dbt/context/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
UnitTestMacroGenerator,
get_rendered,
)
from dbt.clients.jinja_static import statically_parse_unrendered_config
from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.constants import DEFAULT_ENV_PLACEHOLDER
from dbt.context.base import Var, contextmember, contextproperty
Expand Down Expand Up @@ -395,6 +396,13 @@ def __call__(self, *args, **kwargs):
# not call it!
if self.context_config is None:
raise DbtRuntimeError("At parse time, did not receive a context config")

# Track unrendered opts to build parsed node unrendered_config later on
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved
unrendered_config = statically_parse_unrendered_config(self.model.raw_code)
if unrendered_config:
self.context_config.add_unrendered_config_call(unrendered_config)

# Use rendered opts to populate context_config
self.context_config.add_config_call(opts)
return ""

Expand Down
28 changes: 28 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ class SchemaSourceFile(BaseSourceFile):
# created too, but those are in 'sources'
sop: List[SourceKey] = field(default_factory=list)
env_vars: Dict[str, Any] = field(default_factory=dict)
unrendered_configs: Dict[str, Any] = field(default_factory=dict)
pp_dict: Optional[Dict[str, Any]] = None
pp_test_index: Optional[Dict[str, Any]] = None

Expand Down Expand Up @@ -317,6 +318,33 @@ def get_all_test_ids(self):
test_ids.extend(self.data_tests[key][name])
return test_ids

def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
versioned_name = f"{name}_v{version}" if version is not None else name

if yaml_key not in self.unrendered_configs:
self.unrendered_configs[yaml_key] = {}

if versioned_name not in self.unrendered_configs[yaml_key]:
self.unrendered_configs[yaml_key][versioned_name] = unrendered_config

def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
versioned_name = f"{name}_v{version}" if version is not None else name

if yaml_key not in self.unrendered_configs:
return None
if versioned_name not in self.unrendered_configs[yaml_key]:
return None

return self.unrendered_configs[yaml_key][versioned_name]

def delete_from_unrendered_configs(self, yaml_key, name):
# We delete all unrendered_configs for this yaml_key/name because the
# entry has been scheduled for reparsing.
if self.get_unrendered_config(yaml_key, name):
del self.unrendered_configs[yaml_key][name]
if not self.unrendered_configs[yaml_key]:
del self.unrendered_configs[yaml_key]

def add_env_var(self, var, yaml_key, name):
if yaml_key not in self.env_vars:
self.env_vars[yaml_key] = {}
Expand Down
4 changes: 3 additions & 1 deletion core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,17 +337,19 @@ class ProjectFlags(ExtensibleDbtClassMixin):
warn_error_options: Optional[Dict[str, Union[str, List[str]]]] = None
write_json: Optional[bool] = None

# legacy behaviors
# legacy behaviors - https://github.com/dbt-labs/dbt-core/blob/main/docs/guides/behavior-change-flags.md
require_explicit_package_overrides_for_builtin_materializations: bool = True
require_resource_names_without_spaces: bool = False
source_freshness_run_project_hooks: bool = False
state_modified_compare_more_unrendered_values: bool = False

@property
def project_only_flags(self) -> Dict[str, Any]:
return {
"require_explicit_package_overrides_for_builtin_materializations": self.require_explicit_package_overrides_for_builtin_materializations,
"require_resource_names_without_spaces": self.require_resource_names_without_spaces,
"source_freshness_run_project_hooks": self.source_freshness_run_project_hooks,
"state_modified_compare_more_unrendered_values": self.state_modified_compare_more_unrendered_values,
}


Expand Down
18 changes: 18 additions & 0 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
generate_generate_name_macro_context,
generate_parser_model_context,
)
from dbt.contracts.files import SchemaSourceFile
from dbt.contracts.graph.manifest import Manifest
from dbt.contracts.graph.nodes import BaseNode, ManifestNode
from dbt.contracts.graph.unparsed import Docs, UnparsedNode
Expand All @@ -22,9 +23,11 @@
DictParseError,
InvalidAccessTypeError,
)
from dbt.flags import get_flags
from dbt.node_types import AccessType, ModelLanguage, NodeType
from dbt.parser.search import FileBlock
from dbt_common.dataclass_schema import ValidationError
from dbt_common.utils import deep_merge

# internally, the parser may store a less-restrictive type that will be
# transformed into the final type. But it will have to be derived from
Expand Down Expand Up @@ -308,6 +311,7 @@ def update_parsed_node_config(
config: ContextConfig,
context=None,
patch_config_dict=None,
patch_file_id=None,
) -> None:
"""Given the ContextConfig used for parsing and the parsed node,
generate and set the true values to use, overriding the temporary parse
Expand Down Expand Up @@ -369,13 +373,27 @@ def update_parsed_node_config(
if hasattr(parsed_node, "contract"):
parsed_node.contract = Contract.from_dict(contract_dct)

if get_flags().state_modified_compare_more_unrendered_values:
# Use the patch_file.unrendered_configs if available to update patch_dict_config,
# as provided patch_config_dict may actuallly already be rendered and thus sensitive to jinja evaluations
if patch_file_id:
patch_file = self.manifest.files.get(patch_file_id, None)
if patch_file and isinstance(patch_file, SchemaSourceFile):
# TODO: do not hardcode "models"
if unrendered_patch_config := patch_file.get_unrendered_config(
"models", parsed_node.name, getattr(parsed_node, "version", None)
):
patch_config_dict = deep_merge(patch_config_dict, unrendered_patch_config)

# unrendered_config is used to compare the original database/schema/alias
# values and to handle 'same_config' and 'same_contents' calls
parsed_node.unrendered_config = config.build_config_dict(
rendered=False, patch_config_dict=patch_config_dict
)
print(f"final unrendered_config: {parsed_node.unrendered_config}")
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved

parsed_node.config_call_dict = config._config_call_dict
parsed_node.unrendered_config_call_dict = config._unrendered_config_call_dict

# do this once before we parse the node database/schema/alias, so
# parsed_node.config is what it would be if they did nothing
Expand Down
1 change: 1 addition & 0 deletions core/dbt/parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def render_update(self, node: ModelNode, config: ContextConfig) -> None:
statically_parsed = self.run_experimental_parser(node)
# run the stable static parser unless it is explicitly turned off
else:
# TODO: consider running new parser here?
statically_parsed = self.run_static_parser(node)

# if the static parser succeeded, extract some data in easy-to-compare formats
Expand Down
1 change: 1 addition & 0 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,7 @@ def merge_patch(self, schema_file, key, patch, new_patch=False):
pp_dict[key].append(patch)

schema_file.delete_from_env_vars(key, patch["name"])
schema_file.delete_from_unrendered_configs(key, patch["name"])
self.add_to_pp_files(schema_file)

# For model, seed, snapshot, analysis schema dictionary keys,
Expand Down
35 changes: 32 additions & 3 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,13 +400,34 @@ def get_key_dicts(self) -> Iterable[Dict[str, Any]]:
if "name" not in entry and "model" not in entry:
raise ParsingError("Entry did not contain a name")

unrendered_config = {}
if "config" in entry:
unrendered_config = entry["config"]
print(f"unrendered_config: {unrendered_config}")

unrendered_version_configs = {}
if "versions" in entry:
for version in entry["versions"]:
if "v" in version:
unrendered_version_configs[version["v"]] = version.get("config", {})

# Render the data (except for tests, data_tests and descriptions).
# See the SchemaYamlRenderer
entry = self.render_entry(entry)

schema_file = self.yaml.file
assert isinstance(schema_file, SchemaSourceFile)

if unrendered_config:
schema_file.add_unrendered_config(unrendered_config, self.key, entry["name"])

for version, unrendered_version_config in unrendered_version_configs.items():
schema_file.add_unrendered_config(
unrendered_version_config, self.key, entry["name"], version
)

if self.schema_yaml_vars.env_vars:
self.schema_parser.manifest.env_vars.update(self.schema_yaml_vars.env_vars)
schema_file = self.yaml.file
assert isinstance(schema_file, SchemaSourceFile)
for var in self.schema_yaml_vars.env_vars.keys():
schema_file.add_env_var(var, self.key, entry["name"])
self.schema_yaml_vars.env_vars = {}
Expand Down Expand Up @@ -464,6 +485,8 @@ def parse(self) -> ParseResult:
self.manifest.source_patches[key] = patch
source_file.source_patches.append(key)
else:
# TODO: add unrendered_database from manifest.unrendered_source_patch
MichelleArk marked this conversation as resolved.
Show resolved Hide resolved
# self.yaml.path.original_file_path
source = self._target_from_dict(UnparsedSourceDefinition, data)
self.add_source_definitions(source)
return ParseResult()
Expand Down Expand Up @@ -663,7 +686,13 @@ def patch_node_config(self, node, patch) -> None:
)
# We need to re-apply the config_call_dict after the patch config
config._config_call_dict = node.config_call_dict
self.schema_parser.update_parsed_node_config(node, config, patch_config_dict=patch.config)
config._unrendered_config_call_dict = node.unrendered_config_call_dict
self.schema_parser.update_parsed_node_config(
node,
config,
patch_config_dict=patch.config,
patch_file_id=patch.file_id,
)


# Subclasses of NodePatchParser: TestablePatchParser, ModelPatchParser, AnalysisPatchParser,
Expand Down
Loading
Loading