dbt-labs · MichelleArk · Sep 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
@@ -0,0 +1,7 @@
+kind: Fixes
+body: Ignore rendered jinja in configs for state:modified, behind state_modified_compare_more_unrendered_values
+  behaviour flag
+time: 2024-09-25T13:10:28.490042+01:00
+custom:
+  Author: michelleark
+  Issue: "9564"
@@ -194,13 +194,16 @@ class ParsedResource(ParsedResourceMandatory):
     unrendered_config: Dict[str, Any] = field(default_factory=dict)
     created_at: float = field(default_factory=lambda: time.time())
     config_call_dict: Dict[str, Any] = field(default_factory=dict)
+    unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict)
     relation_name: Optional[str] = None
     raw_code: str = ""
 
     def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
         dct = super().__post_serialize__(dct, context)
         if context and context.get("artifact") and "config_call_dict" in dct:
             del dct["config_call_dict"]
+        if context and context.get("artifact") and "unrendered_config_call_dict" in dct:
+            del dct["unrendered_config_call_dict"]
         return dct
 
 

@@ -191,3 +191,57 @@
         raise ParsingError(f"Invalid ref or source expression: {expression}")
 
     return ref_or_source
+
+
+def statically_parse_unrendered_config(string: str) -> Optional[Dict[str, Any]]:
+    """
+    Given a string with jinja, extract an unrendered config call.
+    If no config call is present, returns None.
+
+    For example, given:
+    "{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }}\nselect 1 as id"
+    returns: {'materialized': "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='DBT_TEST_STATE_MODIFIED')], kwargs=[], dyn_args=None, dyn_kwargs=None))"}
+
+    No config call:
+    "select 1 as id"
+    returns: None
+    """
+    # Return early to avoid creating jinja environemt if no config call in input string
+    if "config(" not in string:
+        return None
+
+    # set 'capture_macros' to capture undefined
+    env = get_environment(None, capture_macros=True)
+
+    global _TESTING_MACRO_CACHE
+    if test_caching_enabled() and _TESTING_MACRO_CACHE and string in _TESTING_MACRO_CACHE:
+        parsed = _TESTING_MACRO_CACHE.get(string, None)
+        func_calls = getattr(parsed, "_dbt_cached_calls")
+    else:
+        parsed = env.parse(string)
+        func_calls = tuple(parsed.find_all(jinja2.nodes.Call))
+
+    config_func_calls = list(
+        filter(
+            lambda f: hasattr(f, "node") and hasattr(f.node, "name") and f.node.name == "config",
+            func_calls,
+        )
+    )
+    # There should only be one {{ config(...) }} call per input
+    config_func_call = config_func_calls[0] if config_func_calls else None
+
+    if not config_func_call:
+        return None
+
+    unrendered_config = {}
+    for kwarg in config_func_call.kwargs:
+        unrendered_config[kwarg.key] = construct_static_kwarg_value(kwarg)
+
+    return unrendered_config
+
+
+def construct_static_kwarg_value(kwarg):
+    # Instead of trying to re-assemble complex kwarg value, simply stringify the value
+    # This is still useful to be able to detect changes in unrendered configs, even if it is
+    # not an exact representation of the user input.
+    return str(kwarg)
@@ -6,6 +6,7 @@
 from dbt.adapters.factory import get_config_class_by_name
 from dbt.config import IsFQNResource, Project, RuntimeConfig
 from dbt.contracts.graph.model_config import get_config_for
+from dbt.flags import get_flags
 from dbt.node_types import NodeType
 from dbt.utils import fqn_search
 from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
@@ -286,6 +287,7 @@ def __init__(
         project_name: str,
     ) -> None:
         self._config_call_dict: Dict[str, Any] = {}
+        self._unrendered_config_call_dict: Dict[str, Any] = {}
         self._active_project = active_project
         self._fqn = fqn
         self._resource_type = resource_type
@@ -295,6 +297,10 @@ def add_config_call(self, opts: Dict[str, Any]) -> None:
         dct = self._config_call_dict
         merge_config_dicts(dct, opts)
 
+    def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None:
+        # Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types.
+        self._unrendered_config_call_dict.update(opts)
+
     def build_config_dict(
         self,
         base: bool = False,
@@ -305,12 +311,24 @@ def build_config_dict(
         if rendered:
             # TODO CT-211
             src = ContextConfigGenerator(self._active_project)  # type: ignore[var-annotated]
+            config_call_dict = self._config_call_dict
         else:
             # TODO CT-211
             src = UnrenderedConfigGenerator(self._active_project)  # type: ignore[assignment]
 
+            # preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict
+            if get_flags().state_modified_compare_more_unrendered_values is False:
+                config_call_dict = self._config_call_dict
+            else:
+                # Prefer _config_call_dict if it is available and _unrendered_config_call_dict is not,
+                # as _unrendered_config_call_dict is unreliable for non-sql nodes (e.g. no jinja config block rendered for python models, etc)
+                if self._config_call_dict and not self._unrendered_config_call_dict:
+                    config_call_dict = self._config_call_dict
+                else:
+                    config_call_dict = self._unrendered_config_call_dict
+
         return src.calculate_node_config_dict(
-            config_call_dict=self._config_call_dict,
+            config_call_dict=config_call_dict,
             fqn=self._fqn,
             resource_type=self._resource_type,
             project_name=self._project_name,

@@ -35,6 +35,7 @@
     UnitTestMacroGenerator,
     get_rendered,
 )
+from dbt.clients.jinja_static import statically_parse_unrendered_config
 from dbt.config import IsFQNResource, Project, RuntimeConfig
 from dbt.constants import DEFAULT_ENV_PLACEHOLDER
 from dbt.context.base import Var, contextmember, contextproperty
@@ -395,6 +396,13 @@ def __call__(self, *args, **kwargs):
         # not call it!
         if self.context_config is None:
             raise DbtRuntimeError("At parse time, did not receive a context config")
+
+        # Track unrendered opts to build parsed node unrendered_config later on
+        unrendered_config = statically_parse_unrendered_config(self.model.raw_code)
+        if unrendered_config:
+            self.context_config.add_unrendered_config_call(unrendered_config)
+
+        # Use rendered opts to populate context_config
         self.context_config.add_config_call(opts)
         return ""
 

@@ -212,6 +212,7 @@ class SchemaSourceFile(BaseSourceFile):
     # created too, but those are in 'sources'
     sop: List[SourceKey] = field(default_factory=list)
     env_vars: Dict[str, Any] = field(default_factory=dict)
+    unrendered_configs: Dict[str, Any] = field(default_factory=dict)
     pp_dict: Optional[Dict[str, Any]] = None
     pp_test_index: Optional[Dict[str, Any]] = None
 
@@ -317,6 +318,33 @@ def get_all_test_ids(self):
                 test_ids.extend(self.data_tests[key][name])
         return test_ids
 
+    def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
+        versioned_name = f"{name}_v{version}" if version is not None else name
+
+        if yaml_key not in self.unrendered_configs:
+            self.unrendered_configs[yaml_key] = {}
+
+        if versioned_name not in self.unrendered_configs[yaml_key]:
+            self.unrendered_configs[yaml_key][versioned_name] = unrendered_config
+
+    def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
+        versioned_name = f"{name}_v{version}" if version is not None else name
+
+        if yaml_key not in self.unrendered_configs:
+            return None
+        if versioned_name not in self.unrendered_configs[yaml_key]:
+            return None
+
+        return self.unrendered_configs[yaml_key][versioned_name]
+
+    def delete_from_unrendered_configs(self, yaml_key, name):
+        # We delete all unrendered_configs for this yaml_key/name because the
+        # entry has been scheduled for reparsing.
+        if self.get_unrendered_config(yaml_key, name):
+            del self.unrendered_configs[yaml_key][name]
+            if not self.unrendered_configs[yaml_key]:
+                del self.unrendered_configs[yaml_key]
+
     def add_env_var(self, var, yaml_key, name):
         if yaml_key not in self.env_vars:
             self.env_vars[yaml_key] = {}

@@ -337,17 +337,19 @@ class ProjectFlags(ExtensibleDbtClassMixin):
     warn_error_options: Optional[Dict[str, Union[str, List[str]]]] = None
     write_json: Optional[bool] = None
 
-    # legacy behaviors
+    # legacy behaviors - https://github.com/dbt-labs/dbt-core/blob/main/docs/guides/behavior-change-flags.md
     require_explicit_package_overrides_for_builtin_materializations: bool = True
     require_resource_names_without_spaces: bool = False
     source_freshness_run_project_hooks: bool = False
+    state_modified_compare_more_unrendered_values: bool = False
 
     @property
     def project_only_flags(self) -> Dict[str, Any]:
         return {
             "require_explicit_package_overrides_for_builtin_materializations": self.require_explicit_package_overrides_for_builtin_materializations,
             "require_resource_names_without_spaces": self.require_resource_names_without_spaces,
             "source_freshness_run_project_hooks": self.source_freshness_run_project_hooks,
+            "state_modified_compare_more_unrendered_values": self.state_modified_compare_more_unrendered_values,
         }
 
 

@@ -13,6 +13,7 @@
     generate_generate_name_macro_context,
     generate_parser_model_context,
 )
+from dbt.contracts.files import SchemaSourceFile
 from dbt.contracts.graph.manifest import Manifest
 from dbt.contracts.graph.nodes import BaseNode, ManifestNode
 from dbt.contracts.graph.unparsed import Docs, UnparsedNode
@@ -22,9 +23,11 @@
     DictParseError,
     InvalidAccessTypeError,
 )
+from dbt.flags import get_flags
 from dbt.node_types import AccessType, ModelLanguage, NodeType
 from dbt.parser.search import FileBlock
 from dbt_common.dataclass_schema import ValidationError
+from dbt_common.utils import deep_merge
 
 # internally, the parser may store a less-restrictive type that will be
 # transformed into the final type. But it will have to be derived from
@@ -308,6 +311,7 @@ def update_parsed_node_config(
         config: ContextConfig,
         context=None,
         patch_config_dict=None,
+        patch_file_id=None,
     ) -> None:
         """Given the ContextConfig used for parsing and the parsed node,
         generate and set the true values to use, overriding the temporary parse
@@ -369,13 +373,27 @@ def update_parsed_node_config(
             if hasattr(parsed_node, "contract"):
                 parsed_node.contract = Contract.from_dict(contract_dct)
 
+        if get_flags().state_modified_compare_more_unrendered_values:
+            # Use the patch_file.unrendered_configs if available to update patch_dict_config,
+            # as provided patch_config_dict may actuallly already be rendered and thus sensitive to jinja evaluations
+            if patch_file_id:
+                patch_file = self.manifest.files.get(patch_file_id, None)
+                if patch_file and isinstance(patch_file, SchemaSourceFile):
+                    # TODO: do not hardcode "models"
+                    if unrendered_patch_config := patch_file.get_unrendered_config(
+                        "models", parsed_node.name, getattr(parsed_node, "version", None)
+                    ):
+                        patch_config_dict = deep_merge(patch_config_dict, unrendered_patch_config)
+
         # unrendered_config is used to compare the original database/schema/alias
         # values and to handle 'same_config' and 'same_contents' calls
         parsed_node.unrendered_config = config.build_config_dict(
             rendered=False, patch_config_dict=patch_config_dict
         )
+        print(f"final unrendered_config: {parsed_node.unrendered_config}")
 
         parsed_node.config_call_dict = config._config_call_dict
+        parsed_node.unrendered_config_call_dict = config._unrendered_config_call_dict
 
         # do this once before we parse the node database/schema/alias, so
         # parsed_node.config is what it would be if they did nothing

diff --git a/core/dbt/parser/models.py b/core/dbt/parser/models.py
@@ -302,6 +302,7 @@ def render_update(self, node: ModelNode, config: ContextConfig) -> None:
             statically_parsed = self.run_experimental_parser(node)
         # run the stable static parser unless it is explicitly turned off
         else:
+            # TODO: consider running new parser here?
             statically_parsed = self.run_static_parser(node)
 
         # if the static parser succeeded, extract some data in easy-to-compare formats

@@ -808,6 +808,7 @@ def merge_patch(self, schema_file, key, patch, new_patch=False):
                 pp_dict[key].append(patch)
 
         schema_file.delete_from_env_vars(key, patch["name"])
+        schema_file.delete_from_unrendered_configs(key, patch["name"])
         self.add_to_pp_files(schema_file)
 
     # For model, seed, snapshot, analysis schema dictionary keys,

@@ -400,13 +400,34 @@ def get_key_dicts(self) -> Iterable[Dict[str, Any]]:
             if "name" not in entry and "model" not in entry:
                 raise ParsingError("Entry did not contain a name")
 
+            unrendered_config = {}
+            if "config" in entry:
+                unrendered_config = entry["config"]
+                print(f"unrendered_config: {unrendered_config}")
+
+            unrendered_version_configs = {}
+            if "versions" in entry:
+                for version in entry["versions"]:
+                    if "v" in version:
+                        unrendered_version_configs[version["v"]] = version.get("config", {})
+
             # Render the data (except for tests, data_tests and descriptions).
             # See the SchemaYamlRenderer
             entry = self.render_entry(entry)
+
+            schema_file = self.yaml.file
+            assert isinstance(schema_file, SchemaSourceFile)
+
+            if unrendered_config:
+                schema_file.add_unrendered_config(unrendered_config, self.key, entry["name"])
+
+            for version, unrendered_version_config in unrendered_version_configs.items():
+                schema_file.add_unrendered_config(
+                    unrendered_version_config, self.key, entry["name"], version
+                )
+
             if self.schema_yaml_vars.env_vars:
                 self.schema_parser.manifest.env_vars.update(self.schema_yaml_vars.env_vars)
-                schema_file = self.yaml.file
-                assert isinstance(schema_file, SchemaSourceFile)
                 for var in self.schema_yaml_vars.env_vars.keys():
                     schema_file.add_env_var(var, self.key, entry["name"])
                 self.schema_yaml_vars.env_vars = {}
@@ -464,6 +485,8 @@ def parse(self) -> ParseResult:
                 self.manifest.source_patches[key] = patch
                 source_file.source_patches.append(key)
             else:
+                # TODO: add unrendered_database from manifest.unrendered_source_patch
+                # self.yaml.path.original_file_path
                 source = self._target_from_dict(UnparsedSourceDefinition, data)
                 self.add_source_definitions(source)
         return ParseResult()
@@ -663,7 +686,13 @@ def patch_node_config(self, node, patch) -> None:
         )
         # We need to re-apply the config_call_dict after the patch config
         config._config_call_dict = node.config_call_dict
-        self.schema_parser.update_parsed_node_config(node, config, patch_config_dict=patch.config)
+        config._unrendered_config_call_dict = node.unrendered_config_call_dict
+        self.schema_parser.update_parsed_node_config(
+            node,
+            config,
+            patch_config_dict=patch.config,
+            patch_file_id=patch.file_id,
+        )
 
 
 # Subclasses of NodePatchParser: TestablePatchParser, ModelPatchParser, AnalysisPatchParser,