dbt-labs · jtcohen6 · Apr 1, 2023 · Apr 1, 2023 · Apr 2, 2023 · Apr 2, 2023
@@ -0,0 +1,6 @@
+kind: Features
+body: 'New command: ''dbt clone'''
+time: 2023-04-01T19:36:14.622217+02:00
+custom:
+  Author: jtcohen6
+  Issue: "7256"
@@ -9,6 +9,7 @@
 from dbt.events.base_types import EventMsg
 from dbt.task.clean import CleanTask
 from dbt.task.compile import CompileTask
+from dbt.task.clone import CloneTask
 from dbt.task.deps import DepsTask
 from dbt.task.debug import DebugTask
 from dbt.task.run import RunTask
@@ -298,6 +299,42 @@ def compile(ctx, **kwargs):
     return results, success
 
 
+# dbt clone
+@cli.command("clone")
+@click.pass_context
+@p.exclude
+@p.full_refresh
+@p.profile
+@p.profiles_dir
+@p.project_dir
+@p.resource_type
+@p.select
+@p.selector
+@p.state  # required
+@p.target
+@p.target_path
+@p.threads
+@p.vars
+@p.version_check
+@requires.preflight
+@requires.profile
+@requires.project
+@requires.runtime_config
+@requires.manifest
+@requires.postflight
+def clone(ctx, **kwargs):
+    """Create clones of selected nodes based on their location in the manifest provided to --state."""
+    task = CloneTask(
+        ctx.obj["flags"],
+        ctx.obj["runtime_config"],
+        ctx.obj["manifest"],
+    )
+
+    results = task.run()
+    success = task.interpret_results(results)
+    return results, success
+
+
 # dbt debug
 @cli.command("debug")
 @click.pass_context

@@ -1362,6 +1362,20 @@ def this(self) -> Optional[RelationProxy]:
             return None
         return self.db_wrapper.Relation.create_from(self.config, self.model)
 
+    @contextproperty
+    def state_relation(self) -> Optional[RelationProxy]:
+        """
+        For commands which add information about this node's corresponding
+        production version (via a --state artifact), access the Relation
+        object for that stateful other
+        """
+        if getattr(self.model, "state_relation", None):
+            return self.db_wrapper.Relation.create_from_node(
+                self.config, self.model.state_relation  # type: ignore
+            )
+        else:
+            return None
+
 
 # This is called by '_context_for', used in 'render_with_context'
 def generate_parser_model_context(

@@ -35,6 +35,7 @@
     GraphMemberNode,
     ResultNode,
     BaseNode,
+    RelationalNode,
 )
 from dbt.contracts.graph.unparsed import SourcePatch
 from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
@@ -1032,6 +1033,30 @@ def merge_from_artifact(
         sample = list(islice(merged, 5))
         fire_event(MergedFromState(num_merged=len(merged), sample=sample))
 
+    # Called by CloneTask.defer_to_manifest
+    def add_from_artifact(
+        self,
+        other: "WritableManifest",
+    ) -> None:
+        """Update this manifest by *adding* information about each node's location
+        in the other manifest.
+
+        Only non-ephemeral refable nodes are examined.
+        """
+        refables = set(NodeType.refable())
+        for unique_id, node in other.nodes.items():
+            current = self.nodes.get(unique_id)
+            if current and (node.resource_type in refables and not node.is_ephemeral):
+                other_node = other.nodes[unique_id]
+                state_relation = RelationalNode(
+                    other_node.database, other_node.schema, other_node.alias
+                )
+                self.nodes[unique_id] = current.replace(state_relation=state_relation)
+
+        # Rebuild the flat_graph, which powers the 'graph' context variable,
+        # now that we've deferred some nodes
+        self.build_flat_graph()
+
     # Methods that were formerly in ParseResult
 
     def add_macro(self, source_file: SourceFile, macro: Macro):
@@ -1232,6 +1257,8 @@ def __post_serialize__(self, dct):
         for unique_id, node in dct["nodes"].items():
             if "config_call_dict" in node:
                 del node["config_call_dict"]
+            if "state_relation" in node:
+                del node["state_relation"]
         return dct
 
 

@@ -229,16 +229,20 @@ def add_node(self, value: str):
 
 
 @dataclass
-class ParsedNodeMandatory(GraphNode, HasRelationMetadata, Replaceable):
+class RelationalNode(HasRelationMetadata):
     alias: str
-    checksum: FileHash
-    config: NodeConfig = field(default_factory=NodeConfig)
 
     @property
     def identifier(self):
         return self.alias
 
 
+@dataclass
+class ParsedNodeMandatory(GraphNode, RelationalNode, Replaceable):
+    checksum: FileHash
+    config: NodeConfig = field(default_factory=NodeConfig)
+
+
 # This needs to be in all ManifestNodes and also in SourceDefinition,
 # because of "source freshness"
 @dataclass
@@ -310,7 +314,7 @@ def __post_serialize__(self, dct):
     @classmethod
     def _deserialize(cls, dct: Dict[str, int]):
         # The serialized ParsedNodes do not differ from each other
-        # in fields that would allow 'from_dict' to distinguis
+        # in fields that would allow 'from_dict' to distinguish
         # between them.
         resource_type = dct["resource_type"]
         if resource_type == "model":
@@ -567,6 +571,7 @@ class HookNode(CompiledNode):
 class ModelNode(CompiledNode):
     resource_type: NodeType = field(metadata={"restrict": [NodeType.Model]})
     access: AccessType = AccessType.Protected
+    state_relation: Optional[RelationalNode] = None
 
 
 # TODO: rm?
@@ -593,6 +598,7 @@ class SeedNode(ParsedNode):  # No SQLDefaults!
     # and we need the root_path to load the seed later
     root_path: Optional[str] = None
     depends_on: MacroDependsOn = field(default_factory=MacroDependsOn)
+    state_relation: Optional[RelationalNode] = None
 
     def same_seeds(self, other: "SeedNode") -> bool:
         # for seeds, we check the hashes. If the hashes are different types,
@@ -787,6 +793,7 @@ class IntermediateSnapshotNode(CompiledNode):
 class SnapshotNode(CompiledNode):
     resource_type: NodeType = field(metadata={"restrict": [NodeType.Snapshot]})
     config: SnapshotConfig
+    state_relation: Optional[RelationalNode] = None
 
 
 # ====================================

@@ -0,0 +1,107 @@
+{% macro can_clone_tables() %}
+    {{ return(adapter.dispatch('can_clone_tables', 'dbt')()) }}
+{% endmacro %}
+
+
+{% macro default__can_clone_tables() %}
+    {{ return(False) }}
+{% endmacro %}
+
+
+{% macro snowflake__can_clone_tables() %}
+    {{ return(True) }}
+{% endmacro %}
+
+
+{% macro get_pointer_sql(to_relation) %}
+    {{ return(adapter.dispatch('get_pointer_sql', 'dbt')(to_relation)) }}
+{% endmacro %}
+
+
+{% macro default__get_pointer_sql(to_relation) %}
+    {% set pointer_sql %}
+        select * from {{ to_relation }}
+    {% endset %}
+    {{ return(pointer_sql) }}
+{% endmacro %}
+
+
+{% macro get_clone_table_sql(this_relation, state_relation) %}
+    {{ return(adapter.dispatch('get_clone_table_sql', 'dbt')(this_relation, state_relation)) }}
+{% endmacro %}
+
+
+{% macro default__get_clone_table_sql(this_relation, state_relation) %}
+    create or replace table {{ this_relation }} clone {{ state_relation }}
+{% endmacro %}
+
+
+{% macro snowflake__get_clone_table_sql(this_relation, state_relation) %}
+    create or replace
+      {{ "transient" if config.get("transient", true) }}
+      table {{ this_relation }}
+      clone {{ state_relation }}
+      {{ "copy grants" if config.get("copy_grants", false) }}
+{% endmacro %}
+
+
+{%- materialization clone, default -%}
+
+  {%- set relations = {'relations': []} -%}
+
+  {%- set existing_relation = load_cached_relation(this) -%}
+  {%- set other_existing_relation = load_cached_relation(state_relation) -%}
+
+  {%- if existing_relation and not flags.FULL_REFRESH -%}
+      -- noop!
+      {{ return(relations) }}
+  {%- endif -%}
+
+  -- If this is a database that can do zero-copy cloning of tables, and the other relation is a table, then this will be a table
+  -- Otherwise, this will be a view
+
+  {% set can_clone_tables = can_clone_tables() %}
+
+  {%- if other_existing_relation and other_existing_relation.type == 'table' and can_clone_tables -%}
+
+      {%- set target_relation = this.incorporate(type='table') -%}
+      {% if existing_relation is not none and not existing_relation.is_table %}
+        {{ log("Dropping relation " ~ existing_relation ~ " because it is of type " ~ existing_relation.type) }}
+        {{ drop_relation_if_exists(existing_relation) }}
+      {% endif %}
+
+      -- as a general rule, data platforms that can clone tables can also do atomic 'create or replace'
+      {% call statement('main') %}
+          {{ get_clone_table_sql(target_relation, state_relation) }}
+      {% endcall %}
+
+      {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}
+      {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
+      {% do persist_docs(target_relation, model) %}
+
+      {{ return({'relations': [target_relation]}) }}
+
+  {%- else -%}
+
+      {%- set target_relation = this.incorporate(type='view') -%}
+
+      -- TODO: this should probably be illegal
+      -- I'm just doing it out of convenience to reuse the 'view' materialization logic
+      {%- do context.update({
+          'sql': get_pointer_sql(state_relation),
+          'compiled_code': get_pointer_sql(state_relation)
+      }) -%}
+
+      -- reuse the view materialization
+      -- TODO: support actual dispatch for materialization macros
+      {% set search_name = "materialization_view_" ~ adapter.type() %}
+      {% if not search_name in context %}
+          {% set search_name = "materialization_view_default" %}
+      {% endif %}
+      {% set materialization_macro = context[search_name] %}
+      {% set relations = materialization_macro() %}
+      {{ return(relations) }}
+
+  {%- endif -%}
+
+{%- endmaterialization -%}