dbt-labs · aranke · Jun 29, 2023 · Jun 15, 2023 · Jun 15, 2023 · Jun 15, 2023
@@ -0,0 +1,6 @@
+kind: Features
+body: dbt clone
+time: 2023-06-16T10:48:49.079961-05:00
+custom:
+ Author: jtcohen6 aranke
+ Issue: "7258"
@@ -373,6 +373,7 @@ def command_args(command: CliCommand) -> ArgsList:
  CMD_DICT: Dict[CliCommand, ClickCommand] = {
  CliCommand.BUILD: cli.build,
  CliCommand.CLEAN: cli.clean,
+ CliCommand.CLONE: cli.clone,
  CliCommand.COMPILE: cli.compile,
  CliCommand.DOCS_GENERATE: cli.docs_generate,
  CliCommand.DOCS_SERVE: cli.docs_serve,

@@ -23,6 +23,7 @@
 from dbt.events.base_types import EventMsg
 from dbt.task.build import BuildTask
 from dbt.task.clean import CleanTask
+from dbt.task.clone import CloneTask
 from dbt.task.compile import CompileTask
 from dbt.task.debug import DebugTask
 from dbt.task.deps import DepsTask
@@ -608,6 +609,43 @@ def retry(ctx, **kwargs):
  return results, success
 
 
+# dbt clone
+@cli.command("clone")
+@click.pass_context
+@p.defer_state
+@p.exclude
+@p.full_refresh
+@p.profile
+@p.profiles_dir
+@p.project_dir
+@p.resource_type
+@p.select
+@p.selector
+@p.state # required
+@p.target
+@p.target_path
+@p.threads
+@p.vars
+@p.version_check
+@requires.preflight
+@requires.profile
+@requires.project
+@requires.runtime_config
+@requires.manifest
+@requires.postflight
+def clone(ctx, **kwargs):
+ """Create clones of selected nodes based on their location in the manifest provided to --state."""
+ task = CloneTask(
+ ctx.obj["flags"],
+ ctx.obj["runtime_config"],
+ ctx.obj["manifest"],
+ )
+
+ results = task.run()
+ success = task.interpret_results(results)
+ return results, success
+
+
 # dbt run operation
 @cli.command("run-operation")
 @click.pass_context

@@ -8,6 +8,7 @@ class Command(Enum):
  BUILD = "build"
  CLEAN = "clean"
  COMPILE = "compile"
+ CLONE = "clone"
  DOCS_GENERATE = "generate"
  DOCS_SERVE = "serve"
  DEBUG = "debug"

@@ -1432,6 +1432,20 @@ def this(self) -> Optional[RelationProxy]:
  return None
  return self.db_wrapper.Relation.create_from(self.config, self.model)
 
+ @contextproperty
+ def state_relation(self) -> Optional[RelationProxy]:
+ """
+ For commands which add information about this node's corresponding
+ production version (via a --state artifact), access the Relation
+ object for that stateful other
+ """
+ if getattr(self.model, "state_relation", None):
+ return self.db_wrapper.Relation.create_from_node(
+ self.config, self.model.state_relation # type: ignore
+ )
+ else:
+ return None
+
 
 # This is called by '_context_for', used in 'render_with_context'
 def generate_parser_model_context(

@@ -0,0 +1,7 @@
+{% macro can_clone_table() %}
+ {{ return(adapter.dispatch('can_clone_table', 'dbt')()) }}
+{% endmacro %}
+
+{% macro default__can_clone_table() %}
+ {{ return(False) }}
+{% endmacro %}
@@ -0,0 +1,68 @@
+{%- materialization clone, default -%}
+
+ {%- set relations = {'relations': []} -%}
+
+ {%- if not state_relation -%}
+ -- nothing to do
+ {{ log("No relation found in state manifest for " ~ model.unique_id, info=True) }}
+ {{ return(relations) }}
+ {%- endif -%}
+
+ {%- set existing_relation = load_cached_relation(this) -%}
+
+ {%- if existing_relation and not flags.FULL_REFRESH -%}
+ -- noop!
+ {{ log("Relation " ~ existing_relation ~ " already exists", info=True) }}
+ {{ return(relations) }}
+ {%- endif -%}
+
+ {%- set other_existing_relation = load_cached_relation(state_relation) -%}
+
+ -- If this is a database that can do zero-copy cloning of tables, and the other relation is a table, then this will be a table
+ -- Otherwise, this will be a view
+
+ {% set can_clone_table = can_clone_table() %}
+
+ {%- if other_existing_relation and other_existing_relation.type == 'table' and can_clone_table -%}
+
+ {%- set target_relation = this.incorporate(type='table') -%}
+ {% if existing_relation is not none and not existing_relation.is_table %}
+ {{ log("Dropping relation " ~ existing_relation ~ " because it is of type " ~ existing_relation.type) }}
+ {{ drop_relation_if_exists(existing_relation) }}
+ {% endif %}
+
+ -- as a general rule, data platforms that can clone tables can also do atomic 'create or replace'
+ {% call statement('main') %}
+ {{ create_or_replace_clone(target_relation, state_relation) }}
+ {% endcall %}
+
+ {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %}
+ {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %}
+ {% do persist_docs(target_relation, model) %}
+
+ {{ return({'relations': [target_relation]}) }}
+
+ {%- else -%}
+
+ {%- set target_relation = this.incorporate(type='view') -%}
+
+ -- TODO: this should probably be illegal
+ -- I'm just doing it out of convenience to reuse the 'view' materialization logic
+ {%- do context.update({
+ 'sql': get_clone_target(state_relation),
+ 'compiled_code': get_clone_target(state_relation)
+ }) -%}
+
+ -- reuse the view materialization
+ -- TODO: support actual dispatch for materialization macros
+ {% set search_name = "materialization_view_" ~ adapter.type() %}
+ {% if not search_name in context %}
+ {% set search_name = "materialization_view_default" %}
+ {% endif %}
+ {% set materialization_macro = context[search_name] %}
+ {% set relations = materialization_macro() %}
+ {{ return(relations) }}
+
+ {%- endif -%}
+
+{%- endmaterialization -%}
@@ -0,0 +1,7 @@
+{% macro create_or_replace_clone(this_relation, state_relation) %}
+ {{ return(adapter.dispatch('create_or_replace_clone', 'dbt')(this_relation, state_relation)) }}
+{% endmacro %}
+
+{% macro default__create_or_replace_clone(this_relation, state_relation) %}
+ create or replace table {{ this_relation }} clone {{ state_relation }}
+{% endmacro %}
diff --git a/core/dbt/include/global_project/macros/materializations/models/clone/get_clone_target.sql b/core/dbt/include/global_project/macros/materializations/models/clone/get_clone_target.sql
@@ -0,0 +1,10 @@
+{% macro get_clone_target(to_relation) %}
+ {{ return(adapter.dispatch('get_clone_target', 'dbt')(to_relation)) }}
+{% endmacro %}
+
+{% macro default__get_clone_target(to_relation) %}
+ {% set target_sql %}
+ select * from {{ to_relation }}
+ {% endset %}
+ {{ return(target_sql) }}
+{% endmacro %}
@@ -0,0 +1,183 @@
+import threading
+from typing import AbstractSet, Optional, Any, List, Iterable, Set
+
+from dbt.dataclass_schema import dbtClassMixin
+
+from dbt.contracts.graph.manifest import WritableManifest
+from dbt.contracts.results import RunStatus, RunResult
+from dbt.exceptions import DbtInternalError, DbtRuntimeError, CompilationError
+from dbt.graph import ResourceTypeSelector
+from dbt.node_types import NodeType
+from dbt.parser.manifest import write_manifest
+from dbt.task.base import BaseRunner
+from dbt.task.runnable import GraphRunnableTask
+from dbt.task.run import _validate_materialization_relations_dict
+from dbt.adapters.base import BaseRelation
+from dbt.clients.jinja import MacroGenerator
+from dbt.context.providers import generate_runtime_model_context
+
+
+class CloneRunner(BaseRunner):
+ def before_execute(self):
+ pass
+
+ def after_execute(self, result):
+ pass
+
+ def _build_run_model_result(self, model, context):
+ result = context["load_result"]("main")
+ if result:
+ status = RunStatus.Success
+ message = str(result.response)
+ else:
+ status = RunStatus.Success
+ message = "No-op"
+ adapter_response = {}
+ if result and isinstance(result.response, dbtClassMixin):
+ adapter_response = result.response.to_dict(omit_none=True)
+ return RunResult(
+ node=model,
+ status=status,
+ timing=[],
+ thread_id=threading.current_thread().name,
+ execution_time=0,
+ message=message,
+ adapter_response=adapter_response,
+ failures=None,
+ )
+
+ def compile(self, manifest):
+ # no-op
+ return self.node
+
+ def _materialization_relations(self, result: Any, model) -> List[BaseRelation]:
+ if isinstance(result, str):
+ msg = (
+ 'The materialization ("{}") did not explicitly return a '
+ "list of relations to add to the cache.".format(str(model.get_materialization()))
+ )
+ raise CompilationError(msg, node=model)
+
+ if isinstance(result, dict):
+ return _validate_materialization_relations_dict(result, model)
+
+ msg = (
+ "Invalid return value from materialization, expected a dict "
+ 'with key "relations", got: {}'.format(str(result))
+ )
+ raise CompilationError(msg, node=model)
+
+ def execute(self, model, manifest):
+ context = generate_runtime_model_context(model, self.config, manifest)
+ materialization_macro = manifest.find_materialization_macro_by_name(
+ self.config.project_name, "clone", self.adapter.type()
+ )
+
+ if "config" not in context:
+ raise DbtInternalError(
+ "Invalid materialization context generated, missing config: {}".format(context)
+ )
+
+ context_config = context["config"]
+
+ hook_ctx = self.adapter.pre_model_hook(context_config)
+ try:
+ result = MacroGenerator(materialization_macro, context)()
+ finally:
+ self.adapter.post_model_hook(context_config, hook_ctx)
+
+ for relation in self._materialization_relations(result, model):
+ self.adapter.cache_added(relation.incorporate(dbt_created=True))
+
+ return self._build_run_model_result(model, context)
+
+
+class CloneTask(GraphRunnableTask):
+ def raise_on_first_error(self):
+ return False
+
+ def get_model_schemas(self, adapter, selected_uids: Iterable[str]) -> Set[BaseRelation]:
+ if self.manifest is None:
+ raise DbtInternalError("manifest was None in get_model_schemas")
+ result: Set[BaseRelation] = set()
+
+ for node in self.manifest.nodes.values():
+ if node.unique_id not in selected_uids:
+ continue
+ if node.is_relational and not node.is_ephemeral:
+ relation = adapter.Relation.create_from(self.config, node)
+ result.add(relation.without_identifier())
+
+ # cache the 'other' schemas too!
+ if node.state_relation: # type: ignore
+ other_relation = adapter.Relation.create_from_node(
+ self.config, node.state_relation # type: ignore
+ )
+ result.add(other_relation.without_identifier())
+
+ return result
+
+ def before_run(self, adapter, selected_uids: AbstractSet[str]):
+ with adapter.connection_named("master"):
+ # unlike in other tasks, we want to add information from the --state manifest *before* caching!
+ self.defer_to_manifest(adapter, selected_uids)
+ # only create *our* schemas, but cache *other* schemas in addition
+ schemas_to_create = super().get_model_schemas(adapter, selected_uids)
+ self.create_schemas(adapter, schemas_to_create)
+ schemas_to_cache = self.get_model_schemas(adapter, selected_uids)
+ self.populate_adapter_cache(adapter, schemas_to_cache)
+
+ @property
+ def resource_types(self):
+ if not self.args.resource_types:
+ return NodeType.refable()
+
+ values = set(self.args.resource_types)
+
+ if "all" in values:
+ values.remove("all")
+ values.update(NodeType.refable())
+
+ values = [NodeType(val) for val in values if val in NodeType.refable()]
+
+ return list(values)
+
+ def get_node_selector(self) -> ResourceTypeSelector:
+ resource_types = self.resource_types
+
+ if self.manifest is None or self.graph is None:
+ raise DbtInternalError("manifest and graph must be set to get perform node selection")
+ return ResourceTypeSelector(
+ graph=self.graph,
+ manifest=self.manifest,
+ previous_state=self.previous_state,
+ resource_types=resource_types,
+ )
+
+ def get_runner_type(self, _):
+ return CloneRunner
+
+ def _get_deferred_manifest(self) -> Optional[WritableManifest]:
+ state = self.previous_state
+ if state is None:
+ raise DbtRuntimeError(
+ "--state is required for cloning relations from another environment"
+ )
+
+ if state.manifest is None:
+ raise DbtRuntimeError(f'Could not find manifest in --state path: "{self.args.state}"')
+ return state.manifest
+
+ # Note that this is different behavior from --defer with other commands, which *merge*
+ # selected nodes from this manifest + unselected nodes from the other manifest
+ def defer_to_manifest(self, adapter, selected_uids: AbstractSet[str]):
+ deferred_manifest = self._get_deferred_manifest()
+ if deferred_manifest is None:
+ return
+ if self.manifest is None:
+ raise DbtInternalError(
+ "Expected to defer to manifest, but there is no runtime manifest to defer from!"
+ )
+ self.manifest.add_from_artifact(other=deferred_manifest)
+ # TODO: is it wrong to write the manifest here? I think it's right...
+ write_manifest(self.manifest, self.config.target_path)
@@ -9,6 +9,7 @@
 from dbt.graph import GraphQueue
 from dbt.task.base import ConfiguredTask
 from dbt.task.build import BuildTask
+from dbt.task.clone import CloneTask
 from dbt.task.compile import CompileTask
 from dbt.task.generate import GenerateTask
 from dbt.task.run import RunTask
@@ -22,6 +23,7 @@
 TASK_DICT = {
  "build": BuildTask,
  "compile": CompileTask,
+ "clone": CloneTask,
  "generate": GenerateTask,
  "seed": SeedTask,
  "snapshot": SnapshotTask,
@@ -33,6 +35,7 @@
 CMD_DICT = {
  "build": CliCommand.BUILD,
  "compile": CliCommand.COMPILE,
+ "clone": CliCommand.CLONE,
  "generate": CliCommand.DOCS_GENERATE,
  "seed": CliCommand.SEED,
  "snapshot": CliCommand.SNAPSHOT,