-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
165 additions
and
160 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
154 changes: 0 additions & 154 deletions
154
dbt/include/spark/macros/materializations/incremental.sql
This file was deleted.
Oops, something went wrong.
45 changes: 45 additions & 0 deletions
45
dbt/include/spark/macros/materializations/incremental/incremental.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
{% materialization incremental, adapter='spark' -%} | ||
|
||
{#-- Validate early so we don't run SQL if the file_format + strategy combo is invalid --#} | ||
{%- set raw_file_format = config.get('file_format', default='parquet') -%} | ||
{%- set raw_strategy = config.get('incremental_strategy', default='append') -%} | ||
|
||
{%- set file_format = dbt_spark_validate_get_file_format(raw_file_format) -%} | ||
{%- set strategy = dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) -%} | ||
|
||
{%- set unique_key = config.get('unique_key', none) -%} | ||
{%- set partition_by = config.get('partition_by', none) -%} | ||
|
||
{%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} | ||
|
||
{% set target_relation = this %} | ||
{% set existing_relation = load_relation(this) %} | ||
{% set tmp_relation = make_temp_relation(this) %} | ||
|
||
{% if strategy == 'insert_overwrite' and partition_by %} | ||
{% call statement() %} | ||
set spark.sql.sources.partitionOverwriteMode = DYNAMIC | ||
{% endcall %} | ||
{% endif %} | ||
|
||
{{ run_hooks(pre_hooks) }} | ||
|
||
{% if existing_relation is none %} | ||
{% set build_sql = create_table_as(False, target_relation, sql) %} | ||
{% elif existing_relation.is_view or full_refresh_mode %} | ||
{% do adapter.drop_relation(existing_relation) %} | ||
{% set build_sql = create_table_as(False, target_relation, sql) %} | ||
{% else %} | ||
{% do run_query(create_table_as(True, tmp_relation, sql)) %} | ||
{% set build_sql = dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key) %} | ||
{% endif %} | ||
|
||
{%- call statement('main') -%} | ||
{{ build_sql }} | ||
{%- endcall -%} | ||
|
||
{{ run_hooks(post_hooks) }} | ||
|
||
{{ return({'relations': [target_relation]}) }} | ||
|
||
{%- endmaterialization %} |
58 changes: 58 additions & 0 deletions
58
dbt/include/spark/macros/materializations/incremental/strategies.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
{% macro get_insert_overwrite_sql(source_relation, target_relation) %} | ||
|
||
{%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} | ||
{%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} | ||
insert overwrite table {{ target_relation }} | ||
{{ partition_cols(label="partition") }} | ||
select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} | ||
|
||
{% endmacro %} | ||
|
||
|
||
{% macro get_insert_into_sql(source_relation, target_relation) %} | ||
|
||
{%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} | ||
{%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} | ||
insert into table {{ target_relation }} | ||
select {{dest_cols_csv}} from {{ source_relation.include(database=false, schema=false) }} | ||
|
||
{% endmacro %} | ||
|
||
|
||
{% macro spark__get_merge_sql(target, source, unique_key, dest_columns, predicates=none) %} | ||
{# ignore dest_columns - we will just use `*` #} | ||
|
||
{% set merge_condition %} | ||
{% if unique_key %} | ||
on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }} | ||
{% else %} | ||
on false | ||
{% endif %} | ||
{% endset %} | ||
|
||
merge into {{ target }} as DBT_INTERNAL_DEST | ||
using {{ source.include(schema=false) }} as DBT_INTERNAL_SOURCE | ||
{{ merge_condition }} | ||
when matched then update set * | ||
when not matched then insert * | ||
{% endmacro %} | ||
|
||
|
||
{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key) %} | ||
{%- if strategy == 'append' -%} | ||
{#-- insert new records into existing table, without updating or overwriting #} | ||
{{ get_insert_into_sql(source, target) }} | ||
{%- elif strategy == 'insert_overwrite' -%} | ||
{#-- insert statements don't like CTEs, so support them via a temp view #} | ||
{{ get_insert_overwrite_sql(source, target) }} | ||
{%- elif strategy == 'merge' -%} | ||
{#-- merge all columns with databricks delta - schema changes are handled for us #} | ||
{{ get_merge_sql(target, source, unique_key, dest_columns=none, predicates=none) }} | ||
{%- else -%} | ||
{% set no_sql_for_strategy_msg -%} | ||
No known SQL for the incremental strategy provided: {{ strategy }} | ||
{%- endset %} | ||
{%- do exceptions.raise_compiler_error(no_sql_for_strategy_msg) -%} | ||
{%- endif -%} | ||
|
||
{% endmacro %} |
59 changes: 59 additions & 0 deletions
59
dbt/include/spark/macros/materializations/incremental/validate.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
{% macro dbt_spark_validate_get_file_format(raw_file_format) %} | ||
{#-- Validate the file format #} | ||
|
||
{% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm'] %} | ||
|
||
{% set invalid_file_format_msg -%} | ||
Invalid file format provided: {{ raw_file_format }} | ||
Expected one of: {{ accepted_formats | join(', ') }} | ||
{%- endset %} | ||
|
||
{% if raw_file_format not in accepted_formats %} | ||
{% do exceptions.raise_compiler_error(invalid_file_format_msg) %} | ||
{% endif %} | ||
|
||
{% do return(raw_file_format) %} | ||
{% endmacro %} | ||
|
||
|
||
{% macro dbt_spark_validate_get_incremental_strategy(raw_strategy, file_format) %} | ||
{#-- Validate the incremental strategy #} | ||
|
||
{% set invalid_strategy_msg -%} | ||
Invalid incremental strategy provided: {{ raw_strategy }} | ||
Expected one of: 'append', 'merge', 'insert_overwrite' | ||
{%- endset %} | ||
|
||
{% set invalid_merge_msg -%} | ||
Invalid incremental strategy provided: {{ raw_strategy }} | ||
You can only choose this strategy when file_format is set to 'delta' | ||
{%- endset %} | ||
|
||
{% set invalid_insert_overwrite_delta_msg -%} | ||
Invalid incremental strategy provided: {{ raw_strategy }} | ||
You cannot use this strategy when file_format is set to 'delta' | ||
Use the 'append' or 'merge' strategy instead | ||
{%- endset %} | ||
|
||
{% set invalid_insert_overwrite_endpoint_msg -%} | ||
Invalid incremental strategy provided: {{ raw_strategy }} | ||
You cannot use this strategy when connecting via endpoint | ||
Use the 'append' or 'merge' strategy instead | ||
{%- endset %} | ||
|
||
{% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %} | ||
{% do exceptions.raise_compiler_error(invalid_strategy_msg) %} | ||
{%-else %} | ||
{% if raw_strategy == 'merge' and file_format != 'delta' %} | ||
{% do exceptions.raise_compiler_error(invalid_merge_msg) %} | ||
{% endif %} | ||
{% if raw_strategy == 'insert_overwrite' and file_format == 'delta' %} | ||
{% do exceptions.raise_compiler_error(invalid_insert_overwrite_delta_msg) %} | ||
{% endif %} | ||
{% if raw_strategy == 'insert_overwrite' and target.endpoint %} | ||
{% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %} | ||
{% endif %} | ||
{% endif %} | ||
|
||
{% do return(raw_strategy) %} | ||
{% endmacro %} |