Skip to content

Commit

Permalink
Merge pull request #2391 from fishtown-analytics/fix/snapshot-use-upd…
Browse files Browse the repository at this point in the history
…ated-at-timestamp-for-comparisons

Fix for changing snapshot strategy types
  • Loading branch information
drewbanin authored May 4, 2020
2 parents 8681dd8 + e82e68d commit 24ed84d
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
- No longer query the information_schema.schemata view on bigquery ([#2320](https://github.com/fishtown-analytics/dbt/issues/2320), [#2382](https://github.com/fishtown-analytics/dbt/pull/2382))
- Add support for `sql_header` config in incremental models ([#2136](https://github.com/fishtown-analytics/dbt/issues/2136), [#2200](https://github.com/fishtown-analytics/dbt/pull/2200))
- Fix for non-atomic snapshot staging table creation ([#1884](https://github.com/fishtown-analytics/dbt/issues/1884), [#2390](https://github.com/fishtown-analytics/dbt/pull/2390))
- Fix for snapshot errors when strategy changes from `check` to `timestamp` between runs ([#2350](https://github.com/fishtown-analytics/dbt/issues/2350), [#2391](https://github.com/fishtown-analytics/dbt/pull/2391))

### Under the hood
- Added more tests for source inheritance ([#2264](https://github.com/fishtown-analytics/dbt/issues/2264), [#2291](https://github.com/fishtown-analytics/dbt/pull/2291))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,17 @@
{% set primary_key = config['unique_key'] %}
{% set updated_at = config['updated_at'] %}

{#/*
The snapshot relation might not have an {{ updated_at }} value if the
snapshot strategy is changed from `check` to `timestamp`. We
should use a dbt-created column for the comparison in the snapshot
table instead of assuming that the user-supplied {{ updated_at }}
will be present in the historical data.

See https://github.com/fishtown-analytics/dbt/issues/2350
*/ #}
{% set row_changed_expr -%}
({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }})
({{ snapshotted_rel }}.dbt_valid_from < {{ current_rel }}.{{ updated_at }})
{%- endset %}

{% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}
Expand Down Expand Up @@ -126,7 +135,7 @@
select {{ snapshot_get_time() }} as snapshot_start
{%- endset %}

{# don't access the column by name, to avoid dealing with casing issues on snowflake #}
{#-- don't access the column by name, to avoid dealing with casing issues on snowflake #}
{%- set now = run_query(select_current_time)[0][0] -%}
{% if now is none or now is undefined -%}
{%- do exceptions.raise_compiler_error('Could not get a snapshot start time from the database') -%}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

{# /*
Given the repro case for the snapshot build, we'd
expect to see both records have color='pink'
in their most recent rows.
*/ #}
with expected as (
select 1 as id, 'pink' as color union all
select 2 as id, 'pink' as color
),
actual as (
select id, color
from {{ ref('my_snapshot') }}
where color = 'pink'
and dbt_valid_to is null
)
select * from expected
except
select * from actual
union all
select * from actual
except
select * from expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

{#
REPRO:
1. Run with check strategy
2. Add a new ts column and run with check strategy
3. Run with timestamp strategy on new ts column

Expect: new entry is added for changed rows in (3)
#}


{% snapshot my_snapshot %}

{#--------------- Configuration ------------ #}

{{ config(
target_schema=schema,
unique_key='id'
) }}

{% if var('strategy') == 'timestamp' %}
{{ config(strategy='timestamp', updated_at='updated_at') }}
{% else %}
{{ config(strategy='check', check_cols=['color']) }}
{% endif %}

{#--------------- Test setup ------------ #}

{% if var('step') == 1 %}

select 1 as id, 'blue' as color
union all
select 2 as id, 'red' as color

{% elif var('step') == 2 %}

-- change id=1 color from blue to green
-- id=2 is unchanged when using the check strategy
select 1 as id, 'green' as color, '2020-01-01'::date as updated_at
union all
select 2 as id, 'red' as color, '2020-01-01'::date as updated_at

{% elif var('step') == 3 %}

-- bump timestamp for both records. Expect that after this runs
-- using the timestamp strategy, both ids should have the color
-- 'pink' in the database. This should be in the future b/c we're
-- going to compare to the check timestamp, which will be _now_
select 1 as id, 'pink' as color, (now() + interval '1 day')::date as updated_at
union all
select 2 as id, 'pink' as color, (now() + interval '1 day')::date as updated_at

{% endif %}

{% endsnapshot %}
36 changes: 36 additions & 0 deletions test/integration/004_simple_snapshot_test/test_simple_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,3 +710,39 @@ def test__postgres__slow(self):

results = self.run_dbt(['test'])
self.assertEqual(len(results), 1)


class TestChangingStrategy(DBTIntegrationTest):

@property
def schema(self):
return "simple_snapshot_004"

@property
def models(self):
return "models-slow"

def run_snapshot(self):
return self.run_dbt(['snapshot'])

@property
def project_config(self):
return {
"config-version": 2,
"snapshot-paths": ['test-snapshots-changing-strategy'],
"test-paths": ["test-snapshots-changing-strategy-tests"],
}

@use_profile('postgres')
def test__postgres__changing_strategy(self):
results = self.run_dbt(['snapshot', '--vars', '{strategy: check, step: 1}'])
self.assertEqual(len(results), 1)

results = self.run_dbt(['snapshot', '--vars', '{strategy: check, step: 2}'])
self.assertEqual(len(results), 1)

results = self.run_dbt(['snapshot', '--vars', '{strategy: timestamp, step: 3}'])
self.assertEqual(len(results), 1)

results = self.run_dbt(['test'])
self.assertEqual(len(results), 1)

0 comments on commit 24ed84d

Please sign in to comment.