Skip to content

Commit

Permalink
Merge pull request #921 from fishtown-analytics/relationships-tests-w…
Browse files Browse the repository at this point in the history
…ith-nulls

Relationships tests with nulls (#887)
  • Loading branch information
beckjake authored Aug 15, 2018
2 parents a698486 + 6222829 commit e2e2614
Show file tree
Hide file tree
Showing 14 changed files with 186 additions and 34 deletions.
23 changes: 11 additions & 12 deletions dbt/include/global_project/macros/schema_tests/relationships.sql
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@

{% macro test_relationships(model, to, from) %}
{% macro test_relationships(model, to, field) %}

{% set column_name = kwargs.get('column_name', kwargs.get('from')) %}

{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}

select count(*)
from (

select
{{ from }} as id

from {{ model }}
where {{ from }} is not null
and {{ from }} not in (select {{ column_name }}
from {{ to }})

) validation_errors
select {{ column_name }} as id from {{ model }}
) as child
left join (
select {{ field }} as id from {{ to }}
) as parent on parent.id = child.id
where child.id is not null
and parent.id is null

{% endmacro %}

15 changes: 11 additions & 4 deletions test/integration/008_schema_tests_test/models-v1/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ table_copy:
table_summary:
constraints:
not_null:
- favorite_color
- favorite_color_copy
- count

unique:
- favorite_color
- favorite_color_copy

accepted_values:
- { field: favorite_color, values: ['blue', 'green'] }
- { field: favorite_color_copy, values: ['blue', 'green'] }

relationships:
- { from: favorite_color, to: ref('table_copy'), field: favorite_color }
- { from: favorite_color_copy, to: ref('table_copy'), field: favorite_color }


# all of these constraints will fail
Expand All @@ -64,3 +64,10 @@ table_failure_summary:

relationships:
- { from: favorite_color, to: ref('table_copy'), field: favorite_color }


# all of these constraints will fail
table_failure_null_relation:
constraints:
relationships:
- { from: id, to: ref('table_failure_copy'), field: id }
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{
config(
materialized='table'
)
}}

-- force a foreign key constraint failure here
select 105 as id, count(*) as count
from {{ ref('table_failure_copy') }}
group by 1
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
)
}}

select favorite_color, count(*) as count
select favorite_color as favorite_color_copy, count(*) as count
from {{ ref('table_copy') }}
group by 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

{{
config(
materialized='ephemeral'
)
}}

select * from {{ this.schema }}.seed
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
version: 2

models:
- name: ephemeral_copy
description: "An ephemeral copy of the table"
columns:
- name: id
description: "The ID"
tests:
- not_null
- unique
- name: favorite_color
tests:
- accepted_values: { values: ['blue', 'green'] }

# this whole model should pass and run
- name: table_summary
description: "The summary table"
columns:
- name: favorite_color_copy
description: "The favorite color"
tests:
- not_null
- unique
- accepted_values: { values: ['blue', 'green'] }
- relationships: { field: favorite_color, to: ref('ephemeral_copy') }
- name: count
description: "The number of responses for this favorite color"
tests:
- not_null
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{{
config(
materialized='table'
)
}}

select favorite_color as favorite_color_copy, count(*) as count
from {{ ref('ephemeral_copy') }}
group by 1
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ models:
- not_null
- unique
- accepted_values: { values: ['blue', 'green'] }
- relationships: { from: favorite_color, to: ref('table_copy') }
- relationships: { field: favorite_color, to: ref('table_copy') }
- name: count
description: "The number of responses for this favorite color"
tests:
Expand Down
17 changes: 13 additions & 4 deletions test/integration/008_schema_tests_test/models-v2/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ models:
- name: table_summary
description: "The summary table"
columns:
- name: favorite_color
- name: favorite_color_copy
description: "The favorite color"
tests:
- not_null
- unique
- accepted_values: { values: ['blue', 'green'] }
- relationships: { from: favorite_color, to: ref('table_copy') }
- relationships: { field: favorite_color, to: ref('table_copy') }
- name: count
description: "The number of responses for this favorite color"
tests:
Expand Down Expand Up @@ -67,7 +67,7 @@ models:
description: "The favorite color"
tests:
- accepted_values: { values: ['red'] }
- relationships: { from: favorite_color, to: ref('table_copy') }
- relationships: { field: favorite_color, to: ref('table_copy') }

# this table is disabled so these tests should be ignored
- name: table_disabled
Expand All @@ -77,4 +77,13 @@ models:
description: "The favorite color"
tests:
- accepted_values: { values: ['red'] }
- relationships: { from: favorite_color, to: ref('table_copy') }
- relationships: { field: favorite_color, to: ref('table_copy') }

# all of these constraints will fail
- name: table_failure_null_relation
description: "A table with a null value where it should be a foreign key"
columns:
- name: id
description: "The user ID"
tests:
- relationships: { field: id, to: ref('table_failure_copy') }
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{{
config(
materialized='table'
)
}}

-- force a foreign key constraint failure here
select 105 as id, count(*) as count
from {{ ref('table_failure_copy') }}
group by 1
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
)
}}

select favorite_color, count(*) as count
select favorite_color as favorite_color_copy, count(*) as count
from {{ ref('table_copy') }}
group by 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
favorite_color,id,first_name,email,ip_address,updated_at
blue,1,Larry,null,69.135.206.194,2008-09-12 19:08:31
blue,2,Larry,null,64.210.133.162,1978-05-09 04:15:14
green,99,Paul,pjohnson2q@umn.edu,183.59.198.197,1991-11-14 12:33:55
green,100,Frank,fgreene2r@blogspot.com,150.143.68.121,2010-06-12 23:55:39
16 changes: 11 additions & 5 deletions test/integration/008_schema_tests_test/test_schema_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,25 +30,31 @@ def run_schema_validations(self):
@attr(type='postgres')
def test_schema_tests(self):
results = self.run_dbt()
self.assertEqual(len(results), 4)
self.assertEqual(len(results), 5)
test_results = self.run_schema_validations()
self.assertEqual(len(test_results), 17)
self.assertEqual(len(test_results), 18)

for result in test_results:
# assert that all deliberately failing tests actually fail
if 'failure' in result.node.get('name'):
self.assertFalse(result.errored)
self.assertFalse(result.skipped)
self.assertTrue(result.status > 0)
self.assertTrue(
result.status > 0,
'test {} did not fail'.format(result.node.get('name'))
)

# assert that actual tests pass
else:
self.assertFalse(result.errored)
self.assertFalse(result.skipped)
# status = # of failing rows
self.assertEqual(result.status, 0)
self.assertEqual(
result.status, 0,
'test {} failed'.format(result.node.get('name'))
)

self.assertEqual(sum(x.status for x in test_results), 5)
self.assertEqual(sum(x.status for x in test_results), 6)


class TestMalformedSchemaTests(DBTIntegrationTest):
Expand Down
71 changes: 65 additions & 6 deletions test/integration/008_schema_tests_test/test_schema_v2_tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from nose.plugins.attrib import attr
from test.integration.base import DBTIntegrationTest, FakeArgs
from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile
import os

from dbt.task.test import TestTask
from dbt.project import read_project
Expand Down Expand Up @@ -30,26 +31,32 @@ def run_schema_validations(self):
@attr(type='postgres')
def test_schema_tests(self):
results = self.run_dbt()
self.assertEqual(len(results), 4)
self.assertEqual(len(results), 5)
test_results = self.run_schema_validations()
# If the disabled model's tests ran, there would be 19 of these.
self.assertEqual(len(test_results), 17)
self.assertEqual(len(test_results), 18)

for result in test_results:
# assert that all deliberately failing tests actually fail
if 'failure' in result.node.get('name'):
self.assertFalse(result.errored)
self.assertFalse(result.skipped)
self.assertTrue(result.status > 0)
self.assertTrue(
result.status > 0,
'test {} did not fail'.format(result.node.get('name'))
)

# assert that actual tests pass
else:
self.assertFalse(result.errored)
self.assertFalse(result.skipped)
# status = # of failing rows
self.assertEqual(result.status, 0)
self.assertEqual(
result.status, 0,
'test {} failed'.format(result.node.get('name'))
)

self.assertEqual(sum(x.status for x in test_results), 5)
self.assertEqual(sum(x.status for x in test_results), 6)

class TestMalformedSchemaTests(DBTIntegrationTest):

Expand Down Expand Up @@ -139,3 +146,55 @@ def test_schema_tests(self):
if result.errored:
self.assertTrue(result.node['name'] in expected_failures)
self.assertEqual(sum(x.status for x in test_results), 52)

class TestSchemaTests(DBTIntegrationTest):
@property
def schema(self):
return "schema_tests_008"

@property
def models(self):
return "test/integration/008_schema_tests_test/models-v2/bq-models"

@staticmethod
def dir(path):
return os.path.normpath(
os.path.join('test/integration/008_schema_tests_test/models-v2', path))

def run_schema_validations(self):
project = read_project('dbt_project.yml')
args = FakeArgs()

test_task = TestTask(args, project)
return test_task.run()

@use_profile('bigquery')
def test_schema_tests(self):
self.use_default_project({'data-paths': [self.dir('seed')]})
self.assertEqual(len(self.run_dbt(['seed'])), 1)
results = self.run_dbt()
self.assertEqual(len(results), 1)
test_results = self.run_schema_validations()
self.assertEqual(len(test_results), 8)

for result in test_results:
# assert that all deliberately failing tests actually fail
if 'failure' in result.node.get('name'):
self.assertFalse(result.errored)
self.assertFalse(result.skipped)
self.assertTrue(
result.status > 0,
'test {} did not fail'.format(result.node.get('name'))
)

# assert that actual tests pass
else:
self.assertFalse(result.errored)
self.assertFalse(result.skipped)
# status = # of failing rows
self.assertEqual(
result.status, 0,
'test {} failed'.format(result.node.get('name'))
)

self.assertEqual(sum(x.status for x in test_results), 0)

0 comments on commit e2e2614

Please sign in to comment.