Merge pull request #921 from fishtown-analytics/relationships-tests-w…

…ith-nulls Relationships tests with nulls (#887)
dbt-labs · Aug 15, 2018 · e2e2614 · e2e2614
2 parents a698486 + 6222829
commit e2e2614
Show file tree

Hide file tree

Showing 14 changed files with 186 additions and 34 deletions.
diff --git a/dbt/include/global_project/macros/schema_tests/relationships.sql b/dbt/include/global_project/macros/schema_tests/relationships.sql
@@ -1,19 +1,18 @@
 
-{% macro test_relationships(model, to, from) %}
+{% macro test_relationships(model, to, field) %}
+
+{% set column_name = kwargs.get('column_name', kwargs.get('from')) %}
 
-{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}
 
 select count(*)
 from (
-
-    select
-        {{ from }} as id
-
-    from {{ model }}
-    where {{ from }} is not null
-      and {{ from }} not in (select {{ column_name }}
-                             from {{ to }})
-
-) validation_errors
+    select {{ column_name }} as id from {{ model }}
+) as child
+left join (
+    select {{ field }} as id from {{ to }}
+) as parent on parent.id = child.id
+where child.id is not null
+  and parent.id is null
 
 {% endmacro %}
+
diff --git a/test/integration/008_schema_tests_test/models-v1/models/schema.yml b/test/integration/008_schema_tests_test/models-v1/models/schema.yml
@@ -30,17 +30,17 @@ table_copy:
 table_summary:
     constraints:
         not_null:
-            - favorite_color
+            - favorite_color_copy
             - count
 
         unique:
-            - favorite_color 
+            - favorite_color_copy
 
         accepted_values:
-            - { field: favorite_color, values: ['blue', 'green'] }
+            - { field: favorite_color_copy, values: ['blue', 'green'] }
 
         relationships:
-            - { from: favorite_color, to: ref('table_copy'), field: favorite_color }
+            - { from: favorite_color_copy, to: ref('table_copy'), field: favorite_color }
 
 
 # all of these constraints will fail
@@ -64,3 +64,10 @@ table_failure_summary:
 
         relationships:
             - { from: favorite_color, to: ref('table_copy'), field: favorite_color }
+
+
+# all of these constraints will fail
+table_failure_null_relation:
+    constraints:
+        relationships:
+            - { from: id, to: ref('table_failure_copy'), field: id }
diff --git a/test/integration/008_schema_tests_test/models-v1/models/table_failure_null_relation.sql b/test/integration/008_schema_tests_test/models-v1/models/table_failure_null_relation.sql
@@ -0,0 +1,10 @@
+{{
+    config(
+        materialized='table'
+    )
+}}
+
+-- force a foreign key constraint failure here
+select 105 as id, count(*) as count
+from {{ ref('table_failure_copy') }}
+group by 1
diff --git a/test/integration/008_schema_tests_test/models-v1/models/table_summary.sql b/test/integration/008_schema_tests_test/models-v1/models/table_summary.sql
@@ -4,6 +4,6 @@
     )
 }}
 
-select favorite_color, count(*) as count
+select favorite_color as favorite_color_copy, count(*) as count
 from {{ ref('table_copy') }}
 group by 1
diff --git a/test/integration/008_schema_tests_test/models-v2/bq-models/ephemeral_copy.sql b/test/integration/008_schema_tests_test/models-v2/bq-models/ephemeral_copy.sql
@@ -0,0 +1,8 @@
+
+{{
+    config(
+        materialized='ephemeral'
+    )
+}}
+
+select * from {{ this.schema }}.seed
diff --git a/test/integration/008_schema_tests_test/models-v2/bq-models/schema.yml b/test/integration/008_schema_tests_test/models-v2/bq-models/schema.yml
@@ -0,0 +1,30 @@
+version: 2
+
+models:
+  - name: ephemeral_copy
+    description: "An ephemeral copy of the table"
+    columns:
+      - name: id
+        description: "The ID"
+        tests:
+          - not_null
+          - unique
+      - name: favorite_color
+        tests:
+          - accepted_values: { values: ['blue', 'green'] }
+
+  # this whole model should pass and run
+  - name: table_summary
+    description: "The summary table"
+    columns:
+      - name: favorite_color_copy
+        description: "The favorite color"
+        tests:
+          - not_null
+          - unique
+          - accepted_values: { values: ['blue', 'green'] }
+          - relationships: { field: favorite_color, to: ref('ephemeral_copy') }
+      - name: count
+        description: "The number of responses for this favorite color"
+        tests:
+          - not_null
diff --git a/test/integration/008_schema_tests_test/models-v2/bq-models/table_summary.sql b/test/integration/008_schema_tests_test/models-v2/bq-models/table_summary.sql
@@ -0,0 +1,9 @@
+{{
+    config(
+        materialized='table'
+    )
+}}
+
+select favorite_color as favorite_color_copy, count(*) as count
+from {{ ref('ephemeral_copy') }}
+group by 1
diff --git a/test/integration/008_schema_tests_test/models-v2/malformed/schema.yml b/test/integration/008_schema_tests_test/models-v2/malformed/schema.yml
@@ -25,7 +25,7 @@ models:
           - not_null
           - unique
           - accepted_values: { values: ['blue', 'green'] }
-          - relationships: { from: favorite_color, to: ref('table_copy') }
+          - relationships: { field: favorite_color, to: ref('table_copy') }
       - name: count
         description: "The number of responses for this favorite color"
         tests:

diff --git a/test/integration/008_schema_tests_test/models-v2/models/schema.yml b/test/integration/008_schema_tests_test/models-v2/models/schema.yml
@@ -33,13 +33,13 @@ models:
     - name: table_summary
       description: "The summary table"
       columns:
-        - name: favorite_color
+        - name: favorite_color_copy
           description: "The favorite color"
           tests:
             - not_null
             - unique
             - accepted_values: { values: ['blue', 'green'] }
-            - relationships: { from: favorite_color, to: ref('table_copy') }
+            - relationships: { field: favorite_color, to: ref('table_copy') }
         - name: count
           description: "The number of responses for this favorite color"
           tests:
@@ -67,7 +67,7 @@ models:
           description: "The favorite color"
           tests:
             - accepted_values: { values: ['red'] }
-            - relationships: { from: favorite_color, to: ref('table_copy') }
+            - relationships: { field: favorite_color, to: ref('table_copy') }
 
 # this table is disabled so these tests should be ignored
     - name: table_disabled
@@ -77,4 +77,13 @@ models:
           description: "The favorite color"
           tests:
             - accepted_values: { values: ['red'] }
-            - relationships: { from: favorite_color, to: ref('table_copy') }
+            - relationships: { field: favorite_color, to: ref('table_copy') }
+
+# all of these constraints will fail
+    - name: table_failure_null_relation
+      description: "A table with a null value where it should be a foreign key"
+      columns:
+        - name: id
+          description: "The user ID"
+          tests:
+            - relationships: { field: id, to: ref('table_failure_copy') }
diff --git a/test/integration/008_schema_tests_test/models-v2/models/table_failure_null_relation.sql b/test/integration/008_schema_tests_test/models-v2/models/table_failure_null_relation.sql
@@ -0,0 +1,10 @@
+{{
+    config(
+        materialized='table'
+    )
+}}
+
+-- force a foreign key constraint failure here
+select 105 as id, count(*) as count
+from {{ ref('table_failure_copy') }}
+group by 1
diff --git a/test/integration/008_schema_tests_test/models-v2/models/table_summary.sql b/test/integration/008_schema_tests_test/models-v2/models/table_summary.sql
@@ -4,6 +4,6 @@
     )
 }}
 
-select favorite_color, count(*) as count
+select favorite_color as favorite_color_copy, count(*) as count
 from {{ ref('table_copy') }}
 group by 1
diff --git a/test/integration/008_schema_tests_test/models-v2/seed/seed.csv b/test/integration/008_schema_tests_test/models-v2/seed/seed.csv
@@ -0,0 +1,5 @@
+favorite_color,id,first_name,email,ip_address,updated_at
+blue,1,Larry,null,69.135.206.194,2008-09-12 19:08:31
+blue,2,Larry,null,64.210.133.162,1978-05-09 04:15:14
+green,99,Paul,pjohnson2q@umn.edu,183.59.198.197,1991-11-14 12:33:55
+green,100,Frank,fgreene2r@blogspot.com,150.143.68.121,2010-06-12 23:55:39
diff --git a/test/integration/008_schema_tests_test/test_schema_tests.py b/test/integration/008_schema_tests_test/test_schema_tests.py
@@ -30,25 +30,31 @@ def run_schema_validations(self):
     @attr(type='postgres')
     def test_schema_tests(self):
         results = self.run_dbt()
-        self.assertEqual(len(results), 4)
+        self.assertEqual(len(results), 5)
         test_results = self.run_schema_validations()
-        self.assertEqual(len(test_results), 17)
+        self.assertEqual(len(test_results), 18)
 
         for result in test_results:
             # assert that all deliberately failing tests actually fail
             if 'failure' in result.node.get('name'):
                 self.assertFalse(result.errored)
                 self.assertFalse(result.skipped)
-                self.assertTrue(result.status > 0)
+                self.assertTrue(
+                    result.status > 0,
+                    'test {} did not fail'.format(result.node.get('name'))
+                )
 
             # assert that actual tests pass
             else:
                 self.assertFalse(result.errored)
                 self.assertFalse(result.skipped)
                 # status = # of failing rows
-                self.assertEqual(result.status, 0)
+                self.assertEqual(
+                    result.status, 0,
+                    'test {} failed'.format(result.node.get('name'))
+                )
 
-        self.assertEqual(sum(x.status for x in test_results), 5)
+        self.assertEqual(sum(x.status for x in test_results), 6)
 
 
 class TestMalformedSchemaTests(DBTIntegrationTest):

diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py
@@ -1,5 +1,6 @@
 from nose.plugins.attrib import attr
-from test.integration.base import DBTIntegrationTest, FakeArgs
+from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile
+import os
 
 from dbt.task.test import TestTask
 from dbt.project import read_project
@@ -30,26 +31,32 @@ def run_schema_validations(self):
     @attr(type='postgres')
     def test_schema_tests(self):
         results = self.run_dbt()
-        self.assertEqual(len(results), 4)
+        self.assertEqual(len(results), 5)
         test_results = self.run_schema_validations()
         # If the disabled model's tests ran, there would be 19 of these.
-        self.assertEqual(len(test_results), 17)
+        self.assertEqual(len(test_results), 18)
 
         for result in test_results:
             # assert that all deliberately failing tests actually fail
             if 'failure' in result.node.get('name'):
                 self.assertFalse(result.errored)
                 self.assertFalse(result.skipped)
-                self.assertTrue(result.status > 0)
+                self.assertTrue(
+                    result.status > 0,
+                    'test {} did not fail'.format(result.node.get('name'))
+                )
 
             # assert that actual tests pass
             else:
                 self.assertFalse(result.errored)
                 self.assertFalse(result.skipped)
                 # status = # of failing rows
-                self.assertEqual(result.status, 0)
+                self.assertEqual(
+                    result.status, 0,
+                    'test {} failed'.format(result.node.get('name'))
+                )
 
-        self.assertEqual(sum(x.status for x in test_results), 5)
+        self.assertEqual(sum(x.status for x in test_results), 6)
 
 class TestMalformedSchemaTests(DBTIntegrationTest):
 
@@ -139,3 +146,55 @@ def test_schema_tests(self):
             if result.errored:
                 self.assertTrue(result.node['name'] in expected_failures)
         self.assertEqual(sum(x.status for x in test_results), 52)
+
+class TestSchemaTests(DBTIntegrationTest):
+    @property
+    def schema(self):
+        return "schema_tests_008"
+
+    @property
+    def models(self):
+        return "test/integration/008_schema_tests_test/models-v2/bq-models"
+
+    @staticmethod
+    def dir(path):
+        return os.path.normpath(
+            os.path.join('test/integration/008_schema_tests_test/models-v2', path))
+
+    def run_schema_validations(self):
+        project = read_project('dbt_project.yml')
+        args = FakeArgs()
+
+        test_task = TestTask(args, project)
+        return test_task.run()
+
+    @use_profile('bigquery')
+    def test_schema_tests(self):
+        self.use_default_project({'data-paths': [self.dir('seed')]})
+        self.assertEqual(len(self.run_dbt(['seed'])), 1)
+        results = self.run_dbt()
+        self.assertEqual(len(results), 1)
+        test_results = self.run_schema_validations()
+        self.assertEqual(len(test_results), 8)
+
+        for result in test_results:
+            # assert that all deliberately failing tests actually fail
+            if 'failure' in result.node.get('name'):
+                self.assertFalse(result.errored)
+                self.assertFalse(result.skipped)
+                self.assertTrue(
+                    result.status > 0,
+                    'test {} did not fail'.format(result.node.get('name'))
+                )
+
+            # assert that actual tests pass
+            else:
+                self.assertFalse(result.errored)
+                self.assertFalse(result.skipped)
+                # status = # of failing rows
+                self.assertEqual(
+                    result.status, 0,
+                    'test {} failed'.format(result.node.get('name'))
+                )
+
+        self.assertEqual(sum(x.status for x in test_results), 0)