Skip to content

Commit

Permalink
Merge pull request #880 from fishtown-analytics/new-schema-yaml-syntax
Browse files Browse the repository at this point in the history
Support new schema.yml syntax (#790)
  • Loading branch information
beckjake authored Jul 31, 2018
2 parents c7c3d09 + 2e1aaac commit 3b3a486
Show file tree
Hide file tree
Showing 42 changed files with 1,368 additions and 308 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## dbt 0.11.0 (unreleased, codename: Isaac Asimov)

### Features

- Extend catalog and manifest to also support Snowflake, BigQuery, and Redshift
- Add a 'generated_at' field to both the manifest and the catalog.
- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest

## dbt 0.10.2 (unreleased, codename: Betsy Ross)

### Overview
Expand Down
6 changes: 6 additions & 0 deletions dbt/api/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def __init__(self, **kwargs):
self._contents = deep_merge(self.DEFAULTS, kwargs)
self.validate()

def __str__(self):
return '{}(**{})'.format(self.__class__.__name__, self._contents)

def __repr__(self):
return '{}(**{})'.format(self.__class__.__name__, self._contents)

def incorporate(self, **kwargs):
"""
Given a list of kwargs, incorporate these arguments
Expand Down
116 changes: 116 additions & 0 deletions dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dbt.api import APIObject
from dbt.utils import deep_merge, timestring
from dbt.node_types import NodeType
from dbt.exceptions import raise_duplicate_resource_name, \
raise_patch_targets_not_found

import dbt.clients.jinja

Expand Down Expand Up @@ -66,6 +68,25 @@
}


# Note that description must be present, but may be empty.
COLUMN_INFO_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'Information about a single column in a model',
'properties': {
'name': {
'type': 'string',
'description': 'The column name',
},
'description': {
'type': 'string',
'description': 'A description of the column',
},
},
'required': ['name', 'description'],
}


PARSED_NODE_CONTRACT = deep_merge(
UNPARSED_NODE_CONTRACT,
{
Expand Down Expand Up @@ -151,6 +172,20 @@
'type': 'string',
}
},
'description': {
'type': 'string',
'description': 'A user-supplied description of the model',
},
'columns': {
'type': 'array',
'items': COLUMN_INFO_CONTRACT,
},
'patch_path': {
'type': 'string',
'description': (
'The path to the patch source if the node was patched'
),
},
},
'required': UNPARSED_NODE_CONTRACT['required'] + [
'unique_id', 'fqn', 'schema', 'refs', 'depends_on', 'empty',
Expand All @@ -160,6 +195,42 @@
)


# The parsed node update is only the 'patch', not the test. The test became a
# regular parsed node. Note that description and columns must be present, but
# may be empty.
PARSED_NODE_PATCH_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'A collection of values that can be set on a node',
'properties': {
'name': {
'type': 'string',
'description': 'The name of the node this modifies',
},
'description': {
'type': 'string',
'description': 'The description of the node to add',
},
'original_file_path': {
'type': 'string',
'description': (
'Relative path to the originating file path for the patch '
'from the project root'
),
},
'columns': {
'type': 'array',
'items': COLUMN_INFO_CONTRACT,
}
},
'required': ['name', 'original_file_path', 'description', 'columns'],
}


class ParsedNodePatch(APIObject):
SCHEMA = PARSED_NODE_PATCH_CONTRACT


PARSED_NODES_CONTRACT = {
'type': 'object',
'additionalProperties': False,
Expand Down Expand Up @@ -308,6 +379,18 @@ def to_dict(self):
ret['agate_table'] = self.agate_table
return ret

def patch(self, patch):
"""Given a ParsedNodePatch, add the new information to the node."""
# explicitly pick out the parts to update so we don't inadvertently
# step on the model name or anything
self._contents.update({
'patch_path': patch.original_file_path,
'description': patch.description,
'columns': patch.columns,
})
# patches always trigger re-validation
self.validate()


class ParsedMacro(APIObject):
SCHEMA = PARSED_MACRO_CONTRACT
Expand Down Expand Up @@ -405,6 +488,39 @@ def find_operation_by_name(self, name, package):
return self._find_by_name(name, package, 'macros',
[NodeType.Operation])

def add_nodes(self, new_nodes):
"""Add the given dict of new nodes to the manifest."""
for unique_id, node in new_nodes.items():
if unique_id in self.nodes:
raise_duplicate_resource_name(node, self.nodes[unique_id])
self.nodes[unique_id] = node

def patch_nodes(self, patches):
"""Patch nodes with the given dict of patches. Note that this consumes
the input!
"""
# because we don't have any mapping from node _names_ to nodes, and we
# only have the node name in the patch, we have to iterate over all the
# nodes looking for matching names. We could use _find_by_name if we
# were ok with doing an O(n*m) search (one nodes scan per patch)
for node in self.nodes.values():
if node.resource_type != NodeType.Model:
continue
patch = patches.pop(node.name, None)
if not patch:
continue
node.patch(patch)

# log debug-level warning about nodes we couldn't find
if patches:
for patch in patches.values():
# since patches aren't nodes, we can't use the existing
# target_not_found warning
logger.debug((
'WARNING: Found documentation for model "{}" which was '
'not found or is disabled').format(patch.name)
)

def to_flat_graph(self):
"""Convert the parsed manifest to the 'flat graph' that the compiler
expects.
Expand Down
76 changes: 76 additions & 0 deletions dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,79 @@ class UnparsedMacro(APIObject):

class UnparsedNode(APIObject):
SCHEMA = UNPARSED_NODE_CONTRACT


COLUMN_TEST_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'name': {
'type': 'string',
'description': 'The name of the column this test is for',
},
'description': {
'type': 'string',
'description': 'The description of this test',
},
'tests': {
'type': 'array',
'items': {
"anyOf": [
# 'not_null', 'unique', ...
{'type': 'string'},
# 'relationships: {...}', 'accepted_values: {...}'
{'type': 'object', 'additionalProperties': True}
],
},
'description': 'The list of tests to perform',
},
},
'required': ['name'],
}


UNPARSED_NODE_UPDATE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the unparsed node updates, as provided in the '
'"models" section of schema.yml'
),
'properties': {
'name': {
'type': 'string',
'description': (
'The name of this node, which is the name of the model it'
'refers to'
),
'minLength': 1,
},
'description': {
'type': 'string',
'description': (
'The raw string description of the node after parsing the yaml'
),
},
'columns': {
'type': 'array',
'items': COLUMN_TEST_CONTRACT,
},
'tests': {
'type': 'array',
'items': {
"anyOf": [
{'type': 'string'},
{'type': 'object', 'additionalProperties': True}
],
},
},
},
'required': ['name'],
}


class UnparsedNodeUpdate(APIObject):
"""An unparsed node update is the blueprint for tests to be added and nodes
to be updated, referencing a certain node (specifically, a Model).
"""
SCHEMA = UNPARSED_NODE_UPDATE_CONTRACT
24 changes: 24 additions & 0 deletions dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,3 +357,27 @@ def raise_ambiguous_alias(node_1, node_2):
duped_name,
node_1['unique_id'], node_1['original_file_path'],
node_2['unique_id'], node_2['original_file_path']))


def raise_patch_targets_not_found(patches):
patch_list = '\n\t'.join(
'model {} (referenced in path {})'.format(p.name, p.original_file_path)
for p in patches.values()
)
raise_compiler_error(
'dbt could not find models for the following patches:\n\t{}'.format(
patch_list
)
)


def raise_duplicate_patch_name(name, patch_1, patch_2):
raise_compiler_error(
'dbt found two schema.yml entries for the same model named {}. The '
'first patch was specified in {} and the second in {}. Models and '
'their associated columns may only be described a single time.'.format(
name,
patch_1,
patch_2,
)
)
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@

{% macro test_accepted_values(model, field, values) %}
{% macro test_accepted_values(model, values) %}

{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}

with all_values as (

select distinct
{{ field }} as value_field
{{ column_name }} as value_field

from {{ model }}

Expand Down
6 changes: 4 additions & 2 deletions dbt/include/global_project/macros/schema_tests/not_null.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@

{% macro test_not_null(model, arg) %}
{% macro test_not_null(model) %}

{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}

select count(*)
from {{ model }}
where {{ arg }} is null
where {{ column_name }} is null

{% endmacro %}

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

{% macro test_relationships(model, field, to, from) %}
{% macro test_relationships(model, to, from) %}

{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}

select count(*)
from (
Expand All @@ -10,7 +11,7 @@ from (

from {{ model }}
where {{ from }} is not null
and {{ from }} not in (select {{ field }}
and {{ from }} not in (select {{ column_name }}
from {{ to }})

) validation_errors
Expand Down
10 changes: 6 additions & 4 deletions dbt/include/global_project/macros/schema_tests/unique.sql
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@

{% macro test_unique(model, arg) %}
{% macro test_unique(model) %}

{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}

select count(*)
from (

select
{{ arg }}
{{ column_name }}

from {{ model }}
where {{ arg }} is not null
group by {{ arg }}
where {{ column_name }} is not null
group by {{ column_name }}
having count(*) > 1

) validation_errors
Expand Down
Loading

0 comments on commit 3b3a486

Please sign in to comment.