Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make seeds ref'able, provide for seed configuration #668

Merged
merged 3 commits into from
Feb 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbt/adapters/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def warning_on_hooks(cls, hook_type):

@classmethod
def add_query(cls, profile, sql, model_name=None, auto_begin=True,
bindings=None):
bindings=None, abridge_sql_log=False):
if model_name in ['on-run-start', 'on-run-end']:
cls.warning_on_hooks(model_name)
else:
Expand Down
7 changes: 5 additions & 2 deletions dbt/adapters/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def close(cls, connection):

@classmethod
def add_query(cls, profile, sql, model_name=None, auto_begin=True,
bindings=None):
bindings=None, abridge_sql_log=False):
connection = cls.get_connection(profile, model_name)
connection_name = connection.get('name')

Expand All @@ -535,7 +535,10 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True,
.format(cls.type(), connection_name))

with cls.exception_handler(profile, sql, model_name, connection_name):
logger.debug('On %s: %s', connection_name, sql)
if abridge_sql_log:
logger.debug('On %s: %s....', connection_name, sql[0:512])
else:
logger.debug('On %s: %s', connection_name, sql)
pre = time.time()

cursor = connection.get('handle').cursor()
Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,4 +235,4 @@ def load_csv_rows(cls, profile, schema, table_name, agate_table):
cols_sql,
",\n".join(placeholders)))

cls.add_query(profile, sql, bindings=bindings)
cls.add_query(profile, sql, bindings=bindings, abridge_sql_log=True)
4 changes: 2 additions & 2 deletions dbt/adapters/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def check_schema_exists(cls, profile, schema, model_name=None):

@classmethod
def add_query(cls, profile, sql, model_name=None, auto_begin=True,
select_schema=True, bindings=None):
select_schema=True, bindings=None, abridge_sql_log=False):
# snowflake only allows one query per api call.
queries = sql.strip().split(";")
cursor = None
Expand Down Expand Up @@ -211,7 +211,7 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True,

connection, cursor = super(PostgresAdapter, cls).add_query(
profile, individual_query, model_name, auto_begin,
bindings=bindings)
bindings=bindings, abridge_sql_log=abridge_sql_log)

return connection, cursor

Expand Down
3 changes: 2 additions & 1 deletion dbt/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ def load_project(cls, root_project, all_projects, project, project_name,
all_projects=all_projects,
root_dir=project.get('project-root'),
relative_dirs=project.get('data-paths', []),
resource_type=NodeType.Seed)
resource_type=NodeType.Seed,
macros=macros)


# node loaders
Expand Down
9 changes: 7 additions & 2 deletions dbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dbt.compat import basestring

from dbt.utils import split_path, deep_merge, DBTConfigKeys
from dbt.node_types import NodeType


class SourceConfig(object):
Expand All @@ -24,11 +25,12 @@ class SourceConfig(object):
'bind'
]

def __init__(self, active_project, own_project, fqn):
def __init__(self, active_project, own_project, fqn, node_type):
self._config = None
self.active_project = active_project
self.own_project = own_project
self.fqn = fqn
self.node_type = node_type

# the config options defined within the model
self.in_model_config = {}
Expand Down Expand Up @@ -133,7 +135,10 @@ def get_project_config(self, project):
for k in SourceConfig.ExtendDictFields:
config[k] = {}

model_configs = project.get('models')
if self.node_type == NodeType.Seed:
model_configs = project.get('seeds')
else:
model_configs = project.get('models')

if model_configs is None:
return config
Expand Down
8 changes: 4 additions & 4 deletions dbt/node_runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,16 @@ def raise_on_first_error(self):
return False

@classmethod
def is_model(cls, node):
return node.get('resource_type') == NodeType.Model
def is_refable(cls, node):
return node.get('resource_type') in [NodeType.Model, NodeType.Seed]

@classmethod
def is_ephemeral(cls, node):
return dbt.utils.get_materialization(node) == 'ephemeral'

@classmethod
def is_ephemeral_model(cls, node):
return cls.is_model(node) and cls.is_ephemeral(node)
return cls.is_refable(node) and cls.is_ephemeral(node)

def safe_run(self, flat_graph, existing):
catchable_errors = (dbt.exceptions.CompilationException,
Expand Down Expand Up @@ -175,7 +175,7 @@ def do_skip(self):
def get_model_schemas(cls, flat_graph):
schemas = set()
for node in flat_graph['nodes'].values():
if cls.is_model(node) and not cls.is_ephemeral(node):
if cls.is_refable(node) and not cls.is_ephemeral(node):
schemas.add(node['schema'])

return schemas
Expand Down
13 changes: 8 additions & 5 deletions dbt/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def resolve_ref(flat_graph, target_model_name, target_model_package,
current_project, node_package):

if target_model_package is not None:
return dbt.utils.find_model_by_name(
return dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
target_model_package)

target_model = None

# first pass: look for models in the current_project
target_model = dbt.utils.find_model_by_name(
target_model = dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
current_project)
Expand All @@ -56,7 +56,7 @@ def resolve_ref(flat_graph, target_model_name, target_model_package,
return target_model

# second pass: look for models in the node's package
target_model = dbt.utils.find_model_by_name(
target_model = dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
node_package)
Expand All @@ -67,7 +67,7 @@ def resolve_ref(flat_graph, target_model_name, target_model_package,
# final pass: look for models in any package
# todo: exclude the packages we have already searched. overriding
# a package model in another package doesn't necessarily work atm
return dbt.utils.find_model_by_name(
return dbt.utils.find_refable_by_name(
flat_graph,
target_model_name,
None)
Expand Down Expand Up @@ -201,7 +201,10 @@ def parse_node(node, node_path, root_project_config, package_project_config,
fqn = get_fqn(node.get('path'), package_project_config, fqn_extra)

config = dbt.model.SourceConfig(
root_project_config, package_project_config, fqn)
root_project_config,
package_project_config,
fqn,
node['resource_type'])

node['unique_id'] = node_path
node['empty'] = ('raw_sql' in node and len(node['raw_sql'].strip()) == 0)
Expand Down
8 changes: 4 additions & 4 deletions dbt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ def model_immediate_name(model, non_destructive):
return "{}__dbt_tmp".format(model_name)


def find_model_by_name(flat_graph, target_name, target_package):
def find_refable_by_name(flat_graph, target_name, target_package):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

return find_by_name(flat_graph, target_name, target_package,
'nodes', NodeType.Model)
'nodes', [NodeType.Model, NodeType.Seed])


def find_macro_by_name(flat_graph, target_name, target_package):
return find_by_name(flat_graph, target_name, target_package,
'macros', NodeType.Macro)
'macros', [NodeType.Macro])


def find_by_name(flat_graph, target_name, target_package, subgraph,
Expand All @@ -146,7 +146,7 @@ def find_by_name(flat_graph, target_name, target_package, subgraph,

resource_type, package_name, node_name = node_parts

if (resource_type == nodetype and
if (resource_type in nodetype and
((target_name == node_name) and
(target_package is None or
target_package == package_name))):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
id,first_name,email,ip_address,birthday
1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31
2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14
3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57
4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43
5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29
6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50
7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59
8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15
9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48
10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49
11,Ralph,rfieldsa@home.pl,55.152.163.149,1972-11-18 19:06:11
12,Louise,lnicholsb@samsung.com,141.116.153.154,2014-11-25 20:56:14
13,Clarence,cduncanc@sfgate.com,81.171.31.133,2011-11-17 07:02:36
14,Daniel,dfranklind@omniture.com,8.204.211.37,1980-09-13 00:09:04
15,Katherine,klanee@auda.org.au,176.96.134.59,1997-08-22 19:36:56
16,Billy,bwardf@wikia.com,214.108.78.85,2003-10-19 02:14:47
17,Annie,agarzag@ocn.ne.jp,190.108.42.70,1988-10-28 15:12:35
18,Shirley,scolemanh@fastcompany.com,109.251.164.84,1988-08-24 10:50:57
19,Roger,rfrazieri@scribd.com,38.145.218.108,1985-12-31 15:17:15
20,Lillian,lstanleyj@goodreads.com,47.57.236.17,1970-06-08 02:09:05
21 changes: 21 additions & 0 deletions test/integration/005_simple_seed_test/data-config/seed_enabled.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
id,first_name,email,ip_address,birthday
1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31
2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14
3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57
4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43
5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29
6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50
7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59
8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15
9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48
10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49
11,Ralph,rfieldsa@home.pl,55.152.163.149,1972-11-18 19:06:11
12,Louise,lnicholsb@samsung.com,141.116.153.154,2014-11-25 20:56:14
13,Clarence,cduncanc@sfgate.com,81.171.31.133,2011-11-17 07:02:36
14,Daniel,dfranklind@omniture.com,8.204.211.37,1980-09-13 00:09:04
15,Katherine,klanee@auda.org.au,176.96.134.59,1997-08-22 19:36:56
16,Billy,bwardf@wikia.com,214.108.78.85,2003-10-19 02:14:47
17,Annie,agarzag@ocn.ne.jp,190.108.42.70,1988-10-28 15:12:35
18,Shirley,scolemanh@fastcompany.com,109.251.164.84,1988-08-24 10:50:57
19,Roger,rfrazieri@scribd.com,38.145.218.108,1985-12-31 15:17:15
20,Lillian,lstanleyj@goodreads.com,47.57.236.17,1970-06-08 02:09:05
83 changes: 82 additions & 1 deletion test/integration/005_simple_seed_test/test_simple_seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_simple_seed(self):
self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected")

# this should truncate the seed_actual table, the re-insert
# this should truncate the seed_actual table, then re-insert
self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected")

Expand All @@ -40,3 +40,84 @@ def test_simple_seed_with_drop(self):
# this should drop the seed table, then re-create
self.run_dbt(["seed", "--drop-existing"])
self.assertTablesEqual("seed_actual","seed_expected")


class TestSimpleSeedCustomSchema(DBTIntegrationTest):

def setUp(self):
DBTIntegrationTest.setUp(self)
self.run_sql_file("test/integration/005_simple_seed_test/seed.sql")

@property
def schema(self):
return "simple_seed_005"

@property
def models(self):
return "test/integration/005_simple_seed_test/models"

@property
def project_config(self):
return {
"data-paths": ['test/integration/005_simple_seed_test/data'],
"seeds": {
"schema": "custom_schema"
}
}

@attr(type='postgres')
def test_simple_seed_with_schema(self):
schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema')

self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)

# this should truncate the seed_actual table, then re-insert
self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)


@attr(type='postgres')
def test_simple_seed_with_drop_and_schema(self):
schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema')

self.run_dbt(["seed"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)

# this should drop the seed table, then re-create
self.run_dbt(["seed", "--full-refresh"])
self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name)


class TestSimpleSeedDisabled(DBTIntegrationTest):

@property
def schema(self):
return "simple_seed_005"

@property
def models(self):
return "test/integration/005_simple_seed_test/models"

@property
def project_config(self):
return {
"data-paths": ['test/integration/005_simple_seed_test/data-config'],
"seeds": {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cmcarthur check it out :)

"test": {
"seed_enabled": {
"enabled": True
},
"seed_disabled": {
"enabled": False
}
}
}
}

@attr(type='postgres')
def test_simple_seed_with_disabled(self):
self.run_dbt(["seed"])
self.assertTableDoesExist('seed_enabled')
self.assertTableDoesNotExist('seed_disabled')