From c4d6b2ed0f9a2f8e7d48795533e7d8c3c2ad31c2 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 23 Nov 2018 07:44:20 -0700 Subject: [PATCH 1/3] Delete dead code, move SourceConfig into the parser, allow multiple calls --- dbt/parser/base.py | 4 +- dbt/{model.py => parser/source_config.py} | 108 +++++++--------------- dbt/runner.py | 1 - dbt/source.py | 39 -------- test/unit/test_graph.py | 1 - 5 files changed, 33 insertions(+), 120 deletions(-) rename dbt/{model.py => parser/source_config.py} (64%) delete mode 100644 dbt/source.py diff --git a/dbt/parser/base.py b/dbt/parser/base.py index 0d0da33eeba..f4f169bfa96 100644 --- a/dbt/parser/base.py +++ b/dbt/parser/base.py @@ -2,7 +2,6 @@ import dbt.exceptions import dbt.flags -import dbt.model import dbt.utils import dbt.hooks import dbt.clients.jinja @@ -11,6 +10,7 @@ from dbt.utils import coalesce from dbt.logger import GLOBAL_LOGGER as logger from dbt.contracts.graph.parsed import ParsedNode +from dbt.parser.source_config import SourceConfig class BaseParser(object): @@ -71,7 +71,7 @@ def parse_node(cls, node, node_path, root_project_config, fqn = cls.get_fqn(node.get('path'), package_project_config, fqn_extra) - config = dbt.model.SourceConfig( + config = SourceConfig( root_project_config, package_project_config, fqn, diff --git a/dbt/model.py b/dbt/parser/source_config.py similarity index 64% rename from dbt/model.py rename to dbt/parser/source_config.py index 27c0546de72..703e446b8fd 100644 --- a/dbt/model.py +++ b/dbt/parser/source_config.py @@ -1,19 +1,15 @@ -import os.path - import dbt.exceptions -from dbt.compat import basestring - -from dbt.utils import split_path, deep_merge, DBTConfigKeys +from dbt.utils import deep_merge, DBTConfigKeys from dbt.node_types import NodeType class SourceConfig(object): ConfigKeys = DBTConfigKeys - AppendListFields = ['pre-hook', 'post-hook', 'tags'] - ExtendDictFields = ['vars', 'column_types', 'quoting'] - ClobberFields = [ + AppendListFields = {'pre-hook', 'post-hook', 'tags'} + ExtendDictFields = {'vars', 'column_types', 'quoting'} + ClobberFields = { 'alias', 'schema', 'enabled', @@ -23,8 +19,8 @@ class SourceConfig(object): 'sql_where', 'unique_key', 'sort_type', - 'bind' - ] + 'bind', + } def __init__(self, active_project, own_project, fqn, node_type): self._config = None @@ -37,7 +33,7 @@ def __init__(self, active_project, own_project, fqn, node_type): self.in_model_config = {} # make sure we categorize all configs - all_configs = self.AppendListFields + self.ExtendDictFields + \ + all_configs = self.AppendListFields | self.ExtendDictFields | \ self.ClobberFields for config in self.ConfigKeys: @@ -90,15 +86,23 @@ def config(self): return cfg def update_in_model_config(self, config): - config = config.copy() - - # make sure we're not clobbering an array of hooks with a single hook - # string - for field in self.AppendListFields: - if field in config: - config[field] = self.__get_as_list(config, field) - - self.in_model_config.update(config) + for key, value in config.items(): + if key in self.AppendListFields: + current = self.in_model_config.get(key, []) + if not isinstance(value, (list, tuple)): + value = [value] + current.extend(value) + self.in_model_config[key] = current + elif key in self.ExtendDictFields: + current = self.in_model_config.get(key, {}) + if not isinstance(current, dict): + dbt.exceptions.raise_compiler_error( + 'Invalid config field: "{}" must be a dict'.format(key) + ) + current.update(value) + self.in_model_config[key] = current + else: # key in self.ClobberFields + self.in_model_config[key] = value def __get_as_list(self, relevant_configs, key): if key not in relevant_configs: @@ -116,17 +120,17 @@ def smart_update(self, mutable_config, new_configs): in new_configs if key in self.ConfigKeys } - for key in SourceConfig.AppendListFields: + for key in self.AppendListFields: append_fields = self.__get_as_list(relevant_configs, key) mutable_config[key].extend([ f for f in append_fields if f not in mutable_config[key] ]) - for key in SourceConfig.ExtendDictFields: + for key in self.ExtendDictFields: dict_val = relevant_configs.get(key, {}) mutable_config[key].update(dict_val) - for key in SourceConfig.ClobberFields: + for key in self.ClobberFields: if key in relevant_configs: mutable_config[key] = relevant_configs[key] @@ -136,9 +140,9 @@ def get_project_config(self, runtime_config): # most configs are overwritten by a more specific config, but pre/post # hooks are appended! config = {} - for k in SourceConfig.AppendListFields: + for k in self.AppendListFields: config[k] = [] - for k in SourceConfig.ExtendDictFields: + for k in self.ExtendDictFields: config[k] = {} if self.node_type == NodeType.Seed: @@ -163,8 +167,8 @@ def get_project_config(self, runtime_config): clobber_configs = { k: v for (k, v) in relevant_configs.items() - if k not in SourceConfig.AppendListFields and - k not in SourceConfig.ExtendDictFields + if k not in self.AppendListFields and + k not in self.ExtendDictFields } config.update(clobber_configs) @@ -177,53 +181,3 @@ def load_config_from_own_project(self): def load_config_from_active_project(self): return self.get_project_config(self.active_project) - - -class DBTSource(object): - def __init__(self, project, top_dir, rel_filepath, own_project): - self._config = None - self.project = project - self.own_project = own_project - - self.top_dir = top_dir - self.rel_filepath = rel_filepath - self.filepath = os.path.join(top_dir, rel_filepath) - self.name = self.fqn[-1] - - self.source_config = SourceConfig(project, own_project, self.fqn) - - def compile(self): - raise RuntimeError("Not implemented!") - - @property - def config(self): - if self._config is not None: - return self._config - - return self.source_config.config - - @property - def fqn(self): - """ - fully-qualified name for model. Includes all subdirs below 'models' - path and the filename - """ - parts = split_path(self.filepath) - name, _ = os.path.splitext(parts[-1]) - return [self.own_project['name']] + parts[1:-1] + [name] - - @property - def nice_name(self): - return "{}.{}".format(self.fqn[0], self.fqn[-1]) - - -class Csv(DBTSource): - def __init__(self, project, target_dir, rel_filepath, own_project): - super(Csv, self).__init__( - project, target_dir, rel_filepath, own_project - ) - - def __repr__(self): - return "".format( - self.project['name'], self.model_name, self.filepath - ) diff --git a/dbt/runner.py b/dbt/runner.py index d9432f6414c..8231406e5de 100644 --- a/dbt/runner.py +++ b/dbt/runner.py @@ -12,7 +12,6 @@ import dbt.exceptions import dbt.linker import dbt.tracking -import dbt.model import dbt.ui.printer import dbt.utils from dbt.clients.system import write_json diff --git a/dbt/source.py b/dbt/source.py deleted file mode 100644 index d286cb63076..00000000000 --- a/dbt/source.py +++ /dev/null @@ -1,39 +0,0 @@ -from dbt.model import Csv - -import dbt.clients.system - - -class Source(object): - def __init__(self, project, own_project=None): - self.project = project - self.project_root = project['project-root'] - self.project_name = project['name'] - - self.own_project = (own_project if own_project is not None - else self.project) - self.own_project_root = self.own_project['project-root'] - self.own_project_name = self.own_project['name'] - - def build_models_from_file_matches( - self, - to_build, - file_matches, - extra_args=[]): - - build_args = [[self.project, - file_match.get('searched_path'), - file_match.get('relative_path'), - self.own_project] + extra_args - for file_match in file_matches] - - return [to_build(*args) for args in build_args] - - def get_csvs(self, csv_dirs): - file_matches = dbt.clients.system.find_matching( - self.own_project_root, - csv_dirs, - "[!.#~]*.csv") - - return self.build_models_from_file_matches( - Csv, - file_matches) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index e663d199af8..2b8ffaef119 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -8,7 +8,6 @@ import dbt.exceptions import dbt.flags import dbt.linker -import dbt.model import dbt.config import dbt.templates import dbt.utils From 1fd84ad9d5ee35a9155851d28e46d5c40e5d079a Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 23 Nov 2018 08:09:57 -0700 Subject: [PATCH 2/3] add unit tests --- test/unit/test_parser.py | 76 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 6 deletions(-) diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 431a456c9ab..226aa9a5533 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -7,6 +7,7 @@ import dbt.flags import dbt.parser from dbt.parser import ModelParser, MacroParser, DataTestParser, SchemaParser, ParserUtils +from dbt.parser.source_config import SourceConfig from dbt.utils import timestring from dbt.config import RuntimeConfig @@ -21,12 +22,7 @@ def get_os_path(unix_path): return os.path.normpath(unix_path) -class ParserTest(unittest.TestCase): - - def find_input_by_name(self, models, name): - return next( - (model for model in models if model.get('name') == name), - {}) +class BaseParserTest(unittest.TestCase): def setUp(self): dbt.flags.STRICT_MODE = True @@ -73,6 +69,73 @@ def setUp(self): project=snowplow_project, profile=profile_data ) + + +class SourceConfigTest(BaseParserTest): + def test__source_config_single_call(self): + cfg = SourceConfig(self.root_project_config, self.root_project_config, + ['root', 'x'], NodeType.Model) + cfg.update_in_model_config({ + 'materialized': 'something', + 'sort': 'my sort key', + 'pre-hook': 'my pre run hook', + 'vars': {'a': 1, 'b': 2}, + }) + expect = { + 'column_types': {}, + 'enabled': True, + 'materialized': 'something', + 'post-hook': [], + 'pre-hook': ['my pre run hook'], + 'quoting': {}, + 'sort': 'my sort key', + 'tags': [], + 'vars': {'a': 1, 'b': 2}, + } + self.assertEqual(cfg.config, expect) + + def test__source_config_multiple_calls(self): + cfg = SourceConfig(self.root_project_config, self.root_project_config, + ['root', 'x'], NodeType.Model) + cfg.update_in_model_config({ + 'materialized': 'something', + 'sort': 'my sort key', + 'pre-hook': 'my pre run hook', + 'vars': {'a': 1, 'b': 2}, + }) + cfg.update_in_model_config({ + 'materialized': 'something else', + 'pre-hook': ['my other pre run hook', 'another pre run hook'], + 'vars': {'a': 4, 'c': 3}, + }) + expect = { + 'column_types': {}, + 'enabled': True, + 'materialized': 'something else', + 'post-hook': [], + 'pre-hook': [ + 'my pre run hook', + 'my other pre run hook', + 'another pre run hook', + ], + 'quoting': {}, + 'sort': 'my sort key', + 'tags': [], + 'vars': {'a': 4, 'b': 2, 'c': 3}, + } + self.assertEqual(cfg.config, expect) + + +class ParserTest(BaseParserTest): + + def find_input_by_name(self, models, name): + return next( + (model for model in models if model.get('name') == name), + {}) + + def setUp(self): + super(ParserTest, self).setUp() + self.macro_manifest = Manifest(macros={}, nodes={}, docs={}, generated_at=timestring(), disabled=[]) @@ -98,6 +161,7 @@ def setUp(self): 'tags': [], } + def test__single_model(self): models = [{ 'name': 'model_one', From 51252b06b9841e9b5cee4d2f0347073838f7bd23 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 23 Nov 2018 09:05:03 -0700 Subject: [PATCH 3/3] Add a test for overwrite/append --- .../integration/039_config_test/data/seed.csv | 2 + .../039_config_test/models/model.sql | 15 +++++++ .../039_config_test/test_configs.py | 41 +++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 test/integration/039_config_test/data/seed.csv create mode 100644 test/integration/039_config_test/models/model.sql create mode 100644 test/integration/039_config_test/test_configs.py diff --git a/test/integration/039_config_test/data/seed.csv b/test/integration/039_config_test/data/seed.csv new file mode 100644 index 00000000000..83f9676727c --- /dev/null +++ b/test/integration/039_config_test/data/seed.csv @@ -0,0 +1,2 @@ +id,value +4,2 diff --git a/test/integration/039_config_test/models/model.sql b/test/integration/039_config_test/models/model.sql new file mode 100644 index 00000000000..43aabe2102c --- /dev/null +++ b/test/integration/039_config_test/models/model.sql @@ -0,0 +1,15 @@ +{{ + config( + materialized='view', + tags=['tag_two'], + ) +}} + +{{ + config( + materialized='table', + tags=['tag_three'], + ) +}} + +select 4 as id, 2 as value diff --git a/test/integration/039_config_test/test_configs.py b/test/integration/039_config_test/test_configs.py new file mode 100644 index 00000000000..d1cac71c85c --- /dev/null +++ b/test/integration/039_config_test/test_configs.py @@ -0,0 +1,41 @@ + +from test.integration.base import DBTIntegrationTest, use_profile + + +class TestConfigs(DBTIntegrationTest): + @property + def schema(self): + return "config_039" + + def unique_schema(self): + return super(TestConfigs, self).unique_schema().upper() + + @property + def project_config(self): + return { + 'data-paths': ['test/integration/039_config_test/data'], + 'models': { + 'test': { + # the model configs will override this + 'materialized': 'invalid', + # the model configs will append to these + 'tags': ['tag_one'], + }, + }, + } + + @property + def models(self): + return "test/integration/039_config_test/models" + + @use_profile('postgres') + def test_postgres_config_layering(self): + self.assertEqual(len(self.run_dbt(['seed'])), 1) + # test the project-level tag, and both config() call tags + self.assertEqual(len(self.run_dbt(['run', '--model', 'tag:tag_one'])), 1) + self.assertEqual(len(self.run_dbt(['run', '--model', 'tag:tag_two'])), 1) + self.assertEqual(len(self.run_dbt(['run', '--model', 'tag:tag_three'])), 1) + self.assertTablesEqual('seed', 'model') + # make sure we overwrote the materialization properly + models = self.get_models_in_schema() + self.assertEqual(models['model'], 'table')