diff --git a/Dockerfile b/Dockerfile index 58574511148..410823589f3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.5 +FROM python:3.6 RUN apt-get update diff --git a/Makefile b/Makefile index ca3201ece20..738768bebdb 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,10 @@ changed_tests := `git status --porcelain | grep '^\(M\| M\|A\| A\)' | awk '{ print $$2 }' | grep '\/test_[a-zA-Z_\-\.]\+.py'` +it: + @echo "Unit test run starting..." + @time docker-compose run test tox -e unit-py27,pep8 + test: @echo "Full test run starting..." @time docker-compose run test tox diff --git a/dbt/clients/__init__.py b/dbt/clients/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbt/clients/system.py b/dbt/clients/system.py new file mode 100644 index 00000000000..45142baf7ff --- /dev/null +++ b/dbt/clients/system.py @@ -0,0 +1,53 @@ +import fnmatch +import os +import os.path + + +def find_matching(root_path, + relative_paths_to_search, + file_pattern): + """ + Given an absolute `root_path`, a list of relative paths to that + absolute root path (`relative_paths_to_search`), and a `file_pattern` + like '*.sql', returns information about the files. For example: + + > find_matching('/root/path', 'models', '*.sql') + + [ { 'absolute_path': '/root/path/models/model_one.sql', + 'relative_path': 'models/model_one.sql', + 'searched_path': 'models' }, + { 'absolute_path': '/root/path/models/subdirectory/model_two.sql', + 'relative_path': 'models/subdirectory/model_two.sql', + 'searched_path': 'models' } ] + """ + matching = [] + + for relative_path_to_search in relative_paths_to_search: + absolute_path_to_search = os.path.join( + root_path, relative_path_to_search) + walk_results = os.walk(absolute_path_to_search) + + for current_path, subdirectories, local_files in walk_results: + for local_file in local_files: + absolute_path = os.path.join(current_path, local_file) + relative_path = os.path.relpath( + absolute_path, absolute_path_to_search) + + if fnmatch.fnmatch(local_file, file_pattern): + matching.append({ + 'searched_path': relative_path_to_search, + 'absolute_path': absolute_path, + 'relative_path': relative_path, + }) + + return matching + + +def load_file_contents(path, strip=True): + with open(path, 'rb') as handle: + to_return = handle.read().decode('utf-8') + + if strip: + to_return = to_return.strip() + + return to_return diff --git a/dbt/compilation.py b/dbt/compilation.py index 56b5e408bdc..459f5e64560 100644 --- a/dbt/compilation.py +++ b/dbt/compilation.py @@ -6,10 +6,11 @@ import sqlparse import dbt.project +import dbt.utils + from dbt.source import Source from dbt.utils import find_model_by_fqn, find_model_by_name, \ - dependency_projects, split_path, This, Var, compiler_error, \ - to_string + split_path, This, Var, compiler_error, to_string from dbt.linker import Linker from dbt.runtime import RuntimeContext @@ -229,7 +230,7 @@ def wrapped_do_ref(*args): return wrapped_do_ref - def get_context(self, linker, model, models, add_dependency=False): + def get_context(self, linker, model, models, add_dependency=False): runtime = RuntimeContext(model=model) context = self.project.context() @@ -272,10 +273,10 @@ def compile_model(self, linker, model, models, add_dependency=True): fs_loader = jinja2.FileSystemLoader(searchpath=model.root_dir) jinja = jinja2.Environment(loader=fs_loader) - # this is a dumb jinja2 bug -- on windows, forward slashes - # are EXPECTED - posix_filepath = '/'.join(split_path(model.rel_filepath)) - template = jinja.get_template(posix_filepath) + template_contents = dbt.clients.system.load_file_contents( + model.absolute_path) + + template = jinja.from_string(template_contents) context = self.get_context( linker, model, models, add_dependency=add_dependency ) @@ -521,7 +522,7 @@ def compile_archives(self): def get_models(self): all_models = self.model_sources(this_project=self.project) - for project in dependency_projects(self.project): + for project in dbt.utils.dependency_projects(self.project): all_models.extend( self.model_sources( this_project=self.project, own_project=project @@ -536,7 +537,7 @@ def compile(self, limit_to=None): all_models = self.get_models() all_macros = self.get_macros(this_project=self.project) - for project in dependency_projects(self.project): + for project in dbt.utils.dependency_projects(self.project): all_macros.extend( self.get_macros(this_project=self.project, own_project=project) ) diff --git a/dbt/model.py b/dbt/model.py index 1988b983f14..e9535233ebb 100644 --- a/dbt/model.py +++ b/dbt/model.py @@ -1,4 +1,3 @@ - import os.path import yaml import jinja2 @@ -203,6 +202,10 @@ def __init__(self, project, top_dir, rel_filepath, own_project): self.source_config = SourceConfig(project, own_project, self.fqn) + @property + def absolute_path(self): + return os.path.join(self.root_dir, self.rel_filepath) + @property def root_dir(self): return os.path.join(self.own_project['project-root'], self.top_dir) @@ -231,9 +234,7 @@ def serialize(self): @property def contents(self): - filepath = os.path.join(self.root_dir, self.rel_filepath) - with open(filepath) as fh: - return fh.read().strip() + return dbt.clients.system.load_file_contents(self.absolute_path) @property def config(self): diff --git a/dbt/source.py b/dbt/source.py index 0a6e5c40cd5..7c05719eefc 100644 --- a/dbt/source.py +++ b/dbt/source.py @@ -3,6 +3,8 @@ from dbt.model import Model, Analysis, TestModel, SchemaFile, Csv, Macro, \ ArchiveModel, DataTest +import dbt.clients.system + class Source(object): def __init__(self, project, own_project=None): @@ -15,72 +17,91 @@ def __init__(self, project, own_project=None): self.own_project_root = self.own_project['project-root'] self.own_project_name = self.own_project['name'] - def find(self, source_paths, file_pattern): - """returns abspath, relpath, filename of files matching file_regex in - source_paths""" - found = [] - - if type(source_paths) not in (list, tuple): - source_paths = [source_paths] - - for source_path in source_paths: - root_path = os.path.join(self.own_project_root, source_path) - for root, dirs, files in os.walk(root_path): - for filename in files: - abs_path = os.path.join(root, filename) - rel_path = os.path.relpath(abs_path, root_path) - - if fnmatch.fnmatch(filename, file_pattern): - found.append( - (self.project, - source_path, - rel_path, - self.own_project) - ) - return found + def build_models_from_file_matches( + self, + to_build, + file_matches, + extra_args=[]): + + build_args = [[self.project, + file_match.get('searched_path'), + file_match.get('relative_path'), + self.own_project] + extra_args + for file_match in file_matches] + + return [to_build(*args) for args in build_args] def get_models(self, model_dirs, create_template): - pattern = "[!.#~]*.sql" - models = [Model(*model + (create_template,)) - for model in self.find(model_dirs, pattern)] - return models + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + model_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + Model, + file_matches, + [create_template]) def get_test_models(self, model_dirs, create_template): - pattern = "[!.#~]*.sql" - models = [TestModel(*model + (create_template,)) - for model in self.find(model_dirs, pattern)] - return models + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + model_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + TestModel, + file_matches, + [create_template]) def get_analyses(self, analysis_dirs): - pattern = "[!.#~]*.sql" - models = [Analysis(*analysis) - for analysis in self.find(analysis_dirs, pattern)] - return models - - def get_schemas(self, model_dirs): - "Get schema.yml files" - pattern = "[!.#~]*.yml" - schemas = [SchemaFile(*schema) - for schema in self.find(model_dirs, pattern)] - return schemas + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + analysis_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + Analysis, + file_matches) + + def get_schemas(self, schema_dirs): + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + schema_dirs, + "[!.#~]*.yml") + + return self.build_models_from_file_matches( + SchemaFile, + file_matches) def get_tests(self, test_dirs): - "Get custom test files" - pattern = "[!.#~]*.sql" - tests = [DataTest(*test) for test in self.find(test_dirs, pattern)] - return tests + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + test_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + DataTest, + file_matches) def get_csvs(self, csv_dirs): - "Get CSV files" - pattern = "[!.#~]*.csv" - csvs = [Csv(*csv) for csv in self.find(csv_dirs, pattern)] - return csvs + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + csv_dirs, + "[!.#~]*.csv") + + return self.build_models_from_file_matches( + Csv, + file_matches) def get_macros(self, macro_dirs): - "Get Macro files" - pattern = "[!.#~]*.sql" - macros = [Macro(*macro) for macro in self.find(macro_dirs, pattern)] - return macros + file_matches = dbt.clients.system.find_matching( + self.own_project_root, + macro_dirs, + "[!.#~]*.sql") + + return self.build_models_from_file_matches( + Macro, + file_matches) def get_archives(self, create_template): "Get Archive models defined in project config" diff --git a/dev_requirements.txt b/dev_requirements.txt index 04af909e836..bd576fdfb5c 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,4 @@ nose>=1.3.7 -nosy>=1.1.2 mock>=1.3.0 pep8>=1.6.2 bumpversion==0.5.3 diff --git a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py index 2d4daaf7ef4..971c06bbefd 100644 --- a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py +++ b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py @@ -91,8 +91,7 @@ def project_config(self): "vars": { "config_1": "ghi", "config_2": "jkl", - "bool_config": True - + "bool_config": True, } } } diff --git a/test/integration/009_data_tests_test/test_data_tests.py b/test/integration/009_data_tests_test/test_data_tests.py index dc579f30fd3..13bbb5fa88e 100644 --- a/test/integration/009_data_tests_test/test_data_tests.py +++ b/test/integration/009_data_tests_test/test_data_tests.py @@ -43,7 +43,6 @@ def test_data_tests(self): self.run_dbt() test_results = self.run_data_validations() - for result in test_results: # assert that all deliberately failing tests actually fail if 'fail' in result.model.name: diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py new file mode 100644 index 00000000000..be41bd37e1b --- /dev/null +++ b/test/unit/test_graph.py @@ -0,0 +1,367 @@ +from mock import MagicMock +import os +import six +import unittest + +import dbt.compilation +import dbt.model +import dbt.project +import dbt.templates +import dbt.utils +import dbt.linker + +import networkx as nx + +# from dbt.logger import GLOBAL_LOGGER as logger + + +class FakeArgs: + + def __init__(self): + self.full_refresh = False + + +class GraphTest(unittest.TestCase): + + def tearDown(self): + nx.write_yaml = self.real_write_yaml + dbt.utils.dependency_projects = self.real_dependency_projects + dbt.clients.system.find_matching = self.real_find_matching + dbt.clients.system.load_file_contents = self.real_load_file_contents + + def setUp(self): + def mock_write_yaml(graph, outfile): + self.graph_result = graph + + self.real_write_yaml = nx.write_yaml + nx.write_yaml = mock_write_yaml + + self.graph_result = None + + self.profiles = { + 'test': { + 'outputs': { + 'test': { + 'type': 'postgres', + 'threads': 4, + 'host': 'database', + 'port': 5432, + 'user': 'root', + 'pass': 'password', + 'dbname': 'dbt', + 'schema': 'dbt_test' + } + }, + 'target': 'test' + } + } + + self.real_dependency_projects = dbt.utils.dependency_projects + dbt.utils.dependency_projects = MagicMock(return_value=[]) + + self.mock_models = [] + self.mock_content = {} + + def mock_find_matching(root_path, relative_paths_to_search, + file_pattern): + if 'sql' not in file_pattern: + return [] + + to_return = [] + + if 'models' in relative_paths_to_search: + to_return = to_return + self.mock_models + + return to_return + + self.real_find_matching = dbt.clients.system.find_matching + dbt.clients.system.find_matching = MagicMock( + side_effect=mock_find_matching) + + def mock_load_file_contents(path): + return self.mock_content[path] + + self.real_load_file_contents = dbt.clients.system.load_file_contents + dbt.clients.system.load_file_contents = MagicMock( + side_effect=mock_load_file_contents) + + def get_project(self, extra_cfg=None): + if extra_cfg is None: + extra_cfg = {} + + cfg = { + 'name': 'test_models_compile', + 'version': '0.1', + 'profile': 'test', + 'project-root': os.path.abspath('.'), + } + cfg.update(extra_cfg) + + project = dbt.project.Project( + cfg=cfg, + profiles=self.profiles, + profiles_dir=None) + + project.validate() + return project + + def get_compiler(self, project): + compiler = dbt.compilation.Compiler( + project, + dbt.templates.BaseCreateTemplate, + FakeArgs()) + + compiler.get_macros = MagicMock(return_value=[]) + return compiler + + def use_models(self, models): + for k, v in models.items(): + path = os.path.abspath('models/{}.sql'.format(k)) + self.mock_models.append({ + 'searched_path': 'models', + 'absolute_path': path, + 'relative_path': '{}.sql'.format(k)}) + self.mock_content[path] = v + + def test__single_model(self): + self.use_models({ + 'model_one': 'select * from events', + }) + + compiler = self.get_compiler(self.get_project()) + compiler.compile(limit_to=['models']) + + self.assertEquals( + self.graph_result.nodes(), + [('test_models_compile', 'model_one')]) + + self.assertEquals( + self.graph_result.edges(), + []) + + def test__two_models_simple_ref(self): + self.use_models({ + 'model_one': 'select * from events', + 'model_two': "select * from {{ref('model_one')}}", + }) + + compiler = self.get_compiler(self.get_project()) + compiler.compile(limit_to=['models']) + + six.assertCountEqual(self, + self.graph_result.nodes(), + [ + ('test_models_compile', 'model_one'), + ('test_models_compile', 'model_two') + ]) + + six.assertCountEqual(self, + self.graph_result.edges(), + [ + ( + ('test_models_compile', 'model_one'), + ('test_models_compile', 'model_two') + ) + ]) + + def test__model_materializations(self): + self.use_models({ + 'model_one': 'select * from events', + 'model_two': "select * from {{ref('model_one')}}", + 'model_three': "select * from events", + 'model_four': "select * from events", + }) + + cfg = { + "models": { + "materialized": "table", + "test_models_compile": { + "model_one": {"materialized": "table"}, + "model_two": {"materialized": "view"}, + "model_three": {"materialized": "ephemeral"} + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + compiler.compile(limit_to=['models']) + + expected_materialization = { + "model_one": "table", + "model_two": "view", + "model_three": "ephemeral", + "model_four": "table" + } + + nodes = self.graph_result.node + + for model, expected in expected_materialization.items(): + actual = nodes[("test_models_compile", model)]["materialized"] + self.assertEquals(actual, expected) + + def test__model_enabled(self): + self.use_models({ + 'model_one': 'select * from events', + 'model_two': "select * from {{ref('model_one')}}", + }) + + cfg = { + "models": { + "materialized": "table", + "test_models_compile": { + "model_one": {"enabled": True}, + "model_two": {"enabled": False}, + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + compiler.compile(limit_to=['models']) + + six.assertCountEqual(self, + self.graph_result.nodes(), + [('test_models_compile', 'model_one')]) + + six.assertCountEqual(self, self.graph_result.edges(), []) + + def test__model_incremental_without_sql_where_fails(self): + self.use_models({ + 'model_one': 'select * from events' + }) + + cfg = { + "models": { + "materialized": "table", + "test_models_compile": { + "model_one": {"materialized": "incremental"}, + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + + with self.assertRaises(RuntimeError): + compiler.compile(limit_to=['models']) + + def test__model_incremental(self): + self.use_models({ + 'model_one': 'select * from events' + }) + + cfg = { + "models": { + "test_models_compile": { + "model_one": { + "materialized": "incremental", + "sql_where": "created_at", + "unique_key": "id" + }, + } + } + } + + compiler = self.get_compiler(self.get_project(cfg)) + compiler.compile(limit_to=['models']) + + node = ('test_models_compile', 'model_one') + + self.assertEqual(self.graph_result.nodes(), [node]) + self.assertEqual(self.graph_result.edges(), []) + + self.assertEqual( + self.graph_result.node[node]['materialized'], + 'incremental') + + def test__topological_ordering(self): + self.use_models({ + 'model_1': 'select * from events', + 'model_2': 'select * from {{ ref("model_1") }}', + 'model_3': ''' + select * from {{ ref("model_1") }} + union all + select * from {{ ref("model_2") }} + ''', + 'model_4': 'select * from {{ ref("model_3") }}' + }) + + compiler = self.get_compiler(self.get_project({})) + compiler.compile(limit_to=['models']) + + six.assertCountEqual(self, + self.graph_result.nodes(), + [ + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_2'), + ('test_models_compile', 'model_3'), + ('test_models_compile', 'model_4') + ]) + + six.assertCountEqual(self, + self.graph_result.edges(), + [ + ( + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_2') + ), + ( + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_3') + ), + ( + ('test_models_compile', 'model_2'), + ('test_models_compile', 'model_3') + ), + ( + ('test_models_compile', 'model_3'), + ('test_models_compile', 'model_4') + ) + ]) + + linker = dbt.linker.Linker() + linker.graph = self.graph_result + + actual_ordering = linker.as_topological_ordering() + expected_ordering = [ + ('test_models_compile', 'model_1'), + ('test_models_compile', 'model_2'), + ('test_models_compile', 'model_3'), + ('test_models_compile', 'model_4') + ] + + self.assertEqual(actual_ordering, expected_ordering) + + def test__dependency_list(self): + self.use_models({ + 'model_1': 'select * from events', + 'model_2': 'select * from {{ ref("model_1") }}', + 'model_3': ''' + select * from {{ ref("model_1") }} + union all + select * from {{ ref("model_2") }} + ''', + 'model_4': 'select * from {{ ref("model_3") }}' + }) + + compiler = self.get_compiler(self.get_project({})) + compiler.compile(limit_to=['models']) + + linker = dbt.linker.Linker() + linker.graph = self.graph_result + + actual_dep_list = linker.as_dependency_list() + expected_dep_list = [ + [ + ('test_models_compile', 'model_1') + ], + [ + ('test_models_compile', 'model_2') + ], + [ + ('test_models_compile', 'model_3') + ], + [ + ('test_models_compile', 'model_4'), + ] + ] + + self.assertEqual(actual_dep_list, expected_dep_list)