From 434369764f1bb86f2b867471ff41977760c92d92 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Tue, 28 Nov 2017 12:25:55 -0500 Subject: [PATCH 01/34] loader for seed data files --- dbt/loader.py | 15 +++++++++++++++ dbt/node_types.py | 4 +++- dbt/parser.py | 40 +++++++++++++++++++++++++++++++++++++++- dbt/runner.py | 2 +- requirements.txt | 1 + setup.py | 1 + 6 files changed, 60 insertions(+), 3 deletions(-) diff --git a/dbt/loader.py b/dbt/loader.py index 3d952f470ff..1d067e04f7e 100644 --- a/dbt/loader.py +++ b/dbt/loader.py @@ -181,6 +181,20 @@ def load_project(cls, root_project, all_projects, macros): macros) +class SeedLoader(ResourceLoader): + + @classmethod + def load_project(cls, root_project, all_projects, project, project_name, + macros): + return dbt.parser.load_and_parse_seeds( + package_name=project_name, + root_project=root_project, + all_projects=all_projects, + root_dir=project.get('project-root'), + relative_dirs=project.get('data-paths', []), + resource_type=NodeType.Seed) + + # node loaders GraphLoader.register(ModelLoader, 'nodes') GraphLoader.register(AnalysisLoader, 'nodes') @@ -188,3 +202,4 @@ def load_project(cls, root_project, all_projects, macros): GraphLoader.register(DataTestLoader, 'nodes') GraphLoader.register(RunHookLoader, 'nodes') GraphLoader.register(ArchiveLoader, 'nodes') +GraphLoader.register(SeedLoader, 'nodes') diff --git a/dbt/node_types.py b/dbt/node_types.py index ae498d3ea47..5d3ef275c83 100644 --- a/dbt/node_types.py +++ b/dbt/node_types.py @@ -7,6 +7,7 @@ class NodeType(object): Archive = 'archive' Macro = 'macro' Operation = 'operation' + Seed = 'seed' @classmethod def executable(cls): @@ -15,7 +16,8 @@ def executable(cls): cls.Test, cls.Archive, cls.Analysis, - cls.Operation + cls.Operation, + cls.Seed, ] diff --git a/dbt/parser.py b/dbt/parser.py index b3c602650df..2e2e050a26d 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -3,6 +3,7 @@ import re import hashlib import collections +import agate import dbt.exceptions import dbt.flags @@ -207,7 +208,7 @@ def parse_node(node, node_path, root_project_config, package_project_config, root_project_config, package_project_config, fqn) node['unique_id'] = node_path - node['empty'] = (len(node.get('raw_sql').strip()) == 0) + node['empty'] = ('raw_sql' in node and len(node['raw_sql'].strip()) == 0) node['fqn'] = fqn node['tags'] = tags node['config_reference'] = config @@ -701,3 +702,40 @@ def parse_archives_from_project(project): }) return archives + + +def parse_seed_file(path, package_name): + logger.debug("Parsing {}".format(path)) + to_return = {} + table_name = os.path.basename(path)[:-4] + return { + 'unique_id': get_path(NodeType.Seed, package_name, table_name), + 'path': path, + 'table_name': table_name, + 'resource_type': NodeType.Seed, + 'package_name': package_name, + 'depends_on': {'nodes': []}, + 'csv_table': agate.Table.from_csv(path), + } + return node + + +def load_and_parse_seeds(package_name, root_project, all_projects, root_dir, + relative_dirs, resource_type, tags=None, macros=None): + extension = "[!.#~]*.csv" + if dbt.flags.STRICT_MODE: + dbt.contracts.project.validate_list(all_projects) + file_matches = dbt.clients.system.find_matching( + root_dir, + relative_dirs, + extension) + result = {} + for file_match in file_matches: + node = parse_seed_file(file_match['absolute_path'], package_name) + node_path = node['unique_id'] + parsed = parse_node(node, node_path, root_project, + all_projects.get(package_name), + all_projects, tags=tags, macros=macros) + # parsed['empty'] = False + result[node_path] = parsed + return result diff --git a/dbt/runner.py b/dbt/runner.py index e18ea22a08a..c88e079580d 100644 --- a/dbt/runner.py +++ b/dbt/runner.py @@ -26,7 +26,7 @@ def __init__(self, project, target_path, args): profile = self.project.run_environment() # TODO validate the number of threads - if self.args.threads is None: + if not getattr(self.args, "threads", None): self.threads = profile.get('threads', 1) else: self.threads = self.args.threads diff --git a/requirements.txt b/requirements.txt index d5947c66764..98ec6af75e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ voluptuous==0.10.5 snowflake-connector-python==1.4.9 colorama==0.3.9 google-cloud-bigquery==0.26.0 +agate>=1.6,<2 diff --git a/setup.py b/setup.py index 9c9421c79b2..00d8a365e29 100644 --- a/setup.py +++ b/setup.py @@ -45,5 +45,6 @@ 'snowflake-connector-python>=1.4.9', 'colorama==0.3.9', 'google-cloud-bigquery==0.26.0', + 'agate>=1.6,<2', ] ) From 0bf84894b58ac8bda9ee5346a0dcadf40c695021 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Tue, 28 Nov 2017 17:25:22 -0500 Subject: [PATCH 02/34] Functioning rework of seed task --- dbt/adapters/default.py | 22 ++++++++++++++-- dbt/adapters/postgres.py | 30 ++++++++++++++++++++++ dbt/node_runners.py | 54 +++++++++++++++++++++++++++++++++------- dbt/parser.py | 13 +++++----- dbt/task/seed.py | 26 +++++++++++++------ 5 files changed, 121 insertions(+), 24 deletions(-) diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index a31c73a44fb..0b99dd9429b 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -94,6 +94,23 @@ def cancel_connection(cls, project, connection): raise dbt.exceptions.NotImplementedException( '`cancel_connection` is not implemented for this adapter!') + @classmethod + def create_table(cls, profile, schema, table_name, agate_table): + raise dbt.exceptions.NotImplementedException( + '`create_table` is not implemented for this adapter!') + + @classmethod + def load_csv(cls, profile, schema, table_name, agate_table): + raise dbt.exceptions.NotImplementedException( + '`load_csv` is not implemented for this adapter!') + + @classmethod + def convert_agate_type(cls, agate_type): + for cls, sql in cls.agate_type_conversions: + if isinstance(agate_type, cls): + return sql + return cls.agate_default_type + ### # FUNCTIONS THAT SHOULD BE ABSTRACT ### @@ -501,7 +518,8 @@ def close(cls, connection): return connection @classmethod - def add_query(cls, profile, sql, model_name=None, auto_begin=True): + def add_query(cls, profile, sql, model_name=None, auto_begin=True, + bindings=None): connection = cls.get_connection(profile, model_name) connection_name = connection.get('name') @@ -516,7 +534,7 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True): pre = time.time() cursor = connection.get('handle').cursor() - cursor.execute(sql) + cursor.execute(sql, (bindings or ())) logger.debug("SQL status: %s in %0.2f seconds", cls.get_status(cursor), (time.time() - pre)) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 8836f0bca92..7cc7cf7f5eb 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -1,4 +1,5 @@ import psycopg2 +import agate from contextlib import contextmanager @@ -11,6 +12,15 @@ class PostgresAdapter(dbt.adapters.default.DefaultAdapter): + agate_type_conversions = [ + (agate.Text, "text"), + (agate.Number, "numeric"), + (agate.Boolean, "boolean"), + (agate.DateTime, "timestamp without time zone"), + (agate.Date, "date"), + ] + agate_default_type = "text" + @classmethod @contextmanager def exception_handler(cls, profile, sql, model_name=None, @@ -165,3 +175,23 @@ def cancel_connection(cls, profile, connection): res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res)) + + @classmethod + def create_table(cls, profile, schema, table_name, agate_table): + col_sqls = [] + for idx, col_name in enumerate(agate_table.column_names): + col_type = agate_table.column_types[idx] + converted_type = cls.convert_agate_type(col_type) + col_sqls.append('"{}" {}'.format(col_name, converted_type)) + sql = 'create table "{}"."{}" ({})'.format(schema, table_name, + ", ".join(col_sqls)) + return cls.add_query(profile, sql) + + @classmethod + def load_csv(cls, profile, schema, table_name, agate_table): + cols_sql = ", ".join(agate_table.column_names) + placeholders = ", ".join("%s" for _ in agate_table.column_names) + sql = ('insert into "{}"."{}" ({}) values ({})' + .format(schema, table_name, cols_sql, placeholders)) + for row in agate_table.rows: + cls.add_query(profile, sql, bindings=row) diff --git a/dbt/node_runners.py b/dbt/node_runners.py index 74c07434bcf..5a11127e57f 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -263,6 +263,14 @@ def call_table_exists(schema, table): "already_exists": call_table_exists, } + @classmethod + def create_schemas(cls, project, adapter, flat_graph): + profile = project.run_environment() + required_schemas = cls.get_model_schemas(flat_graph) + existing_schemas = set(adapter.get_existing_schemas(profile)) + for schema in (required_schemas - existing_schemas): + adapter.create_schema(profile, schema) + class ModelRunner(CompileRunner): @@ -314,15 +322,6 @@ def safe_run_hooks(cls, project, adapter, flat_graph, hook_type): logger.info("Database error while running {}".format(hook_type)) raise - @classmethod - def create_schemas(cls, project, adapter, flat_graph): - profile = project.run_environment() - required_schemas = cls.get_model_schemas(flat_graph) - existing_schemas = set(adapter.get_existing_schemas(profile)) - - for schema in (required_schemas - existing_schemas): - adapter.create_schema(profile, schema) - @classmethod def before_run(cls, project, adapter, flat_graph): cls.create_schemas(project, adapter, flat_graph) @@ -456,3 +455,40 @@ def describe_node(self): def print_result_line(self, result): dbt.ui.printer.print_archive_result_line(result, self.node_index, self.num_nodes) + + +class SeedRunner(CompileRunner): + + def describe_node(self): + table_name = self.node["table_name"] + return "seed {}".format(table_name) + + @classmethod + def before_run(cls, project, adapter, flat_graph): + cls.create_schemas(project, adapter, flat_graph) + + def before_execute(self): + description = self.describe_node() + dbt.ui.printer.print_start_line(description, self.node_index, + self.num_nodes) + + def execute(self, compiled_node, existing, flat_graph): + existing_tables = [k for k, v in existing.items() if v == "table"] + table_name = compiled_node["table_name"] + existing_type = existing.get(table_name) + if existing_type and existing_type != "table": + raise Exception("table is already a view") # FIXME better exception + adapter = self.adapter # type: dbt.adapters.default.DefaultAdapter + schema = compiled_node["schema"] + table = compiled_node["agate_table"] + if existing_type: + if dbt.flags.NON_DESTRUCTIVE: + adapter.truncate(self.profile, schema, table_name) + else: + adapter.drop_table(self.profile, schema, table_name, None) + adapter.create_table(self.profile, schema, table_name, table) + else: + adapter.create_table(self.profile, schema, table_name, table) + adapter.load_csv(self.profile, schema, table_name, table) + adapter.commit_if_has_connection(self.profile, None) + return RunModelResult(compiled_node) diff --git a/dbt/parser.py b/dbt/parser.py index 2e2e050a26d..972a5fcdb2a 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -704,18 +704,19 @@ def parse_archives_from_project(project): return archives -def parse_seed_file(path, package_name): - logger.debug("Parsing {}".format(path)) +def parse_seed_file(file_match, package_name): + abspath = file_match['absolute_path'] + logger.debug("Parsing {}".format(abspath)) to_return = {} - table_name = os.path.basename(path)[:-4] + table_name = os.path.basename(abspath)[:-4] return { 'unique_id': get_path(NodeType.Seed, package_name, table_name), - 'path': path, + 'path': file_match['relative_path'], 'table_name': table_name, 'resource_type': NodeType.Seed, 'package_name': package_name, 'depends_on': {'nodes': []}, - 'csv_table': agate.Table.from_csv(path), + 'agate_table': agate.Table.from_csv(abspath), } return node @@ -731,7 +732,7 @@ def load_and_parse_seeds(package_name, root_project, all_projects, root_dir, extension) result = {} for file_match in file_matches: - node = parse_seed_file(file_match['absolute_path'], package_name) + node = parse_seed_file(file_match, package_name) node_path = node['unique_id'] parsed = parse_node(node, node_path, root_project, all_projects.get(package_name), diff --git a/dbt/task/seed.py b/dbt/task/seed.py index fabc14eb412..4e7735c7106 100644 --- a/dbt/task/seed.py +++ b/dbt/task/seed.py @@ -1,12 +1,24 @@ import os +from dbt.node_runners import SeedRunner +from dbt.node_types import NodeType +from dbt.runner import RunManager from dbt.seeder import Seeder -from dbt.task.base_task import BaseTask +from dbt.task.base_task import RunnableTask +import dbt.ui.printer -class SeedTask(BaseTask): +class SeedTask(RunnableTask): def run(self): - seeder = Seeder(self.project) - self.success = seeder.seed(self.args.drop_existing) - - def interpret_results(self, results): - return self.success + runner = RunManager( + self.project, + self.project["target-path"], + self.args, + ) + query = { + "include": ["*"], + "exclude": [], + "resource_types": [NodeType.Seed], + } + results = runner.run_flat(query, SeedRunner) + dbt.ui.printer.print_run_end_messages(results) + return results From 5b3cf6f64b81adaeb1cfa3fc06ee94b5d19ddb2f Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 4 Dec 2017 10:32:23 -0500 Subject: [PATCH 03/34] Make CompilerRunner fns private and impl. SeedRunner.compile Trying to distinguish between the public/private interface for this class. And the SeedRunner doesn't need the functionality in the compile function, it just needs a compile function to exist for use in the compilation process. --- dbt/node_runners.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/dbt/node_runners.py b/dbt/node_runners.py index 5a11127e57f..1a686aa4962 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -205,14 +205,14 @@ def execute(self, compiled_node, existing, flat_graph): return RunModelResult(compiled_node) def compile(self, flat_graph): - return self.compile_node(self.adapter, self.project, self.node, - flat_graph) + return self._compile_node(self.adapter, self.project, self.node, + flat_graph) @classmethod - def compile_node(cls, adapter, project, node, flat_graph): + def _compile_node(cls, adapter, project, node, flat_graph): compiler = dbt.compilation.Compiler(project) node = compiler.compile_node(node, flat_graph) - node = cls.inject_runtime_config(adapter, project, node) + node = cls._inject_runtime_config(adapter, project, node) if(node['injected_sql'] is not None and not (dbt.utils.is_type(node, NodeType.Archive))): @@ -230,15 +230,15 @@ def compile_node(cls, adapter, project, node, flat_graph): return node @classmethod - def inject_runtime_config(cls, adapter, project, node): + def _inject_runtime_config(cls, adapter, project, node): wrapped_sql = node.get('wrapped_sql') - context = cls.node_context(adapter, project, node) + context = cls._node_context(adapter, project, node) sql = dbt.clients.jinja.get_rendered(wrapped_sql, context) node['wrapped_sql'] = sql return node @classmethod - def node_context(cls, adapter, project, node): + def _node_context(cls, adapter, project, node): profile = project.run_environment() def call_get_columns_in_table(schema_name, table_name): @@ -295,7 +295,7 @@ def run_hooks(cls, project, adapter, flat_graph, hook_type): compiled_hooks = [] for hook in hooks: - compiled = cls.compile_node(adapter, project, hook, flat_graph) + compiled = cls._compile_node(adapter, project, hook, flat_graph) model_name = compiled.get('name') statement = compiled['wrapped_sql'] @@ -492,3 +492,6 @@ def execute(self, compiled_node, existing, flat_graph): adapter.load_csv(self.profile, schema, table_name, table) adapter.commit_if_has_connection(self.profile, None) return RunModelResult(compiled_node) + + def compile(self, flat_graph): + return self.node From 4899e0b5c59925fc33a297685ab7c35d486bb289 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 4 Dec 2017 14:09:59 -0500 Subject: [PATCH 04/34] Test changes and fixes --- dbt/node_runners.py | 8 +++++--- dbt/parser.py | 11 +++++++++-- test/integration/005_simple_seed_test/seed.sql | 8 ++++---- .../integration/023_exit_codes_test/data-bad/data.csv | 4 ++-- .../023_exit_codes_test/test_exit_codes.py | 7 +++++-- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/dbt/node_runners.py b/dbt/node_runners.py index 1a686aa4962..3e9c18a2f38 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -472,14 +472,16 @@ def before_execute(self): dbt.ui.printer.print_start_line(description, self.node_index, self.num_nodes) - def execute(self, compiled_node, existing, flat_graph): + def execute(self, compiled_node, existing_, flat_graph): + # In testing, existing_ was not correctly set to the existing things + schema = compiled_node["schema"] + adapter = self.adapter # type: dbt.adapters.default.DefaultAdapter + existing = adapter.query_for_existing(self.profile, schema) existing_tables = [k for k, v in existing.items() if v == "table"] table_name = compiled_node["table_name"] existing_type = existing.get(table_name) if existing_type and existing_type != "table": raise Exception("table is already a view") # FIXME better exception - adapter = self.adapter # type: dbt.adapters.default.DefaultAdapter - schema = compiled_node["schema"] table = compiled_node["agate_table"] if existing_type: if dbt.flags.NON_DESTRUCTIVE: diff --git a/dbt/parser.py b/dbt/parser.py index 972a5fcdb2a..b2ca04d10f7 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -709,15 +709,22 @@ def parse_seed_file(file_match, package_name): logger.debug("Parsing {}".format(abspath)) to_return = {} table_name = os.path.basename(abspath)[:-4] - return { + node = { 'unique_id': get_path(NodeType.Seed, package_name, table_name), 'path': file_match['relative_path'], 'table_name': table_name, + 'name': table_name, 'resource_type': NodeType.Seed, 'package_name': package_name, 'depends_on': {'nodes': []}, - 'agate_table': agate.Table.from_csv(abspath), + 'original_file_path': os.path.join(file_match.get('searched_path'), + file_match.get('relative_path')), } + try: + table = agate.Table.from_csv(abspath) + except ValueError as e: + dbt.exceptions.raise_compiler_error(str(e), node) + node['agate_table'] = table return node diff --git a/test/integration/005_simple_seed_test/seed.sql b/test/integration/005_simple_seed_test/seed.sql index 5ad307e67ff..55c45bf6653 100644 --- a/test/integration/005_simple_seed_test/seed.sql +++ b/test/integration/005_simple_seed_test/seed.sql @@ -1,8 +1,8 @@ create table {schema}.seed_expected ( - id INTEGER, - first_name VARCHAR(11), - email VARCHAR(31), - ip_address VARCHAR(15), + id NUMERIC, + first_name TEXT, + email TEXT, + ip_address TEXT, birthday TIMESTAMP WITHOUT TIME ZONE ); diff --git a/test/integration/023_exit_codes_test/data-bad/data.csv b/test/integration/023_exit_codes_test/data-bad/data.csv index 37ff5b7356a..fcc8e001bbd 100644 --- a/test/integration/023_exit_codes_test/data-bad/data.csv +++ b/test/integration/023_exit_codes_test/data-bad/data.csv @@ -1,2 +1,2 @@ -"a,b,c -"1,\2,3,a,a,a +a,b,c +1,\2,3,a,a,a diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index e3640b7e80c..dfc4a9a2931 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -214,5 +214,8 @@ def project_config(self): @attr(type='postgres') def test_seed(self): - _, success = self.run_dbt_and_check(['seed']) - self.assertFalse(success) + try: + _, success = self.run_dbt_and_check(['seed']) + self.assertTrue(False) + except dbt.exceptions.CompilationException as e: + pass From 28d2886409f84914f25d6aaeaec9db49afea1b6e Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 4 Dec 2017 15:16:14 -0500 Subject: [PATCH 05/34] make the DB setup script usable locally --- test/setup_db.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/test/setup_db.sh b/test/setup_db.sh index e315d76f02f..d373b7195a0 100644 --- a/test/setup_db.sh +++ b/test/setup_db.sh @@ -1,12 +1,19 @@ +#!/bin/bash set -x +# If you want to run this script for your own postgresql (run with +# docker-compose) it will look like this: +# PGHOST=127.0.0.1 PGUSER=root PGPASSWORD=password PGDATABASE=postgres \ +# bash test/setup.sh +PGUSER="${PGUSER:-postgres}" + createdb dbt -psql -c "CREATE ROLE root WITH UNENCRYPTED PASSWORD 'password';" -U postgres -psql -c "ALTER ROLE root WITH LOGIN;" -U postgres -psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" -U postgres +psql -c "CREATE ROLE root WITH PASSWORD 'password';" +psql -c "ALTER ROLE root WITH LOGIN;" +psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" -psql -c "CREATE ROLE noaccess WITH UNENCRYPTED PASSWORD 'password' NOSUPERUSER;" -U postgres; -psql -c "ALTER ROLE noaccess WITH LOGIN;" -U postgres -psql -c "GRANT CONNECT ON DATABASE dbt TO noaccess;" -U postgres; +psql -c "CREATE ROLE noaccess WITH PASSWORD 'password' NOSUPERUSER;" +psql -c "ALTER ROLE noaccess WITH LOGIN;" +psql -c "GRANT CONNECT ON DATABASE dbt TO noaccess;" set +x From dbac3c5e6f0a62e9edd1af60d8a1e800fa86159a Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 4 Dec 2017 17:09:57 -0500 Subject: [PATCH 06/34] convert simple copy test to use seeed --- .../001_simple_copy_test/data/seed.csv | 101 ++++++++++++++++ .../integration/001_simple_copy_test/seed.sql | 111 ------------------ .../001_simple_copy_test/test_simple_copy.py | 10 +- test/integration/base.py | 4 +- 4 files changed, 111 insertions(+), 115 deletions(-) create mode 100644 test/integration/001_simple_copy_test/data/seed.csv delete mode 100644 test/integration/001_simple_copy_test/seed.sql diff --git a/test/integration/001_simple_copy_test/data/seed.csv b/test/integration/001_simple_copy_test/data/seed.csv new file mode 100644 index 00000000000..640af6c4ee6 --- /dev/null +++ b/test/integration/001_simple_copy_test/data/seed.csv @@ -0,0 +1,101 @@ +id,first_name,last_name,email,gender,ip_address +1,Jack,Hunter,jhunter0@pbs.org,Male,59.80.20.168 +2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35 +3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243 +4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175 +5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136 +6,Jacqueline,Griffin,jgriffin5@t.co,Female,16.13.192.220 +7,Wanda,Arnold,warnold6@google.nl,Female,232.116.150.64 +8,Craig,Ortiz,cortiz7@sciencedaily.com,Male,199.126.106.13 +9,Gary,Day,gday8@nih.gov,Male,35.81.68.186 +10,Rose,Wright,rwright9@yahoo.co.jp,Female,236.82.178.100 +11,Raymond,Kelley,rkelleya@fc2.com,Male,213.65.166.67 +12,Gerald,Robinson,grobinsonb@disqus.com,Male,72.232.194.193 +13,Mildred,Martinez,mmartinezc@samsung.com,Female,198.29.112.5 +14,Dennis,Arnold,darnoldd@google.com,Male,86.96.3.250 +15,Judy,Gray,jgraye@opensource.org,Female,79.218.162.245 +16,Theresa,Garza,tgarzaf@epa.gov,Female,21.59.100.54 +17,Gerald,Robertson,grobertsong@csmonitor.com,Male,131.134.82.96 +18,Philip,Hernandez,phernandezh@adobe.com,Male,254.196.137.72 +19,Julia,Gonzalez,jgonzalezi@cam.ac.uk,Female,84.240.227.174 +20,Andrew,Davis,adavisj@patch.com,Male,9.255.67.25 +21,Kimberly,Harper,kharperk@foxnews.com,Female,198.208.120.253 +22,Mark,Martin,mmartinl@marketwatch.com,Male,233.138.182.153 +23,Cynthia,Ruiz,cruizm@google.fr,Female,18.178.187.201 +24,Samuel,Carroll,scarrolln@youtu.be,Male,128.113.96.122 +25,Jennifer,Larson,jlarsono@vinaora.com,Female,98.234.85.95 +26,Ashley,Perry,aperryp@rakuten.co.jp,Female,247.173.114.52 +27,Howard,Rodriguez,hrodriguezq@shutterfly.com,Male,231.188.95.26 +28,Amy,Brooks,abrooksr@theatlantic.com,Female,141.199.174.118 +29,Louise,Warren,lwarrens@adobe.com,Female,96.105.158.28 +30,Tina,Watson,twatsont@myspace.com,Female,251.142.118.177 +31,Janice,Kelley,jkelleyu@creativecommons.org,Female,239.167.34.233 +32,Terry,Mccoy,tmccoyv@bravesites.com,Male,117.201.183.203 +33,Jeffrey,Morgan,jmorganw@surveymonkey.com,Male,78.101.78.149 +34,Louis,Harvey,lharveyx@sina.com.cn,Male,51.50.0.167 +35,Philip,Miller,pmillery@samsung.com,Male,103.255.222.110 +36,Willie,Marshall,wmarshallz@ow.ly,Male,149.219.91.68 +37,Patrick,Lopez,plopez10@redcross.org,Male,250.136.229.89 +38,Adam,Jenkins,ajenkins11@harvard.edu,Male,7.36.112.81 +39,Benjamin,Cruz,bcruz12@linkedin.com,Male,32.38.98.15 +40,Ruby,Hawkins,rhawkins13@gmpg.org,Female,135.171.129.255 +41,Carlos,Barnes,cbarnes14@a8.net,Male,240.197.85.140 +42,Ruby,Griffin,rgriffin15@bravesites.com,Female,19.29.135.24 +43,Sean,Mason,smason16@icq.com,Male,159.219.155.249 +44,Anthony,Payne,apayne17@utexas.edu,Male,235.168.199.218 +45,Steve,Cruz,scruz18@pcworld.com,Male,238.201.81.198 +46,Anthony,Garcia,agarcia19@flavors.me,Male,25.85.10.18 +47,Doris,Lopez,dlopez1a@sphinn.com,Female,245.218.51.238 +48,Susan,Nichols,snichols1b@freewebs.com,Female,199.99.9.61 +49,Wanda,Ferguson,wferguson1c@yahoo.co.jp,Female,236.241.135.21 +50,Andrea,Pierce,apierce1d@google.co.uk,Female,132.40.10.209 +51,Lawrence,Phillips,lphillips1e@jugem.jp,Male,72.226.82.87 +52,Judy,Gilbert,jgilbert1f@multiply.com,Female,196.250.15.142 +53,Eric,Williams,ewilliams1g@joomla.org,Male,222.202.73.126 +54,Ralph,Romero,rromero1h@sogou.com,Male,123.184.125.212 +55,Jean,Wilson,jwilson1i@ocn.ne.jp,Female,176.106.32.194 +56,Lori,Reynolds,lreynolds1j@illinois.edu,Female,114.181.203.22 +57,Donald,Moreno,dmoreno1k@bbc.co.uk,Male,233.249.97.60 +58,Steven,Berry,sberry1l@eepurl.com,Male,186.193.50.50 +59,Theresa,Shaw,tshaw1m@people.com.cn,Female,120.37.71.222 +60,John,Stephens,jstephens1n@nationalgeographic.com,Male,191.87.127.115 +61,Richard,Jacobs,rjacobs1o@state.tx.us,Male,66.210.83.155 +62,Andrew,Lawson,alawson1p@over-blog.com,Male,54.98.36.94 +63,Peter,Morgan,pmorgan1q@rambler.ru,Male,14.77.29.106 +64,Nicole,Garrett,ngarrett1r@zimbio.com,Female,21.127.74.68 +65,Joshua,Kim,jkim1s@edublogs.org,Male,57.255.207.41 +66,Ralph,Roberts,rroberts1t@people.com.cn,Male,222.143.131.109 +67,George,Montgomery,gmontgomery1u@smugmug.com,Male,76.75.111.77 +68,Gerald,Alvarez,galvarez1v@flavors.me,Male,58.157.186.194 +69,Donald,Olson,dolson1w@whitehouse.gov,Male,69.65.74.135 +70,Carlos,Morgan,cmorgan1x@pbs.org,Male,96.20.140.87 +71,Aaron,Stanley,astanley1y@webnode.com,Male,163.119.217.44 +72,Virginia,Long,vlong1z@spiegel.de,Female,204.150.194.182 +73,Robert,Berry,rberry20@tripadvisor.com,Male,104.19.48.241 +74,Antonio,Brooks,abrooks21@unesco.org,Male,210.31.7.24 +75,Ruby,Garcia,rgarcia22@ovh.net,Female,233.218.162.214 +76,Jack,Hanson,jhanson23@blogtalkradio.com,Male,31.55.46.199 +77,Kathryn,Nelson,knelson24@walmart.com,Female,14.189.146.41 +78,Jason,Reed,jreed25@printfriendly.com,Male,141.189.89.255 +79,George,Coleman,gcoleman26@people.com.cn,Male,81.189.221.144 +80,Rose,King,rking27@ucoz.com,Female,212.123.168.231 +81,Johnny,Holmes,jholmes28@boston.com,Male,177.3.93.188 +82,Katherine,Gilbert,kgilbert29@altervista.org,Female,199.215.169.61 +83,Joshua,Thomas,jthomas2a@ustream.tv,Male,0.8.205.30 +84,Julie,Perry,jperry2b@opensource.org,Female,60.116.114.192 +85,Richard,Perry,rperry2c@oracle.com,Male,181.125.70.232 +86,Kenneth,Ruiz,kruiz2d@wikimedia.org,Male,189.105.137.109 +87,Jose,Morgan,jmorgan2e@webnode.com,Male,101.134.215.156 +88,Donald,Campbell,dcampbell2f@goo.ne.jp,Male,102.120.215.84 +89,Debra,Collins,dcollins2g@uol.com.br,Female,90.13.153.235 +90,Jesse,Johnson,jjohnson2h@stumbleupon.com,Male,225.178.125.53 +91,Elizabeth,Stone,estone2i@histats.com,Female,123.184.126.221 +92,Angela,Rogers,arogers2j@goodreads.com,Female,98.104.132.187 +93,Emily,Dixon,edixon2k@mlb.com,Female,39.190.75.57 +94,Albert,Scott,ascott2l@tinypic.com,Male,40.209.13.189 +95,Barbara,Peterson,bpeterson2m@ow.ly,Female,75.249.136.180 +96,Adam,Greene,agreene2n@fastcompany.com,Male,184.173.109.144 +97,Earl,Sanders,esanders2o@hc360.com,Male,247.34.90.117 +98,Angela,Brooks,abrooks2p@mtv.com,Female,10.63.249.126 +99,Harold,Foster,hfoster2q@privacy.gov.au,Male,139.214.40.244 +100,Carl,Meyer,cmeyer2r@disqus.com,Male,204.117.7.88 diff --git a/test/integration/001_simple_copy_test/seed.sql b/test/integration/001_simple_copy_test/seed.sql deleted file mode 100644 index 28309b400f8..00000000000 --- a/test/integration/001_simple_copy_test/seed.sql +++ /dev/null @@ -1,111 +0,0 @@ -create table "{schema}"."seed" ( - id BIGSERIAL PRIMARY KEY, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20) -); - - -insert into "{schema}"."seed" (first_name, last_name, email, gender, ip_address) values -('Jack', 'Hunter', 'jhunter0@pbs.org', 'Male', '59.80.20.168'), -('Kathryn', 'Walker', 'kwalker1@ezinearticles.com', 'Female', '194.121.179.35'), -('Gerald', 'Ryan', 'gryan2@com.com', 'Male', '11.3.212.243'), -('Bonnie', 'Spencer', 'bspencer3@ameblo.jp', 'Female', '216.32.196.175'), -('Harold', 'Taylor', 'htaylor4@people.com.cn', 'Male', '253.10.246.136'), -('Jacqueline', 'Griffin', 'jgriffin5@t.co', 'Female', '16.13.192.220'), -('Wanda', 'Arnold', 'warnold6@google.nl', 'Female', '232.116.150.64'), -('Craig', 'Ortiz', 'cortiz7@sciencedaily.com', 'Male', '199.126.106.13'), -('Gary', 'Day', 'gday8@nih.gov', 'Male', '35.81.68.186'), -('Rose', 'Wright', 'rwright9@yahoo.co.jp', 'Female', '236.82.178.100'), -('Raymond', 'Kelley', 'rkelleya@fc2.com', 'Male', '213.65.166.67'), -('Gerald', 'Robinson', 'grobinsonb@disqus.com', 'Male', '72.232.194.193'), -('Mildred', 'Martinez', 'mmartinezc@samsung.com', 'Female', '198.29.112.5'), -('Dennis', 'Arnold', 'darnoldd@google.com', 'Male', '86.96.3.250'), -('Judy', 'Gray', 'jgraye@opensource.org', 'Female', '79.218.162.245'), -('Theresa', 'Garza', 'tgarzaf@epa.gov', 'Female', '21.59.100.54'), -('Gerald', 'Robertson', 'grobertsong@csmonitor.com', 'Male', '131.134.82.96'), -('Philip', 'Hernandez', 'phernandezh@adobe.com', 'Male', '254.196.137.72'), -('Julia', 'Gonzalez', 'jgonzalezi@cam.ac.uk', 'Female', '84.240.227.174'), -('Andrew', 'Davis', 'adavisj@patch.com', 'Male', '9.255.67.25'), -('Kimberly', 'Harper', 'kharperk@foxnews.com', 'Female', '198.208.120.253'), -('Mark', 'Martin', 'mmartinl@marketwatch.com', 'Male', '233.138.182.153'), -('Cynthia', 'Ruiz', 'cruizm@google.fr', 'Female', '18.178.187.201'), -('Samuel', 'Carroll', 'scarrolln@youtu.be', 'Male', '128.113.96.122'), -('Jennifer', 'Larson', 'jlarsono@vinaora.com', 'Female', '98.234.85.95'), -('Ashley', 'Perry', 'aperryp@rakuten.co.jp', 'Female', '247.173.114.52'), -('Howard', 'Rodriguez', 'hrodriguezq@shutterfly.com', 'Male', '231.188.95.26'), -('Amy', 'Brooks', 'abrooksr@theatlantic.com', 'Female', '141.199.174.118'), -('Louise', 'Warren', 'lwarrens@adobe.com', 'Female', '96.105.158.28'), -('Tina', 'Watson', 'twatsont@myspace.com', 'Female', '251.142.118.177'), -('Janice', 'Kelley', 'jkelleyu@creativecommons.org', 'Female', '239.167.34.233'), -('Terry', 'Mccoy', 'tmccoyv@bravesites.com', 'Male', '117.201.183.203'), -('Jeffrey', 'Morgan', 'jmorganw@surveymonkey.com', 'Male', '78.101.78.149'), -('Louis', 'Harvey', 'lharveyx@sina.com.cn', 'Male', '51.50.0.167'), -('Philip', 'Miller', 'pmillery@samsung.com', 'Male', '103.255.222.110'), -('Willie', 'Marshall', 'wmarshallz@ow.ly', 'Male', '149.219.91.68'), -('Patrick', 'Lopez', 'plopez10@redcross.org', 'Male', '250.136.229.89'), -('Adam', 'Jenkins', 'ajenkins11@harvard.edu', 'Male', '7.36.112.81'), -('Benjamin', 'Cruz', 'bcruz12@linkedin.com', 'Male', '32.38.98.15'), -('Ruby', 'Hawkins', 'rhawkins13@gmpg.org', 'Female', '135.171.129.255'), -('Carlos', 'Barnes', 'cbarnes14@a8.net', 'Male', '240.197.85.140'), -('Ruby', 'Griffin', 'rgriffin15@bravesites.com', 'Female', '19.29.135.24'), -('Sean', 'Mason', 'smason16@icq.com', 'Male', '159.219.155.249'), -('Anthony', 'Payne', 'apayne17@utexas.edu', 'Male', '235.168.199.218'), -('Steve', 'Cruz', 'scruz18@pcworld.com', 'Male', '238.201.81.198'), -('Anthony', 'Garcia', 'agarcia19@flavors.me', 'Male', '25.85.10.18'), -('Doris', 'Lopez', 'dlopez1a@sphinn.com', 'Female', '245.218.51.238'), -('Susan', 'Nichols', 'snichols1b@freewebs.com', 'Female', '199.99.9.61'), -('Wanda', 'Ferguson', 'wferguson1c@yahoo.co.jp', 'Female', '236.241.135.21'), -('Andrea', 'Pierce', 'apierce1d@google.co.uk', 'Female', '132.40.10.209'), -('Lawrence', 'Phillips', 'lphillips1e@jugem.jp', 'Male', '72.226.82.87'), -('Judy', 'Gilbert', 'jgilbert1f@multiply.com', 'Female', '196.250.15.142'), -('Eric', 'Williams', 'ewilliams1g@joomla.org', 'Male', '222.202.73.126'), -('Ralph', 'Romero', 'rromero1h@sogou.com', 'Male', '123.184.125.212'), -('Jean', 'Wilson', 'jwilson1i@ocn.ne.jp', 'Female', '176.106.32.194'), -('Lori', 'Reynolds', 'lreynolds1j@illinois.edu', 'Female', '114.181.203.22'), -('Donald', 'Moreno', 'dmoreno1k@bbc.co.uk', 'Male', '233.249.97.60'), -('Steven', 'Berry', 'sberry1l@eepurl.com', 'Male', '186.193.50.50'), -('Theresa', 'Shaw', 'tshaw1m@people.com.cn', 'Female', '120.37.71.222'), -('John', 'Stephens', 'jstephens1n@nationalgeographic.com', 'Male', '191.87.127.115'), -('Richard', 'Jacobs', 'rjacobs1o@state.tx.us', 'Male', '66.210.83.155'), -('Andrew', 'Lawson', 'alawson1p@over-blog.com', 'Male', '54.98.36.94'), -('Peter', 'Morgan', 'pmorgan1q@rambler.ru', 'Male', '14.77.29.106'), -('Nicole', 'Garrett', 'ngarrett1r@zimbio.com', 'Female', '21.127.74.68'), -('Joshua', 'Kim', 'jkim1s@edublogs.org', 'Male', '57.255.207.41'), -('Ralph', 'Roberts', 'rroberts1t@people.com.cn', 'Male', '222.143.131.109'), -('George', 'Montgomery', 'gmontgomery1u@smugmug.com', 'Male', '76.75.111.77'), -('Gerald', 'Alvarez', 'galvarez1v@flavors.me', 'Male', '58.157.186.194'), -('Donald', 'Olson', 'dolson1w@whitehouse.gov', 'Male', '69.65.74.135'), -('Carlos', 'Morgan', 'cmorgan1x@pbs.org', 'Male', '96.20.140.87'), -('Aaron', 'Stanley', 'astanley1y@webnode.com', 'Male', '163.119.217.44'), -('Virginia', 'Long', 'vlong1z@spiegel.de', 'Female', '204.150.194.182'), -('Robert', 'Berry', 'rberry20@tripadvisor.com', 'Male', '104.19.48.241'), -('Antonio', 'Brooks', 'abrooks21@unesco.org', 'Male', '210.31.7.24'), -('Ruby', 'Garcia', 'rgarcia22@ovh.net', 'Female', '233.218.162.214'), -('Jack', 'Hanson', 'jhanson23@blogtalkradio.com', 'Male', '31.55.46.199'), -('Kathryn', 'Nelson', 'knelson24@walmart.com', 'Female', '14.189.146.41'), -('Jason', 'Reed', 'jreed25@printfriendly.com', 'Male', '141.189.89.255'), -('George', 'Coleman', 'gcoleman26@people.com.cn', 'Male', '81.189.221.144'), -('Rose', 'King', 'rking27@ucoz.com', 'Female', '212.123.168.231'), -('Johnny', 'Holmes', 'jholmes28@boston.com', 'Male', '177.3.93.188'), -('Katherine', 'Gilbert', 'kgilbert29@altervista.org', 'Female', '199.215.169.61'), -('Joshua', 'Thomas', 'jthomas2a@ustream.tv', 'Male', '0.8.205.30'), -('Julie', 'Perry', 'jperry2b@opensource.org', 'Female', '60.116.114.192'), -('Richard', 'Perry', 'rperry2c@oracle.com', 'Male', '181.125.70.232'), -('Kenneth', 'Ruiz', 'kruiz2d@wikimedia.org', 'Male', '189.105.137.109'), -('Jose', 'Morgan', 'jmorgan2e@webnode.com', 'Male', '101.134.215.156'), -('Donald', 'Campbell', 'dcampbell2f@goo.ne.jp', 'Male', '102.120.215.84'), -('Debra', 'Collins', 'dcollins2g@uol.com.br', 'Female', '90.13.153.235'), -('Jesse', 'Johnson', 'jjohnson2h@stumbleupon.com', 'Male', '225.178.125.53'), -('Elizabeth', 'Stone', 'estone2i@histats.com', 'Female', '123.184.126.221'), -('Angela', 'Rogers', 'arogers2j@goodreads.com', 'Female', '98.104.132.187'), -('Emily', 'Dixon', 'edixon2k@mlb.com', 'Female', '39.190.75.57'), -('Albert', 'Scott', 'ascott2l@tinypic.com', 'Male', '40.209.13.189'), -('Barbara', 'Peterson', 'bpeterson2m@ow.ly', 'Female', '75.249.136.180'), -('Adam', 'Greene', 'agreene2n@fastcompany.com', 'Male', '184.173.109.144'), -('Earl', 'Sanders', 'esanders2o@hc360.com', 'Male', '247.34.90.117'), -('Angela', 'Brooks', 'abrooks2p@mtv.com', 'Female', '10.63.249.126'), -('Harold', 'Foster', 'hfoster2q@privacy.gov.au', 'Male', '139.214.40.244'), -('Carl', 'Meyer', 'cmeyer2r@disqus.com', 'Male', '204.117.7.88'); diff --git a/test/integration/001_simple_copy_test/test_simple_copy.py b/test/integration/001_simple_copy_test/test_simple_copy.py index 5f67d72eb9d..256e17a5ad3 100644 --- a/test/integration/001_simple_copy_test/test_simple_copy.py +++ b/test/integration/001_simple_copy_test/test_simple_copy.py @@ -15,12 +15,16 @@ def schema(self): def models(self): return "test/integration/001_simple_copy_test/models" + @property + def seeds(self): + return "test/integration/001_simple_copy_test/data" + @attr(type='postgres') def test__postgres__simple_copy(self): self.use_default_project() self.use_profile('postgres') - self.run_sql_file("test/integration/001_simple_copy_test/seed.sql") + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") @@ -39,8 +43,8 @@ def test__postgres__simple_copy(self): def test__postgres__dbt_doesnt_run_empty_models(self): self.use_default_project() self.use_profile('postgres') - self.run_sql_file("test/integration/001_simple_copy_test/seed.sql") + self.run_dbt(["seed"]) self.run_dbt() models = self.get_models_in_schema() @@ -52,8 +56,8 @@ def test__postgres__dbt_doesnt_run_empty_models(self): def test__snowflake__simple_copy(self): self.use_default_project() self.use_profile('snowflake') - self.run_sql_file("test/integration/001_simple_copy_test/seed.sql") + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") diff --git a/test/integration/base.py b/test/integration/base.py index 78aa2729d06..a0b758e8746 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -188,12 +188,14 @@ def setUp(self): def use_default_project(self): # create a dbt_project.yml + data_paths = getattr(self, 'seeds', None) base_project_config = { 'name': 'test', 'version': '1.0', 'test-paths': [], 'source-paths': [self.models], - 'profile': 'test' + 'profile': 'test', + 'data-paths': [data_paths] if data_paths else [], } project_config = {} From 07f283f448b2262d574a940a89d620ed7158ef9f Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Wed, 6 Dec 2017 13:04:10 -0500 Subject: [PATCH 07/34] Fixes to get Snowflake working --- dbt/adapters/postgres.py | 13 ++++++++++--- dbt/adapters/snowflake.py | 10 ++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 7cc7cf7f5eb..b1965728404 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -43,6 +43,10 @@ def exception_handler(cls, profile, sql, model_name=None, cls.release_connection(profile, connection_name) raise dbt.exceptions.RuntimeException(e) + @staticmethod + def escape_identifier(ident): + return '"{}"'.format(ident.replace('"', '""')) + @classmethod def type(cls): return 'postgres' @@ -189,9 +193,12 @@ def create_table(cls, profile, schema, table_name, agate_table): @classmethod def load_csv(cls, profile, schema, table_name, agate_table): - cols_sql = ", ".join(agate_table.column_names) + cols_sql = ", ".join(cls.escape_identifier(c) + for c in agate_table.column_names) placeholders = ", ".join("%s" for _ in agate_table.column_names) - sql = ('insert into "{}"."{}" ({}) values ({})' - .format(schema, table_name, cols_sql, placeholders)) + sql = ('insert into {}.{} ({}) values ({})' + .format(cls.escape_identifier(schema), + cls.escape_identifier(table_name), + cols_sql, placeholders)) for row in agate_table.rows: cls.add_query(profile, sql, bindings=row) diff --git a/dbt/adapters/snowflake.py b/dbt/adapters/snowflake.py index 801628e2dbc..3ae1363604d 100644 --- a/dbt/adapters/snowflake.py +++ b/dbt/adapters/snowflake.py @@ -177,7 +177,7 @@ def check_schema_exists(cls, profile, schema, model_name=None): @classmethod def add_query(cls, profile, sql, model_name=None, auto_begin=True, - select_schema=True): + select_schema=True, bindings=None): # snowflake only allows one query per api call. queries = sql.strip().split(";") cursor = None @@ -189,6 +189,11 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True, model_name, auto_begin) + if bindings: + # The snowflake connector is more strict than, eg., psycopg2 - + # which allows any iterable thing to be passed as a binding. + bindings = tuple(bindings) + for individual_query in queries: # hack -- after the last ';', remove comments and don't run # empty queries. this avoids using exceptions as flow control, @@ -201,7 +206,8 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True, continue connection, cursor = super(PostgresAdapter, cls).add_query( - profile, individual_query, model_name, auto_begin) + profile, individual_query, model_name, auto_begin, + bindings=bindings) return connection, cursor From 935d2a5aba3005ff09a711a4b00d398604819b7e Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Wed, 6 Dec 2017 14:20:51 -0500 Subject: [PATCH 08/34] New seed flag and make it non-destructive by default --- dbt/main.py | 17 ++++++++++++----- dbt/node_runners.py | 6 +++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/dbt/main.py b/dbt/main.py index 8f7c7c892c1..6e01d822e0b 100644 --- a/dbt/main.py +++ b/dbt/main.py @@ -232,7 +232,9 @@ def invoke_dbt(parsed): return None flags.NON_DESTRUCTIVE = getattr(proj.args, 'non_destructive', False) - flags.FULL_REFRESH = getattr(proj.args, 'full_refresh', False) + refresh_flags = ['full_refresh', 'drop_existing'] + if any(getattr(proj.args, attr, False) for attr in refresh_flags): + flags.FULL_REFRESH = True logger.debug("running dbt with arguments %s", parsed) @@ -375,13 +377,18 @@ def parse_args(args): fully-recalculate the incremental table from the model definition. """) - sub = subs.add_parser('seed', parents=[base_subparser]) - sub.add_argument( + seed_sub = subs.add_parser('seed', parents=[base_subparser]) + seed_sub.add_argument( '--drop-existing', action='store_true', - help="Drop existing seed tables and recreate them" + help='(DEPRECATED) Use --full-refresh instead.' + ) + seed_sub.add_argument( + '--full-refresh', + action='store_true', + help='Drop existing seed tables and recreate them' ) - sub.set_defaults(cls=seed_task.SeedTask, which='seed') + seed_sub.set_defaults(cls=seed_task.SeedTask, which='seed') sub = subs.add_parser('test', parents=[base_subparser]) sub.add_argument( diff --git a/dbt/node_runners.py b/dbt/node_runners.py index 3e9c18a2f38..415b594efce 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -484,11 +484,11 @@ def execute(self, compiled_node, existing_, flat_graph): raise Exception("table is already a view") # FIXME better exception table = compiled_node["agate_table"] if existing_type: - if dbt.flags.NON_DESTRUCTIVE: - adapter.truncate(self.profile, schema, table_name) - else: + if dbt.flags.FULL_REFRESH: adapter.drop_table(self.profile, schema, table_name, None) adapter.create_table(self.profile, schema, table_name, table) + else: + adapter.truncate(self.profile, schema, table_name) else: adapter.create_table(self.profile, schema, table_name, table) adapter.load_csv(self.profile, schema, table_name, table) From e49cab070fa32c92c9784ec682db664433c7530b Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Wed, 6 Dec 2017 14:21:49 -0500 Subject: [PATCH 09/34] Convert update SQL script to another seed --- .../{data => seed-initial}/seed.csv | 0 .../001_simple_copy_test/seed-update/seed.csv | 201 ++++++++++++++++++ .../001_simple_copy_test/test_simple_copy.py | 40 ++-- .../001_simple_copy_test/update.sql | 101 --------- test/integration/base.py | 5 +- 5 files changed, 223 insertions(+), 124 deletions(-) rename test/integration/001_simple_copy_test/{data => seed-initial}/seed.csv (100%) create mode 100644 test/integration/001_simple_copy_test/seed-update/seed.csv delete mode 100644 test/integration/001_simple_copy_test/update.sql diff --git a/test/integration/001_simple_copy_test/data/seed.csv b/test/integration/001_simple_copy_test/seed-initial/seed.csv similarity index 100% rename from test/integration/001_simple_copy_test/data/seed.csv rename to test/integration/001_simple_copy_test/seed-initial/seed.csv diff --git a/test/integration/001_simple_copy_test/seed-update/seed.csv b/test/integration/001_simple_copy_test/seed-update/seed.csv new file mode 100644 index 00000000000..5b93306a280 --- /dev/null +++ b/test/integration/001_simple_copy_test/seed-update/seed.csv @@ -0,0 +1,201 @@ +id,first_name,last_name,email,gender,ip_address +1,Jack,Hunter,jhunter0@pbs.org,Male,59.80.20.168 +2,Kathryn,Walker,kwalker1@ezinearticles.com,Female,194.121.179.35 +3,Gerald,Ryan,gryan2@com.com,Male,11.3.212.243 +4,Bonnie,Spencer,bspencer3@ameblo.jp,Female,216.32.196.175 +5,Harold,Taylor,htaylor4@people.com.cn,Male,253.10.246.136 +6,Jacqueline,Griffin,jgriffin5@t.co,Female,16.13.192.220 +7,Wanda,Arnold,warnold6@google.nl,Female,232.116.150.64 +8,Craig,Ortiz,cortiz7@sciencedaily.com,Male,199.126.106.13 +9,Gary,Day,gday8@nih.gov,Male,35.81.68.186 +10,Rose,Wright,rwright9@yahoo.co.jp,Female,236.82.178.100 +11,Raymond,Kelley,rkelleya@fc2.com,Male,213.65.166.67 +12,Gerald,Robinson,grobinsonb@disqus.com,Male,72.232.194.193 +13,Mildred,Martinez,mmartinezc@samsung.com,Female,198.29.112.5 +14,Dennis,Arnold,darnoldd@google.com,Male,86.96.3.250 +15,Judy,Gray,jgraye@opensource.org,Female,79.218.162.245 +16,Theresa,Garza,tgarzaf@epa.gov,Female,21.59.100.54 +17,Gerald,Robertson,grobertsong@csmonitor.com,Male,131.134.82.96 +18,Philip,Hernandez,phernandezh@adobe.com,Male,254.196.137.72 +19,Julia,Gonzalez,jgonzalezi@cam.ac.uk,Female,84.240.227.174 +20,Andrew,Davis,adavisj@patch.com,Male,9.255.67.25 +21,Kimberly,Harper,kharperk@foxnews.com,Female,198.208.120.253 +22,Mark,Martin,mmartinl@marketwatch.com,Male,233.138.182.153 +23,Cynthia,Ruiz,cruizm@google.fr,Female,18.178.187.201 +24,Samuel,Carroll,scarrolln@youtu.be,Male,128.113.96.122 +25,Jennifer,Larson,jlarsono@vinaora.com,Female,98.234.85.95 +26,Ashley,Perry,aperryp@rakuten.co.jp,Female,247.173.114.52 +27,Howard,Rodriguez,hrodriguezq@shutterfly.com,Male,231.188.95.26 +28,Amy,Brooks,abrooksr@theatlantic.com,Female,141.199.174.118 +29,Louise,Warren,lwarrens@adobe.com,Female,96.105.158.28 +30,Tina,Watson,twatsont@myspace.com,Female,251.142.118.177 +31,Janice,Kelley,jkelleyu@creativecommons.org,Female,239.167.34.233 +32,Terry,Mccoy,tmccoyv@bravesites.com,Male,117.201.183.203 +33,Jeffrey,Morgan,jmorganw@surveymonkey.com,Male,78.101.78.149 +34,Louis,Harvey,lharveyx@sina.com.cn,Male,51.50.0.167 +35,Philip,Miller,pmillery@samsung.com,Male,103.255.222.110 +36,Willie,Marshall,wmarshallz@ow.ly,Male,149.219.91.68 +37,Patrick,Lopez,plopez10@redcross.org,Male,250.136.229.89 +38,Adam,Jenkins,ajenkins11@harvard.edu,Male,7.36.112.81 +39,Benjamin,Cruz,bcruz12@linkedin.com,Male,32.38.98.15 +40,Ruby,Hawkins,rhawkins13@gmpg.org,Female,135.171.129.255 +41,Carlos,Barnes,cbarnes14@a8.net,Male,240.197.85.140 +42,Ruby,Griffin,rgriffin15@bravesites.com,Female,19.29.135.24 +43,Sean,Mason,smason16@icq.com,Male,159.219.155.249 +44,Anthony,Payne,apayne17@utexas.edu,Male,235.168.199.218 +45,Steve,Cruz,scruz18@pcworld.com,Male,238.201.81.198 +46,Anthony,Garcia,agarcia19@flavors.me,Male,25.85.10.18 +47,Doris,Lopez,dlopez1a@sphinn.com,Female,245.218.51.238 +48,Susan,Nichols,snichols1b@freewebs.com,Female,199.99.9.61 +49,Wanda,Ferguson,wferguson1c@yahoo.co.jp,Female,236.241.135.21 +50,Andrea,Pierce,apierce1d@google.co.uk,Female,132.40.10.209 +51,Lawrence,Phillips,lphillips1e@jugem.jp,Male,72.226.82.87 +52,Judy,Gilbert,jgilbert1f@multiply.com,Female,196.250.15.142 +53,Eric,Williams,ewilliams1g@joomla.org,Male,222.202.73.126 +54,Ralph,Romero,rromero1h@sogou.com,Male,123.184.125.212 +55,Jean,Wilson,jwilson1i@ocn.ne.jp,Female,176.106.32.194 +56,Lori,Reynolds,lreynolds1j@illinois.edu,Female,114.181.203.22 +57,Donald,Moreno,dmoreno1k@bbc.co.uk,Male,233.249.97.60 +58,Steven,Berry,sberry1l@eepurl.com,Male,186.193.50.50 +59,Theresa,Shaw,tshaw1m@people.com.cn,Female,120.37.71.222 +60,John,Stephens,jstephens1n@nationalgeographic.com,Male,191.87.127.115 +61,Richard,Jacobs,rjacobs1o@state.tx.us,Male,66.210.83.155 +62,Andrew,Lawson,alawson1p@over-blog.com,Male,54.98.36.94 +63,Peter,Morgan,pmorgan1q@rambler.ru,Male,14.77.29.106 +64,Nicole,Garrett,ngarrett1r@zimbio.com,Female,21.127.74.68 +65,Joshua,Kim,jkim1s@edublogs.org,Male,57.255.207.41 +66,Ralph,Roberts,rroberts1t@people.com.cn,Male,222.143.131.109 +67,George,Montgomery,gmontgomery1u@smugmug.com,Male,76.75.111.77 +68,Gerald,Alvarez,galvarez1v@flavors.me,Male,58.157.186.194 +69,Donald,Olson,dolson1w@whitehouse.gov,Male,69.65.74.135 +70,Carlos,Morgan,cmorgan1x@pbs.org,Male,96.20.140.87 +71,Aaron,Stanley,astanley1y@webnode.com,Male,163.119.217.44 +72,Virginia,Long,vlong1z@spiegel.de,Female,204.150.194.182 +73,Robert,Berry,rberry20@tripadvisor.com,Male,104.19.48.241 +74,Antonio,Brooks,abrooks21@unesco.org,Male,210.31.7.24 +75,Ruby,Garcia,rgarcia22@ovh.net,Female,233.218.162.214 +76,Jack,Hanson,jhanson23@blogtalkradio.com,Male,31.55.46.199 +77,Kathryn,Nelson,knelson24@walmart.com,Female,14.189.146.41 +78,Jason,Reed,jreed25@printfriendly.com,Male,141.189.89.255 +79,George,Coleman,gcoleman26@people.com.cn,Male,81.189.221.144 +80,Rose,King,rking27@ucoz.com,Female,212.123.168.231 +81,Johnny,Holmes,jholmes28@boston.com,Male,177.3.93.188 +82,Katherine,Gilbert,kgilbert29@altervista.org,Female,199.215.169.61 +83,Joshua,Thomas,jthomas2a@ustream.tv,Male,0.8.205.30 +84,Julie,Perry,jperry2b@opensource.org,Female,60.116.114.192 +85,Richard,Perry,rperry2c@oracle.com,Male,181.125.70.232 +86,Kenneth,Ruiz,kruiz2d@wikimedia.org,Male,189.105.137.109 +87,Jose,Morgan,jmorgan2e@webnode.com,Male,101.134.215.156 +88,Donald,Campbell,dcampbell2f@goo.ne.jp,Male,102.120.215.84 +89,Debra,Collins,dcollins2g@uol.com.br,Female,90.13.153.235 +90,Jesse,Johnson,jjohnson2h@stumbleupon.com,Male,225.178.125.53 +91,Elizabeth,Stone,estone2i@histats.com,Female,123.184.126.221 +92,Angela,Rogers,arogers2j@goodreads.com,Female,98.104.132.187 +93,Emily,Dixon,edixon2k@mlb.com,Female,39.190.75.57 +94,Albert,Scott,ascott2l@tinypic.com,Male,40.209.13.189 +95,Barbara,Peterson,bpeterson2m@ow.ly,Female,75.249.136.180 +96,Adam,Greene,agreene2n@fastcompany.com,Male,184.173.109.144 +97,Earl,Sanders,esanders2o@hc360.com,Male,247.34.90.117 +98,Angela,Brooks,abrooks2p@mtv.com,Female,10.63.249.126 +99,Harold,Foster,hfoster2q@privacy.gov.au,Male,139.214.40.244 +100,Carl,Meyer,cmeyer2r@disqus.com,Male,204.117.7.88 +101,Michael,Perez,mperez0@chronoengine.com,Male,106.239.70.175 +102,Shawn,Mccoy,smccoy1@reddit.com,Male,24.165.76.182 +103,Kathleen,Payne,kpayne2@cargocollective.com,Female,113.207.168.106 +104,Jimmy,Cooper,jcooper3@cargocollective.com,Male,198.24.63.114 +105,Katherine,Rice,krice4@typepad.com,Female,36.97.186.238 +106,Sarah,Ryan,sryan5@gnu.org,Female,119.117.152.40 +107,Martin,Mcdonald,mmcdonald6@opera.com,Male,8.76.38.115 +108,Frank,Robinson,frobinson7@wunderground.com,Male,186.14.64.194 +109,Jennifer,Franklin,jfranklin8@mail.ru,Female,91.216.3.131 +110,Henry,Welch,hwelch9@list-manage.com,Male,176.35.182.168 +111,Fred,Snyder,fsnydera@reddit.com,Male,217.106.196.54 +112,Amy,Dunn,adunnb@nba.com,Female,95.39.163.195 +113,Kathleen,Meyer,kmeyerc@cdc.gov,Female,164.142.188.214 +114,Steve,Ferguson,sfergusond@reverbnation.com,Male,138.22.204.251 +115,Teresa,Hill,thille@dion.ne.jp,Female,82.84.228.235 +116,Amanda,Harper,aharperf@mail.ru,Female,16.123.56.176 +117,Kimberly,Ray,krayg@xing.com,Female,48.66.48.12 +118,Johnny,Knight,jknighth@jalbum.net,Male,99.30.138.123 +119,Virginia,Freeman,vfreemani@tiny.cc,Female,225.172.182.63 +120,Anna,Austin,aaustinj@diigo.com,Female,62.111.227.148 +121,Willie,Hill,whillk@mail.ru,Male,0.86.232.249 +122,Sean,Harris,sharrisl@zdnet.com,Male,117.165.133.249 +123,Mildred,Adams,madamsm@usatoday.com,Female,163.44.97.46 +124,David,Graham,dgrahamn@zimbio.com,Male,78.13.246.202 +125,Victor,Hunter,vhuntero@ehow.com,Male,64.156.179.139 +126,Aaron,Ruiz,aruizp@weebly.com,Male,34.194.68.78 +127,Benjamin,Brooks,bbrooksq@jalbum.net,Male,20.192.189.107 +128,Lisa,Wilson,lwilsonr@japanpost.jp,Female,199.152.130.217 +129,Benjamin,King,bkings@comsenz.com,Male,29.189.189.213 +130,Christina,Williamson,cwilliamsont@boston.com,Female,194.101.52.60 +131,Jane,Gonzalez,jgonzalezu@networksolutions.com,Female,109.119.12.87 +132,Thomas,Owens,towensv@psu.edu,Male,84.168.213.153 +133,Katherine,Moore,kmoorew@naver.com,Female,183.150.65.24 +134,Jennifer,Stewart,jstewartx@yahoo.com,Female,38.41.244.58 +135,Sara,Tucker,stuckery@topsy.com,Female,181.130.59.184 +136,Harold,Ortiz,hortizz@vkontakte.ru,Male,198.231.63.137 +137,Shirley,James,sjames10@yelp.com,Female,83.27.160.104 +138,Dennis,Johnson,djohnson11@slate.com,Male,183.178.246.101 +139,Louise,Weaver,lweaver12@china.com.cn,Female,1.14.110.18 +140,Maria,Armstrong,marmstrong13@prweb.com,Female,181.142.1.249 +141,Gloria,Cruz,gcruz14@odnoklassniki.ru,Female,178.232.140.243 +142,Diana,Spencer,dspencer15@ifeng.com,Female,125.153.138.244 +143,Kelly,Nguyen,knguyen16@altervista.org,Female,170.13.201.119 +144,Jane,Rodriguez,jrodriguez17@biblegateway.com,Female,12.102.249.81 +145,Scott,Brown,sbrown18@geocities.jp,Male,108.174.99.192 +146,Norma,Cruz,ncruz19@si.edu,Female,201.112.156.197 +147,Marie,Peters,mpeters1a@mlb.com,Female,231.121.197.144 +148,Lillian,Carr,lcarr1b@typepad.com,Female,206.179.164.163 +149,Judy,Nichols,jnichols1c@t-online.de,Female,158.190.209.194 +150,Billy,Long,blong1d@yahoo.com,Male,175.20.23.160 +151,Howard,Reid,hreid1e@exblog.jp,Male,118.99.196.20 +152,Laura,Ferguson,lferguson1f@tuttocitta.it,Female,22.77.87.110 +153,Anne,Bailey,abailey1g@geocities.com,Female,58.144.159.245 +154,Rose,Morgan,rmorgan1h@ehow.com,Female,118.127.97.4 +155,Nicholas,Reyes,nreyes1i@google.ru,Male,50.135.10.252 +156,Joshua,Kennedy,jkennedy1j@house.gov,Male,154.6.163.209 +157,Paul,Watkins,pwatkins1k@upenn.edu,Male,177.236.120.87 +158,Kathryn,Kelly,kkelly1l@businessweek.com,Female,70.28.61.86 +159,Adam,Armstrong,aarmstrong1m@techcrunch.com,Male,133.235.24.202 +160,Norma,Wallace,nwallace1n@phoca.cz,Female,241.119.227.128 +161,Timothy,Reyes,treyes1o@google.cn,Male,86.28.23.26 +162,Elizabeth,Patterson,epatterson1p@sun.com,Female,139.97.159.149 +163,Edward,Gomez,egomez1q@google.fr,Male,158.103.108.255 +164,David,Cox,dcox1r@friendfeed.com,Male,206.80.80.58 +165,Brenda,Wood,bwood1s@over-blog.com,Female,217.207.44.179 +166,Adam,Walker,awalker1t@blogs.com,Male,253.211.54.93 +167,Michael,Hart,mhart1u@wix.com,Male,230.206.200.22 +168,Jesse,Ellis,jellis1v@google.co.uk,Male,213.254.162.52 +169,Janet,Powell,jpowell1w@un.org,Female,27.192.194.86 +170,Helen,Ford,hford1x@creativecommons.org,Female,52.160.102.168 +171,Gerald,Carpenter,gcarpenter1y@about.me,Male,36.30.194.218 +172,Kathryn,Oliver,koliver1z@army.mil,Female,202.63.103.69 +173,Alan,Berry,aberry20@gov.uk,Male,246.157.112.211 +174,Harry,Andrews,handrews21@ameblo.jp,Male,195.108.0.12 +175,Andrea,Hall,ahall22@hp.com,Female,149.162.163.28 +176,Barbara,Wells,bwells23@behance.net,Female,224.70.72.1 +177,Anne,Wells,awells24@apache.org,Female,180.168.81.153 +178,Harry,Harper,hharper25@rediff.com,Male,151.87.130.21 +179,Jack,Ray,jray26@wufoo.com,Male,220.109.38.178 +180,Phillip,Hamilton,phamilton27@joomla.org,Male,166.40.47.30 +181,Shirley,Hunter,shunter28@newsvine.com,Female,97.209.140.194 +182,Arthur,Daniels,adaniels29@reuters.com,Male,5.40.240.86 +183,Virginia,Rodriguez,vrodriguez2a@walmart.com,Female,96.80.164.184 +184,Christina,Ryan,cryan2b@hibu.com,Female,56.35.5.52 +185,Theresa,Mendoza,tmendoza2c@vinaora.com,Female,243.42.0.210 +186,Jason,Cole,jcole2d@ycombinator.com,Male,198.248.39.129 +187,Phillip,Bryant,pbryant2e@rediff.com,Male,140.39.116.251 +188,Adam,Torres,atorres2f@sun.com,Male,101.75.187.135 +189,Margaret,Johnston,mjohnston2g@ucsd.edu,Female,159.30.69.149 +190,Paul,Payne,ppayne2h@hhs.gov,Male,199.234.140.220 +191,Todd,Willis,twillis2i@businessweek.com,Male,191.59.136.214 +192,Willie,Oliver,woliver2j@noaa.gov,Male,44.212.35.197 +193,Frances,Robertson,frobertson2k@go.com,Female,31.117.65.136 +194,Gregory,Hawkins,ghawkins2l@joomla.org,Male,91.3.22.49 +195,Lisa,Perkins,lperkins2m@si.edu,Female,145.95.31.186 +196,Jacqueline,Anderson,janderson2n@cargocollective.com,Female,14.176.0.187 +197,Shirley,Diaz,sdiaz2o@ucla.edu,Female,207.12.95.46 +198,Nicole,Meyer,nmeyer2p@flickr.com,Female,231.79.115.13 +199,Mary,Gray,mgray2q@constantcontact.com,Female,210.116.64.253 +200,Jean,Mcdonald,jmcdonald2r@baidu.com,Female,122.239.235.117 diff --git a/test/integration/001_simple_copy_test/test_simple_copy.py b/test/integration/001_simple_copy_test/test_simple_copy.py index 256e17a5ad3..fcf720f7aac 100644 --- a/test/integration/001_simple_copy_test/test_simple_copy.py +++ b/test/integration/001_simple_copy_test/test_simple_copy.py @@ -11,18 +11,18 @@ def setUp(self): def schema(self): return "simple_copy_001" - @property - def models(self): - return "test/integration/001_simple_copy_test/models" + @staticmethod + def dir(path): + return "test/integration/001_simple_copy_test/" + path.lstrip("/") @property - def seeds(self): - return "test/integration/001_simple_copy_test/data" + def models(self): + return self.dir("models") - @attr(type='postgres') + @attr(type="postgres") def test__postgres__simple_copy(self): - self.use_default_project() - self.use_profile('postgres') + self.use_default_project({"data-paths": [self.dir("seed-initial")]}) + self.use_profile("postgres") self.run_dbt(["seed"]) self.run_dbt() @@ -31,31 +31,31 @@ def test__postgres__simple_copy(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - self.run_sql_file("test/integration/001_simple_copy_test/update.sql") - + self.use_default_project({"data-paths": [self.dir("seed-update")]}) + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @attr(type='postgres') + @attr(type="postgres") def test__postgres__dbt_doesnt_run_empty_models(self): - self.use_default_project() - self.use_profile('postgres') + self.use_default_project({"data-paths": [self.dir("seed-initial")]}) + self.use_profile("postgres") self.run_dbt(["seed"]) self.run_dbt() models = self.get_models_in_schema() - self.assertFalse('empty' in models.keys()) - self.assertFalse('disabled' in models.keys()) + self.assertFalse("empty" in models.keys()) + self.assertFalse("disabled" in models.keys()) - @attr(type='snowflake') + @attr(type="snowflake") def test__snowflake__simple_copy(self): - self.use_default_project() - self.use_profile('snowflake') + self.use_default_project({"data-paths": [self.dir("seed-initial")]}) + self.use_profile("snowflake") self.run_dbt(["seed"]) self.run_dbt() @@ -64,8 +64,8 @@ def test__snowflake__simple_copy(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - self.run_sql_file("test/integration/001_simple_copy_test/update.sql") - + self.use_default_project({"data-paths": [self.dir("seed-update")]}) + self.run_dbt(["seed"]) self.run_dbt() self.assertTablesEqual("seed","view") diff --git a/test/integration/001_simple_copy_test/update.sql b/test/integration/001_simple_copy_test/update.sql deleted file mode 100644 index e78d5d7d6df..00000000000 --- a/test/integration/001_simple_copy_test/update.sql +++ /dev/null @@ -1,101 +0,0 @@ -insert into "{schema}"."seed" (first_name, last_name, email, gender, ip_address) values -('Michael', 'Perez', 'mperez0@chronoengine.com', 'Male', '106.239.70.175'), -('Shawn', 'Mccoy', 'smccoy1@reddit.com', 'Male', '24.165.76.182'), -('Kathleen', 'Payne', 'kpayne2@cargocollective.com', 'Female', '113.207.168.106'), -('Jimmy', 'Cooper', 'jcooper3@cargocollective.com', 'Male', '198.24.63.114'), -('Katherine', 'Rice', 'krice4@typepad.com', 'Female', '36.97.186.238'), -('Sarah', 'Ryan', 'sryan5@gnu.org', 'Female', '119.117.152.40'), -('Martin', 'Mcdonald', 'mmcdonald6@opera.com', 'Male', '8.76.38.115'), -('Frank', 'Robinson', 'frobinson7@wunderground.com', 'Male', '186.14.64.194'), -('Jennifer', 'Franklin', 'jfranklin8@mail.ru', 'Female', '91.216.3.131'), -('Henry', 'Welch', 'hwelch9@list-manage.com', 'Male', '176.35.182.168'), -('Fred', 'Snyder', 'fsnydera@reddit.com', 'Male', '217.106.196.54'), -('Amy', 'Dunn', 'adunnb@nba.com', 'Female', '95.39.163.195'), -('Kathleen', 'Meyer', 'kmeyerc@cdc.gov', 'Female', '164.142.188.214'), -('Steve', 'Ferguson', 'sfergusond@reverbnation.com', 'Male', '138.22.204.251'), -('Teresa', 'Hill', 'thille@dion.ne.jp', 'Female', '82.84.228.235'), -('Amanda', 'Harper', 'aharperf@mail.ru', 'Female', '16.123.56.176'), -('Kimberly', 'Ray', 'krayg@xing.com', 'Female', '48.66.48.12'), -('Johnny', 'Knight', 'jknighth@jalbum.net', 'Male', '99.30.138.123'), -('Virginia', 'Freeman', 'vfreemani@tiny.cc', 'Female', '225.172.182.63'), -('Anna', 'Austin', 'aaustinj@diigo.com', 'Female', '62.111.227.148'), -('Willie', 'Hill', 'whillk@mail.ru', 'Male', '0.86.232.249'), -('Sean', 'Harris', 'sharrisl@zdnet.com', 'Male', '117.165.133.249'), -('Mildred', 'Adams', 'madamsm@usatoday.com', 'Female', '163.44.97.46'), -('David', 'Graham', 'dgrahamn@zimbio.com', 'Male', '78.13.246.202'), -('Victor', 'Hunter', 'vhuntero@ehow.com', 'Male', '64.156.179.139'), -('Aaron', 'Ruiz', 'aruizp@weebly.com', 'Male', '34.194.68.78'), -('Benjamin', 'Brooks', 'bbrooksq@jalbum.net', 'Male', '20.192.189.107'), -('Lisa', 'Wilson', 'lwilsonr@japanpost.jp', 'Female', '199.152.130.217'), -('Benjamin', 'King', 'bkings@comsenz.com', 'Male', '29.189.189.213'), -('Christina', 'Williamson', 'cwilliamsont@boston.com', 'Female', '194.101.52.60'), -('Jane', 'Gonzalez', 'jgonzalezu@networksolutions.com', 'Female', '109.119.12.87'), -('Thomas', 'Owens', 'towensv@psu.edu', 'Male', '84.168.213.153'), -('Katherine', 'Moore', 'kmoorew@naver.com', 'Female', '183.150.65.24'), -('Jennifer', 'Stewart', 'jstewartx@yahoo.com', 'Female', '38.41.244.58'), -('Sara', 'Tucker', 'stuckery@topsy.com', 'Female', '181.130.59.184'), -('Harold', 'Ortiz', 'hortizz@vkontakte.ru', 'Male', '198.231.63.137'), -('Shirley', 'James', 'sjames10@yelp.com', 'Female', '83.27.160.104'), -('Dennis', 'Johnson', 'djohnson11@slate.com', 'Male', '183.178.246.101'), -('Louise', 'Weaver', 'lweaver12@china.com.cn', 'Female', '1.14.110.18'), -('Maria', 'Armstrong', 'marmstrong13@prweb.com', 'Female', '181.142.1.249'), -('Gloria', 'Cruz', 'gcruz14@odnoklassniki.ru', 'Female', '178.232.140.243'), -('Diana', 'Spencer', 'dspencer15@ifeng.com', 'Female', '125.153.138.244'), -('Kelly', 'Nguyen', 'knguyen16@altervista.org', 'Female', '170.13.201.119'), -('Jane', 'Rodriguez', 'jrodriguez17@biblegateway.com', 'Female', '12.102.249.81'), -('Scott', 'Brown', 'sbrown18@geocities.jp', 'Male', '108.174.99.192'), -('Norma', 'Cruz', 'ncruz19@si.edu', 'Female', '201.112.156.197'), -('Marie', 'Peters', 'mpeters1a@mlb.com', 'Female', '231.121.197.144'), -('Lillian', 'Carr', 'lcarr1b@typepad.com', 'Female', '206.179.164.163'), -('Judy', 'Nichols', 'jnichols1c@t-online.de', 'Female', '158.190.209.194'), -('Billy', 'Long', 'blong1d@yahoo.com', 'Male', '175.20.23.160'), -('Howard', 'Reid', 'hreid1e@exblog.jp', 'Male', '118.99.196.20'), -('Laura', 'Ferguson', 'lferguson1f@tuttocitta.it', 'Female', '22.77.87.110'), -('Anne', 'Bailey', 'abailey1g@geocities.com', 'Female', '58.144.159.245'), -('Rose', 'Morgan', 'rmorgan1h@ehow.com', 'Female', '118.127.97.4'), -('Nicholas', 'Reyes', 'nreyes1i@google.ru', 'Male', '50.135.10.252'), -('Joshua', 'Kennedy', 'jkennedy1j@house.gov', 'Male', '154.6.163.209'), -('Paul', 'Watkins', 'pwatkins1k@upenn.edu', 'Male', '177.236.120.87'), -('Kathryn', 'Kelly', 'kkelly1l@businessweek.com', 'Female', '70.28.61.86'), -('Adam', 'Armstrong', 'aarmstrong1m@techcrunch.com', 'Male', '133.235.24.202'), -('Norma', 'Wallace', 'nwallace1n@phoca.cz', 'Female', '241.119.227.128'), -('Timothy', 'Reyes', 'treyes1o@google.cn', 'Male', '86.28.23.26'), -('Elizabeth', 'Patterson', 'epatterson1p@sun.com', 'Female', '139.97.159.149'), -('Edward', 'Gomez', 'egomez1q@google.fr', 'Male', '158.103.108.255'), -('David', 'Cox', 'dcox1r@friendfeed.com', 'Male', '206.80.80.58'), -('Brenda', 'Wood', 'bwood1s@over-blog.com', 'Female', '217.207.44.179'), -('Adam', 'Walker', 'awalker1t@blogs.com', 'Male', '253.211.54.93'), -('Michael', 'Hart', 'mhart1u@wix.com', 'Male', '230.206.200.22'), -('Jesse', 'Ellis', 'jellis1v@google.co.uk', 'Male', '213.254.162.52'), -('Janet', 'Powell', 'jpowell1w@un.org', 'Female', '27.192.194.86'), -('Helen', 'Ford', 'hford1x@creativecommons.org', 'Female', '52.160.102.168'), -('Gerald', 'Carpenter', 'gcarpenter1y@about.me', 'Male', '36.30.194.218'), -('Kathryn', 'Oliver', 'koliver1z@army.mil', 'Female', '202.63.103.69'), -('Alan', 'Berry', 'aberry20@gov.uk', 'Male', '246.157.112.211'), -('Harry', 'Andrews', 'handrews21@ameblo.jp', 'Male', '195.108.0.12'), -('Andrea', 'Hall', 'ahall22@hp.com', 'Female', '149.162.163.28'), -('Barbara', 'Wells', 'bwells23@behance.net', 'Female', '224.70.72.1'), -('Anne', 'Wells', 'awells24@apache.org', 'Female', '180.168.81.153'), -('Harry', 'Harper', 'hharper25@rediff.com', 'Male', '151.87.130.21'), -('Jack', 'Ray', 'jray26@wufoo.com', 'Male', '220.109.38.178'), -('Phillip', 'Hamilton', 'phamilton27@joomla.org', 'Male', '166.40.47.30'), -('Shirley', 'Hunter', 'shunter28@newsvine.com', 'Female', '97.209.140.194'), -('Arthur', 'Daniels', 'adaniels29@reuters.com', 'Male', '5.40.240.86'), -('Virginia', 'Rodriguez', 'vrodriguez2a@walmart.com', 'Female', '96.80.164.184'), -('Christina', 'Ryan', 'cryan2b@hibu.com', 'Female', '56.35.5.52'), -('Theresa', 'Mendoza', 'tmendoza2c@vinaora.com', 'Female', '243.42.0.210'), -('Jason', 'Cole', 'jcole2d@ycombinator.com', 'Male', '198.248.39.129'), -('Phillip', 'Bryant', 'pbryant2e@rediff.com', 'Male', '140.39.116.251'), -('Adam', 'Torres', 'atorres2f@sun.com', 'Male', '101.75.187.135'), -('Margaret', 'Johnston', 'mjohnston2g@ucsd.edu', 'Female', '159.30.69.149'), -('Paul', 'Payne', 'ppayne2h@hhs.gov', 'Male', '199.234.140.220'), -('Todd', 'Willis', 'twillis2i@businessweek.com', 'Male', '191.59.136.214'), -('Willie', 'Oliver', 'woliver2j@noaa.gov', 'Male', '44.212.35.197'), -('Frances', 'Robertson', 'frobertson2k@go.com', 'Female', '31.117.65.136'), -('Gregory', 'Hawkins', 'ghawkins2l@joomla.org', 'Male', '91.3.22.49'), -('Lisa', 'Perkins', 'lperkins2m@si.edu', 'Female', '145.95.31.186'), -('Jacqueline', 'Anderson', 'janderson2n@cargocollective.com', 'Female', '14.176.0.187'), -('Shirley', 'Diaz', 'sdiaz2o@ucla.edu', 'Female', '207.12.95.46'), -('Nicole', 'Meyer', 'nmeyer2p@flickr.com', 'Female', '231.79.115.13'), -('Mary', 'Gray', 'mgray2q@constantcontact.com', 'Female', '210.116.64.253'), -('Jean', 'Mcdonald', 'jmcdonald2r@baidu.com', 'Female', '122.239.235.117'); diff --git a/test/integration/base.py b/test/integration/base.py index a0b758e8746..76747760f80 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -186,21 +186,20 @@ def setUp(self): self.run_sql('DROP SCHEMA IF EXISTS "{}" CASCADE'.format(self.unique_schema())) self.run_sql('CREATE SCHEMA "{}"'.format(self.unique_schema())) - def use_default_project(self): + def use_default_project(self, overrides=None): # create a dbt_project.yml - data_paths = getattr(self, 'seeds', None) base_project_config = { 'name': 'test', 'version': '1.0', 'test-paths': [], 'source-paths': [self.models], 'profile': 'test', - 'data-paths': [data_paths] if data_paths else [], } project_config = {} project_config.update(base_project_config) project_config.update(self.project_config) + project_config.update(overrides or {}) with open("dbt_project.yml", 'w') as f: yaml.safe_dump(project_config, f, default_flow_style=True) From 434e7b78c8562e940fd16b170a77c8c7864f69b9 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Wed, 6 Dec 2017 14:52:23 -0500 Subject: [PATCH 10/34] cleanup --- dbt/adapters/bigquery.py | 3 ++- dbt/adapters/postgres.py | 28 +++++++++++++++++++++++----- dbt/node_runners.py | 19 ++----------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 41416bebf09..6e59d7badd2 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -366,7 +366,8 @@ def warning_on_hooks(cls, hook_type): dbt.ui.printer.COLOR_FG_YELLOW) @classmethod - def add_query(cls, profile, sql, model_name=None, auto_begin=True): + def add_query(cls, profile, sql, model_name=None, auto_begin=True, + bindings=None): if model_name in ['on-run-start', 'on-run-end']: cls.warning_on_hooks(model_name) else: diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index b1965728404..c4d461475e3 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -44,7 +44,7 @@ def exception_handler(cls, profile, sql, model_name=None, raise dbt.exceptions.RuntimeException(e) @staticmethod - def escape_identifier(ident): + def quote(ident): return '"{}"'.format(ident.replace('"', '""')) @classmethod @@ -193,12 +193,30 @@ def create_table(cls, profile, schema, table_name, agate_table): @classmethod def load_csv(cls, profile, schema, table_name, agate_table): - cols_sql = ", ".join(cls.escape_identifier(c) - for c in agate_table.column_names) + cols_sql = ", ".join(cls.quote(c) for c in agate_table.column_names) placeholders = ", ".join("%s" for _ in agate_table.column_names) sql = ('insert into {}.{} ({}) values ({})' - .format(cls.escape_identifier(schema), - cls.escape_identifier(table_name), + .format(cls.quote(schema), + cls.quote(table_name), cols_sql, placeholders)) for row in agate_table.rows: cls.add_query(profile, sql, bindings=row) + + @classmethod + def create_seed_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + existing = cls.query_for_existing(profile, schema) + existing_type = existing.get(table_name) + if existing_type and existing_type != "table": + raise dbt.exceptions.RuntimeException( + "Cannot seed to '{}', it is a view".format(table_name)) + if existing_type: + if full_refresh: + cls.drop_table(profile, schema, table_name, None) + cls.create_table(profile, schema, table_name, agate_table) + else: + cls.truncate(profile, schema, table_name) + else: + cls.create_table(profile, schema, table_name, agate_table) + cls.load_csv(profile, schema, table_name, agate_table) + cls.commit_if_has_connection(profile, None) diff --git a/dbt/node_runners.py b/dbt/node_runners.py index 415b594efce..ef3796d67c8 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -473,26 +473,11 @@ def before_execute(self): self.num_nodes) def execute(self, compiled_node, existing_, flat_graph): - # In testing, existing_ was not correctly set to the existing things schema = compiled_node["schema"] - adapter = self.adapter # type: dbt.adapters.default.DefaultAdapter - existing = adapter.query_for_existing(self.profile, schema) - existing_tables = [k for k, v in existing.items() if v == "table"] table_name = compiled_node["table_name"] - existing_type = existing.get(table_name) - if existing_type and existing_type != "table": - raise Exception("table is already a view") # FIXME better exception table = compiled_node["agate_table"] - if existing_type: - if dbt.flags.FULL_REFRESH: - adapter.drop_table(self.profile, schema, table_name, None) - adapter.create_table(self.profile, schema, table_name, table) - else: - adapter.truncate(self.profile, schema, table_name) - else: - adapter.create_table(self.profile, schema, table_name, table) - adapter.load_csv(self.profile, schema, table_name, table) - adapter.commit_if_has_connection(self.profile, None) + self.adapter.create_seed_table(self.profile, schema, table_name, table, + full_refresh=dbt.flags.FULL_REFRESH) return RunModelResult(compiled_node) def compile(self, flat_graph): From 23a01ce8135b7417753f457e6eb815b65cc3fc25 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Wed, 6 Dec 2017 16:43:12 -0500 Subject: [PATCH 11/34] implement bigquery csv load --- dbt/adapters/bigquery.py | 43 +++++++++++++++++++++++++++++ dbt/adapters/default.py | 18 ++++++++++++- dbt/adapters/postgres.py | 58 +++++++++++++++------------------------- dbt/node_runners.py | 4 +-- 4 files changed, 84 insertions(+), 39 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 6e59d7badd2..1a8a3f8e256 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -15,9 +15,11 @@ import google.oauth2 import google.cloud.exceptions import google.cloud.bigquery +import agate import time import uuid +import six class BigQueryAdapter(PostgresAdapter): @@ -390,3 +392,44 @@ def quote_schema_and_table(cls, profile, schema, table, model_name=None): return '{}.{}.{}'.format(cls.quote(project), cls.quote(schema), cls.quote(table)) + + agate_type_conversions = [ + (agate.Text, "STRING"), + (agate.Number, "FLOAT64"), + (agate.Boolean, "BOOL"), + (agate.DateTime, "DATETIME"), + (agate.Date, "DATE"), + ] + agate_default_type = "STRING" + + @classmethod + def _agate_to_schema(cls, agate_table): + bq_schema = [] + for idx, col_name in enumerate(agate_table.column_names): + col_type = agate_table.column_types[idx] + type_ = cls.convert_agate_type(col_type) + bq_schema.append(google.cloud.bigquery.SchemaField(col_name, type_)) + return bq_schema + + @classmethod + def create_csv_table(cls, profile, schema, table_name, agate_table): + pass + + @classmethod + def reset_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + cls.drop(profile, schema, table_name, "table") + + @classmethod + def load_csv_rows(cls, profile, schema, table_name, agate_table): + bq_schema = cls._agate_to_schema(agate_table) + dataset = cls.get_dataset(profile, schema, None) + table = dataset.table(table_name, schema=bq_schema) + conn = cls.get_connection(profile, None) + client = conn.get('handle') + in_mem_buffer = six.StringIO() + agate_table.to_csv(in_mem_buffer) + job = table.upload_from_file(in_mem_buffer, "CSV", rewind=True, + client=client, skip_leading_rows=1) + with cls.exception_handler(profile, "LOAD TABLE"): + cls.poll_until_job_completes(job, cls.get_timeout(conn)) diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index 0b99dd9429b..cddf581f9a7 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -615,9 +615,25 @@ def already_exists(cls, profile, schema, table, model_name=None): @classmethod def quote(cls, identifier): - return '"{}"'.format(identifier) + return '"{}"'.format(identifier.replace('"', '""')) @classmethod def quote_schema_and_table(cls, profile, schema, table, model_name=None): return '{}.{}'.format(cls.quote(schema), cls.quote(table)) + + @classmethod + def handle_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + existing = cls.query_for_existing(profile, schema) + existing_type = existing.get(table_name) + if existing_type and existing_type != "table": + raise dbt.exceptions.RuntimeException( + "Cannot seed to '{}', it is a view".format(table_name)) + if existing_type: + cls.reset_csv_table(profile, schema, table_name, agate_table, + full_refresh=full_refresh) + else: + cls.create_csv_table(profile, schema, table_name, agate_table) + cls.load_csv_rows(profile, schema, table_name, agate_table) + cls.commit_if_has_connection(profile, None) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index c4d461475e3..34363c9ff4a 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -12,15 +12,6 @@ class PostgresAdapter(dbt.adapters.default.DefaultAdapter): - agate_type_conversions = [ - (agate.Text, "text"), - (agate.Number, "numeric"), - (agate.Boolean, "boolean"), - (agate.DateTime, "timestamp without time zone"), - (agate.Date, "date"), - ] - agate_default_type = "text" - @classmethod @contextmanager def exception_handler(cls, profile, sql, model_name=None, @@ -43,10 +34,6 @@ def exception_handler(cls, profile, sql, model_name=None, cls.release_connection(profile, connection_name) raise dbt.exceptions.RuntimeException(e) - @staticmethod - def quote(ident): - return '"{}"'.format(ident.replace('"', '""')) - @classmethod def type(cls): return 'postgres' @@ -180,19 +167,37 @@ def cancel_connection(cls, profile, connection): logger.debug("Cancel query '{}': {}".format(connection_name, res)) + agate_type_conversions = [ + (agate.Text, "text"), + (agate.Number, "numeric"), + (agate.Boolean, "boolean"), + (agate.DateTime, "timestamp without time zone"), + (agate.Date, "date"), + ] + agate_default_type = "text" + @classmethod - def create_table(cls, profile, schema, table_name, agate_table): + def create_csv_table(cls, profile, schema, table_name, agate_table): col_sqls = [] for idx, col_name in enumerate(agate_table.column_names): col_type = agate_table.column_types[idx] - converted_type = cls.convert_agate_type(col_type) - col_sqls.append('"{}" {}'.format(col_name, converted_type)) + type_ = cls.convert_agate_type(col_type) + col_sqls.append('"{}" {}'.format(col_name, type_)) sql = 'create table "{}"."{}" ({})'.format(schema, table_name, ", ".join(col_sqls)) return cls.add_query(profile, sql) @classmethod - def load_csv(cls, profile, schema, table_name, agate_table): + def reset_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): + if full_refresh: + cls.drop_table(profile, schema, table_name, None) + cls.create_csv_table(profile, schema, table_name, agate_table) + else: + cls.truncate(profile, schema, table_name) + + @classmethod + def load_csv_rows(cls, profile, schema, table_name, agate_table): cols_sql = ", ".join(cls.quote(c) for c in agate_table.column_names) placeholders = ", ".join("%s" for _ in agate_table.column_names) sql = ('insert into {}.{} ({}) values ({})' @@ -201,22 +206,3 @@ def load_csv(cls, profile, schema, table_name, agate_table): cols_sql, placeholders)) for row in agate_table.rows: cls.add_query(profile, sql, bindings=row) - - @classmethod - def create_seed_table(cls, profile, schema, table_name, agate_table, - full_refresh=False): - existing = cls.query_for_existing(profile, schema) - existing_type = existing.get(table_name) - if existing_type and existing_type != "table": - raise dbt.exceptions.RuntimeException( - "Cannot seed to '{}', it is a view".format(table_name)) - if existing_type: - if full_refresh: - cls.drop_table(profile, schema, table_name, None) - cls.create_table(profile, schema, table_name, agate_table) - else: - cls.truncate(profile, schema, table_name) - else: - cls.create_table(profile, schema, table_name, agate_table) - cls.load_csv(profile, schema, table_name, agate_table) - cls.commit_if_has_connection(profile, None) diff --git a/dbt/node_runners.py b/dbt/node_runners.py index ef3796d67c8..6e852e2687e 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -476,8 +476,8 @@ def execute(self, compiled_node, existing_, flat_graph): schema = compiled_node["schema"] table_name = compiled_node["table_name"] table = compiled_node["agate_table"] - self.adapter.create_seed_table(self.profile, schema, table_name, table, - full_refresh=dbt.flags.FULL_REFRESH) + self.adapter.handle_csv_table(self.profile, schema, table_name, table, + full_refresh=dbt.flags.FULL_REFRESH) return RunModelResult(compiled_node) def compile(self, flat_graph): From 584578f812d0a7e36abbead32179cc4e4bab84bb Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Wed, 6 Dec 2017 22:00:32 -0500 Subject: [PATCH 12/34] context handling of StringIO --- dbt/adapters/bigquery.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 1a8a3f8e256..f7a9ac46925 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -427,9 +427,9 @@ def load_csv_rows(cls, profile, schema, table_name, agate_table): table = dataset.table(table_name, schema=bq_schema) conn = cls.get_connection(profile, None) client = conn.get('handle') - in_mem_buffer = six.StringIO() - agate_table.to_csv(in_mem_buffer) - job = table.upload_from_file(in_mem_buffer, "CSV", rewind=True, - client=client, skip_leading_rows=1) + with six.StringIO() as in_mem_buffer: + agate_table.to_csv(in_mem_buffer) + job = table.upload_from_file(in_mem_buffer, "CSV", rewind=True, + client=client, skip_leading_rows=1) with cls.exception_handler(profile, "LOAD TABLE"): cls.poll_until_job_completes(job, cls.get_timeout(conn)) From 936e68ed9d276789c1aa34c2fa3d0b95341a4516 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Fri, 8 Dec 2017 17:08:40 -0500 Subject: [PATCH 13/34] Better typing --- dbt/adapters/bigquery.py | 24 ++++++++++++++---------- dbt/adapters/default.py | 22 +++++++++++++++------- dbt/adapters/postgres.py | 38 ++++++++++++++++++++++++++++---------- dbt/adapters/redshift.py | 7 +++++++ dbt/utils.py | 13 +++++++++++++ 5 files changed, 77 insertions(+), 27 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index f7a9ac46925..f701846a0be 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -393,21 +393,25 @@ def quote_schema_and_table(cls, profile, schema, table, model_name=None): cls.quote(schema), cls.quote(table)) - agate_type_conversions = [ - (agate.Text, "STRING"), - (agate.Number, "FLOAT64"), - (agate.Boolean, "BOOL"), - (agate.DateTime, "DATETIME"), - (agate.Date, "DATE"), - ] - agate_default_type = "STRING" + @classmethod + def convert_agate_type(cls, agate_table, col_idx): + agate_type = agate_table.column_types[col_idx] + conversions = [ + (agate.Text, "STRING"), + (agate.Number, "FLOAT64"), + (agate.Boolean, "BOOL"), + (agate.DateTime, "DATETIME"), + (agate.Date, "DATE"), + ] + for agate_cls, sql in conversions: + if isinstance(agate_type, agate_cls): + return sql @classmethod def _agate_to_schema(cls, agate_table): bq_schema = [] for idx, col_name in enumerate(agate_table.column_names): - col_type = agate_table.column_types[idx] - type_ = cls.convert_agate_type(col_type) + type_ = cls.convert_agate_type(agate_table, idx) bq_schema.append(google.cloud.bigquery.SchemaField(col_name, type_)) return bq_schema diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index cddf581f9a7..af7385ca35e 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -2,6 +2,7 @@ import itertools import multiprocessing import time +import agate from contextlib import contextmanager @@ -104,13 +105,6 @@ def load_csv(cls, profile, schema, table_name, agate_table): raise dbt.exceptions.NotImplementedException( '`load_csv` is not implemented for this adapter!') - @classmethod - def convert_agate_type(cls, agate_type): - for cls, sql in cls.agate_type_conversions: - if isinstance(agate_type, cls): - return sql - return cls.agate_default_type - ### # FUNCTIONS THAT SHOULD BE ABSTRACT ### @@ -637,3 +631,17 @@ def handle_csv_table(cls, profile, schema, table_name, agate_table, cls.create_csv_table(profile, schema, table_name, agate_table) cls.load_csv_rows(profile, schema, table_name, agate_table) cls.commit_if_has_connection(profile, None) + + @classmethod + def convert_agate_type(cls, agate_table, col_idx): + agate_type = agate_table.column_types[col_idx] + conversions = [ + (agate.Text, cls.convert_text_type), + (agate.Number, cls.convert_number_type), + (agate.Boolean, cls.convert_boolean_type), + (agate.DateTime, cls.convert_datetime_type), + (agate.Date, cls.convert_date_type), + ] + for agate_cls, func in conversions: + if isinstance(agate_type, agate_cls): + return func(agate_table, col_idx) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 34363c9ff4a..ab1574de3a0 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -6,6 +6,7 @@ import dbt.adapters.default import dbt.compat import dbt.exceptions +from dbt.utils import max_digits from dbt.logger import GLOBAL_LOGGER as logger @@ -167,21 +168,38 @@ def cancel_connection(cls, profile, connection): logger.debug("Cancel query '{}': {}".format(connection_name, res)) - agate_type_conversions = [ - (agate.Text, "text"), - (agate.Number, "numeric"), - (agate.Boolean, "boolean"), - (agate.DateTime, "timestamp without time zone"), - (agate.Date, "date"), - ] - agate_default_type = "text" + @classmethod + def convert_text_type(cls, agate_table, col_idx): + return "text" + + @classmethod + def convert_number_type(cls, agate_table, col_idx): + column = agate_table.columns[col_idx] + precision = max_digits(column.values_without_nulls()) + # agate uses the term Precision but in this context, it is really the + # scale - ie. the number of decimal places + scale = agate_table.aggregate(agate.MaxPrecision(col_idx)) + if not scale: + return "integer" + return "numeric({}, {})".format(precision, scale) + + @classmethod + def convert_boolean_type(cls, agate_table, col_idx): + return "boolean" + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + return "timestamp without time zone" + + @classmethod + def convert_date_type(cls, agate_table, col_idx): + return "date" @classmethod def create_csv_table(cls, profile, schema, table_name, agate_table): col_sqls = [] for idx, col_name in enumerate(agate_table.column_names): - col_type = agate_table.column_types[idx] - type_ = cls.convert_agate_type(col_type) + type_ = cls.convert_agate_type(agate_table, idx) col_sqls.append('"{}" {}'.format(col_name, type_)) sql = 'create table "{}"."{}" ({})'.format(schema, table_name, ", ".join(col_sqls)) diff --git a/dbt/adapters/redshift.py b/dbt/adapters/redshift.py index e4e0db9d543..ace27f911e9 100644 --- a/dbt/adapters/redshift.py +++ b/dbt/adapters/redshift.py @@ -43,3 +43,10 @@ def drop(cls, profile, schema, relation, relation_type, model_name=None): finally: drop_lock.release() + + @classmethod + def convert_text_type(cls, agate_table, col_idx): + column = agate_table.columns[col_idx] + lens = (len(d.encode("utf-8")) for d in column.values_without_nulls()) + max_len = max(lens) if lens else 64 + return "varchar({})".format(max_len) diff --git a/dbt/utils.py b/dbt/utils.py index 16fb1e585a8..0e84f46716e 100644 --- a/dbt/utils.py +++ b/dbt/utils.py @@ -359,3 +359,16 @@ def get_hashed_contents(model): def flatten_nodes(dep_list): return list(itertools.chain.from_iterable(dep_list)) + + +def max_digits(values): + """Given a series of decimal.Decimal values, find the maximum + number of digits (on both sides of the decimal point) used by the + values.""" + max_ = 0 + for value in values: + if value is None: + continue + sign, digits, exponent = value.normalize().as_tuple() + max_ = max(len(digits), max_) + return max_ From 224174a3fd7655c47e63d7da315bf7aeb8909458 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Fri, 8 Dec 2017 17:15:22 -0500 Subject: [PATCH 14/34] strip seeder and csvkit dependency --- dbt/seeder.py | 139 ----------------------------------------------- dbt/task/seed.py | 1 - requirements.txt | 1 - setup.py | 1 - 4 files changed, 142 deletions(-) delete mode 100644 dbt/seeder.py diff --git a/dbt/seeder.py b/dbt/seeder.py deleted file mode 100644 index d30e5f131cc..00000000000 --- a/dbt/seeder.py +++ /dev/null @@ -1,139 +0,0 @@ -import os -import fnmatch -from csvkit import table as csv_table, sql as csv_sql -from sqlalchemy.dialects import postgresql as postgresql_dialect -import psycopg2 - -from dbt.source import Source -from dbt.logger import GLOBAL_LOGGER as logger -from dbt.adapters.factory import get_adapter -import dbt.exceptions - - -class Seeder: - def __init__(self, project): - self.project = project - run_environment = self.project.run_environment() - - def find_csvs(self): - return Source(self.project).get_csvs(self.project['data-paths']) - - def drop_table(self, cursor, schema, table): - sql = 'drop table if exists "{schema}"."{table}" cascade'.format( - schema=schema, table=table - ) - logger.info("Dropping table {}.{}".format(schema, table)) - cursor.execute(sql) - - def truncate_table(self, cursor, schema, table): - sql = 'truncate table "{schema}"."{table}"'.format( - schema=schema, table=table - ) - logger.info("Truncating table {}.{}".format(schema, table)) - cursor.execute(sql) - - def create_table(self, cursor, schema, table, virtual_table): - sql_table = csv_sql.make_table(virtual_table, db_schema=schema) - create_table_sql = csv_sql.make_create_table_statement( - sql_table, dialect='postgresql' - ) - logger.info("Creating table {}.{}".format(schema, table)) - cursor.execute(create_table_sql) - - def insert_into_table(self, cursor, schema, table, virtual_table): - headers = virtual_table.headers() - - header_csv = ", ".join(['"{}"'.format(h) for h in headers]) - base_insert = ('INSERT INTO "{schema}"."{table}" ({header_csv}) ' - 'VALUES '.format( - schema=schema, - table=table, - header_csv=header_csv - )) - records = [] - - def quote_or_null(s): - if s is None: - return 'null' - else: - return "'{}'".format(s) - - for row in virtual_table.to_rows(): - record_csv = ', '.join([quote_or_null(val) for val in row]) - record_csv_wrapped = "({})".format(record_csv) - records.append(record_csv_wrapped) - insert_sql = "{} {}".format(base_insert, ",\n".join(records)) - logger.info("Inserting {} records into table {}.{}" - .format(len(virtual_table.to_rows()), schema, table)) - cursor.execute(insert_sql) - - def existing_tables(self, cursor, schema): - sql = ("select tablename as name from pg_tables where " - "schemaname = '{schema}'".format(schema=schema)) - - cursor.execute(sql) - existing = set([row[0] for row in cursor.fetchall()]) - return existing - - def do_seed(self, schema, cursor, drop_existing): - existing_tables = self.existing_tables(cursor, schema) - - csvs = self.find_csvs() - statuses = [] - for csv in csvs: - - table_name = csv.name - fh = open(csv.filepath) - virtual_table = csv_table.Table.from_csv(fh, table_name) - - if table_name in existing_tables: - if drop_existing: - self.drop_table(cursor, schema, table_name) - self.create_table( - cursor, - schema, - table_name, - virtual_table - ) - else: - self.truncate_table(cursor, schema, table_name) - else: - self.create_table(cursor, schema, table_name, virtual_table) - - try: - self.insert_into_table( - cursor, schema, table_name, virtual_table - ) - statuses.append(True) - - except psycopg2.ProgrammingError as e: - statuses.append(False) - logger.info( - 'Encountered an error while inserting into table "{}"."{}"' - .format(schema, table_name) - ) - logger.info( - 'Check for formatting errors in {}'.format(csv.filepath) - ) - logger.info( - 'Try --drop-existing to delete and recreate the table ' - 'instead' - ) - logger.info(str(e)) - return all(statuses) - - def seed(self, drop_existing=False): - profile = self.project.run_environment() - - if profile.get('type') == 'snowflake': - raise dbt.exceptions.NotImplementedException( - "`seed` operation is not supported for snowflake.") - - adapter = get_adapter(profile) - connection = adapter.get_connection(profile) - - schema = connection.get('credentials', {}).get('schema') - - with connection.get('handle') as handle: - with handle.cursor() as cursor: - return self.do_seed(schema, cursor, drop_existing) diff --git a/dbt/task/seed.py b/dbt/task/seed.py index 4e7735c7106..d4c14818aec 100644 --- a/dbt/task/seed.py +++ b/dbt/task/seed.py @@ -2,7 +2,6 @@ from dbt.node_runners import SeedRunner from dbt.node_types import NodeType from dbt.runner import RunManager -from dbt.seeder import Seeder from dbt.task.base_task import RunnableTask import dbt.ui.printer diff --git a/requirements.txt b/requirements.txt index 98ec6af75e6..0d0e0a8b1a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ PyYAML>=3.11 psycopg2==2.7.1 sqlparse==0.2.3 networkx==1.11 -csvkit==0.9.1 snowplow-tracker==0.7.2 celery==3.1.23 voluptuous==0.10.5 diff --git a/setup.py b/setup.py index 00d8a365e29..71a66417360 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,6 @@ 'psycopg2==2.7.1', 'sqlparse==0.2.3', 'networkx==1.11', - 'csvkit==0.9.1', 'snowplow-tracker==0.7.2', 'celery==3.1.23', 'voluptuous==0.10.5', From ce25e782ce61ae67491df6ab5f288d955e3474b5 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Fri, 8 Dec 2017 18:11:33 -0500 Subject: [PATCH 15/34] update bigquery to use new data typing and to fix unicode issue --- dbt/adapters/bigquery.py | 49 ++++++++++++++++++++++------------------ dbt/parser.py | 1 + 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index f701846a0be..9773b7db72e 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -19,7 +19,6 @@ import time import uuid -import six class BigQueryAdapter(PostgresAdapter): @@ -394,26 +393,25 @@ def quote_schema_and_table(cls, profile, schema, table, model_name=None): cls.quote(table)) @classmethod - def convert_agate_type(cls, agate_table, col_idx): - agate_type = agate_table.column_types[col_idx] - conversions = [ - (agate.Text, "STRING"), - (agate.Number, "FLOAT64"), - (agate.Boolean, "BOOL"), - (agate.DateTime, "DATETIME"), - (agate.Date, "DATE"), - ] - for agate_cls, sql in conversions: - if isinstance(agate_type, agate_cls): - return sql + def convert_text_type(cls, agate_table, col_idx): + return "STRING" @classmethod - def _agate_to_schema(cls, agate_table): - bq_schema = [] - for idx, col_name in enumerate(agate_table.column_names): - type_ = cls.convert_agate_type(agate_table, idx) - bq_schema.append(google.cloud.bigquery.SchemaField(col_name, type_)) - return bq_schema + def convert_number_type(cls, agate_table, col_idx): + decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) + return "FLOAT64" if decimals else "INT64" + + @classmethod + def convert_boolean_type(cls, agate_table, col_idx): + return "BOOL" + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + return "DATETIME" + + @classmethod + def convert_date_type(cls, agate_table, col_idx): + return "DATE" @classmethod def create_csv_table(cls, profile, schema, table_name, agate_table): @@ -424,6 +422,14 @@ def reset_csv_table(cls, profile, schema, table_name, agate_table, full_refresh=False): cls.drop(profile, schema, table_name, "table") + @classmethod + def _agate_to_schema(cls, agate_table): + bq_schema = [] + for idx, col_name in enumerate(agate_table.column_names): + type_ = cls.convert_agate_type(agate_table, idx) + bq_schema.append(google.cloud.bigquery.SchemaField(col_name, type_)) + return bq_schema + @classmethod def load_csv_rows(cls, profile, schema, table_name, agate_table): bq_schema = cls._agate_to_schema(agate_table) @@ -431,9 +437,8 @@ def load_csv_rows(cls, profile, schema, table_name, agate_table): table = dataset.table(table_name, schema=bq_schema) conn = cls.get_connection(profile, None) client = conn.get('handle') - with six.StringIO() as in_mem_buffer: - agate_table.to_csv(in_mem_buffer) - job = table.upload_from_file(in_mem_buffer, "CSV", rewind=True, + with open(agate_table.original_abspath, "rb") as f: + job = table.upload_from_file(f, "CSV", rewind=True, client=client, skip_leading_rows=1) with cls.exception_handler(profile, "LOAD TABLE"): cls.poll_until_job_completes(job, cls.get_timeout(conn)) diff --git a/dbt/parser.py b/dbt/parser.py index b2ca04d10f7..60cd08c1b18 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -724,6 +724,7 @@ def parse_seed_file(file_match, package_name): table = agate.Table.from_csv(abspath) except ValueError as e: dbt.exceptions.raise_compiler_error(str(e), node) + table.original_abspath = abspath node['agate_table'] = table return node From 5b97a8663ba43393f00139d5ca97038a0a97280f Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 11 Dec 2017 10:45:38 -0500 Subject: [PATCH 16/34] update seed test --- test/integration/005_simple_seed_test/seed.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/005_simple_seed_test/seed.sql b/test/integration/005_simple_seed_test/seed.sql index 55c45bf6653..d4eea3ae495 100644 --- a/test/integration/005_simple_seed_test/seed.sql +++ b/test/integration/005_simple_seed_test/seed.sql @@ -1,5 +1,5 @@ create table {schema}.seed_expected ( - id NUMERIC, + id INTEGER, first_name TEXT, email TEXT, ip_address TEXT, From e9b57953b7fe6121ab9c55fb132e7c3876fcfcd6 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 11 Dec 2017 10:51:43 -0500 Subject: [PATCH 17/34] fix abstract functions in base adapter --- dbt/adapters/default.py | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index af7385ca35e..e261eb25791 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -96,14 +96,20 @@ def cancel_connection(cls, project, connection): '`cancel_connection` is not implemented for this adapter!') @classmethod - def create_table(cls, profile, schema, table_name, agate_table): + def create_csv_table(cls, profile, schema, table_name, agate_table): raise dbt.exceptions.NotImplementedException( - '`create_table` is not implemented for this adapter!') + '`create_csv_table` is not implemented for this adapter!') @classmethod - def load_csv(cls, profile, schema, table_name, agate_table): + def reset_csv_table(cls, profile, schema, table_name, agate_table, + full_refresh=False): raise dbt.exceptions.NotImplementedException( - '`load_csv` is not implemented for this adapter!') + '`reset_csv_table` is not implemented for this adapter!') + + @classmethod + def load_csv_rows(cls, profile, schema, table_name, agate_table): + raise dbt.exceptions.NotImplementedException( + '`load_csv_rows` is not implemented for this adapter!') ### # FUNCTIONS THAT SHOULD BE ABSTRACT @@ -632,6 +638,31 @@ def handle_csv_table(cls, profile, schema, table_name, agate_table, cls.load_csv_rows(profile, schema, table_name, agate_table) cls.commit_if_has_connection(profile, None) + @classmethod + def convert_text_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_text_type` is not implemented for this adapter!') + + @classmethod + def convert_number_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_number_type` is not implemented for this adapter!') + + @classmethod + def convert_boolean_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_boolean_type` is not implemented for this adapter!') + + @classmethod + def convert_datetime_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_datetime_type` is not implemented for this adapter!') + + @classmethod + def convert_date_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_date_type` is not implemented for this adapter!') + @classmethod def convert_agate_type(cls, agate_table, col_idx): agate_type = agate_table.column_types[col_idx] From b86159fd9a628e1c8c763fba9e876ef86a1dbc58 Mon Sep 17 00:00:00 2001 From: Buck Ryan Date: Mon, 11 Dec 2017 11:35:05 -0500 Subject: [PATCH 18/34] support time type --- dbt/adapters/bigquery.py | 12 ++++-------- dbt/adapters/default.py | 6 ++++++ dbt/adapters/postgres.py | 4 ++++ dbt/adapters/redshift.py | 4 ++++ 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 9773b7db72e..7d30e393e37 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -394,24 +394,20 @@ def quote_schema_and_table(cls, profile, schema, table, model_name=None): @classmethod def convert_text_type(cls, agate_table, col_idx): - return "STRING" + return "string" @classmethod def convert_number_type(cls, agate_table, col_idx): decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) - return "FLOAT64" if decimals else "INT64" + return "float64" if decimals else "int64" @classmethod def convert_boolean_type(cls, agate_table, col_idx): - return "BOOL" + return "bool" @classmethod def convert_datetime_type(cls, agate_table, col_idx): - return "DATETIME" - - @classmethod - def convert_date_type(cls, agate_table, col_idx): - return "DATE" + return "datetime" @classmethod def create_csv_table(cls, profile, schema, table_name, agate_table): diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index e261eb25791..940bf30b561 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -663,6 +663,11 @@ def convert_date_type(cls, agate_table, col_idx): raise dbt.exceptions.NotImplementedException( '`convert_date_type` is not implemented for this adapter!') + @classmethod + def convert_time_type(cls, agate_table, col_idx): + raise dbt.exceptions.NotImplementedException( + '`convert_time_type` is not implemented for this adapter!') + @classmethod def convert_agate_type(cls, agate_table, col_idx): agate_type = agate_table.column_types[col_idx] @@ -672,6 +677,7 @@ def convert_agate_type(cls, agate_table, col_idx): (agate.Boolean, cls.convert_boolean_type), (agate.DateTime, cls.convert_datetime_type), (agate.Date, cls.convert_date_type), + (agate.TimeDelta, cls.convert_time_type), ] for agate_cls, func in conversions: if isinstance(agate_type, agate_cls): diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index ab1574de3a0..3f61679c16c 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -195,6 +195,10 @@ def convert_datetime_type(cls, agate_table, col_idx): def convert_date_type(cls, agate_table, col_idx): return "date" + @classmethod + def convert_time_type(cls, agate_table, col_idx): + return "time" + @classmethod def create_csv_table(cls, profile, schema, table_name, agate_table): col_sqls = [] diff --git a/dbt/adapters/redshift.py b/dbt/adapters/redshift.py index ace27f911e9..af3080c102b 100644 --- a/dbt/adapters/redshift.py +++ b/dbt/adapters/redshift.py @@ -50,3 +50,7 @@ def convert_text_type(cls, agate_table, col_idx): lens = (len(d.encode("utf-8")) for d in column.values_without_nulls()) max_len = max(lens) if lens else 64 return "varchar({})".format(max_len) + + @classmethod + def convert_time_type(cls, agate_table, col_idx): + return "varchar(24)" From 797a966ddf8db87aa54bb6cfebe3eac273ed479f Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Tue, 12 Dec 2017 15:42:57 -0500 Subject: [PATCH 19/34] try pinning crypto, pyopenssl versions --- requirements.txt | 2 ++ setup.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/requirements.txt b/requirements.txt index 0d0e0a8b1a9..64a5b30adca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,5 @@ snowflake-connector-python==1.4.9 colorama==0.3.9 google-cloud-bigquery==0.26.0 agate>=1.6,<2 +cryptography==2.0.3 +pyOpenSSL==17.3.0 diff --git a/setup.py b/setup.py index 71a66417360..2a80e527e99 100644 --- a/setup.py +++ b/setup.py @@ -45,5 +45,7 @@ 'colorama==0.3.9', 'google-cloud-bigquery==0.26.0', 'agate>=1.6,<2', + 'cryptography==2.0.3', + 'pyOpenSSL==17.3.0', ] ) From 4f67dcaec2f5f6aa1d1b909f2412e38951346262 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Tue, 12 Dec 2017 15:58:20 -0500 Subject: [PATCH 20/34] remove unnecessary version pins --- requirements.txt | 2 -- setup.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 37dd10ddcd4..9c2735f1523 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,5 +13,3 @@ snowflake-connector-python>=1.4.9 colorama==0.3.9 google-cloud-bigquery==0.26.0 agate>=1.6,<2 -cryptography==2.0.3 -pyOpenSSL==17.3.0 diff --git a/setup.py b/setup.py index 2a80e527e99..71a66417360 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,5 @@ 'colorama==0.3.9', 'google-cloud-bigquery==0.26.0', 'agate>=1.6,<2', - 'cryptography==2.0.3', - 'pyOpenSSL==17.3.0', ] ) From 58b98f0a5b70cc0eb363714f440d4ed667ae209b Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Fri, 15 Dec 2017 13:32:58 -0500 Subject: [PATCH 21/34] insert all at once, rather than one query per row --- dbt/adapters/postgres.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 3f61679c16c..8ec9a5c86de 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -220,11 +220,19 @@ def reset_csv_table(cls, profile, schema, table_name, agate_table, @classmethod def load_csv_rows(cls, profile, schema, table_name, agate_table): + bindings = [] + placeholders = [] cols_sql = ", ".join(cls.quote(c) for c in agate_table.column_names) - placeholders = ", ".join("%s" for _ in agate_table.column_names) - sql = ('insert into {}.{} ({}) values ({})' + + for row in agate_table.rows: + bindings += row + placeholders.append("({})".format( + ", ".join("%s" for _ in agate_table.column_names))) + + sql = ('insert into {}.{} ({}) values {}' .format(cls.quote(schema), cls.quote(table_name), - cols_sql, placeholders)) - for row in agate_table.rows: - cls.add_query(profile, sql, bindings=row) + cols_sql, + ",\n".join(placeholders))) + + cls.add_query(profile, sql, bindings=bindings) From 90380d55ea3836442d806a7b122459d0cbf65e6c Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Mon, 18 Dec 2017 11:59:45 -0500 Subject: [PATCH 22/34] do not quote field names on creation --- dbt/adapters/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 8ec9a5c86de..60962cb79e5 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -222,7 +222,7 @@ def reset_csv_table(cls, profile, schema, table_name, agate_table, def load_csv_rows(cls, profile, schema, table_name, agate_table): bindings = [] placeholders = [] - cols_sql = ", ".join(cls.quote(c) for c in agate_table.column_names) + cols_sql = ", ".join(c for c in agate_table.column_names) for row in agate_table.rows: bindings += row From 122fba724252af59e629a95219d01fb9acf4ceea Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Mon, 18 Dec 2017 12:59:47 -0500 Subject: [PATCH 23/34] bad --- dbt/adapters/postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 60962cb79e5..1567d9e451b 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -204,7 +204,7 @@ def create_csv_table(cls, profile, schema, table_name, agate_table): col_sqls = [] for idx, col_name in enumerate(agate_table.column_names): type_ = cls.convert_agate_type(agate_table, idx) - col_sqls.append('"{}" {}'.format(col_name, type_)) + col_sqls.append('{} {}'.format(col_name, type_)) sql = 'create table "{}"."{}" ({})'.format(schema, table_name, ", ".join(col_sqls)) return cls.add_query(profile, sql) From 3688286ff710f84b9020cbcf5f073a95953540c9 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Mon, 18 Dec 2017 13:00:51 -0500 Subject: [PATCH 24/34] quiet down parsedatetime logger --- dbt/logger.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dbt/logger.py b/dbt/logger.py index 2daa036e6c1..7ce14b7ca7e 100644 --- a/dbt/logger.py +++ b/dbt/logger.py @@ -12,6 +12,7 @@ logging.getLogger('urllib3').setLevel(logging.CRITICAL) logging.getLogger('google').setLevel(logging.CRITICAL) logging.getLogger('snowflake.connector').setLevel(logging.CRITICAL) +logging.getLogger('parsedatetime').setLevel(logging.CRITICAL) # Colorama needs some help on windows because we're using logger.info # intead of print(). If the Windows env doesn't have a TERM var set, From 256fca080ad8fee9ed803b39dcfd9a3537cddcd3 Mon Sep 17 00:00:00 2001 From: Connor McArthur Date: Tue, 19 Dec 2017 09:34:30 -0500 Subject: [PATCH 25/34] pep8 --- dbt/adapters/bigquery.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index 7d30e393e37..185469a7846 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -41,7 +41,9 @@ class BigQueryAdapter(PostgresAdapter): def handle_error(cls, error, message, sql): logger.debug(message.format(sql=sql)) logger.debug(error) - error_msg = "\n".join([error['message'] for error in error.errors]) + error_msg = "\n".join( + [item['message'] for item in error.errors]) + raise dbt.exceptions.DatabaseException(error_msg) @classmethod @@ -423,7 +425,8 @@ def _agate_to_schema(cls, agate_table): bq_schema = [] for idx, col_name in enumerate(agate_table.column_names): type_ = cls.convert_agate_type(agate_table, idx) - bq_schema.append(google.cloud.bigquery.SchemaField(col_name, type_)) + bq_schema.append( + google.cloud.bigquery.SchemaField(col_name, type_)) return bq_schema @classmethod From f15044f177a7c33abea807ef25b9964ab82543db Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 17 Jan 2018 00:01:06 -0500 Subject: [PATCH 26/34] UI updates + node conformity for seed nodes --- dbt/adapters/default.py | 2 +- dbt/contracts/graph/parsed.py | 5 ++++- dbt/contracts/graph/unparsed.py | 3 ++- dbt/node_runners.py | 23 ++++++++++++++++++----- dbt/parser.py | 11 ++++++++--- dbt/ui/printer.py | 16 ++++++++++++++++ 6 files changed, 49 insertions(+), 11 deletions(-) diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index 078ffb9f5d8..5a30d2c46d2 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -540,7 +540,7 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True, pre = time.time() cursor = connection.get('handle').cursor() - cursor.execute(sql, (bindings or ())) + cursor.execute(sql, bindings) logger.debug("SQL status: %s in %0.2f seconds", cls.get_status(cursor), (time.time() - pre)) diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index 570b73f25ed..80de0a7319c 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -1,4 +1,4 @@ -from voluptuous import Schema, Required, All, Any, Length, ALLOW_EXTRA +from voluptuous import Schema, Required, All, Any, Length, ALLOW_EXTRA, Optional import dbt.exceptions @@ -43,6 +43,9 @@ Required('empty'): bool, Required('config'): config_contract, Required('tags'): All(set), + + # For csv files + Optional('agate_table'): object, }) parsed_nodes_contract = Schema({ diff --git a/dbt/contracts/graph/unparsed.py b/dbt/contracts/graph/unparsed.py index 4dfcf8e0db1..fc4daae7103 100644 --- a/dbt/contracts/graph/unparsed.py +++ b/dbt/contracts/graph/unparsed.py @@ -22,7 +22,8 @@ Required('resource_type'): Any(NodeType.Model, NodeType.Test, NodeType.Analysis, - NodeType.Operation) + NodeType.Operation, + NodeType.Seed) }) unparsed_nodes_contract = Schema([unparsed_node_contract]) diff --git a/dbt/node_runners.py b/dbt/node_runners.py index 60f8331c713..d877c887c17 100644 --- a/dbt/node_runners.py +++ b/dbt/node_runners.py @@ -484,11 +484,11 @@ def print_result_line(self, result): self.num_nodes) -class SeedRunner(CompileRunner): +class SeedRunner(ModelRunner): def describe_node(self): - table_name = self.node["table_name"] - return "seed {}".format(table_name) + schema_name = self.node.get('schema') + return "seed file {}.{}".format(schema_name, self.node["name"]) @classmethod def before_run(cls, project, adapter, flat_graph): @@ -501,11 +501,24 @@ def before_execute(self): def execute(self, compiled_node, existing_, flat_graph): schema = compiled_node["schema"] - table_name = compiled_node["table_name"] + table_name = compiled_node["name"] table = compiled_node["agate_table"] self.adapter.handle_csv_table(self.profile, schema, table_name, table, full_refresh=dbt.flags.FULL_REFRESH) - return RunModelResult(compiled_node) + + if dbt.flags.FULL_REFRESH: + status = 'CREATE {}'.format(len(table.rows)) + else: + status = 'INSERT {}'.format(len(table.rows)) + + return RunModelResult(compiled_node, status=status) def compile(self, flat_graph): return self.node + + def print_result_line(self, result): + schema_name = self.node.get('schema') + dbt.ui.printer.print_seed_result_line(result, + schema_name, + self.node_index, + self.num_nodes) diff --git a/dbt/parser.py b/dbt/parser.py index 8b2ce3f218d..145d3990ff0 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -700,7 +700,7 @@ def parse_archives_from_project(project): return archives -def parse_seed_file(file_match, package_name): +def parse_seed_file(file_match, root_dir, package_name): abspath = file_match['absolute_path'] logger.debug("Parsing {}".format(abspath)) to_return = {} @@ -708,9 +708,12 @@ def parse_seed_file(file_match, package_name): node = { 'unique_id': get_path(NodeType.Seed, package_name, table_name), 'path': file_match['relative_path'], - 'table_name': table_name, 'name': table_name, + 'root_path': root_dir, 'resource_type': NodeType.Seed, + # Give this raw_sql so it conforms to the node spec, + # use dummy text so it doesn't look like an empty node + 'raw_sql': '-- csv --', 'package_name': package_name, 'depends_on': {'nodes': []}, 'original_file_path': os.path.join(file_match.get('searched_path'), @@ -736,11 +739,13 @@ def load_and_parse_seeds(package_name, root_project, all_projects, root_dir, extension) result = {} for file_match in file_matches: - node = parse_seed_file(file_match, package_name) + node = parse_seed_file(file_match, root_dir, package_name) node_path = node['unique_id'] parsed = parse_node(node, node_path, root_project, all_projects.get(package_name), all_projects, tags=tags, macros=macros) # parsed['empty'] = False result[node_path] = parsed + + dbt.contracts.graph.parsed.validate_nodes(result) return result diff --git a/dbt/ui/printer.py b/dbt/ui/printer.py index 33904c1c2b4..871bedd2c63 100644 --- a/dbt/ui/printer.py +++ b/dbt/ui/printer.py @@ -178,6 +178,22 @@ def print_archive_result_line(result, index, total): result.execution_time) +def print_seed_result_line(result, schema_name, index, total): + model = result.node + + info, status = get_printable_result(result, 'loaded', 'loading') + + print_fancy_output_line( + "{info} seed file {schema}.{relation}".format( + info=info, + schema=schema_name, + relation=model.get('name')), + status, + index, + total, + result.execution_time) + + def interpret_run_result(result): if result.errored or result.failed: return 'error' From 868d3382b901dd685e8f32d46e0d89874244940f Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 18 Jan 2018 16:54:44 -0500 Subject: [PATCH 27/34] add seed to list of resource types, cleanup --- dbt/compilation.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/dbt/compilation.py b/dbt/compilation.py index 706009a77ca..39bc4af302c 100644 --- a/dbt/compilation.py +++ b/dbt/compilation.py @@ -34,17 +34,10 @@ def print_compile_stats(stats): NodeType.Analysis: 'analyses', NodeType.Macro: 'macros', NodeType.Operation: 'operations', + NodeType.Seed: 'seed files', } - results = { - NodeType.Model: 0, - NodeType.Test: 0, - NodeType.Archive: 0, - NodeType.Analysis: 0, - NodeType.Macro: 0, - NodeType.Operation: 0, - } - + results = {k: 0 for k in names.keys()} results.update(stats) stat_line = ", ".join( From 0b0c4232d0db62324a2c1f6d0a6eb6a79af10a66 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 19 Jan 2018 01:42:48 -0500 Subject: [PATCH 28/34] show option for CSVs --- dbt/main.py | 5 +++++ dbt/task/seed.py | 26 +++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/dbt/main.py b/dbt/main.py index 6e01d822e0b..75b12f1f657 100644 --- a/dbt/main.py +++ b/dbt/main.py @@ -388,6 +388,11 @@ def parse_args(args): action='store_true', help='Drop existing seed tables and recreate them' ) + seed_sub.add_argument( + '--show', + action='store_true', + help='Show a sample of the loaded data in the terminal' + ) seed_sub.set_defaults(cls=seed_task.SeedTask, which='seed') sub = subs.add_parser('test', parents=[base_subparser]) diff --git a/dbt/task/seed.py b/dbt/task/seed.py index d4c14818aec..84d62e8855a 100644 --- a/dbt/task/seed.py +++ b/dbt/task/seed.py @@ -1,4 +1,5 @@ -import os +import random +from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_runners import SeedRunner from dbt.node_types import NodeType from dbt.runner import RunManager @@ -19,5 +20,28 @@ def run(self): "resource_types": [NodeType.Seed], } results = runner.run_flat(query, SeedRunner) + + if self.args.show: + self.show_tables(results) + dbt.ui.printer.print_run_end_messages(results) return results + + def show_table(self, result): + table = result.node['agate_table'] + rand_table = table.order_by(lambda x: random.random()) + + schema = result.node['schema'] + name = result.node['name'] + + header = "Random sample of table: {}.{}".format(schema, name) + logger.info("") + ogger.info(header) + logger.info("-" * len(header)) + rand_table.print_table(max_rows=10, max_columns=None) + logger.info("") + + def show_tables(self, results): + for result in results: + if not result.errored: + self.show_table(result) From 6d9af2b7e218a562ef942c64138b385bb466a022 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 19 Jan 2018 11:30:10 -0500 Subject: [PATCH 29/34] typo --- dbt/task/seed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/task/seed.py b/dbt/task/seed.py index 84d62e8855a..1f3ac5f7e42 100644 --- a/dbt/task/seed.py +++ b/dbt/task/seed.py @@ -36,7 +36,7 @@ def show_table(self, result): header = "Random sample of table: {}.{}".format(schema, name) logger.info("") - ogger.info(header) + logger.info(header) logger.info("-" * len(header)) rand_table.print_table(max_rows=10, max_columns=None) logger.info("") From 4a49afd9cc3e7b78aff4afd178f5f3cdb87c1cc2 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Jan 2018 17:02:41 -0500 Subject: [PATCH 30/34] pep8 --- dbt/contracts/graph/parsed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index 80de0a7319c..e5dd0cba9ab 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -1,4 +1,5 @@ -from voluptuous import Schema, Required, All, Any, Length, ALLOW_EXTRA, Optional +from voluptuous import Schema, Required, All, Any, Length, ALLOW_EXTRA +from voluptuous import Optional import dbt.exceptions From f51850522a44227357edacadb2848d102db5b76e Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Jan 2018 19:03:08 -0500 Subject: [PATCH 31/34] move agate import to avoid strange warnings --- dbt/adapters/bigquery.py | 2 +- dbt/adapters/default.py | 2 +- dbt/adapters/postgres.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/bigquery.py b/dbt/adapters/bigquery.py index addd5d26e4a..4e3664986ea 100644 --- a/dbt/adapters/bigquery.py +++ b/dbt/adapters/bigquery.py @@ -15,7 +15,6 @@ import google.oauth2 import google.cloud.exceptions import google.cloud.bigquery -import agate import time import uuid @@ -406,6 +405,7 @@ def convert_text_type(cls, agate_table, col_idx): @classmethod def convert_number_type(cls, agate_table, col_idx): + import agate decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) return "float64" if decimals else "int64" diff --git a/dbt/adapters/default.py b/dbt/adapters/default.py index 5a30d2c46d2..a2046e64c17 100644 --- a/dbt/adapters/default.py +++ b/dbt/adapters/default.py @@ -2,7 +2,6 @@ import itertools import multiprocessing import time -import agate from contextlib import contextmanager @@ -676,6 +675,7 @@ def convert_time_type(cls, agate_table, col_idx): @classmethod def convert_agate_type(cls, agate_table, col_idx): + import agate agate_type = agate_table.column_types[col_idx] conversions = [ (agate.Text, cls.convert_text_type), diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 1567d9e451b..5a0b489af6b 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -1,5 +1,4 @@ import psycopg2 -import agate from contextlib import contextmanager @@ -174,6 +173,7 @@ def convert_text_type(cls, agate_table, col_idx): @classmethod def convert_number_type(cls, agate_table, col_idx): + import agate column = agate_table.columns[col_idx] precision = max_digits(column.values_without_nulls()) # agate uses the term Precision but in this context, it is really the From 9d92eb6d79bf4eb17b63bda9d8836763f3d1fa95 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Jan 2018 19:26:11 -0500 Subject: [PATCH 32/34] deprecation warning for --drop-existing --- dbt/deprecations.py | 8 ++++++++ dbt/main.py | 11 +++++++++-- dbt/parser.py | 2 +- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/dbt/deprecations.py b/dbt/deprecations.py index a4e1a222934..23566b080af 100644 --- a/dbt/deprecations.py +++ b/dbt/deprecations.py @@ -21,6 +21,13 @@ def show(self, *args, **kwargs): # removed (in favor of 'target') in DBT version 0.7.0""" +class SeedDropExistingDeprecation(DBTDeprecation): + name = 'drop-existing' + description = """The --drop-existing argument has been deprecated. Please + use --full-refresh instead. The --drop-existing option will be removed in a + future version of dbt.""" + + def warn(name, *args, **kwargs): if name not in deprecations: # this should (hopefully) never happen @@ -37,6 +44,7 @@ def warn(name, *args, **kwargs): active_deprecations = set() deprecations_list = [ + SeedDropExistingDeprecation() ] deprecations = {d.name: d for d in deprecations_list} diff --git a/dbt/main.py b/dbt/main.py index 75b12f1f657..bee0e59d39e 100644 --- a/dbt/main.py +++ b/dbt/main.py @@ -22,6 +22,7 @@ import dbt.config as config import dbt.ui.printer import dbt.compat +import dbt.deprecations from dbt.utils import ExitCodes @@ -232,8 +233,14 @@ def invoke_dbt(parsed): return None flags.NON_DESTRUCTIVE = getattr(proj.args, 'non_destructive', False) - refresh_flags = ['full_refresh', 'drop_existing'] - if any(getattr(proj.args, attr, False) for attr in refresh_flags): + + arg_drop_existing = getattr(proj.args, 'drop_existing', False) + arg_full_refresh = getattr(proj.args, 'full_refresh', False) + + if arg_drop_existing: + dbt.deprecations.warn('drop-existing') + flags.FULL_REFRESH = True + elif arg_full_refresh: flags.FULL_REFRESH = True logger.debug("running dbt with arguments %s", parsed) diff --git a/dbt/parser.py b/dbt/parser.py index 145d3990ff0..9eb1fb694aa 100644 --- a/dbt/parser.py +++ b/dbt/parser.py @@ -469,7 +469,7 @@ def parse_schema_tests(tests, root_project, projects, macros=None): for model_name, test_spec in test_yml.items(): if test_spec is None or test_spec.get('constraints') is None: test_path = test.get('original_file_path', '') - logger.warn(no_tests_warning.format(model_name, test_path)) + logger.warning(no_tests_warning.format(model_name, test_path)) continue for test_type, configs in test_spec.get('constraints', {}).items(): From c597803d16973f392f18f2b8c199bab0b2ab5d2c Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 30 Jan 2018 19:32:34 -0500 Subject: [PATCH 33/34] quote column names in seed files --- dbt/adapters/postgres.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 5a0b489af6b..987e8bf6863 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -204,7 +204,7 @@ def create_csv_table(cls, profile, schema, table_name, agate_table): col_sqls = [] for idx, col_name in enumerate(agate_table.column_names): type_ = cls.convert_agate_type(agate_table, idx) - col_sqls.append('{} {}'.format(col_name, type_)) + col_sqls.append('{} {}'.format(cls.quote(col_name), type_)) sql = 'create table "{}"."{}" ({})'.format(schema, table_name, ", ".join(col_sqls)) return cls.add_query(profile, sql) @@ -222,7 +222,7 @@ def reset_csv_table(cls, profile, schema, table_name, agate_table, def load_csv_rows(cls, profile, schema, table_name, agate_table): bindings = [] placeholders = [] - cols_sql = ", ".join(c for c in agate_table.column_names) + cols_sql = ", ".join(cls.quote(c) for c in agate_table.column_names) for row in agate_table.rows: bindings += row From 97b616e6fb76693470f0817a2836779965a72926 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 9 Feb 2018 21:27:44 -0500 Subject: [PATCH 34/34] revert quoting change (breaks Snowflake). Hush warnings --- dbt/adapters/postgres.py | 4 ++-- test/integration/base.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/postgres.py b/dbt/adapters/postgres.py index 987e8bf6863..5a0b489af6b 100644 --- a/dbt/adapters/postgres.py +++ b/dbt/adapters/postgres.py @@ -204,7 +204,7 @@ def create_csv_table(cls, profile, schema, table_name, agate_table): col_sqls = [] for idx, col_name in enumerate(agate_table.column_names): type_ = cls.convert_agate_type(agate_table, idx) - col_sqls.append('{} {}'.format(cls.quote(col_name), type_)) + col_sqls.append('{} {}'.format(col_name, type_)) sql = 'create table "{}"."{}" ({})'.format(schema, table_name, ", ".join(col_sqls)) return cls.add_query(profile, sql) @@ -222,7 +222,7 @@ def reset_csv_table(cls, profile, schema, table_name, agate_table, def load_csv_rows(cls, profile, schema, table_name, agate_table): bindings = [] placeholders = [] - cols_sql = ", ".join(cls.quote(c) for c in agate_table.column_names) + cols_sql = ", ".join(c for c in agate_table.column_names) for row in agate_table.rows: bindings += row diff --git a/test/integration/base.py b/test/integration/base.py index 76747760f80..05d48487809 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -432,3 +432,7 @@ def assertTableColumnsEqual(self, table_a, table_b, table_a_schema=None, table_b table_a_result, table_b_result ) + + def assertEquals(self, *args, **kwargs): + # assertEquals is deprecated. This makes the warnings less chatty + self.assertEqual(*args, **kwargs)