-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bigquery #455
Bigquery #455
Changes from 35 commits
b2781b9
ba28422
f5b70ac
3e97021
dfdb6e4
c4802db
d82eba2
17b330b
5d3ca1e
f3ce484
ba53a7a
db99614
d449717
e5008bd
b52af47
150e4eb
6b92e1b
1e0e989
79a7759
ddb1296
28f61b8
173a1b7
ac356c3
3bd2e61
1c4ed3c
baa542f
6f3a1a2
b2042ae
a9cf033
a066798
59b827f
75e2ba6
a644e5d
bf10de9
65a872a
29c1f8b
48321d6
8049b17
b5d44e6
fc02cfa
4f8d2bd
9849fa0
349c12e
397dc9c
199cf0e
612eef9
f5aa68a
c222fc2
c2fc1e1
3c943a9
11c831a
a2b7af6
b3065ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,300 @@ | ||
from __future__ import absolute_import | ||
|
||
from contextlib import contextmanager | ||
|
||
import dbt.exceptions | ||
import dbt.flags as flags | ||
import dbt.materializers | ||
import dbt.clients.gcloud | ||
|
||
from dbt.adapters.postgres import PostgresAdapter | ||
from dbt.contracts.connection import validate_connection | ||
from dbt.logger import GLOBAL_LOGGER as logger | ||
|
||
|
||
class BigQueryAdapter(PostgresAdapter): | ||
|
||
QUERY_TIMEOUT = 60 * 1000 | ||
requires = {'bigquery': 'google-cloud-bigquery==0.24.0'} | ||
|
||
@classmethod | ||
def initialize(cls): | ||
google = cls._import('google') | ||
google.auth = cls._import('google.auth') | ||
google.oauth2 = cls._import('google.oauth2') | ||
|
||
google.cloud = cls._import('google.cloud') | ||
google.cloud.bigquery = cls._import('google.cloud.bigquery') | ||
google.cloud.exceptions = cls._import('google.cloud.exceptions') | ||
|
||
globals()['google'] = google | ||
|
||
@classmethod | ||
def get_materializer(cls, node, existing): | ||
materializer = dbt.materializers.NonDDLMaterializer | ||
return dbt.materializers.make_materializer(materializer, | ||
cls, | ||
node, | ||
existing) | ||
|
||
@classmethod | ||
def handle_error(cls, error, message, sql): | ||
logger.debug(message.format(sql=sql)) | ||
logger.debug(error) | ||
error_msg = "\n".join([error['message'] for error in error.errors]) | ||
raise dbt.exceptions.RuntimeException(error_msg) | ||
|
||
@classmethod | ||
@contextmanager | ||
def exception_handler(cls, profile, sql, model_name=None, | ||
connection_name='master'): | ||
try: | ||
yield | ||
|
||
except google.cloud.exceptions.BadRequest as e: | ||
message = "Bad request while running:\n{sql}" | ||
cls.handle_error(e, message, sql) | ||
|
||
except google.cloud.exceptions.Forbidden as e: | ||
message = "Access denied while running:\n{sql}" | ||
cls.handle_error(e, message, sql) | ||
|
||
except Exception as e: | ||
logger.debug("Unhandled error while running:\n{}".format(sql)) | ||
logger.debug(e) | ||
raise dbt.exceptions.RuntimeException(e) | ||
|
||
@classmethod | ||
def type(cls): | ||
return 'bigquery' | ||
|
||
@classmethod | ||
def date_function(cls): | ||
return 'CURRENT_TIMESTAMP()' | ||
|
||
@classmethod | ||
def begin(cls, profile, name='master'): | ||
pass | ||
|
||
@classmethod | ||
def commit(cls, connection): | ||
pass | ||
|
||
@classmethod | ||
def get_status(cls, cursor): | ||
raise Exception("Not implemented") | ||
state = cursor.sqlstate | ||
|
||
if state is None: | ||
state = 'SUCCESS' | ||
|
||
return "{} {}".format(state, cursor.rowcount) | ||
|
||
@classmethod | ||
def get_bigquery_credentials(cls, config): | ||
method = config.get('method') | ||
Creds = google.oauth2.service_account.Credentials | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't upcase variable names |
||
|
||
if method == 'oauth': | ||
return None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what happens if they use the oauth method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We initialize a bigquery client with the If the credentials object is More info on gcp auth flows here |
||
|
||
elif method == 'service-account': | ||
keyfile = config.get('keyfile') | ||
return Creds.from_service_account_file(keyfile) | ||
|
||
elif method == 'service-account-json': | ||
details = config.get('config') | ||
return Creds.from_service_account_info(details) | ||
|
||
error = ('Bad `method` in profile: "{}". ' | ||
'Should be "oauth" or "service-account"'.format(method)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or service-account-json? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also can we put this validation into a voluptuous schema for the bigquery profile? |
||
raise dbt.exceptions.FailedToConnectException(error) | ||
|
||
@classmethod | ||
def get_bigquery_client(cls, config): | ||
project_name = config.get('project') | ||
creds = cls.get_bigquery_credentials(config) | ||
|
||
return google.cloud.bigquery.Client(project=project_name, | ||
credentials=creds) | ||
|
||
@classmethod | ||
def open_connection(cls, connection): | ||
if connection.get('state') == 'open': | ||
logger.debug('Connection is already open, skipping open.') | ||
return connection | ||
|
||
result = connection.copy() | ||
credentials = connection.get('credentials', {}) | ||
|
||
try: | ||
handle = cls.get_bigquery_client(credentials) | ||
|
||
except google.auth.exceptions.DefaultCredentialsError as e: | ||
logger.info("Please log into GCP to continue") | ||
dbt.clients.gcloud.setup_default_credentials() | ||
|
||
handle = cls.get_bigquery_client(credentials) | ||
|
||
except Exception as e: | ||
logger.debug("Got an error when attempting to create a bigquery " | ||
"client: '{}'".format(e)) | ||
|
||
result['handle'] = None | ||
result['state'] = 'fail' | ||
|
||
raise dbt.exceptions.FailedToConnectException(str(e)) | ||
|
||
result['handle'] = handle | ||
result['state'] = 'open' | ||
return result | ||
|
||
@classmethod | ||
def query_for_existing(cls, profile, schema, model_name=None): | ||
dataset = cls.get_dataset(profile, schema, model_name) | ||
tables = dataset.list_tables() | ||
|
||
relation_type_lookup = { | ||
'TABLE': 'table', | ||
'VIEW': 'view' | ||
} | ||
|
||
existing = [(table.name, relation_type_lookup.get(table.table_type)) | ||
for table in tables] | ||
|
||
return dict(existing) | ||
|
||
@classmethod | ||
def drop_view(cls, profile, view_name, model_name): | ||
schema = cls.get_default_schema(profile) | ||
dataset = cls.get_dataset(profile, schema, model_name) | ||
view = dataset.table(view_name) | ||
view.delete() | ||
|
||
@classmethod | ||
def rename(cls, profile, from_name, to_name, model_name=None): | ||
message = 'Cannot rename bigquery relation {} to {}'.format( | ||
from_name, to_name) | ||
raise dbt.exceptions.NotImplementedException(message) | ||
|
||
# hack because of current API limitations | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you elaborate more? if there's a link explaining why we need this, you should add it here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, it's actually not super well documented which is part of the problem. BQ defaults to the "Legacy SQL" syntax when views are created via the API. You can set a The best workaround i could find was to use the |
||
@classmethod | ||
def format_sql_for_bigquery(cls, sql): | ||
return "#standardSQL\n{}".format(sql) | ||
|
||
@classmethod | ||
def execute_model(cls, profile, model): | ||
connection = cls.get_connection(profile, model.get('name')) | ||
|
||
if flags.STRICT_MODE: | ||
validate_connection(connection) | ||
|
||
model_name = model.get('name') | ||
model_sql = cls.format_sql_for_bigquery(model.get('injected_sql')) | ||
|
||
schema = cls.get_default_schema(profile) | ||
dataset = cls.get_dataset(profile, schema, model_name) | ||
|
||
view = dataset.table(model_name) | ||
view.view_query = model_sql | ||
|
||
logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql)) | ||
|
||
with cls.exception_handler(profile, model_sql, model_name, model_name): | ||
view.create() | ||
|
||
if view.created is None: | ||
raise RuntimeError("Error creating view {}".format(model_name)) | ||
|
||
return "CREATE VIEW" | ||
|
||
@classmethod | ||
def fetch_query_results(cls, query): | ||
all_rows = [] | ||
|
||
rows = query.rows | ||
token = query.page_token | ||
|
||
while True: | ||
all_rows.extend(rows) | ||
if token is None: | ||
break | ||
rows, total_count, token = query.fetch_data(page_token=token) | ||
return rows | ||
|
||
@classmethod | ||
def execute_and_fetch(cls, profile, sql, model_name=None, **kwargs): | ||
conn = cls.get_connection(profile, model_name) | ||
client = conn.get('handle') | ||
|
||
formatted_sql = cls.format_sql_for_bigquery(sql) | ||
query = client.run_sync_query(formatted_sql) | ||
query.timeout_ms = cls.QUERY_TIMEOUT | ||
|
||
debug_message = "Fetching data for query {}:\n{}" | ||
logger.debug(debug_message.format(model_name, formatted_sql)) | ||
|
||
query.run() | ||
|
||
return cls.fetch_query_results(query) | ||
|
||
@classmethod | ||
def add_begin_query(cls, profile, name): | ||
raise Exception("not implemented") | ||
return cls.add_query(profile, 'BEGIN', name, auto_begin=False, | ||
select_schema=False) | ||
|
||
@classmethod | ||
def create_schema(cls, profile, schema, model_name=None): | ||
logger.debug('Creating schema "%s".', schema) | ||
|
||
dataset = cls.get_dataset(profile, schema, model_name) | ||
|
||
with cls.exception_handler(profile, 'create dataset', model_name): | ||
dataset.create() | ||
|
||
@classmethod | ||
def check_schema_exists(cls, profile, schema, model_name=None): | ||
conn = cls.get_connection(profile, model_name) | ||
|
||
client = conn.get('handle') | ||
|
||
with cls.exception_handler(profile, 'create dataset', model_name): | ||
all_datasets = client.list_datasets() | ||
return any([ds.name == schema for ds in all_datasets]) | ||
|
||
@classmethod | ||
def get_dataset(cls, profile, dataset_name, model_name=None): | ||
conn = cls.get_connection(profile, model_name) | ||
|
||
client = conn.get('handle') | ||
dataset = client.dataset(dataset_name) | ||
return dataset | ||
|
||
@classmethod | ||
def add_query(cls, profile, sql, model_name=None, auto_begin=True): | ||
raise Exception("Not implemented") | ||
|
||
@classmethod | ||
def cancel_connection(cls, profile, connection): | ||
raise Exception("Not implemented") | ||
handle = connection['handle'] | ||
sid = handle.session_id | ||
|
||
connection_name = connection.get('name') | ||
|
||
sql = 'select system$abort_session({})'.format(sid) | ||
|
||
logger.debug("Cancelling query '{}' ({})".format(connection_name, sid)) | ||
|
||
_, cursor = cls.add_query(profile, sql, 'master') | ||
res = cursor.fetchone() | ||
|
||
logger.debug("Cancel query '{}': {}".format(connection_name, res)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what's the behavior on ctrl+c here? if we need to just raise an exception, you should remove all this code. also please change this and the exception for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cmcarthur hadn't gotten there yet :) Looks like we can actually issue a cancel command for running queries: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cmcarthur it doesn't look like it's possible to cancel a table/view creation request. Jobs can be canceled though, which may be good to keep in mind for the future. This branch will log an error that looks like this on ctrl-c:
|
||
|
||
@classmethod | ||
def quote_schema_and_table(cls, profile, schema, table): | ||
connection = cls.get_connection(profile) | ||
credentials = connection.get('credentials', {}) | ||
project = credentials.get('project') | ||
return '`{}`.`{}`.`{}`'.format(project, schema, table) |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,13 @@ | |
import re | ||
import time | ||
import yaml | ||
import importlib | ||
|
||
from contextlib import contextmanager | ||
|
||
import dbt.exceptions | ||
import dbt.flags | ||
import dbt.materializers | ||
|
||
from dbt.contracts.connection import validate_connection | ||
from dbt.logger import GLOBAL_LOGGER as logger | ||
|
@@ -21,6 +23,33 @@ | |
|
||
class DefaultAdapter(object): | ||
|
||
requires = {} | ||
|
||
@classmethod | ||
def is_installed(cls): | ||
try: | ||
cls.initialize() | ||
return True | ||
except ImportError: | ||
return False | ||
|
||
@classmethod | ||
def _import(cls, name): | ||
return importlib.import_module(name) | ||
|
||
@classmethod | ||
def install_requires(cls): | ||
from pip import main as pip_main | ||
|
||
for package, require in cls.requires.items(): | ||
logger.info("Installing {}".format(require)) | ||
pip_main(['install', require]) | ||
logger.info("Installed {} successfully!".format(require)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a unit test for this? i want to be sure it works on all platforms |
||
|
||
@classmethod | ||
def get_materializer(cls, model, existing): | ||
return dbt.materializers.get_materializer(cls, model, existing) | ||
|
||
### | ||
# ADAPTER-SPECIFIC FUNCTIONS -- each of these must be overridden in | ||
# every adapter | ||
|
@@ -531,10 +560,17 @@ def add_query(cls, profile, sql, model_name=None, auto_begin=True): | |
return connection, cursor | ||
|
||
@classmethod | ||
def execute_one(cls, profile, sql, model_name=None): | ||
def execute_one(cls, profile, sql, model_name=None, auto_begin=False): | ||
cls.get_connection(profile, model_name) | ||
|
||
return cls.add_query(profile, sql, model_name) | ||
return cls.add_query(profile, sql, model_name, auto_begin) | ||
|
||
@classmethod | ||
def execute_and_fetch(cls, profile, sql, model_name=None, | ||
auto_begin=False): | ||
_, cursor = cls.execute_one(profile, sql, model_name, auto_begin) | ||
|
||
return cursor.fetchall() | ||
|
||
@classmethod | ||
def execute_all(cls, profile, sqls, model_name=None): | ||
|
@@ -567,13 +603,13 @@ def table_exists(cls, profile, schema, table, model_name=None): | |
exists = tables.get(table) is not None | ||
return exists | ||
|
||
@classmethod | ||
def check_schema_exists(cls, profile, schema): | ||
return cls.check_schema_exists(profile, schema) | ||
|
||
@classmethod | ||
def already_exists(cls, profile, schema, table, model_name=None): | ||
""" | ||
Alias for `table_exists`. | ||
""" | ||
return cls.table_exists(profile, schema, table, model_name) | ||
|
||
@classmethod | ||
def quote_schema_and_table(cls, profile, schema, table): | ||
return '"{}"."{}"'.format(schema, table) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NotImplementedException // remove code below
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good catch 👍