Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make tracking use the profile directory, and suppress errors (#1180) #1186

Merged
merged 2 commits into from
Dec 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 47 additions & 44 deletions dbt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,22 +86,6 @@ def read_profiles(profiles_dir=None):
return profiles


def read_config(profiles_dir):
profile = read_profile(profiles_dir)
if profile is None:
return {}
else:
return profile.get('config', {})


def send_anonymous_usage_stats(config):
return config.get('send_anonymous_usage_stats', True)


def colorize_output(config):
return config.get('use_colors', True)


class ConfigRenderer(object):
"""A renderer provides configuration rendering for a given set of cli
variables and a render type.
Expand Down Expand Up @@ -476,13 +460,48 @@ def warn_for_unused_resource_config_paths(self, resource_fqns, disabled):
logger.info(dbt.ui.printer.yellow(msg))


class UserConfig(object):
def __init__(self, send_anonymous_usage_stats, use_colors):
self.send_anonymous_usage_stats = send_anonymous_usage_stats
self.use_colors = use_colors

@classmethod
def from_dict(cls, cfg=None):
if cfg is None:
cfg = {}
send_anonymous_usage_stats = cfg.get(
'send_anonymous_usage_stats',
DEFAULT_SEND_ANONYMOUS_USAGE_STATS
)
use_colors = cfg.get(
'use_colors',
DEFAULT_USE_COLORS
)
return cls(send_anonymous_usage_stats, use_colors)

def to_dict(self):
return {
'send_anonymous_usage_stats': self.send_anonymous_usage_stats,
'use_colors': self.use_colors,
}

@classmethod
def from_directory(cls, directory):
user_cfg = None
profile = read_profile(directory)
if profile:
user_cfg = profile.get('config', {})
return cls.from_dict(user_cfg)


class Profile(object):
def __init__(self, profile_name, target_name, send_anonymous_usage_stats,
use_colors, threads, credentials):
def __init__(self, profile_name, target_name, config, threads,
credentials):
self.profile_name = profile_name
self.target_name = target_name
self.send_anonymous_usage_stats = send_anonymous_usage_stats
self.use_colors = use_colors
if isinstance(config, dict):
config = UserConfig.from_dict(config)
self.config = config
self.threads = threads
self.credentials = credentials

Expand All @@ -498,8 +517,7 @@ def to_profile_info(self, serialize_credentials=False):
result = {
'profile_name': self.profile_name,
'target_name': self.target_name,
'send_anonymous_usage_stats': self.send_anonymous_usage_stats,
'use_colors': self.use_colors,
'config': self.config.to_dict(),
'threads': self.threads,
'credentials': self.credentials.incorporate(),
}
Expand Down Expand Up @@ -584,21 +602,11 @@ def from_credentials(cls, credentials, threads, profile_name, target_name,
:raises DbtProfileError: If the profile is invalid.
:returns Profile: The new Profile object.
"""
if user_cfg is None:
user_cfg = {}
send_anonymous_usage_stats = user_cfg.get(
'send_anonymous_usage_stats',
DEFAULT_SEND_ANONYMOUS_USAGE_STATS
)
use_colors = user_cfg.get(
'use_colors',
DEFAULT_USE_COLORS
)
config = UserConfig.from_dict(user_cfg)
profile = cls(
profile_name=profile_name,
target_name=target_name,
send_anonymous_usage_stats=send_anonymous_usage_stats,
use_colors=use_colors,
config=config,
threads=threads,
credentials=credentials
)
Expand Down Expand Up @@ -745,10 +753,8 @@ def from_args(cls, args, project_profile_name=None, cli_vars=None):
cli_vars = dbt.utils.parse_cli_vars(getattr(args, 'vars', '{}'))

threads_override = getattr(args, 'threads', None)
# TODO(jeb): is it even possible for this to not be set?
profiles_dir = getattr(args, 'profiles_dir', PROFILES_DIR)
target_override = getattr(args, 'target', None)
raw_profiles = read_profile(profiles_dir)
raw_profiles = read_profile(args.profiles_dir)
profile_name = cls.pick_profile_name(args.profile,
project_profile_name)

Expand Down Expand Up @@ -797,9 +803,8 @@ def __init__(self, project_name, version, project_root, source_paths,
macro_paths, data_paths, test_paths, analysis_paths,
docs_paths, target_path, clean_targets, log_path,
modules_path, quoting, models, on_run_start, on_run_end,
archive, seeds, profile_name, target_name,
send_anonymous_usage_stats, use_colors, threads, credentials,
packages, args):
archive, seeds, profile_name, target_name, config,
threads, credentials, packages, args):
# 'vars'
self.args = args
self.cli_vars = dbt.utils.parse_cli_vars(getattr(args, 'vars', '{}'))
Expand Down Expand Up @@ -833,8 +838,7 @@ def __init__(self, project_name, version, project_root, source_paths,
self,
profile_name=profile_name,
target_name=target_name,
send_anonymous_usage_stats=send_anonymous_usage_stats,
use_colors=use_colors,
config=config,
threads=threads,
credentials=credentials
)
Expand Down Expand Up @@ -877,8 +881,7 @@ def from_parts(cls, project, profile, args):
packages=project.packages,
profile_name=profile.profile_name,
target_name=profile.target_name,
send_anonymous_usage_stats=profile.send_anonymous_usage_stats,
use_colors=profile.use_colors,
config=profile.config,
threads=profile.threads,
credentials=profile.credentials,
args=args
Expand Down
24 changes: 16 additions & 8 deletions dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,20 @@ class PackageConfig(APIObject):
SCHEMA = PACKAGE_FILE_CONTRACT


USER_CONFIG_CONTRACT = {
'type': 'object',
'additionalProperties': True,
'properties': {
'send_anonymous_usage_stats': {
'type': 'boolean',
},
'use_colors': {
'type': 'boolean',
},
},
}


PROFILE_INFO_CONTRACT = {
'type': 'object',
'additionalProperties': False,
Expand All @@ -294,12 +308,7 @@ class PackageConfig(APIObject):
'target_name': {
'type': 'string',
},
'send_anonymous_usage_stats': {
'type': 'boolean',
},
'use_colors': {
'type': 'boolean',
},
'config': USER_CONFIG_CONTRACT,
'threads': {
'type': 'number',
},
Expand All @@ -313,8 +322,7 @@ class PackageConfig(APIObject):
},
},
'required': [
'profile_name', 'target_name', 'send_anonymous_usage_stats',
'use_colors', 'threads', 'credentials'
'profile_name', 'target_name', 'config', 'threads', 'credentials'
],
}

Expand Down
39 changes: 26 additions & 13 deletions dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@
import dbt.profiler

from dbt.utils import ExitCodes
from dbt.config import Project, RuntimeConfig, DbtProjectError, \
DbtProfileError, PROFILES_DIR, read_config, \
send_anonymous_usage_stats, colorize_output, read_profiles
from dbt.exceptions import DbtProfileError, DbtProfileError, RuntimeException
from dbt.config import Project, UserConfig, RuntimeConfig, PROFILES_DIR, \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we lost DbtProjectError here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was a double-import that I incorrectly added at some point - see 2 lines down where we import it again from dbt.exceptions! Same goes for DbtProfileError.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dang. idk where it is in the code, but i saw this when supplying a --profiles-dir that did not contain a profiles.yml:

$ dbt run --profiles-dir .
Encountered an error:
name 'DbtProjectError' is not defined
Traceback (most recent call last):
  File "/Users/drew/fishtown/dbt/dbt/main.py", line 247, in invoke_dbt
    cfg = RuntimeConfig.from_args(parsed)
  File "/Users/drew/fishtown/dbt/dbt/config.py", line 961, in from_args
    cli_vars=cli_vars
  File "/Users/drew/fishtown/dbt/dbt/config.py", line 766, in from_args
    threads_override=threads_override
  File "/Users/drew/fishtown/dbt/dbt/config.py", line 716, in from_raw_profiles
    "Could not find profile named '{}'".format(profile_name)
dbt.exceptions.DbtProjectError: Runtime Error
  Could not find profile named 'debug'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/drew/fishtown/dbt/dbt/main.py", line 77, in main
    results, succeeded = handle_and_check(args)
  File "/Users/drew/fishtown/dbt/dbt/main.py", line 151, in handle_and_check
    task, res = run_from_args(parsed)
  File "/Users/drew/fishtown/dbt/dbt/main.py", line 191, in run_from_args
    res = invoke_dbt(parsed)
  File "/Users/drew/fishtown/dbt/dbt/main.py", line 248, in invoke_dbt
    except DbtProjectError as e:
NameError: name 'DbtProjectError' is not defined

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(we actually import DbtProjectError twice below)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually import DbtProfileError twice in a row, ugh... fixed

read_profiles
from dbt.exceptions import DbtProjectError, DbtProfileError, RuntimeException


PROFILES_HELP_MESSAGE = """
Expand Down Expand Up @@ -111,6 +110,27 @@ def handle(args):
return res


def initialize_config_values(parsed):
"""Given the parsed args, initialize the dbt tracking code.

It would be nice to re-use this profile later on instead of parsing it
twice, but dbt's intialization is not structured in a way that makes that
easy.
"""
try:
cfg = UserConfig.from_directory(parsed.profiles_dir)
except RuntimeException:
cfg = UserConfig.from_dict(None)

if cfg.send_anonymous_usage_stats:
dbt.tracking.initialize_tracking(parsed.profiles_dir)
else:
dbt.tracking.do_not_track()

if cfg.use_colors:
dbt.ui.printer.use_colors()


def handle_and_check(args):
parsed = parse_args(args)
profiler_enabled = False
Expand All @@ -122,16 +142,8 @@ def handle_and_check(args):
enable=profiler_enabled,
outfile=parsed.record_timing_info
):
# this needs to happen after args are parsed so we can determine the
# correct profiles.yml file
profile_config = read_config(parsed.profiles_dir)
if not send_anonymous_usage_stats(profile_config):
dbt.tracking.do_not_track()
else:
dbt.tracking.initialize_tracking()

if colorize_output(profile_config):
dbt.ui.printer.use_colors()
initialize_config_values(parsed)

reset_adapters()

Expand Down Expand Up @@ -598,6 +610,7 @@ def parse_args(args):
sys.exit(1)

parsed = p.parse_args(args)
parsed.profiles_dir = os.path.expanduser(parsed.profiles_dir)

if not hasattr(parsed, 'which'):
# the user did not provide a valid subcommand. trigger the help message
Expand Down
31 changes: 19 additions & 12 deletions dbt/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
COLLECTOR_URL = "fishtownanalytics.sinter-collect.com"
COLLECTOR_PROTOCOL = "https"

COOKIE_PATH = os.path.join(os.path.expanduser('~'), '.dbt/.user.yml')

INVOCATION_SPEC = 'iglu:com.dbt/invocation/jsonschema/1-0-0'
PLATFORM_SPEC = 'iglu:com.dbt/platform/jsonschema/1-0-0'
RUN_MODEL_SPEC = 'iglu:com.dbt/run_model/jsonschema/1-0-0'
Expand All @@ -35,8 +33,9 @@

class User(object):

def __init__(self):
def __init__(self, cookie_dir):
self.do_not_track = True
self.cookie_dir = cookie_dir

self.id = None
self.invocation_id = str(uuid.uuid4())
Expand All @@ -45,6 +44,10 @@ def __init__(self):
def state(self):
return "do not track" if self.do_not_track else "tracking"

@property
def cookie_path(self):
return os.path.join(self.cookie_dir, '.user.yml')

def initialize(self):
self.do_not_track = False

Expand All @@ -56,21 +59,20 @@ def initialize(self):
tracker.set_subject(subject)

def set_cookie(self):
cookie_dir = os.path.dirname(COOKIE_PATH)
user = {"id": str(uuid.uuid4())}

dbt.clients.system.make_directory(cookie_dir)
dbt.clients.system.make_directory(self.cookie_dir)

with open(COOKIE_PATH, "w") as fh:
with open(self.cookie_path, "w") as fh:
yaml.dump(user, fh)

return user

def get_cookie(self):
if not os.path.isfile(COOKIE_PATH):
if not os.path.isfile(self.cookie_path):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to defer the creation of this cookie until the profile is loaded? With the current implementation, dbt will create a cookie if it doesn't exist in the specified profile directory. That means if you errantly specify an incorrect profile dir, dbt will litter .user.yml files all over the place. This won't affect tracking and i don't think it will cause any problems for the end-user, but it's not so polite :)

I figure we can only create the cookie if dbt is able to find and load the profiles.yml file. To be sure: I tested this out, and dbt does not send events if the profiles.yml file is unloadable, which is good. This is mostly a cosmetic/tidyness comment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible, but not easy - I would estimate it at a few days of work, as a ton of main.py would have to be refactored. We start doing tracking things before we load a profile - really before we ever even start inspecting which command was passed, so we don't even know if we will load a profile! And sometimes, in order to load a profile we actually need to load the project first so we can figure out the profile name, etc... Accordingly, per our slack conversation, I'm going to merge this as-is - we should get this fix in to grace-kelly, even if it's a bit less than optimal.

I am very enthusiastic about the idea of refactoring main.py to support this behavior - currently even answering the question "does this command require profile, project, both or neither" requires checking multiple functions and carefully walking through behavior, and I don't like that. We could also handle the issues raised in #1189 when/if we do that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

totally agree @beckjake, thanks for the color. Ship it!

user = self.set_cookie()
else:
with open(COOKIE_PATH, "r") as fh:
with open(self.cookie_path, "r") as fh:
try:
user = yaml.safe_load(fh)
if user is None:
Expand Down Expand Up @@ -266,10 +268,15 @@ def flush():

def do_not_track():
global active_user
active_user = User()
active_user = User(None)


def initialize_tracking():
def initialize_tracking(cookie_dir):
global active_user
active_user = User()
active_user.initialize()
active_user = User(cookie_dir)
try:
active_user.initialize()
except Exception:
logger.debug('Got an exception trying to initialize tracking',
exc_info=True)
active_user = User(None)
1 change: 1 addition & 0 deletions test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class TestArgs(object):
def __init__(self, kwargs):
self.which = 'run'
self.single_threaded = False
self.profiles_dir = DBT_CONFIG_DIR
self.__dict__.update(kwargs)


Expand Down
Loading