Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First time flow #439

Merged
merged 9 commits into from
May 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions dbt/clients/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ def run_cmd(cwd, cmd):
return proc.communicate()


def clone(repo, cwd):
return run_cmd(cwd, ['git', 'clone', '--depth', '1', repo])
def clone(repo, cwd, dirname=None):
clone_cmd = ['git', 'clone', '--depth', '1', repo]

if dirname is not None:
clone_cmd.append(dirname)

return run_cmd(cwd, clone_cmd)


def checkout(cwd, branch=None):
Expand All @@ -39,3 +44,7 @@ def get_current_sha(cwd):
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'])

return out.decode('utf-8')


def remove_remote(cwd):
return run_cmd(cwd, ['git', 'remote', 'rm', 'origin'])
26 changes: 26 additions & 0 deletions dbt/clients/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import fnmatch
import os
import os.path
import sys


def find_matching(root_path,
Expand Down Expand Up @@ -70,3 +71,28 @@ def make_directory(path):
pass
else:
raise e


def make_file(path, contents='', overwrite=False):
"""
Make a file at `path` assuming that the directory it resides in already
exists. The file is saved with contents `contents`
"""
if overwrite or not os.path.exists(path):
with open(path, 'w') as fh:
fh.write(contents)
return True

return False


def open_dir_cmd():
# https://docs.python.org/2/library/sys.html#sys.platform
if sys.platform == 'win32':
return 'start'

elif sys.platform == 'darwin':
return 'open'

else:
return 'xdg-open'
7 changes: 7 additions & 0 deletions dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,13 @@ def parse_args(args):
)

sub = subs.add_parser('debug', parents=[base_subparser])
sub.add_argument(
'--config-dir',
action='store_true',
help="""
If specified, DBT will show path information for this project
"""
)
sub.set_defaults(cls=debug_task.DebugTask, which='debug')

sub = subs.add_parser('deps', parents=[base_subparser])
Expand Down
29 changes: 26 additions & 3 deletions dbt/task/debug.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
import pprint

from dbt.logger import GLOBAL_LOGGER as logger
import dbt.clients.system
import dbt.project

PROFILE_DIR_MESSAGE = """To view your profiles.yml file, run:

{open_cmd} {profiles_dir}"""


class DebugTask:
def __init__(self, args, project):
self.args = args
self.project = project

def run(self):
def path_info(self):
open_cmd = dbt.clients.system.open_dir_cmd()
profiles_dir = dbt.project.default_profiles_dir

message = PROFILE_DIR_MESSAGE.format(
open_cmd=open_cmd,
profiles_dir=profiles_dir
)

logger.info(message)

def diag(self):
logger.info("args: {}".format(self.args))
logger.info("project: ")

# TODO: switch this out for a log statement
pprint.pprint(self.project)

def run(self):

if self.args.config_dir:
self.path_info()
else:
self.diag()
133 changes: 89 additions & 44 deletions dbt/task/init.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,111 @@
import pprint
import os

SAMPLE_CONFIG = """
name: 'package_name'
version: '1.0'

source-paths: ["models"] # paths with source code to compile
analysis-paths: ["analysis"] # path with analysis files which are compiled, but
# not run
target-path: "target" # path for compiled code
clean-targets: ["target"] # directories removed by the clean task
test-paths: ["test"] # where to store test results
data-paths: ["data"] # load CSVs from this directory with `dbt seed`

# specify per-model configs
#models:
# package_name: # define configs for this package (called
# # "package_name" above)
# pardot: # assuming pardot is listed in models/
# enabled: false # disable all pardot models except where overriden
# pardot_emails: # override the configs for the pardot_emails model
# enabled: true # enable this specific model
# materialized: true # create a table instead of a view

# uncomment below and add real repositories to add dependencies to this project
#repositories:
# - "git@github.com:[your-org]/[some-repo-1]"
# - "git@github.com:[your-org]/[some-repo-2]"
"""
import dbt.project
import dbt.clients.git
import dbt.clients.system

from dbt.logger import GLOBAL_LOGGER as logger

STARTER_REPO = 'https://github.com/fishtown-analytics/dbt-starter-project.git'
DOCS_URL = 'https://dbt.readme.io/docs/configure-your-profile'
SAMPLE_PROFILES_YML_FILE = 'https://github.com/fishtown-analytics/dbt/blob/master/sample.profiles.yml' # noqa

ON_COMPLETE_MESSAGE = """
Your new dbt project "{project_name}" was created! If this is your first time
using dbt, you'll need to set up your profiles.yml file -- this file will
tell dbt how to connect to your database. You can find this file by running:

{open_cmd} {profiles_path}

GIT_IGNORE = """
target/
dbt_modules/
For more information on how to configure the profiles.yml file,
please consult the dbt documentation here:

{docs_url}

One more thing:

Need help? Don't hesitate to reach out to us via GitHub issues or on Slack --
There's a link to our Slack group in the GitHub Readme. Happy modeling!
"""


STARTER_PROFILE = """
# For more information on how to configure this file, please see:
# {profiles_sample}

default:
outputs:
dev:
type: redshift
threads: 1
host: 127.0.0.1
port: 5439
user: alice
pass: pa55word
dbname: warehouse
schema: dbt_alice
prod:
type: redshift
threads: 1
host: 127.0.0.1
port: 5439
user: alice
pass: pa55word
dbname: warehouse
schema: analytics
target: dev
""".format(profiles_sample=SAMPLE_PROFILES_YML_FILE)


class InitTask:
def __init__(self, args, project=None):
self.args = args
self.project = project

def __write(self, path, filename, contents):
file_path = os.path.join(path, filename)
def clone_starter_repo(self, project_name):
dbt.clients.git.clone(STARTER_REPO, '.', project_name)
dbt.clients.git.remove_remote(project_name)

def create_profiles_dir(self, profiles_dir):
if not os.path.exists(profiles_dir):
dbt.clients.system.make_directory(profiles_dir)
return True
return False

def create_profiles_file(self, profiles_file):
if not os.path.exists(profiles_file):
dbt.clients.system.make_file(profiles_file, STARTER_PROFILE)
return True
return False

with open(file_path, 'w') as fh:
fh.write(contents)
def get_addendum(self, project_name, profiles_path):
open_cmd = dbt.clients.system.open_dir_cmd()

return ON_COMPLETE_MESSAGE.format(
open_cmd=open_cmd,
project_name=project_name,
profiles_path=profiles_path,
docs_url=DOCS_URL
)

def run(self):
project_dir = self.args.project_name

profiles_dir = dbt.project.default_profiles_dir
profiles_file = os.path.join(profiles_dir, 'profiles.yml')

self.create_profiles_dir(profiles_dir)
self.create_profiles_file(profiles_file)

msg = "Creating dbt configuration folder at {}"
logger.info(msg.format(profiles_dir))

if os.path.exists(project_dir):
raise RuntimeError("directory {} already exists!".format(
project_dir
))

os.mkdir(project_dir)

project_dir = self.args.project_name
self.__write(project_dir, 'dbt_project.yml', SAMPLE_CONFIG)
self.__write(project_dir, '.gitignore', GIT_IGNORE)
self.clone_starter_repo(project_dir)

dirs = ['models', 'analysis', 'tests', 'data']
for dir_name in dirs:
dir_path = os.path.join(project_dir, dir_name)
os.mkdir(dir_path)
addendum = self.get_addendum(project_dir, profiles_dir)
logger.info(addendum)
21 changes: 17 additions & 4 deletions sample.dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ name: 'your_package_name'

# version: Required. This indicates the current version of your package and
# should conform to semantic versioning. The field is currently unused
version: '0.0.1'
version: '0.1.0'



Expand All @@ -40,7 +40,7 @@ target-path: "target"

# test-paths: Optional. Specify which path(s) dbt should look in to find data
# test definitions.
test-paths: ["test"]
test-paths: ["tests"]

# data-paths: Optional. Specify which path(s) dbt should look in to find CSV
# files. Running `dbt seed` will load these CSVs as tables in your warehouse
Expand Down Expand Up @@ -191,5 +191,18 @@ repositories:
# Archival
#

# Data archival is a powerful feature intended for advanced dbt users. For more
# information, consult: https://dbt.readme.io/reference#archives
# dbt's archival process records snapshots of specified tables so that
# you can analyze how these tables change over time. In the example below,
# the public.users table is configured for archival. When the `updated_at`
# value changes for a given user record (identified by the `id` field), dbt
# will record a new record in `users_archived` table which reflects the
# changed state of that row. For more information on this command, consult
# the dbt documentation: https://dbt.readme.io/reference#archive
archive:
- source_schema: public
target_schema: public
tables:
- source_table: users
target_table: users_archived
updated_at: updated_at
unique_key: id
Loading