Skip to content

Commit

Permalink
First time flow (#439)
Browse files Browse the repository at this point in the history
* clone sample repo, better output after dbt init

* add default target to initial profiles file

* change starter project to fishtown repo

* pep8

* add `dbt debug --config-dir` cmd

* pep8

* sample file tweaks

* add overwrite check + unit test
  • Loading branch information
drewbanin authored May 25, 2017
1 parent c9dec2e commit d15c0c6
Show file tree
Hide file tree
Showing 9 changed files with 270 additions and 71 deletions.
13 changes: 11 additions & 2 deletions dbt/clients/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ def run_cmd(cwd, cmd):
return proc.communicate()


def clone(repo, cwd):
return run_cmd(cwd, ['git', 'clone', '--depth', '1', repo])
def clone(repo, cwd, dirname=None):
clone_cmd = ['git', 'clone', '--depth', '1', repo]

if dirname is not None:
clone_cmd.append(dirname)

return run_cmd(cwd, clone_cmd)


def checkout(cwd, branch=None):
Expand All @@ -39,3 +44,7 @@ def get_current_sha(cwd):
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'])

return out.decode('utf-8')


def remove_remote(cwd):
return run_cmd(cwd, ['git', 'remote', 'rm', 'origin'])
26 changes: 26 additions & 0 deletions dbt/clients/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import fnmatch
import os
import os.path
import sys


def find_matching(root_path,
Expand Down Expand Up @@ -70,3 +71,28 @@ def make_directory(path):
pass
else:
raise e


def make_file(path, contents='', overwrite=False):
"""
Make a file at `path` assuming that the directory it resides in already
exists. The file is saved with contents `contents`
"""
if overwrite or not os.path.exists(path):
with open(path, 'w') as fh:
fh.write(contents)
return True

return False


def open_dir_cmd():
# https://docs.python.org/2/library/sys.html#sys.platform
if sys.platform == 'win32':
return 'start'

elif sys.platform == 'darwin':
return 'open'

else:
return 'xdg-open'
7 changes: 7 additions & 0 deletions dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,13 @@ def parse_args(args):
)

sub = subs.add_parser('debug', parents=[base_subparser])
sub.add_argument(
'--config-dir',
action='store_true',
help="""
If specified, DBT will show path information for this project
"""
)
sub.set_defaults(cls=debug_task.DebugTask, which='debug')

sub = subs.add_parser('deps', parents=[base_subparser])
Expand Down
29 changes: 26 additions & 3 deletions dbt/task/debug.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,38 @@
import pprint

from dbt.logger import GLOBAL_LOGGER as logger
import dbt.clients.system
import dbt.project

PROFILE_DIR_MESSAGE = """To view your profiles.yml file, run:
{open_cmd} {profiles_dir}"""


class DebugTask:
def __init__(self, args, project):
self.args = args
self.project = project

def run(self):
def path_info(self):
open_cmd = dbt.clients.system.open_dir_cmd()
profiles_dir = dbt.project.default_profiles_dir

message = PROFILE_DIR_MESSAGE.format(
open_cmd=open_cmd,
profiles_dir=profiles_dir
)

logger.info(message)

def diag(self):
logger.info("args: {}".format(self.args))
logger.info("project: ")

# TODO: switch this out for a log statement
pprint.pprint(self.project)

def run(self):

if self.args.config_dir:
self.path_info()
else:
self.diag()
133 changes: 89 additions & 44 deletions dbt/task/init.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,111 @@
import pprint
import os

SAMPLE_CONFIG = """
name: 'package_name'
version: '1.0'
source-paths: ["models"] # paths with source code to compile
analysis-paths: ["analysis"] # path with analysis files which are compiled, but
# not run
target-path: "target" # path for compiled code
clean-targets: ["target"] # directories removed by the clean task
test-paths: ["test"] # where to store test results
data-paths: ["data"] # load CSVs from this directory with `dbt seed`
# specify per-model configs
#models:
# package_name: # define configs for this package (called
# # "package_name" above)
# pardot: # assuming pardot is listed in models/
# enabled: false # disable all pardot models except where overriden
# pardot_emails: # override the configs for the pardot_emails model
# enabled: true # enable this specific model
# materialized: true # create a table instead of a view
# uncomment below and add real repositories to add dependencies to this project
#repositories:
# - "git@github.com:[your-org]/[some-repo-1]"
# - "git@github.com:[your-org]/[some-repo-2]"
"""
import dbt.project
import dbt.clients.git
import dbt.clients.system

from dbt.logger import GLOBAL_LOGGER as logger

STARTER_REPO = 'https://github.com/fishtown-analytics/dbt-starter-project.git'
DOCS_URL = 'https://dbt.readme.io/docs/configure-your-profile'
SAMPLE_PROFILES_YML_FILE = 'https://github.com/fishtown-analytics/dbt/blob/master/sample.profiles.yml' # noqa

ON_COMPLETE_MESSAGE = """
Your new dbt project "{project_name}" was created! If this is your first time
using dbt, you'll need to set up your profiles.yml file -- this file will
tell dbt how to connect to your database. You can find this file by running:
{open_cmd} {profiles_path}
GIT_IGNORE = """
target/
dbt_modules/
For more information on how to configure the profiles.yml file,
please consult the dbt documentation here:
{docs_url}
One more thing:
Need help? Don't hesitate to reach out to us via GitHub issues or on Slack --
There's a link to our Slack group in the GitHub Readme. Happy modeling!
"""


STARTER_PROFILE = """
# For more information on how to configure this file, please see:
# {profiles_sample}
default:
outputs:
dev:
type: redshift
threads: 1
host: 127.0.0.1
port: 5439
user: alice
pass: pa55word
dbname: warehouse
schema: dbt_alice
prod:
type: redshift
threads: 1
host: 127.0.0.1
port: 5439
user: alice
pass: pa55word
dbname: warehouse
schema: analytics
target: dev
""".format(profiles_sample=SAMPLE_PROFILES_YML_FILE)


class InitTask:
def __init__(self, args, project=None):
self.args = args
self.project = project

def __write(self, path, filename, contents):
file_path = os.path.join(path, filename)
def clone_starter_repo(self, project_name):
dbt.clients.git.clone(STARTER_REPO, '.', project_name)
dbt.clients.git.remove_remote(project_name)

def create_profiles_dir(self, profiles_dir):
if not os.path.exists(profiles_dir):
dbt.clients.system.make_directory(profiles_dir)
return True
return False

def create_profiles_file(self, profiles_file):
if not os.path.exists(profiles_file):
dbt.clients.system.make_file(profiles_file, STARTER_PROFILE)
return True
return False

with open(file_path, 'w') as fh:
fh.write(contents)
def get_addendum(self, project_name, profiles_path):
open_cmd = dbt.clients.system.open_dir_cmd()

return ON_COMPLETE_MESSAGE.format(
open_cmd=open_cmd,
project_name=project_name,
profiles_path=profiles_path,
docs_url=DOCS_URL
)

def run(self):
project_dir = self.args.project_name

profiles_dir = dbt.project.default_profiles_dir
profiles_file = os.path.join(profiles_dir, 'profiles.yml')

self.create_profiles_dir(profiles_dir)
self.create_profiles_file(profiles_file)

msg = "Creating dbt configuration folder at {}"
logger.info(msg.format(profiles_dir))

if os.path.exists(project_dir):
raise RuntimeError("directory {} already exists!".format(
project_dir
))

os.mkdir(project_dir)

project_dir = self.args.project_name
self.__write(project_dir, 'dbt_project.yml', SAMPLE_CONFIG)
self.__write(project_dir, '.gitignore', GIT_IGNORE)
self.clone_starter_repo(project_dir)

dirs = ['models', 'analysis', 'tests', 'data']
for dir_name in dirs:
dir_path = os.path.join(project_dir, dir_name)
os.mkdir(dir_path)
addendum = self.get_addendum(project_dir, profiles_dir)
logger.info(addendum)
21 changes: 17 additions & 4 deletions sample.dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ name: 'your_package_name'

# version: Required. This indicates the current version of your package and
# should conform to semantic versioning. The field is currently unused
version: '0.0.1'
version: '0.1.0'



Expand All @@ -40,7 +40,7 @@ target-path: "target"

# test-paths: Optional. Specify which path(s) dbt should look in to find data
# test definitions.
test-paths: ["test"]
test-paths: ["tests"]

# data-paths: Optional. Specify which path(s) dbt should look in to find CSV
# files. Running `dbt seed` will load these CSVs as tables in your warehouse
Expand Down Expand Up @@ -191,5 +191,18 @@ repositories:
# Archival
#

# Data archival is a powerful feature intended for advanced dbt users. For more
# information, consult: https://dbt.readme.io/reference#archives
# dbt's archival process records snapshots of specified tables so that
# you can analyze how these tables change over time. In the example below,
# the public.users table is configured for archival. When the `updated_at`
# value changes for a given user record (identified by the `id` field), dbt
# will record a new record in `users_archived` table which reflects the
# changed state of that row. For more information on this command, consult
# the dbt documentation: https://dbt.readme.io/reference#archive
archive:
- source_schema: public
target_schema: public
tables:
- source_table: users
target_table: users_archived
updated_at: updated_at
unique_key: id
Loading

0 comments on commit d15c0c6

Please sign in to comment.