Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On mostly-duplicate git urls, pick whichever came first (#1084) #1428

Merged
merged 2 commits into from
May 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 38 additions & 7 deletions core/dbt/task/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,13 @@ def __init__(self, *args, **kwargs):
self._checkout_name = hashlib.md5(six.b(self.git)).hexdigest()
self.version = self._contents.get('revision')

@property
def other_name(self):
if self.git.endswith('.git'):
return self.git[:-4]
else:
return self.git + '.git'

@property
def name(self):
return self.git
Expand Down Expand Up @@ -368,14 +375,38 @@ def _parse_package(dict_):


class PackageListing(AttrDict):
def __contains__(self, package):
if isinstance(package, basestring):
return super(PackageListing, self).__contains__(package)
elif isinstance(package, GitPackage):
return package.name in self or package.other_name in self
else:
return package.name in self

def __setitem__(self, key, value):
if isinstance(key, basestring):
super(PackageListing, self).__setitem__(key, value)
elif isinstance(key, GitPackage) and key.other_name in self:
self[key.other_name] = value
else:
self[key.name] = value

def __getitem__(self, key):
if isinstance(key, basestring):
return super(PackageListing, self).__getitem__(key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When i use the following package spec, I see a backtrace on this line:

packages:
  - git: "https://github.com/fishtown-analytics/dbt-utils"
    revision: 0.1.12

  - git: "https://github.com/fishtown-analytics/dbt-event-logging.git"
    revision: 0.1.5
2019-05-07 21:48:28,778 (MainThread): Traceback (most recent call last):
  File "/Users/drew/fishtown/dbt/core/dbt/main.py", line 80, in main
    results, succeeded = handle_and_check(args)
  File "/Users/drew/fishtown/dbt/core/dbt/main.py", line 153, in handle_and_check
    task, res = run_from_args(parsed)
  File "/Users/drew/fishtown/dbt/core/dbt/main.py", line 201, in run_from_args
    results = task.run()
  File "/Users/drew/fishtown/dbt/core/dbt/task/deps.py", line 525, in run
    final_deps[name].resolve_version()
  File "/Users/drew/fishtown/dbt/core/dbt/task/deps.py", line 397, in __getitem__
    return super(PackageListing, self).__getitem__(key)
KeyError: 'https://github.com/fishtown-analytics/dbt-utils.git'

I don't totally understand what's happening here, or why this case is different than the example provided in the integration test in this PR. Can you check it out?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like while evaluating subpackages, dbt was using the chosen canonical key of the package instead of the package itself for the collision check, which bypassed the extra other_name handling. That's no longer valid since the two dicts might have distinct keys that should match!

elif isinstance(key, GitPackage) and key.other_name in self:
return self[key.other_name]
else:
return self[key.name]

def incorporate(self, package):
if not isinstance(package, Package):
package = _parse_package(package)
if package.name not in self:
self[package.name] = package

if package in self:
self[package] = self[package].incorporate(package)
else:
self[package.name] = self[package.name].incorporate(package)
self[package] = package

@classmethod
def create(cls, parsed_yaml):
Expand Down Expand Up @@ -488,16 +519,16 @@ def run(self):

while pending_deps:
sub_deps = PackageListing.create([])
for name, package in pending_deps.items():
for package in pending_deps.values():
final_deps.incorporate(package)
final_deps[name].resolve_version()
target_config = final_deps[name].fetch_metadata(self.config)
final_deps[package].resolve_version()
target_config = final_deps[package].fetch_metadata(self.config)
sub_deps.incorporate_from_yaml(target_config.packages)
pending_deps = sub_deps

self._check_for_duplicate_project_names(final_deps)

for _, package in final_deps.items():
for package in final_deps.values():
logger.info('Installing %s', package)
package.install(self.config)
logger.info(' Installed from %s\n', package.nice_version_name())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
from test.integration.base import DBTIntegrationTest, use_profile


class TestSimpleDependency(DBTIntegrationTest):

def setUp(self):
Expand Down Expand Up @@ -63,6 +65,67 @@ def test_simple_dependency_with_models(self):
self.assertEqual(created_models['view_model'], 'view')
self.assertEqual(created_models['view_summary'], 'view')


class TestSimpleDependencyWithDuplicates(DBTIntegrationTest):
@property
def schema(self):
return "simple_dependency_006"

@property
def models(self):
return "test/integration/006_simple_dependency_test/models"

@property
def packages_config(self):
# dbt should convert these into a single dependency internally
return {
"packages": [
{
'git': 'https://github.com/fishtown-analytics/dbt-integration-project'
},
{
'git': 'https://github.com/fishtown-analytics/dbt-integration-project.git'
}
]
}

@use_profile('postgres')
def test_postgres_simple_dependency_deps(self):
self.run_dbt(["deps"])


class TestRekeyedDependencyWithSubduplicates(DBTIntegrationTest):
@property
def schema(self):
return "simple_dependency_006"

@property
def models(self):
return "test/integration/006_simple_dependency_test/models"

@property
def packages_config(self):
# dbt-event-logging@0.1.5 requires dbt-utils.git@0.1.12, which the
# package config handling should detect
return {
'packages': [
{
'git': 'https://github.com/fishtown-analytics/dbt-utils',
'revision': '0.1.12',
},
{
'git': 'https://github.com/fishtown-analytics/dbt-event-logging.git',
'revision': '0.1.5',
}
]
}

@use_profile('postgres')
def test_postgres_simple_dependency_deps(self):
self.run_dbt(["deps"])
self.assertEqual(len(os.listdir('dbt_modules')), 2)


class TestSimpleDependencyBranch(DBTIntegrationTest):

def setUp(self):
Expand Down