Skip to content

Commit 0dd5647

Browse files
get: handle non-DVC repositories
Allows us to `dvc get` from non-DVC source repositories. Fixes #3089
1 parent 095464d commit 0dd5647

File tree

4 files changed

+27
-31
lines changed

4 files changed

+27
-31
lines changed

dvc/exceptions.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,6 @@ def __init__(self, ignore_dirname):
240240
)
241241

242242

243-
class UrlNotDvcRepoError(DvcException):
244-
def __init__(self, url):
245-
super().__init__("URL '{}' is not a dvc repository.".format(url))
246-
247-
248243
class GitHookAlreadyExistsError(DvcException):
249244
def __init__(self, hook_name):
250245
super().__init__(

dvc/external_repo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
3333
repo.close()
3434

3535

36-
def cached_clone(url, rev=None, **_ignored_kwargs):
36+
def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs):
3737
"""Clone an external git repo to a temporary directory.
3838
3939
Returns the path to a local temporary directory with the specified
@@ -44,7 +44,7 @@ def cached_clone(url, rev=None, **_ignored_kwargs):
4444
4545
"""
4646

47-
new_path = tempfile.mkdtemp("dvc-erepo")
47+
new_path = clone_path or tempfile.mkdtemp("dvc-erepo")
4848

4949
# Copy and adjust existing clean clone
5050
if (url, None, None) in REPO_CACHE:

dvc/repo/get.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77
DvcException,
88
NotDvcRepoError,
99
OutputNotFoundError,
10-
UrlNotDvcRepoError,
1110
PathMissingError,
1211
)
13-
from dvc.external_repo import external_repo
12+
from dvc.external_repo import cached_clone
1413
from dvc.path_info import PathInfo
1514
from dvc.stage import Stage
1615
from dvc.utils import resolve_output
@@ -28,8 +27,15 @@ def __init__(self):
2827
)
2928

3029

30+
# Dummy exception raised to signal a plain file copy is needed
31+
class _DoPlainCopy(DvcException):
32+
pass
33+
34+
3135
@staticmethod
3236
def get(url, path, out=None, rev=None):
37+
from dvc.repo import Repo
38+
3339
out = resolve_output(path, out)
3440

3541
if Stage.is_valid_filename(out):
@@ -43,7 +49,8 @@ def get(url, path, out=None, rev=None):
4349
dpath = os.path.dirname(os.path.abspath(out))
4450
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
4551
try:
46-
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
52+
cached_clone(url, rev=rev, clone_path=tmp_dir)
53+
try:
4754
# Try any links possible to avoid data duplication.
4855
#
4956
# Not using symlink, because we need to remove cache after we are
@@ -53,26 +60,24 @@ def get(url, path, out=None, rev=None):
5360
#
5461
# Also, we can't use theoretical "move" link type here, because
5562
# the same cache file might be used a few times in a directory.
63+
repo = Repo(tmp_dir)
5664
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
65+
output = repo.find_out_by_relpath(path)
66+
if not output.use_cache:
67+
# Catch this below and go for a plain old fs_copy
68+
raise _DoPlainCopy
69+
_get_cached(repo, output, out)
5770

58-
try:
59-
output = repo.find_out_by_relpath(path)
60-
except OutputNotFoundError:
61-
output = None
62-
63-
if output and output.use_cache:
64-
_get_cached(repo, output, out)
65-
else:
66-
# Either an uncached out with absolute path or a user error
67-
if os.path.isabs(path):
68-
raise FileNotFoundError
71+
except (NotDvcRepoError, OutputNotFoundError, _DoPlainCopy):
72+
# It's an uncached out with absolute path, a non-DVC repo, or a
73+
# user error
74+
if os.path.isabs(path):
75+
raise FileNotFoundError
6976

70-
fs_copy(os.path.join(repo.root_dir, path), out)
77+
fs_copy(os.path.join(tmp_dir, path), out)
7178

7279
except (OutputNotFoundError, FileNotFoundError):
7380
raise PathMissingError(path, url)
74-
except NotDvcRepoError:
75-
raise UrlNotDvcRepoError(url)
7681
finally:
7782
remove(tmp_dir)
7883

tests/func/test_get.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from dvc.cache import Cache
77
from dvc.config import Config
8-
from dvc.exceptions import UrlNotDvcRepoError
98
from dvc.repo.get import GetDVCFileError, PathMissingError
109
from dvc.repo import Repo
1110
from dvc.system import System
@@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir):
8786
def test_get_from_non_dvc_repo(tmp_dir, erepo_dir):
8887
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
8988
erepo_dir.scm.commit("remove dvc")
89+
erepo_dir.scm_gen({"some_file": "contents"}, commit="create file")
9090

91-
with pytest.raises(UrlNotDvcRepoError):
92-
Repo.get(fspath(erepo_dir), "some_file.zip")
91+
Repo.get(fspath(erepo_dir), "some_file", "file_imported")
92+
assert (tmp_dir / "file_imported").read_text() == "contents"
9393

9494

9595
def test_get_a_dvc_file(tmp_dir, erepo_dir):
@@ -164,10 +164,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog):
164164
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
165165
erepo_dir.dvc.scm.commit("remove .dvc")
166166

167-
# sanity check
168-
with pytest.raises(UrlNotDvcRepoError):
169-
Repo.get(fspath(erepo_dir), "some_file")
170-
171167
caplog.clear()
172168
dst = "file_imported"
173169
with caplog.at_level(logging.INFO, logger="dvc"):

0 commit comments

Comments
 (0)