Skip to content

Commit b7f9e73

Browse files
authored
Merge pull request #3097 from fabiosantoscode/feature/3089-get-non-dvc-repositories
get: handle non-DVC repositories
2 parents 9e9f37e + de303d4 commit b7f9e73

File tree

5 files changed

+64
-52
lines changed

5 files changed

+64
-52
lines changed

dvc/command/get.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ def run(self):
3131

3232

3333
def add_parser(subparsers, parent_parser):
34-
GET_HELP = "Download/copy files or directories from DVC repository."
34+
GET_HELP = (
35+
"Download a file or directory from any DVC project or Git repository."
36+
)
3537
get_parser = subparsers.add_parser(
3638
"get",
3739
parents=[parent_parser],
@@ -40,18 +42,17 @@ def add_parser(subparsers, parent_parser):
4042
formatter_class=argparse.RawDescriptionHelpFormatter,
4143
)
4244
get_parser.add_argument(
43-
"url", help="URL of Git repository with DVC project to download from."
45+
"url",
46+
help="Location of DVC project or Git repository to download from",
4447
)
4548
get_parser.add_argument(
46-
"path", help="Path to a file or directory within a DVC repository."
49+
"path",
50+
help="Path to a file or directory within the project or repository",
4751
)
4852
get_parser.add_argument(
49-
"-o",
50-
"--out",
51-
nargs="?",
52-
help="Destination path to copy/download files to.",
53+
"-o", "--out", nargs="?", help="Destination path to download files to"
5354
)
5455
get_parser.add_argument(
55-
"--rev", nargs="?", help="DVC repository git revision."
56+
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
5657
)
5758
get_parser.set_defaults(func=CmdGet)

dvc/command/imp.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def run(self):
3030

3131
def add_parser(subparsers, parent_parser):
3232
IMPORT_HELP = (
33-
"Download data from DVC repository and take it under DVC control."
33+
"Download a file or directory from any DVC project or Git repository"
34+
"and take it under DVC control."
3435
)
3536

3637
import_parser = subparsers.add_parser(
@@ -41,15 +42,17 @@ def add_parser(subparsers, parent_parser):
4142
formatter_class=argparse.RawTextHelpFormatter,
4243
)
4344
import_parser.add_argument(
44-
"url", help="URL of Git repository with DVC project to download from."
45+
"url",
46+
help="Location of DVC project or Git repository to download from",
4547
)
4648
import_parser.add_argument(
47-
"path", help="Path to data within DVC repository."
49+
"path",
50+
help="Path to a file or directory within the project or repository",
4851
)
4952
import_parser.add_argument(
50-
"-o", "--out", nargs="?", help="Destination path to put data to."
53+
"-o", "--out", nargs="?", help="Destination path to download files to"
5154
)
5255
import_parser.add_argument(
53-
"--rev", nargs="?", help="DVC repository git revision."
56+
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
5457
)
5558
import_parser.set_defaults(func=CmdImport)

dvc/exceptions.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -240,11 +240,6 @@ def __init__(self, ignore_dirname):
240240
)
241241

242242

243-
class UrlNotDvcRepoError(DvcException):
244-
def __init__(self, url):
245-
super().__init__("URL '{}' is not a dvc repository.".format(url))
246-
247-
248243
class GitHookAlreadyExistsError(DvcException):
249244
def __init__(self, hook_name):
250245
super().__init__(

dvc/repo/get.py

Lines changed: 36 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@
77
DvcException,
88
NotDvcRepoError,
99
OutputNotFoundError,
10-
UrlNotDvcRepoError,
1110
PathMissingError,
1211
)
13-
from dvc.external_repo import external_repo
12+
from dvc.external_repo import external_repo, cached_clone
1413
from dvc.path_info import PathInfo
1514
from dvc.stage import Stage
1615
from dvc.utils import resolve_output
@@ -42,37 +41,47 @@ def get(url, path, out=None, rev=None):
4241
# and won't work with reflink/hardlink.
4342
dpath = os.path.dirname(os.path.abspath(out))
4443
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
44+
repo_dir = None
4545
try:
46-
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
47-
# Try any links possible to avoid data duplication.
48-
#
49-
# Not using symlink, because we need to remove cache after we are
50-
# done, and to make that work we would have to copy data over
51-
# anyway before removing the cache, so we might just copy it
52-
# right away.
53-
#
54-
# Also, we can't use theoretical "move" link type here, because
55-
# the same cache file might be used a few times in a directory.
56-
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
57-
58-
try:
59-
output = repo.find_out_by_relpath(path)
60-
except OutputNotFoundError:
61-
output = None
62-
63-
if output and output.use_cache:
64-
_get_cached(repo, output, out)
65-
else:
46+
try:
47+
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
48+
# Try any links possible to avoid data duplication.
49+
#
50+
# Not using symlink, because we need to remove cache after we
51+
# are done, and to make that work we would have to copy data
52+
# over anyway before removing the cache, so we might just copy
53+
# it right away.
54+
#
55+
# Also, we can't use theoretical "move" link type here, because
56+
# the same cache file might be used a few times in a directory.
57+
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
58+
59+
try:
60+
output = repo.find_out_by_relpath(path)
61+
except OutputNotFoundError:
62+
output = None
63+
64+
if output and output.use_cache:
65+
_get_cached(repo, output, out)
66+
return
67+
6668
# Either an uncached out with absolute path or a user error
67-
if os.path.isabs(path):
68-
raise FileNotFoundError
6969

70-
fs_copy(os.path.join(repo.root_dir, path), out)
70+
repo_dir = repo.root_dir
71+
72+
except NotDvcRepoError:
73+
# Not a DVC repository, continue below and copy from git
74+
pass
75+
76+
if os.path.isabs(path):
77+
raise FileNotFoundError
78+
79+
if not repo_dir:
80+
repo_dir = cached_clone(url, rev=rev)
7181

82+
fs_copy(os.path.join(repo_dir, path), out)
7283
except (OutputNotFoundError, FileNotFoundError):
7384
raise PathMissingError(path, url)
74-
except NotDvcRepoError:
75-
raise UrlNotDvcRepoError(url)
7685
finally:
7786
remove(tmp_dir)
7887

tests/func/test_get.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from dvc.cache import Cache
77
from dvc.config import Config
8-
from dvc.exceptions import UrlNotDvcRepoError
98
from dvc.repo.get import GetDVCFileError, PathMissingError
109
from dvc.repo import Repo
1110
from dvc.system import System
@@ -87,9 +86,10 @@ def test_get_repo_rev(tmp_dir, erepo_dir):
8786
def test_get_from_non_dvc_repo(tmp_dir, erepo_dir):
8887
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
8988
erepo_dir.scm.commit("remove dvc")
89+
erepo_dir.scm_gen({"some_file": "contents"}, commit="create file")
9090

91-
with pytest.raises(UrlNotDvcRepoError):
92-
Repo.get(fspath(erepo_dir), "some_file.zip")
91+
Repo.get(fspath(erepo_dir), "some_file", "file_imported")
92+
assert (tmp_dir / "file_imported").read_text() == "contents"
9393

9494

9595
def test_get_a_dvc_file(tmp_dir, erepo_dir):
@@ -136,6 +136,14 @@ def test_absolute_file_outside_repo(tmp_dir, erepo_dir):
136136
Repo.get(fspath(erepo_dir), "/root/")
137137

138138

139+
def test_absolute_file_outside_git_repo(tmp_dir, erepo_dir):
140+
erepo_dir.scm.repo.index.remove([erepo_dir.dvc.dvc_dir], r=True)
141+
erepo_dir.scm.commit("remove dvc")
142+
143+
with pytest.raises(PathMissingError):
144+
Repo.get(fspath(erepo_dir), "/root/")
145+
146+
139147
def test_unknown_path(tmp_dir, erepo_dir):
140148
with pytest.raises(PathMissingError):
141149
Repo.get(fspath(erepo_dir), "a_non_existing_file")
@@ -164,10 +172,6 @@ def test_get_from_non_dvc_master(tmp_dir, erepo_dir, caplog):
164172
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
165173
erepo_dir.dvc.scm.commit("remove .dvc")
166174

167-
# sanity check
168-
with pytest.raises(UrlNotDvcRepoError):
169-
Repo.get(fspath(erepo_dir), "some_file")
170-
171175
caplog.clear()
172176
dst = "file_imported"
173177
with caplog.at_level(logging.INFO, logger="dvc"):

0 commit comments

Comments
 (0)