Skip to content

Commit f5e40b2

Browse files
get: leverage external_repo() context manager for DVC repositories
1 parent c16f882 commit f5e40b2

File tree

4 files changed

+45
-38
lines changed

4 files changed

+45
-38
lines changed

dvc/command/get.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def run(self):
3232

3333
def add_parser(subparsers, parent_parser):
3434
GET_HELP = (
35-
"Download a file or directory from any DVC project or Git repository"
35+
"Download a file or directory from any DVC project or Git repository."
3636
)
3737
get_parser = subparsers.add_parser(
3838
"get",
@@ -43,7 +43,7 @@ def add_parser(subparsers, parent_parser):
4343
)
4444
get_parser.add_argument(
4545
"url",
46-
help="Location of DVC project or Git repository to download from.",
46+
help="Location of DVC project or Git repository to download from",
4747
)
4848
get_parser.add_argument(
4949
"path",
@@ -53,7 +53,7 @@ def add_parser(subparsers, parent_parser):
5353
"-o",
5454
"--out",
5555
nargs="?",
56-
help="Destination path to copy/download files to.",
56+
help="Destination path to download files to",
5757
)
5858
get_parser.add_argument(
5959
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"

dvc/command/imp.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def run(self):
3030

3131
def add_parser(subparsers, parent_parser):
3232
IMPORT_HELP = (
33-
"Download data from DVC project or Git repository and take it under "
33+
"Download a file or directory from any DVC project or Git repository and take it under "
3434
"DVC control."
3535
)
3636

@@ -43,13 +43,13 @@ def add_parser(subparsers, parent_parser):
4343
)
4444
import_parser.add_argument(
4545
"url",
46-
help="Location of Git repository with DVC project to download from.",
46+
help="Location of DVC project or Git repository to download from",
4747
)
48-
import_parser.add_argument("path", help="Path to data within DVC project.")
48+
import_parser.add_argument("path", help="Path to a file or directory within the project or repository")
4949
import_parser.add_argument(
50-
"-o", "--out", nargs="?", help="Destination path to put data in."
50+
"-o", "--out", nargs="?", help="Destination path to download files to"
5151
)
5252
import_parser.add_argument(
53-
"--rev", nargs="?", help="DVC repository git revision."
53+
"--rev", nargs="?", help="Git revision (e.g. branch, tag, SHA)"
5454
)
5555
import_parser.set_defaults(func=CmdImport)

dvc/external_repo.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
3333
repo.close()
3434

3535

36-
def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs):
36+
def cached_clone(url, rev=None, **_ignored_kwargs):
3737
"""Clone an external git repo to a temporary directory.
3838
3939
Returns the path to a local temporary directory with the specified
@@ -44,7 +44,7 @@ def cached_clone(url, rev=None, clone_path=None, **_ignored_kwargs):
4444
4545
"""
4646

47-
new_path = clone_path or tempfile.mkdtemp("dvc-erepo")
47+
new_path = tempfile.mkdtemp("dvc-erepo")
4848

4949
# Copy and adjust existing clean clone
5050
if (url, None, None) in REPO_CACHE:

dvc/repo/get.py

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
OutputNotFoundError,
1010
PathMissingError,
1111
)
12-
from dvc.external_repo import cached_clone
12+
from dvc.external_repo import external_repo, cached_clone
1313
from dvc.path_info import PathInfo
1414
from dvc.stage import Stage
1515
from dvc.utils import resolve_output
@@ -29,8 +29,6 @@ def __init__(self):
2929

3030
@staticmethod
3131
def get(url, path, out=None, rev=None):
32-
from dvc.repo import Repo
33-
3432
out = resolve_output(path, out)
3533

3634
if Stage.is_valid_filename(out):
@@ -43,41 +41,50 @@ def get(url, path, out=None, rev=None):
4341
# and won't work with reflink/hardlink.
4442
dpath = os.path.dirname(os.path.abspath(out))
4543
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
44+
raw_git_dir = None
4645
try:
47-
cached_clone(url, rev=rev, clone_path=tmp_dir)
4846
try:
49-
repo = Repo(tmp_dir)
50-
51-
# Try any links possible to avoid data duplication.
52-
#
53-
# Not using symlink, because we need to remove cache after we are
54-
# done, and to make that work we would have to copy data over
55-
# anyway before removing the cache, so we might just copy it
56-
# right away.
57-
#
58-
# Also, we can't use theoretical "move" link type here, because
59-
# the same cache file might be used a few times in a directory.
60-
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
61-
62-
output = repo.find_out_by_relpath(path)
63-
if output.use_cache:
64-
_get_cached(repo, output, out)
47+
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
48+
# Try any links possible to avoid data duplication.
49+
#
50+
# Not using symlink, because we need to remove cache after we
51+
# are done, and to make that work we would have to copy data
52+
# over anyway before removing the cache, so we might just copy
53+
# it right away.
54+
#
55+
# Also, we can't use theoretical "move" link type here, because
56+
# the same cache file might be used a few times in a directory.
57+
repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
58+
59+
try:
60+
output = repo.find_out_by_relpath(path)
61+
except OutputNotFoundError:
62+
output = None
63+
64+
if output and output.use_cache:
65+
_get_cached(repo, output, out)
66+
return
67+
68+
# Either an uncached out with absolute path or a user error
69+
70+
if os.path.isabs(path):
71+
raise FileNotFoundError
72+
73+
fs_copy(os.path.join(repo.root_dir, path), out)
6574
return
6675

67-
except (NotDvcRepoError, OutputNotFoundError):
76+
except NotDvcRepoError:
77+
# Not a DVC repository, continue below and copy from git
6878
pass
6979

70-
# It's an uncached out with absolute path, a non-DVC repo, or a
71-
# user error
72-
if os.path.isabs(path):
73-
raise FileNotFoundError
74-
75-
fs_copy(os.path.join(tmp_dir, path), out)
76-
80+
raw_git_dir = cached_clone(url, rev=rev)
81+
fs_copy(os.path.join(raw_git_dir, path), out)
7782
except (OutputNotFoundError, FileNotFoundError):
7883
raise PathMissingError(path, url)
7984
finally:
8085
remove(tmp_dir)
86+
if raw_git_dir:
87+
remove(raw_git_dir)
8188

8289

8390
def _get_cached(repo, output, out):

0 commit comments

Comments
 (0)