Skip to content

Commit f935748

Browse files
authored
Merge pull request #3020 from chatcannon/dvc-import-regular-git
import: allow importing from non-DVC git repositories
2 parents 54faf1b + 3ceb1db commit f935748

File tree

3 files changed

+64
-31
lines changed

3 files changed

+64
-31
lines changed

dvc/dependency/repo.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from funcy import merge
66

77
from .local import DependencyLOCAL
8+
from dvc.external_repo import cached_clone
89
from dvc.external_repo import external_repo
10+
from dvc.exceptions import NotDvcRepoError
911
from dvc.exceptions import OutputNotFoundError
1012
from dvc.exceptions import PathMissingError
1113
from dvc.utils.fs import fs_copy
@@ -74,27 +76,35 @@ def fetch(self):
7476
return out
7577

7678
@staticmethod
77-
def _is_git_file(repo, path):
78-
if not os.path.isabs(path):
79-
try:
80-
output = repo.find_out_by_relpath(path)
81-
if not output.use_cache:
82-
return True
83-
except OutputNotFoundError:
84-
return True
85-
return False
79+
def _is_git_file(repo_dir, path):
80+
from dvc.repo import Repo
81+
82+
if os.path.isabs(path):
83+
return False
84+
85+
try:
86+
repo = Repo(repo_dir)
87+
except NotDvcRepoError:
88+
return True
89+
90+
try:
91+
output = repo.find_out_by_relpath(path)
92+
return not output.use_cache
93+
except OutputNotFoundError:
94+
return True
95+
finally:
96+
repo.close()
8697

8798
def _copy_if_git_file(self, to_path):
8899
src_path = self.def_path
89-
with self._make_repo(
90-
cache_dir=self.repo.cache.local.cache_dir
91-
) as repo:
92-
if not self._is_git_file(repo, src_path):
93-
return False
100+
repo_dir = cached_clone(**self.def_repo)
101+
102+
if not self._is_git_file(repo_dir, src_path):
103+
return False
94104

95-
src_full_path = os.path.join(repo.root_dir, src_path)
96-
dst_full_path = os.path.abspath(to_path)
97-
fs_copy(src_full_path, dst_full_path)
105+
src_full_path = os.path.join(repo_dir, src_path)
106+
dst_full_path = os.path.abspath(to_path)
107+
fs_copy(src_full_path, dst_full_path)
98108
return True
99109

100110
def download(self, to):

dvc/external_repo.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,20 @@ def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
3333
repo.close()
3434

3535

36-
def _external_repo(url=None, rev=None, cache_dir=None):
37-
from dvc.config import Config
38-
from dvc.cache import CacheConfig
39-
from dvc.repo import Repo
36+
def cached_clone(url, rev=None, **_ignored_kwargs):
37+
"""Clone an external git repo to a temporary directory.
4038
41-
key = (url, rev, cache_dir)
42-
if key in REPO_CACHE:
43-
return REPO_CACHE[key]
39+
Returns the path to a local temporary directory with the specified
40+
revision checked out.
41+
42+
Uses the REPO_CACHE to avoid accessing the remote server again if
43+
cloning from the same URL twice in the same session.
44+
45+
"""
4446

4547
new_path = tempfile.mkdtemp("dvc-erepo")
4648

47-
# Copy and adjust existing clone
49+
# Copy and adjust existing clean clone
4850
if (url, None, None) in REPO_CACHE:
4951
old_path = REPO_CACHE[url, None, None]
5052

@@ -59,13 +61,24 @@ def _external_repo(url=None, rev=None, cache_dir=None):
5961
copy_tree(new_path, clean_clone_path)
6062
REPO_CACHE[url, None, None] = clean_clone_path
6163

62-
# Adjust new clone/copy to fit rev and cache_dir
63-
64-
# Checkout needs to be done first because current branch might not be
65-
# DVC repository
64+
# Check out the specified revision
6665
if rev is not None:
6766
_git_checkout(new_path, rev)
6867

68+
return new_path
69+
70+
71+
def _external_repo(url=None, rev=None, cache_dir=None):
72+
from dvc.config import Config
73+
from dvc.cache import CacheConfig
74+
from dvc.repo import Repo
75+
76+
key = (url, rev, cache_dir)
77+
if key in REPO_CACHE:
78+
return REPO_CACHE[key]
79+
80+
new_path = cached_clone(url, rev=rev)
81+
6982
repo = Repo(new_path)
7083
try:
7184
# check if the URL is local and no default remote is present

tests/func/test_import.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@ def test_import(tmp_dir, scm, dvc, erepo_dir):
2828
assert scm.repo.git.check_ignore("foo_imported")
2929

3030

31-
def test_import_git_file(erepo_dir, tmp_dir, dvc, scm):
31+
@pytest.mark.parametrize("src_is_dvc", [True, False])
32+
def test_import_git_file(erepo_dir, tmp_dir, dvc, scm, src_is_dvc):
33+
if not src_is_dvc:
34+
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
35+
erepo_dir.dvc.scm.commit("remove .dvc")
36+
3237
src = "some_file"
3338
dst = "some_file_imported"
3439

@@ -43,7 +48,12 @@ def test_import_git_file(erepo_dir, tmp_dir, dvc, scm):
4348
assert tmp_dir.scm.repo.git.check_ignore(fspath(tmp_dir / dst))
4449

4550

46-
def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm):
51+
@pytest.mark.parametrize("src_is_dvc", [True, False])
52+
def test_import_git_dir(erepo_dir, tmp_dir, dvc, scm, src_is_dvc):
53+
if not src_is_dvc:
54+
erepo_dir.dvc.scm.repo.index.remove([".dvc"], r=True)
55+
erepo_dir.dvc.scm.commit("remove .dvc")
56+
4757
src = "some_directory"
4858
dst = "some_directory_imported"
4959

0 commit comments

Comments
 (0)