Skip to content

Commit 6718622

Browse files
authored
Merge pull request #3227 from Suor/git-pull
erepo: pull each time
2 parents 2f1224b + 7168a9c commit 6718622

File tree

4 files changed

+58
-59
lines changed

4 files changed

+58
-59
lines changed

dvc/external_repo.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import logging
12
import os
23
import tempfile
34
from contextlib import contextmanager
45
from distutils.dir_util import copy_tree
6+
import threading
57

6-
from funcy import retry, suppress, memoize, cached_property
8+
from funcy import retry, suppress, wrap_with, cached_property
79

810
from dvc.compat import fspath
911
from dvc.repo import Repo
@@ -16,6 +18,9 @@
1618
from dvc.scm import SCM
1719

1820

21+
logger = logging.getLogger(__name__)
22+
23+
1924
@contextmanager
2025
def external_repo(url, rev=None):
2126
path = _cached_clone(url, rev)
@@ -40,8 +45,8 @@ def external_repo(url, rev=None):
4045

4146
def clean_repos():
4247
# Outside code should not see cache while we are removing
43-
repo_paths = list(_cached_clone.memory.values())
44-
_cached_clone.memory.clear()
48+
repo_paths = list(REPOS_CACHE.values())
49+
REPOS_CACHE.clear()
4550

4651
for path in repo_paths:
4752
_remove(path)
@@ -137,41 +142,66 @@ def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
137142
raise PathMissingError(path, self.url)
138143

139144

140-
@memoize
145+
REPOS_CACHE = {}
146+
147+
148+
@wrap_with(threading.Lock())
141149
def _cached_clone(url, rev):
142150
"""Clone an external git repo to a temporary directory.
143151
144152
Returns the path to a local temporary directory with the specified
145153
revision checked out.
146154
"""
155+
if (url, rev) in REPOS_CACHE:
156+
path = REPOS_CACHE[url, rev]
157+
_git_pull(path, rev)
158+
return path
159+
147160
new_path = tempfile.mkdtemp("dvc-erepo")
148161

149-
if url in _cached_clone.memory:
162+
if url in REPOS_CACHE:
150163
# Copy and an existing clean clone
151164
# This one unlike shutil.copytree() works with an existing dir
152-
copy_tree(_cached_clone.memory[url], new_path)
165+
copy_tree(REPOS_CACHE[url], new_path)
166+
_git_pull(new_path, None)
153167
else:
154168
# Create a new clone
155169
_clone_repo(url, new_path)
156170

157171
# Save clean clone dir so that we will have access to a default branch
158172
clean_clone_path = tempfile.mkdtemp("dvc-erepo")
159173
copy_tree(new_path, clean_clone_path)
160-
_cached_clone.memory[url] = clean_clone_path
174+
REPOS_CACHE[url] = clean_clone_path
161175

162176
# Check out the specified revision
163177
if rev is not None:
164178
_git_checkout(new_path, rev)
165179

180+
REPOS_CACHE[url, rev] = new_path
166181
return new_path
167182

168183

169-
def _git_checkout(repo_path, revision):
184+
def _git_checkout(repo_path, rev):
170185
from dvc.scm import Git
171186

172187
git = Git(repo_path)
173188
try:
174-
git.checkout(revision)
189+
git.checkout(rev)
190+
finally:
191+
git.close()
192+
193+
194+
def _git_pull(repo_path, rev):
195+
import git
196+
197+
# Do not try to pull in a detached mode
198+
if rev and git.Repo.re_hexsha_only.search(rev):
199+
return
200+
201+
git = git.Repo(repo_path)
202+
try:
203+
msg = git.git.pull()
204+
logger.debug("external repo: git pull: {}", msg)
175205
finally:
176206
git.close()
177207

tests/func/test_external_repo.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,16 @@ def test_external_repo(erepo_dir):
2323
assert fd.read() == "branch"
2424

2525
assert mock.call_count == 1
26+
27+
28+
def test_source_change(erepo_dir):
29+
url = fspath(erepo_dir)
30+
with external_repo(url) as repo:
31+
old_rev = repo.scm.get_rev()
32+
33+
erepo_dir.scm_gen("file", "text", commit="a change")
34+
35+
with external_repo(url) as repo:
36+
new_rev = repo.scm.get_rev()
37+
38+
assert old_rev != new_rev

tests/func/test_status.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from dvc.main import main
66
from dvc.compat import fspath
7-
from dvc.external_repo import clean_repos
87
from tests.basic_env import TestDvc
98

109

@@ -33,19 +32,12 @@ def test_status_non_dvc_repo_import(tmp_dir, dvc, git_dir):
3332

3433
dvc.imp(fspath(git_dir), "file", "file", rev="branch")
3534

36-
status = dvc.status(["file.dvc"])
37-
38-
assert status == {}
39-
40-
# Caching in external repos doesn't see upstream updates within single
41-
# cli call, so we need to clean the caches to see the changes.
42-
clean_repos()
35+
assert dvc.status(["file.dvc"]) == {}
4336

4437
with git_dir.branch("branch", new=False):
4538
git_dir.scm_gen("file", "second version", commit="update file")
4639

4740
status, = dvc.status(["file.dvc"])["file.dvc"]
48-
4941
assert status == {
5042
"changed deps": {"file ({})".format(git_dir): "update available"}
5143
}
@@ -68,12 +60,7 @@ def test_status_before_and_after_dvc_init(tmp_dir, dvc, git_dir):
6860

6961
assert old_rev != new_rev
7062

71-
# Caching in external repos doesn't see upstream updates within single
72-
# cli call, so we need to clean the caches to see the changes.
73-
clean_repos()
74-
7563
status, = dvc.status(["file.dvc"])["file.dvc"]
76-
7764
assert status == {
7865
"changed deps": {
7966
"file ({})".format(fspath(git_dir)): "update available"

tests/func/test_update.py

Lines changed: 5 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from dvc.stage import Stage
55
from dvc.compat import fspath
6-
from dvc.external_repo import clean_repos
76

87

98
@pytest.mark.parametrize("cached", [True, False])
@@ -16,43 +15,21 @@ def test_update_import(tmp_dir, dvc, erepo_dir, cached):
1615

1716
stage = dvc.imp(fspath(erepo_dir), "version", "version", rev="branch")
1817

19-
imported = tmp_dir / "version"
20-
assert imported.is_file()
21-
assert imported.read_text() == "branch"
22-
assert stage.deps[0].def_repo == {
23-
"url": fspath(erepo_dir),
24-
"rev": "branch",
25-
"rev_lock": old_rev,
26-
}
18+
assert (tmp_dir / "version").read_text() == "branch"
19+
assert stage.deps[0].def_repo["rev_lock"] == old_rev
2720

21+
# Update version file
2822
with erepo_dir.branch("branch", new=False), erepo_dir.chdir():
2923
gen("version", "updated", "update version content")
3024
new_rev = erepo_dir.scm.get_rev()
3125

3226
assert old_rev != new_rev
3327

34-
# Caching in external repos doesn't see upstream updates within single
35-
# cli call, so we need to clean the caches to see the changes.
36-
clean_repos()
37-
38-
status, = dvc.status([stage.path])["version.dvc"]
39-
changed_dep, = list(status["changed deps"].items())
40-
assert changed_dep[0].startswith("version ")
41-
assert changed_dep[1] == "update available"
42-
4328
dvc.update(stage.path)
44-
45-
assert dvc.status([stage.path]) == {}
46-
47-
assert imported.is_file()
48-
assert imported.read_text() == "updated"
29+
assert (tmp_dir / "version").read_text() == "updated"
4930

5031
stage = Stage.load(dvc, stage.path)
51-
assert stage.deps[0].def_repo == {
52-
"url": fspath(erepo_dir),
53-
"rev": "branch",
54-
"rev_lock": new_rev,
55-
}
32+
assert stage.deps[0].def_repo["rev_lock"] == new_rev
5633

5734

5835
def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir):
@@ -82,10 +59,6 @@ def test_update_import_after_remote_updates_to_dvc(tmp_dir, dvc, erepo_dir):
8259

8360
assert old_rev != new_rev
8461

85-
# Caching in external repos doesn't see upstream updates within single
86-
# cli call, so we need to clean the caches to see the changes.
87-
clean_repos()
88-
8962
status, = dvc.status([stage.path])["version.dvc"]
9063
changed_dep, = list(status["changed deps"].items())
9164
assert changed_dep[0].startswith("version ")
@@ -122,10 +95,6 @@ def test_update_before_and_after_dvc_init(tmp_dir, dvc, git_dir):
12295

12396
assert old_rev != new_rev
12497

125-
# Caching in external repos doesn't see upstream updates within single
126-
# cli call, so we need to clean the caches to see the changes.
127-
clean_repos()
128-
12998
assert dvc.status([stage.path]) == {
13099
"file.dvc": [
131100
{

0 commit comments

Comments
 (0)