Skip to content

Commit 4bde0e7

Browse files
rgoyaskshetry
andauthored
Forward args to _get_remote_config() and honour core/no_scm if present (#10719)
* Forward args to _get_remote_config and honour core/no_scm if present * Pass full config, except cache, to Repo() * Avoid modifying original kwargs contents * Simplify if config * Add test * Relax error matching.. .. Avoid windows vs unix paths * Update dvc/repo/open_repo.py --------- Co-authored-by: skshetry <18718008+skshetry@users.noreply.github.com>
1 parent 88f1ada commit 4bde0e7

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

dvc/dependency/repo.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ def _make_fs(
146146
else:
147147
config = Config.load_file(conf)
148148

149+
# Setup config to the new DVCFileSystem to use the remote repo, but rely on the
150+
# local cache instead of the remote's cache. This avoids re-streaming of data,
151+
# but messes up the call to `_get_remote_config()` downstream, which will need
152+
# to ignore cache parameters.
149153
config["cache"] = self.repo.config["cache"]
150154
config["cache"]["dir"] = self.repo.cache.local_cache_dir
151155

dvc/repo/open_repo.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import copy
12
import os
23
import tempfile
34
import threading
@@ -50,7 +51,7 @@ def open_repo(url, *args, **kwargs):
5051
if os.path.exists(url):
5152
url = os.path.abspath(url)
5253
try:
53-
config = _get_remote_config(url)
54+
config = _get_remote_config(url, *args, **kwargs)
5455
config.update(kwargs.get("config") or {})
5556
kwargs["config"] = config
5657
return Repo(url, *args, **kwargs)
@@ -97,9 +98,24 @@ def clean_repos():
9798
_remove(path)
9899

99100

100-
def _get_remote_config(url):
101+
def _get_remote_config(url, *args, **kwargs):
101102
try:
102-
repo = Repo(url)
103+
# Deepcopy to prevent modifying the original `kwargs['config']`
104+
config = copy.deepcopy(kwargs.get("config"))
105+
106+
# Import operations will use this function to get the remote's cache. However,
107+
# while the `url` sent will point to the external repo, the cache information
108+
# in `kwargs["config"]["cache"]["dir"]`) will point to the local repo,
109+
# see `dvc/dependency/repo.py:RepoDependency._make_fs()`
110+
#
111+
# This breaks this function, since we'd be instructing `Repo()` to use the wrong
112+
# cache to being with. We need to remove the cache info from `kwargs["config"]`
113+
# to read the actual remote repo data.
114+
if config:
115+
config.pop("cache", None)
116+
117+
repo = Repo(url, config=config)
118+
103119
except NotDvcRepoError:
104120
return {}
105121

tests/func/api/test_data.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from dvc import api
77
from dvc.exceptions import OutputNotFoundError, PathMissingError
8+
from dvc.scm import CloneError, SCMError
89
from dvc.testing.api_tests import TestAPI # noqa: F401
910
from dvc.testing.tmp_dir import make_subrepo
1011
from dvc.utils.fs import remove
@@ -65,6 +66,38 @@ def test_get_url_from_remote(tmp_dir, erepo_dir, cloud, local_cloud):
6566
)
6667

6768

69+
def test_get_url_ignore_scm(tmp_dir, dvc, cloud, scm):
70+
tmp_dir.add_remote(config=cloud.config)
71+
tmp_dir.dvc_gen("foo", "foo", commit="add foo")
72+
73+
repo_posix = tmp_dir.as_posix()
74+
expected_url = (cloud / "files" / "md5" / "ac/bd18db4cc2f85cedef654fccc4a4d8").url
75+
76+
# Test baseline with scm
77+
assert api.get_url("foo", repo=repo_posix) == expected_url
78+
79+
# Simulate gitless environment (e.g. deployed container)
80+
(tmp_dir / ".git").rename(tmp_dir / "gitless_environment")
81+
82+
# Test failure mode when trying to access with git
83+
with pytest.raises(SCMError, match="is not a git repository"):
84+
api.get_url("foo", repo=repo_posix)
85+
86+
# Test successful access by ignoring git
87+
assert (
88+
api.get_url("foo", repo=repo_posix, config={"core": {"no_scm": True}})
89+
== expected_url
90+
)
91+
92+
# Addressing repos with `file://` triggers git, so it fails in a gitless environment
93+
repo_url = f"file://{repo_posix}"
94+
with pytest.raises(
95+
CloneError,
96+
match="SCM error",
97+
):
98+
api.get_url("foo", repo=repo_url, config={"core": {"no_scm": True}})
99+
100+
68101
def test_open_external(tmp_dir, erepo_dir, cloud):
69102
erepo_dir.add_remote(config=cloud.config)
70103

0 commit comments

Comments
 (0)