Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update: prevent re-download on every update #8752

Merged
merged 1 commit into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions dvc/stage/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,20 @@ def update_import(
stage, rev=None, to_remote=False, remote=None, no_download=None, jobs=None
):
stage.deps[0].update(rev=rev)
outs = stage.outs
deps = stage.deps

frozen = stage.frozen
stage.frozen = False
changed = stage.changed()

if stage.outs:
stage.outs[0].clear()
try:
if to_remote:
_update_import_on_remote(stage, remote, jobs)
else:
stage.reproduce(no_download=no_download, jobs=jobs)
finally:
if deps == stage.deps:
stage.outs = outs
if no_download and changed:
# Avoid retaining stale information
stage.outs[0].clear()
stage.frozen = frozen


Expand Down
68 changes: 63 additions & 5 deletions tests/func/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

from dvc.dependency import base
from dvc.dvcfile import Dvcfile
from dvc.exceptions import InvalidArgumentError
from dvc.testing.tmp_dir import make_subrepo
Expand Down Expand Up @@ -156,6 +157,47 @@ def test_update_before_and_after_dvc_init(tmp_dir, dvc, git_dir):
assert dvc.status([stage.path]) == {}


def test_update_unchanged(tmp_dir, dvc, erepo_dir, mocker):
with erepo_dir.chdir():
erepo_dir.dvc_gen("file", "file content", commit="add file")

assert (erepo_dir / "file").exists()
stage = dvc.imp(os.fspath(erepo_dir), "file")

spy = mocker.spy(base, "fs_download")
dvc.update([stage.path])

assert not spy.called


@pytest.mark.parametrize("outs_exist", (False, True))
def test_update_no_download(tmp_dir, dvc, erepo_dir, outs_exist, mocker):
with erepo_dir.chdir():
erepo_dir.dvc_gen("file", "file content", commit="add file")
initial_rev = erepo_dir.scm.get_rev()

stage = dvc.imp(os.fspath(erepo_dir), "file", no_download=not outs_exist)

assert stage.deps[0].def_repo["rev_lock"] == initial_rev

dst = tmp_dir / "file"
assert dst.exists() is outs_exist

with erepo_dir.chdir():
erepo_dir.dvc_gen("file", "updated file content", commit="update file")
new_rev = erepo_dir.scm.get_rev()

updated_stage = dvc.update([stage.path], rev=new_rev, no_download=True)[0]
assert not dst.exists()

assert updated_stage.deps[0].def_repo["rev_lock"] == new_rev

# output must have no information since no_download=True
out = updated_stage.outs[0]
assert out.hash_info.value is None
assert out.meta.size is None


def test_update_import_url(tmp_dir, dvc, workspace):
workspace.gen("file", "file content")

Expand All @@ -176,17 +218,20 @@ def test_update_import_url(tmp_dir, dvc, workspace):
assert dst.read_text() == "updated file content"


def test_update_import_url_no_download(tmp_dir, dvc, workspace):
@pytest.mark.parametrize("outs_exist", (False, True))
def test_update_import_url_no_download(
dtrifiro marked this conversation as resolved.
Show resolved Hide resolved
tmp_dir, dvc, workspace, outs_exist, mocker
):
workspace.gen("file", "file content")

dst = tmp_dir / "imported_file"
stage = dvc.imp_url(
"remote://workspace/file", os.fspath(dst), no_download=True
"remote://workspace/file", os.fspath(dst), no_download=not outs_exist
)
hash_info = stage.deps[0].hash_info

assert not dst.is_file()
assert stage.deps[0].hash_info.value == "d10b4c3ff123b26dc068d43a8bef2d23"
assert dst.exists() is outs_exist
hash_info = stage.deps[0].hash_info
assert hash_info.value == "d10b4c3ff123b26dc068d43a8bef2d23"

workspace.gen("file", "updated file content")

Expand All @@ -197,12 +242,25 @@ def test_update_import_url_no_download(tmp_dir, dvc, workspace):
assert updated_hash_info != hash_info
assert updated_hash_info.value == "6ffba511ce3aa40b8231d1b1f8c5fba5"

# output must have no information since no_download=True
out = updated_stage.outs[0]
assert out.hash_info.value is None
assert out.hash_info.name is None
assert out.meta.size is None


def test_update_import_url_unchanged(tmp_dir, dvc, workspace, mocker):
workspace.gen("file", "file content")

dst = tmp_dir / "imported_file"
stage = dvc.imp_url("remote://workspace/file", os.fspath(dst))

spy = mocker.spy(base, "fs_download")

dvc.update([stage.path])
assert not spy.called


def test_update_rev(tmp_dir, dvc, scm, git_dir):
with git_dir.chdir():
git_dir.scm_gen({"foo": "foo"}, commit="first")
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/stage/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,19 @@ def test_path_conversion(dvc):
assert stage.dumpd()["wdir"] == "../.."


def test_stage_update(mocker):
def test_stage_update(dvc, mocker):
dep = RepoDependency({"url": "example.com"}, None, "dep_path")
mocker.patch.object(dep, "update", return_value=None)

stage = Stage(None, "path", deps=[dep])
stage = Stage(dvc, "path", deps=[dep])
reproduce = mocker.patch.object(stage, "reproduce")
is_repo_import = mocker.patch(
__name__ + ".Stage.is_repo_import", new_callable=mocker.PropertyMock
)

is_repo_import.return_value = True
stage.update()
with dvc.lock:
stage.update()
assert reproduce.called_once_with()

is_repo_import.return_value = False
Expand Down