Skip to content

Commit

Permalink
update: prevent re-download on every update
Browse files Browse the repository at this point in the history
  • Loading branch information
dtrifiro committed Jan 3, 2023
1 parent a24c389 commit 7c07050
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 14 deletions.
10 changes: 4 additions & 6 deletions dvc/stage/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,20 @@ def update_import(
stage, rev=None, to_remote=False, remote=None, no_download=None, jobs=None
):
stage.deps[0].update(rev=rev)
outs = stage.outs
deps = stage.deps

frozen = stage.frozen
stage.frozen = False
changed = stage.changed()

if stage.outs:
stage.outs[0].clear()
try:
if to_remote:
_update_import_on_remote(stage, remote, jobs)
else:
stage.reproduce(no_download=no_download, jobs=jobs)
finally:
if deps == stage.deps:
stage.outs = outs
if no_download and changed:
# Avoid retaining stale information
stage.outs[0].clear()
stage.frozen = frozen


Expand Down
68 changes: 63 additions & 5 deletions tests/func/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest

from dvc.dependency import base
from dvc.dvcfile import Dvcfile
from dvc.exceptions import InvalidArgumentError
from dvc.testing.tmp_dir import make_subrepo
Expand Down Expand Up @@ -156,6 +157,47 @@ def test_update_before_and_after_dvc_init(tmp_dir, dvc, git_dir):
assert dvc.status([stage.path]) == {}


def test_update_unchanged(tmp_dir, dvc, erepo_dir, mocker):
with erepo_dir.chdir():
erepo_dir.dvc_gen("file", "file content", commit="add file")

assert (erepo_dir / "file").exists()
stage = dvc.imp(os.fspath(erepo_dir), "file")

spy = mocker.spy(base, "fs_download")
dvc.update([stage.path])

assert not spy.called


@pytest.mark.parametrize("outs_exist", (False, True))
def test_update_no_download(tmp_dir, dvc, erepo_dir, outs_exist, mocker):
with erepo_dir.chdir():
erepo_dir.dvc_gen("file", "file content", commit="add file")
initial_rev = erepo_dir.scm.get_rev()

stage = dvc.imp(os.fspath(erepo_dir), "file", no_download=not outs_exist)

assert stage.deps[0].def_repo["rev_lock"] == initial_rev

dst = tmp_dir / "file"
assert dst.exists() is outs_exist

with erepo_dir.chdir():
erepo_dir.dvc_gen("file", "updated file content", commit="update file")
new_rev = erepo_dir.scm.get_rev()

updated_stage = dvc.update([stage.path], rev=new_rev, no_download=True)[0]
assert not dst.exists()

assert updated_stage.deps[0].def_repo["rev_lock"] == new_rev

# output must have no information since no_download=True
out = updated_stage.outs[0]
assert out.hash_info.value is None
assert out.meta.size is None


def test_update_import_url(tmp_dir, dvc, workspace):
workspace.gen("file", "file content")

Expand All @@ -176,17 +218,20 @@ def test_update_import_url(tmp_dir, dvc, workspace):
assert dst.read_text() == "updated file content"


def test_update_import_url_no_download(tmp_dir, dvc, workspace):
@pytest.mark.parametrize("outs_exist", (False, True))
def test_update_import_url_no_download(
tmp_dir, dvc, workspace, outs_exist, mocker
):
workspace.gen("file", "file content")

dst = tmp_dir / "imported_file"
stage = dvc.imp_url(
"remote://workspace/file", os.fspath(dst), no_download=True
"remote://workspace/file", os.fspath(dst), no_download=not outs_exist
)
hash_info = stage.deps[0].hash_info

assert not dst.is_file()
assert stage.deps[0].hash_info.value == "d10b4c3ff123b26dc068d43a8bef2d23"
assert dst.exists() is outs_exist
hash_info = stage.deps[0].hash_info
assert hash_info.value == "d10b4c3ff123b26dc068d43a8bef2d23"

workspace.gen("file", "updated file content")

Expand All @@ -197,12 +242,25 @@ def test_update_import_url_no_download(tmp_dir, dvc, workspace):
assert updated_hash_info != hash_info
assert updated_hash_info.value == "6ffba511ce3aa40b8231d1b1f8c5fba5"

# output must have no information since no_download=True
out = updated_stage.outs[0]
assert out.hash_info.value is None
assert out.hash_info.name is None
assert out.meta.size is None


def test_update_import_url_unchanged(tmp_dir, dvc, workspace, mocker):
workspace.gen("file", "file content")

dst = tmp_dir / "imported_file"
stage = dvc.imp_url("remote://workspace/file", os.fspath(dst))

spy = mocker.spy(base, "fs_download")

dvc.update([stage.path])
assert not spy.called


def test_update_rev(tmp_dir, dvc, scm, git_dir):
with git_dir.chdir():
git_dir.scm_gen({"foo": "foo"}, commit="first")
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/stage/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,19 @@ def test_path_conversion(dvc):
assert stage.dumpd()["wdir"] == "../.."


def test_stage_update(mocker):
def test_stage_update(dvc, mocker):
dep = RepoDependency({"url": "example.com"}, None, "dep_path")
mocker.patch.object(dep, "update", return_value=None)

stage = Stage(None, "path", deps=[dep])
stage = Stage(dvc, "path", deps=[dep])
reproduce = mocker.patch.object(stage, "reproduce")
is_repo_import = mocker.patch(
__name__ + ".Stage.is_repo_import", new_callable=mocker.PropertyMock
)

is_repo_import.return_value = True
stage.update()
with dvc.lock:
stage.update()
assert reproduce.called_once_with()

is_repo_import.return_value = False
Expand Down

0 comments on commit 7c07050

Please sign in to comment.