From 7c0705010b25d7c7a423e3c168aadd94ecb967a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Mon, 2 Jan 2023 19:02:15 +0100 Subject: [PATCH] update: prevent re-download on every update --- dvc/stage/imports.py | 10 ++--- tests/func/test_update.py | 68 +++++++++++++++++++++++++++++++--- tests/unit/stage/test_stage.py | 7 ++-- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/dvc/stage/imports.py b/dvc/stage/imports.py index 313d0d3328..3048214188 100644 --- a/dvc/stage/imports.py +++ b/dvc/stage/imports.py @@ -21,22 +21,20 @@ def update_import( stage, rev=None, to_remote=False, remote=None, no_download=None, jobs=None ): stage.deps[0].update(rev=rev) - outs = stage.outs - deps = stage.deps frozen = stage.frozen stage.frozen = False + changed = stage.changed() - if stage.outs: - stage.outs[0].clear() try: if to_remote: _update_import_on_remote(stage, remote, jobs) else: stage.reproduce(no_download=no_download, jobs=jobs) finally: - if deps == stage.deps: - stage.outs = outs + if no_download and changed: + # Avoid retaining stale information + stage.outs[0].clear() stage.frozen = frozen diff --git a/tests/func/test_update.py b/tests/func/test_update.py index eb4e3cf82e..9d7a351548 100644 --- a/tests/func/test_update.py +++ b/tests/func/test_update.py @@ -2,6 +2,7 @@ import pytest +from dvc.dependency import base from dvc.dvcfile import Dvcfile from dvc.exceptions import InvalidArgumentError from dvc.testing.tmp_dir import make_subrepo @@ -156,6 +157,47 @@ def test_update_before_and_after_dvc_init(tmp_dir, dvc, git_dir): assert dvc.status([stage.path]) == {} +def test_update_unchanged(tmp_dir, dvc, erepo_dir, mocker): + with erepo_dir.chdir(): + erepo_dir.dvc_gen("file", "file content", commit="add file") + + assert (erepo_dir / "file").exists() + stage = dvc.imp(os.fspath(erepo_dir), "file") + + spy = mocker.spy(base, "fs_download") + dvc.update([stage.path]) + + assert not spy.called + + +@pytest.mark.parametrize("outs_exist", (False, True)) +def test_update_no_download(tmp_dir, dvc, erepo_dir, outs_exist, mocker): + with erepo_dir.chdir(): + erepo_dir.dvc_gen("file", "file content", commit="add file") + initial_rev = erepo_dir.scm.get_rev() + + stage = dvc.imp(os.fspath(erepo_dir), "file", no_download=not outs_exist) + + assert stage.deps[0].def_repo["rev_lock"] == initial_rev + + dst = tmp_dir / "file" + assert dst.exists() is outs_exist + + with erepo_dir.chdir(): + erepo_dir.dvc_gen("file", "updated file content", commit="update file") + new_rev = erepo_dir.scm.get_rev() + + updated_stage = dvc.update([stage.path], rev=new_rev, no_download=True)[0] + assert not dst.exists() + + assert updated_stage.deps[0].def_repo["rev_lock"] == new_rev + + # output must have no information since no_download=True + out = updated_stage.outs[0] + assert out.hash_info.value is None + assert out.meta.size is None + + def test_update_import_url(tmp_dir, dvc, workspace): workspace.gen("file", "file content") @@ -176,17 +218,20 @@ def test_update_import_url(tmp_dir, dvc, workspace): assert dst.read_text() == "updated file content" -def test_update_import_url_no_download(tmp_dir, dvc, workspace): +@pytest.mark.parametrize("outs_exist", (False, True)) +def test_update_import_url_no_download( + tmp_dir, dvc, workspace, outs_exist, mocker +): workspace.gen("file", "file content") dst = tmp_dir / "imported_file" stage = dvc.imp_url( - "remote://workspace/file", os.fspath(dst), no_download=True + "remote://workspace/file", os.fspath(dst), no_download=not outs_exist ) - hash_info = stage.deps[0].hash_info - assert not dst.is_file() - assert stage.deps[0].hash_info.value == "d10b4c3ff123b26dc068d43a8bef2d23" + assert dst.exists() is outs_exist + hash_info = stage.deps[0].hash_info + assert hash_info.value == "d10b4c3ff123b26dc068d43a8bef2d23" workspace.gen("file", "updated file content") @@ -197,12 +242,25 @@ def test_update_import_url_no_download(tmp_dir, dvc, workspace): assert updated_hash_info != hash_info assert updated_hash_info.value == "6ffba511ce3aa40b8231d1b1f8c5fba5" + # output must have no information since no_download=True out = updated_stage.outs[0] assert out.hash_info.value is None assert out.hash_info.name is None assert out.meta.size is None +def test_update_import_url_unchanged(tmp_dir, dvc, workspace, mocker): + workspace.gen("file", "file content") + + dst = tmp_dir / "imported_file" + stage = dvc.imp_url("remote://workspace/file", os.fspath(dst)) + + spy = mocker.spy(base, "fs_download") + + dvc.update([stage.path]) + assert not spy.called + + def test_update_rev(tmp_dir, dvc, scm, git_dir): with git_dir.chdir(): git_dir.scm_gen({"foo": "foo"}, commit="first") diff --git a/tests/unit/stage/test_stage.py b/tests/unit/stage/test_stage.py index f564448ae0..e27c4a3359 100644 --- a/tests/unit/stage/test_stage.py +++ b/tests/unit/stage/test_stage.py @@ -55,18 +55,19 @@ def test_path_conversion(dvc): assert stage.dumpd()["wdir"] == "../.." -def test_stage_update(mocker): +def test_stage_update(dvc, mocker): dep = RepoDependency({"url": "example.com"}, None, "dep_path") mocker.patch.object(dep, "update", return_value=None) - stage = Stage(None, "path", deps=[dep]) + stage = Stage(dvc, "path", deps=[dep]) reproduce = mocker.patch.object(stage, "reproduce") is_repo_import = mocker.patch( __name__ + ".Stage.is_repo_import", new_callable=mocker.PropertyMock ) is_repo_import.return_value = True - stage.update() + with dvc.lock: + stage.update() assert reproduce.called_once_with() is_repo_import.return_value = False