From b8cf72cd0db0b1077dfd122717645ea5a1935739 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Mon, 2 Mar 2020 13:09:18 +0545 Subject: [PATCH] add: do not verify hardlink if file is empty Fixes #3390 --- dvc/remote/local.py | 6 ++++++ tests/func/test_add.py | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/dvc/remote/local.py b/dvc/remote/local.py index 24725c526d..3e62e0628e 100644 --- a/dvc/remote/local.py +++ b/dvc/remote/local.py @@ -92,6 +92,12 @@ def already_cached(self, path_info): return not self.changed_cache(current_md5) + def _verify_link(self, path_info, link_type): + if link_type == "hardlink" and self.getsize(path_info) == 0: + return + + super()._verify_link(path_info, link_type) + def is_empty(self, path_info): path = path_info.fspath diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 95407ee7d0..e788c176c3 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -7,7 +7,7 @@ import colorama import pytest -from mock import patch +from mock import patch, call import dvc as dvc_module from dvc.cache import Cache @@ -662,3 +662,39 @@ def test_not_raises_on_re_add(tmp_dir, dvc): tmp_dir.gen({"file2": "file2 content", "file": "modified file"}) dvc.add(["file2", "file"]) + + +@pytest.mark.parametrize("link", ["hardlink", "symlink", "copy"]) +def test_add_empty_files(tmp_dir, dvc, link): + file = "foo" + dvc.cache.local.cache_types = [link] + stages = tmp_dir.dvc_gen(file, "") + + assert (tmp_dir / file).exists() + assert (tmp_dir / (file + Stage.STAGE_FILE_SUFFIX)).exists() + assert os.path.exists(stages[0].outs[0].cache_path) + + +@mock.patch( + "dvc.remote.local.RemoteLOCAL.is_hardlink", + side_effect=RemoteLOCAL.is_hardlink, +) +def test_add_optimization_for_hardlink_on_empty_files(m, tmp_dir, dvc): + dvc.cache.local.cache_types = ["hardlink"] + tmp_dir.gen({"foo": "", "bar": "", "lorem": "lorem", "ipsum": "ipsum"}) + stages = dvc.add(["foo", "bar", "lorem", "ipsum"]) + + m.assert_called_once() + assert m.call_args != call(tmp_dir / "foo") + assert m.call_args != call(tmp_dir / "bar") + + for stage in stages[:2]: + # hardlinks are not created for empty files + assert not System.is_hardlink(stage.outs[0].path_info) + + for stage in stages[2:]: + assert System.is_hardlink(stage.outs[0].path_info) + + for stage in stages: + assert os.path.exists(stage.path) + assert os.path.exists(stage.outs[0].cache_path)