Skip to content
/ dvc Public
forked from iterative/dvc

Commit

Permalink
add: do not verify hardlink if file is empty
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry committed Mar 10, 2020
1 parent f05e6e0 commit b8cf72c
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 1 deletion.
6 changes: 6 additions & 0 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ def already_cached(self, path_info):

return not self.changed_cache(current_md5)

def _verify_link(self, path_info, link_type):
if link_type == "hardlink" and self.getsize(path_info) == 0:
return

super()._verify_link(path_info, link_type)

def is_empty(self, path_info):
path = path_info.fspath

Expand Down
38 changes: 37 additions & 1 deletion tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import colorama
import pytest
from mock import patch
from mock import patch, call

import dvc as dvc_module
from dvc.cache import Cache
Expand Down Expand Up @@ -662,3 +662,39 @@ def test_not_raises_on_re_add(tmp_dir, dvc):

tmp_dir.gen({"file2": "file2 content", "file": "modified file"})
dvc.add(["file2", "file"])


@pytest.mark.parametrize("link", ["hardlink", "symlink", "copy"])
def test_add_empty_files(tmp_dir, dvc, link):
file = "foo"
dvc.cache.local.cache_types = [link]
stages = tmp_dir.dvc_gen(file, "")

assert (tmp_dir / file).exists()
assert (tmp_dir / (file + Stage.STAGE_FILE_SUFFIX)).exists()
assert os.path.exists(stages[0].outs[0].cache_path)


@mock.patch(
"dvc.remote.local.RemoteLOCAL.is_hardlink",
side_effect=RemoteLOCAL.is_hardlink,
)
def test_add_optimization_for_hardlink_on_empty_files(m, tmp_dir, dvc):
dvc.cache.local.cache_types = ["hardlink"]
tmp_dir.gen({"foo": "", "bar": "", "lorem": "lorem", "ipsum": "ipsum"})
stages = dvc.add(["foo", "bar", "lorem", "ipsum"])

m.assert_called_once()
assert m.call_args != call(tmp_dir / "foo")
assert m.call_args != call(tmp_dir / "bar")

for stage in stages[:2]:
# hardlinks are not created for empty files
assert not System.is_hardlink(stage.outs[0].path_info)

for stage in stages[2:]:
assert System.is_hardlink(stage.outs[0].path_info)

for stage in stages:
assert os.path.exists(stage.path)
assert os.path.exists(stage.outs[0].cache_path)

0 comments on commit b8cf72c

Please sign in to comment.