Skip to content

Commit 30d58e1

Browse files
committed
Revert "remote: base: don't checkout existing file (#2358)"
This reverts commit 5ec1d27. Save tests to try make them work, and `_get_cache_type()` to be used later.
1 parent 4ab4e43 commit 30d58e1

File tree

2 files changed

+36
-76
lines changed

2 files changed

+36
-76
lines changed

dvc/remote/base.py

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -758,45 +758,20 @@ def safe_remove(self, path_info, force=False):
758758
self.remove(path_info)
759759

760760
def _checkout_file(
761-
self,
762-
path_info,
763-
checksum,
764-
force,
765-
progress_callback=None,
766-
save_link=True,
761+
self, path_info, checksum, force, progress_callback=None
767762
):
768-
# NOTE: In case if path_info is already cached and path_info's
769-
# link type matches cache link type, we would like to avoid
770-
# relinking.
771-
if self.changed(
772-
path_info, {self.PARAM_CHECKSUM: checksum}
773-
) or not self._link_matches(path_info):
763+
cache_info = self.checksum_to_path_info(checksum)
764+
if self.exists(path_info):
765+
msg = "data '{}' exists. Removing before checkout."
766+
logger.warning(msg.format(str(path_info)))
774767
self.safe_remove(path_info, force=force)
775768

776-
cache_info = self.checksum_to_path_info(checksum)
777-
self.link(cache_info, path_info)
778-
779-
if save_link:
780-
self.state.save_link(path_info)
781-
782-
self.state.save(path_info, checksum)
783-
else:
784-
# NOTE: performing (un)protection costs us +/- the same as checking
785-
# if path_info is protected. Instead of implementing logic,
786-
# just (un)protect according to self.protected.
787-
if self.protected:
788-
self.protect(path_info)
789-
else:
790-
# NOTE dont allow copy, because we checked before that link
791-
# type matches cache, and we don't want data duplication
792-
self.unprotect(path_info, allow_copy=False)
793-
769+
self.link(cache_info, path_info)
770+
self.state.save_link(path_info)
771+
self.state.save(path_info, checksum)
794772
if progress_callback:
795773
progress_callback(str(path_info))
796774

797-
def _link_matches(self, path_info):
798-
return True
799-
800775
def makedirs(self, path_info):
801776
"""Optional: Implement only if the remote needs to create
802777
directories before copying/linking/moving data
@@ -818,14 +793,17 @@ def _checkout_dir(
818793
for entry in dir_info:
819794
relative_path = entry[self.PARAM_RELPATH]
820795
entry_checksum = entry[self.PARAM_CHECKSUM]
796+
entry_cache_info = self.checksum_to_path_info(entry_checksum)
821797
entry_info = path_info / relative_path
822-
self._checkout_file(
823-
entry_info,
824-
entry_checksum,
825-
force,
826-
progress_callback,
827-
save_link=False,
828-
)
798+
799+
entry_checksum_info = {self.PARAM_CHECKSUM: entry_checksum}
800+
if self.changed(entry_info, entry_checksum_info):
801+
if self.exists(entry_info):
802+
self.safe_remove(entry_info, force=force)
803+
self.link(entry_cache_info, entry_info)
804+
self.state.save(entry_info, entry_checksum)
805+
if progress_callback:
806+
progress_callback(str(entry_info))
829807

830808
self._remove_redundant_files(path_info, dir_info, force)
831809

@@ -904,7 +882,7 @@ def get_files_number(self, checksum):
904882
return 1
905883

906884
@staticmethod
907-
def unprotect(path_info, allow_copy=True):
885+
def unprotect(path_info):
908886
pass
909887

910888
def _get_unpacked_dir_names(self, checksums):

dvc/remote/local.py

Lines changed: 17 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -394,22 +394,19 @@ def _log_missing_caches(checksum_info_dict):
394394
logger.warning(msg)
395395

396396
@staticmethod
397-
def _unprotect_file(path, allow_copy=True):
397+
def _unprotect_file(path):
398398
if System.is_symlink(path) or System.is_hardlink(path):
399-
if allow_copy:
400-
logger.debug("Unprotecting '{}'".format(path))
401-
tmp = os.path.join(os.path.dirname(path), "." + str(uuid()))
402-
403-
# The operations order is important here - if some application
404-
# would access the file during the process of copyfile then it
405-
# would get only the part of file. So, at first, the file
406-
# should be copied with the temporary name, and then
407-
# original file should be replaced by new.
408-
copyfile(
409-
path, tmp, name="Unprotecting '{}'".format(relpath(path))
410-
)
411-
remove(path)
412-
os.rename(tmp, path)
399+
logger.debug("Unprotecting '{}'".format(path))
400+
tmp = os.path.join(os.path.dirname(path), "." + str(uuid()))
401+
402+
# The operations order is important here - if some application
403+
# would access the file during the process of copyfile then it
404+
# would get only the part of file. So, at first, the file should be
405+
# copied with the temporary name, and then original file should be
406+
# replaced by new.
407+
copyfile(path, tmp, name="Unprotecting '{}'".format(relpath(path)))
408+
remove(path)
409+
os.rename(tmp, path)
413410

414411
else:
415412
logger.debug(
@@ -419,21 +416,21 @@ def _unprotect_file(path, allow_copy=True):
419416

420417
os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
421418

422-
def _unprotect_dir(self, path, allow_copy=True):
419+
def _unprotect_dir(self, path):
423420
for fname in walk_files(path, self.repo.dvcignore):
424-
self._unprotect_file(fname, allow_copy)
421+
RemoteLOCAL._unprotect_file(fname)
425422

426-
def unprotect(self, path_info, allow_copy=True):
423+
def unprotect(self, path_info):
427424
path = path_info.fspath
428425
if not os.path.exists(path):
429426
raise DvcException(
430427
"can't unprotect non-existing data '{}'".format(path)
431428
)
432429

433430
if os.path.isdir(path):
434-
self._unprotect_dir(path, allow_copy)
431+
self._unprotect_dir(path)
435432
else:
436-
self._unprotect_file(path, allow_copy)
433+
RemoteLOCAL._unprotect_file(path)
437434

438435
@staticmethod
439436
def protect(path_info):
@@ -525,18 +522,3 @@ def _get_cache_type(self, path_info):
525522

526523
self.cache_type_confirmed = True
527524
return self.cache_types[0]
528-
529-
def _link_matches(self, path_info):
530-
is_hardlink = System.is_hardlink(path_info)
531-
is_symlink = System.is_symlink(path_info)
532-
is_copy_or_reflink = not is_hardlink and not is_symlink
533-
534-
cache_type = self._get_cache_type(path_info)
535-
536-
if cache_type == "symlink":
537-
return is_symlink
538-
539-
if cache_type == "hardlink":
540-
return is_hardlink
541-
542-
return is_copy_or_reflink

0 commit comments

Comments
 (0)