diff --git a/dvc/command/checkout.py b/dvc/command/checkout.py index 0e2a533ec5..323e349977 100644 --- a/dvc/command/checkout.py +++ b/dvc/command/checkout.py @@ -1,18 +1,86 @@ import argparse +import logging + +import colorama from dvc.command.base import append_doc_link from dvc.command.base import CmdBase +from dvc.exceptions import CheckoutError + +logger = logging.getLogger(__name__) + + +def _human_join(words): + words = list(words) + if not words: + return "" + + return ( + "{before} and {after}".format( + before=", ".join(words[:-1]), after=words[-1], + ) + if len(words) > 1 + else words[0] + ) + + +def log_summary(stats): + states = ("added", "deleted", "modified") + summary = ( + "{} {}".format(len(stats[state]), state) + for state in states + if stats.get(state) + ) + logger.info(_human_join(summary)) + + +def log_changes(stats): + colors = [ + ("modified", colorama.Fore.YELLOW,), + ("added", colorama.Fore.GREEN), + ("deleted", colorama.Fore.RED,), + ] + + for state, color in colors: + entries = stats.get(state) + + if not entries: + continue + + for entry in entries: + logger.info( + "{color}{state}{nc}{spacing}{entry}".format( + color=color, + state=state[0].upper(), + nc=colorama.Fore.RESET, + spacing="\t", + entry=entry, + ) + ) class CmdCheckout(CmdBase): def run(self): - self.repo.checkout( - targets=self.args.targets, - with_deps=self.args.with_deps, - force=self.args.force, - relink=self.args.relink, - recursive=self.args.recursive, - ) + stats, exc = None, None + try: + stats = self.repo.checkout( + targets=self.args.targets, + with_deps=self.args.with_deps, + force=self.args.force, + relink=self.args.relink, + recursive=self.args.recursive, + ) + except CheckoutError as _exc: + exc = _exc + stats = exc.stats + + if self.args.summary: + log_summary(stats) + else: + log_changes(stats) + + if exc: + raise exc return 0 @@ -26,6 +94,12 @@ def add_parser(subparsers, parent_parser): help=CHECKOUT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) + checkout_parser.add_argument( + "--summary", + action="store_true", + default=False, + help="Show summary of the changes.", + ) checkout_parser.add_argument( "-d", "--with-deps", diff --git a/dvc/command/data_sync.py b/dvc/command/data_sync.py index c12b1afcac..7be18cc747 100644 --- a/dvc/command/data_sync.py +++ b/dvc/command/data_sync.py @@ -3,7 +3,8 @@ from dvc.command.base import append_doc_link from dvc.command.base import CmdBase -from dvc.exceptions import DvcException +from dvc.command.checkout import log_summary +from dvc.exceptions import DvcException, CheckoutError logger = logging.getLogger(__name__) @@ -19,7 +20,7 @@ def check_up_to_date(cls, processed_files_count): class CmdDataPull(CmdDataBase): def run(self): try: - processed_files_count = self.repo.pull( + stats = self.repo.pull( targets=self.args.targets, jobs=self.args.jobs, remote=self.args.remote, @@ -29,10 +30,13 @@ def run(self): force=self.args.force, recursive=self.args.recursive, ) - except DvcException: + log_summary(stats) + except (CheckoutError, DvcException) as exc: + log_summary(getattr(exc, "stats", {})) logger.exception("failed to pull data from the cloud") return 1 - self.check_up_to_date(processed_files_count) + + self.check_up_to_date(stats["downloaded"]) return 0 diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 2f6c25a47b..2a1b1cc5d1 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -256,10 +256,12 @@ def __init__(self, amount): class CheckoutError(DvcException): - def __init__(self, target_infos): + def __init__(self, target_infos, stats=None): + self.target_infos = target_infos + self.stats = stats targets = [str(t) for t in target_infos] m = ( - "Checkout failed for following targets:\n {}\nDid you " + "Checkout failed for following targets:\n{}\nDid you " "forget to fetch?".format("\n".join(targets)) ) super().__init__(m) diff --git a/dvc/external_repo.py b/dvc/external_repo.py index c16d55b584..61257ae28d 100644 --- a/dvc/external_repo.py +++ b/dvc/external_repo.py @@ -11,7 +11,7 @@ from dvc.compat import fspath from dvc.repo import Repo from dvc.config import NoRemoteError, NotDvcRepoError -from dvc.exceptions import NoRemoteInExternalRepoError +from dvc.exceptions import NoRemoteInExternalRepoError, CheckoutError from dvc.exceptions import OutputNotFoundError, NoOutputInExternalRepoError from dvc.exceptions import FileMissingError, PathMissingError from dvc.utils.fs import remove, fs_copy, move @@ -106,14 +106,14 @@ def _pull_cached(self, out, path_info, dest): if out.changed_cache(filter_info=src): self.cloud.pull(out.get_used_cache(filter_info=src)) - failed = out.checkout(filter_info=src) + try: + out.checkout(filter_info=src) + except CheckoutError: + raise FileNotFoundError move(src, dest) remove(tmp) - if failed: - raise FileNotFoundError - @wrap_with(threading.Lock()) def _set_cache_dir(self): try: diff --git a/dvc/remote/base.py b/dvc/remote/base.py index 2d2cf4748e..d9cf39ae12 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -14,6 +14,7 @@ import dvc.prompt as prompt from dvc.exceptions import ( + CheckoutError, DvcException, ConfirmRemoveError, DvcIgnoreInCollectedDirError, @@ -851,13 +852,15 @@ def safe_remove(self, path_info, force=False): self.remove(path_info) def _checkout_file( - self, path_info, checksum, force, progress_callback=None + self, path_info, checksum, force, progress_callback=None, relink=False ): """The file is changed we need to checkout a new copy""" + added, modified = True, False cache_info = self.checksum_to_path_info(checksum) if self.exists(path_info): logger.debug("data '{}' will be replaced.", path_info) self.safe_remove(path_info, force=force) + added, modified = False, True self.link(cache_info, path_info) self.state.save_link(path_info) @@ -865,6 +868,8 @@ def _checkout_file( if progress_callback: progress_callback(str(path_info)) + return added, modified and not relink + def makedirs(self, path_info): """Optional: Implement only if the remote needs to create directories before copying/linking/moving data @@ -879,9 +884,11 @@ def _checkout_dir( relink=False, filter_info=None, ): + added, modified = False, False # Create dir separately so that dir is created # even if there are no files in it if not self.exists(path_info): + added = True self.makedirs(path_info) dir_info = self.get_dir_cache(checksum) @@ -899,27 +906,36 @@ def _checkout_dir( entry_checksum_info = {self.PARAM_CHECKSUM: entry_checksum} if relink or self.changed(entry_info, entry_checksum_info): + modified = True self.safe_remove(entry_info, force=force) self.link(entry_cache_info, entry_info) self.state.save(entry_info, entry_checksum) if progress_callback: progress_callback(str(entry_info)) - self._remove_redundant_files(path_info, dir_info, force) + modified = ( + self._remove_redundant_files(path_info, dir_info, force) + or modified + ) self.state.save_link(path_info) self.state.save(path_info, checksum) + # relink is not modified, assume it as nochange + return added, not added and modified and not relink + def _remove_redundant_files(self, path_info, dir_info, force): existing_files = set(self.walk_files(path_info)) needed_files = { path_info / entry[self.PARAM_RELPATH] for entry in dir_info } - - for path in existing_files - needed_files: + redundant_files = existing_files - needed_files + for path in redundant_files: self.safe_remove(path, force) + return bool(redundant_files) + def checkout( self, path_info, @@ -966,12 +982,14 @@ def checkout( self.path_info, checksum, filter_info ), ) - return failed + if failed: + raise CheckoutError([failed]) + return logger.debug("Checking out '{}' with cache '{}'.", path_info, checksum) - self._checkout( - path_info, checksum, force, progress_callback, relink, filter_info + return self._checkout( + path_info, checksum, force, progress_callback, relink, filter_info, ) def _checkout( @@ -985,8 +1003,9 @@ def _checkout( ): if not self.is_dir_checksum(checksum): return self._checkout_file( - path_info, checksum, force, progress_callback=progress_callback + path_info, checksum, force, progress_callback, relink ) + return self._checkout_dir( path_info, checksum, force, progress_callback, relink, filter_info ) diff --git a/dvc/repo/checkout.py b/dvc/repo/checkout.py index 26f9e18e81..3a239bf1cb 100644 --- a/dvc/repo/checkout.py +++ b/dvc/repo/checkout.py @@ -1,21 +1,31 @@ import logging +import os +from dvc.compat import fspath from dvc.exceptions import CheckoutError from dvc.exceptions import CheckoutErrorSuggestGit from dvc.progress import Tqdm - +from dvc.utils import relpath logger = logging.getLogger(__name__) -def _cleanup_unused_links(repo): +def _get_unused_links(repo): used = [ out.fspath for stage in repo.stages for out in stage.outs if out.scheme == "local" ] - repo.state.remove_unused_links(used) + return repo.state.get_unused_links(used) + + +def _fspath_dir(path, root): + if not os.path.exists(str(path)): + return str(path) + + path = relpath(fspath(path), root) + return os.path.join(path, "") if os.path.isdir(path) else path def get_all_files_numbers(pairs): @@ -34,9 +44,19 @@ def _checkout( ): from dvc.stage import StageFileDoesNotExistError, StageFileBadNameError + unused = [] + stats = { + "added": [], + "deleted": [], + "modified": [], + "failed": [], + } if not targets: targets = [None] - _cleanup_unused_links(self) + unused = _get_unused_links(self) + + stats["deleted"] = [_fspath_dir(u, self.root_dir) for u in unused] + self.state.remove_links(unused) pairs = set() for target in targets: @@ -52,20 +72,23 @@ def _checkout( raise CheckoutErrorSuggestGit(target) from exc total = get_all_files_numbers(pairs) - if total == 0: - logger.info("Nothing to do") - failed = [] with Tqdm( total=total, unit="file", desc="Checkout", disable=total == 0 ) as pbar: for stage, filter_info in pairs: - failed.extend( - stage.checkout( - force=force, - progress_callback=pbar.update_desc, - relink=relink, - filter_info=filter_info, - ) + result = stage.checkout( + force=force, + progress_callback=pbar.update_desc, + relink=relink, + filter_info=filter_info, ) - if failed: - raise CheckoutError(failed) + for data in ["failed", "added", "modified"]: + stats[data].extend( + _fspath_dir(path, self.root_dir) for path in result[data] + ) + + if stats.get("failed"): + raise CheckoutError(stats["failed"], stats) + + del stats["failed"] + return stats diff --git a/dvc/repo/pull.py b/dvc/repo/pull.py index fadd34d3da..993d50be81 100644 --- a/dvc/repo/pull.py +++ b/dvc/repo/pull.py @@ -27,7 +27,9 @@ def pull( with_deps=with_deps, recursive=recursive, ) - self._checkout( + stats = self._checkout( targets=targets, with_deps=with_deps, force=force, recursive=recursive ) - return processed_files_count + + stats["downloaded"] = processed_files_count + return stats diff --git a/dvc/stage.py b/dvc/stage.py index 79e52add9e..0d8a3792c7 100644 --- a/dvc/stage.py +++ b/dvc/stage.py @@ -15,7 +15,7 @@ import dvc.dependency as dependency import dvc.output as output import dvc.prompt as prompt -from dvc.exceptions import DvcException +from dvc.exceptions import CheckoutError, DvcException from dvc.utils import dict_md5 from dvc.utils import fix_env from dvc.utils import relpath @@ -982,18 +982,25 @@ def checkout( relink=False, filter_info=None, ): - failed_checkouts = [] + checkouts = {"failed": [], "added": [], "modified": []} for out in self._filter_outs(filter_info): - failed = out.checkout( - force=force, - tag=self.tag, - progress_callback=progress_callback, - relink=relink, - filter_info=filter_info, - ) - if failed: - failed_checkouts.append(failed) - return failed_checkouts + try: + result = out.checkout( + force=force, + tag=self.tag, + progress_callback=progress_callback, + relink=relink, + filter_info=filter_info, + ) + added, modified = result or (None, None) + if modified: + checkouts["modified"].append(out.path_info) + elif added: + checkouts["added"].append(out.path_info) + except CheckoutError as exc: + checkouts["failed"].extend(exc.target_infos) + + return checkouts @staticmethod def _status(entries): diff --git a/dvc/state.py b/dvc/state.py index 38f77eabaf..9b0ca60ca0 100644 --- a/dvc/state.py +++ b/dvc/state.py @@ -439,7 +439,7 @@ def save_link(self, path_info): ) self._execute(cmd, (relative_path, self._to_sqlite(inode), mtime)) - def remove_unused_links(self, used): + def get_unused_links(self, used): """Removes all saved links except the ones that are used. Args: @@ -453,20 +453,22 @@ def remove_unused_links(self, used): inode = self._from_sqlite(inode) path = os.path.join(self.root_dir, relpath) - if path in used: - continue - - if not os.path.exists(path): + if path in used or not os.path.exists(path): continue actual_inode = get_inode(path) actual_mtime, _ = get_mtime_and_size(path, self.repo.tree) - if inode == actual_inode and mtime == actual_mtime: + if (inode, mtime) == (actual_inode, actual_mtime): logger.debug("Removing '{}' as unused link.", path) - remove(path) unused.append(relpath) + return unused + + def remove_links(self, unused): + for path in unused: + remove(path) + for chunk_unused in to_chunks( unused, chunk_size=SQLITE_MAX_VARIABLES_NUMBER ): diff --git a/tests/func/test_checkout.py b/tests/func/test_checkout.py index 1f49ca4d84..ee06dc2416 100644 --- a/tests/func/test_checkout.py +++ b/tests/func/test_checkout.py @@ -22,6 +22,8 @@ from dvc.utils.fs import walk_files from dvc.utils.stage import dump_stage_file from dvc.utils.stage import load_stage_file +from dvc.remote import RemoteS3 +from tests.remotes import S3 from tests.basic_env import TestDvc from tests.basic_env import TestDvcGit from tests.func.test_repro import TestRepro @@ -321,7 +323,8 @@ def test(self): stage.outs[0].remove() self.assertFalse(os.path.exists(dname)) - self.dvc.checkout(force=True) + stats = self.dvc.checkout(force=True) + assert stats["added"] == [dname + os.sep] self.assertTrue(os.path.isdir(dname)) self.assertEqual(len(os.listdir(dname)), 0) @@ -337,7 +340,8 @@ def test(self): ) self.assertTrue(stage is not None) - self.dvc.checkout(force=True) + stats = self.dvc.checkout(force=True) + assert not any(stats.values()) class TestCheckoutWithDeps(TestRepro): @@ -421,7 +425,8 @@ def test(self): ret = main(["add", self.FOO]) self.assertEqual(0, ret) - self.dvc.checkout(targets=[self.FOO + ".dvc"], recursive=True) + stats = self.dvc.checkout(targets=[self.FOO + ".dvc"], recursive=True) + assert stats == {"added": [], "modified": [], "deleted": []} class TestCheckoutMovedCacheDirWithSymlinks(TestDvc): @@ -489,7 +494,8 @@ def test_checkout_relink(tmp_dir, dvc, link, link_test_func): dvc.unprotect("dir/data") assert not link_test_func("dir/data") - dvc.checkout(["dir.dvc"], relink=True) + stats = dvc.checkout(["dir.dvc"], relink=True) + assert stats == empty_checkout assert link_test_func("dir/data") @@ -502,7 +508,8 @@ def test_checkout_relink_protected(tmp_dir, dvc, link): dvc.unprotect("foo") assert os.access("foo", os.W_OK) - dvc.checkout(["foo.dvc"], relink=True) + stats = dvc.checkout(["foo.dvc"], relink=True) + assert stats == empty_checkout assert not os.access("foo", os.W_OK) @@ -513,5 +520,231 @@ def test_checkout_relink_protected(tmp_dir, dvc, link): def test_partial_checkout(tmp_dir, dvc, target): tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}, "other": "other"}}) shutil.rmtree("dir") - dvc.checkout([target]) + stats = dvc.checkout([target]) + assert stats["added"] == ["dir" + os.sep] assert list(walk_files("dir")) == [os.path.join("dir", "subdir", "file")] + + +empty_checkout = {"added": [], "deleted": [], "modified": []} + + +def test_stats_on_empty_checkout(tmp_dir, dvc, scm): + assert dvc.checkout() == empty_checkout + tmp_dir.dvc_gen( + {"dir": {"subdir": {"file": "file"}, "other": "other"}}, + commit="initial", + ) + assert dvc.checkout() == empty_checkout + + +def test_stats_on_checkout(tmp_dir, dvc, scm): + tmp_dir.dvc_gen( + { + "dir": {"subdir": {"file": "file"}, "other": "other"}, + "foo": "foo", + "bar": "bar", + }, + commit="initial", + ) + scm.checkout("HEAD~") + stats = dvc.checkout() + assert set(stats["deleted"]) == {"dir" + os.sep, "foo", "bar"} + + scm.checkout("-") + stats = dvc.checkout() + assert set(stats["added"]) == {"bar", "dir" + os.sep, "foo"} + + tmp_dir.gen({"lorem": "lorem", "bar": "new bar", "dir2": {"file": "file"}}) + (tmp_dir / "foo").unlink() + scm.repo.git.rm("foo.dvc") + tmp_dir.dvc_add(["bar", "lorem", "dir2"], commit="second") + + scm.checkout("HEAD~") + stats = dvc.checkout() + assert set(stats["modified"]) == {"bar"} + assert set(stats["added"]) == {"foo"} + assert set(stats["deleted"]) == {"lorem", "dir2" + os.sep} + + scm.checkout("-") + stats = dvc.checkout() + assert set(stats["modified"]) == {"bar"} + assert set(stats["added"]) == {"dir2" + os.sep, "lorem"} + assert set(stats["deleted"]) == {"foo"} + + +def test_checkout_stats_on_failure(tmp_dir, dvc, scm): + tmp_dir.dvc_gen( + {"foo": "foo", "dir": {"subdir": {"file": "file"}}, "other": "other"}, + commit="initial", + ) + stage = Stage.load(dvc, "foo.dvc") + tmp_dir.dvc_gen({"foo": "foobar", "other": "other other"}, commit="second") + + # corrupt cache + cache = stage.outs[0].cache_path + with open(cache, "a") as fd: + fd.write("destroy cache") + + scm.checkout("HEAD~") + with pytest.raises(CheckoutError) as exc: + dvc.checkout(force=True) + + assert exc.value.stats == { + **empty_checkout, + "failed": ["foo"], + "modified": ["other"], + } + + +def test_stats_on_added_file_from_tracked_dir(tmp_dir, dvc, scm): + tmp_dir.dvc_gen( + {"dir": {"subdir": {"file": "file"}, "other": "other"}}, + commit="initial", + ) + + tmp_dir.gen("dir/subdir/newfile", "newfile") + tmp_dir.dvc_add("dir", commit="add newfile") + scm.checkout("HEAD~") + assert dvc.checkout() == {**empty_checkout, "modified": ["dir" + os.sep]} + assert dvc.checkout() == empty_checkout + + scm.checkout("-") + assert dvc.checkout() == {**empty_checkout, "modified": ["dir" + os.sep]} + assert dvc.checkout() == empty_checkout + + +def test_stats_on_updated_file_from_tracked_dir(tmp_dir, dvc, scm): + tmp_dir.dvc_gen( + {"dir": {"subdir": {"file": "file"}, "other": "other"}}, + commit="initial", + ) + + tmp_dir.gen("dir/subdir/file", "what file?") + tmp_dir.dvc_add("dir", commit="update file") + scm.checkout("HEAD~") + assert dvc.checkout() == {**empty_checkout, "modified": ["dir" + os.sep]} + assert dvc.checkout() == empty_checkout + + scm.checkout("-") + assert dvc.checkout() == {**empty_checkout, "modified": ["dir" + os.sep]} + assert dvc.checkout() == empty_checkout + + +def test_stats_on_removed_file_from_tracked_dir(tmp_dir, dvc, scm): + tmp_dir.dvc_gen( + {"dir": {"subdir": {"file": "file"}, "other": "other"}}, + commit="initial", + ) + + (tmp_dir / "dir" / "subdir" / "file").unlink() + tmp_dir.dvc_add("dir", commit="removed file from subdir") + scm.checkout("HEAD~") + assert dvc.checkout() == {**empty_checkout, "modified": ["dir" + os.sep]} + assert dvc.checkout() == empty_checkout + + scm.checkout("-") + assert dvc.checkout() == {**empty_checkout, "modified": ["dir" + os.sep]} + assert dvc.checkout() == empty_checkout + + +def test_stats_on_show_changes_does_not_show_summary( + tmp_dir, dvc, scm, caplog +): + tmp_dir.dvc_gen( + {"dir": {"subdir": {"file": "file"}}, "other": "other"}, + commit="initial", + ) + scm.checkout("HEAD~") + + with caplog.at_level(logging.INFO, logger="dvc"): + caplog.clear() + assert main(["checkout"]) == 0 + for out in ["D\tdir" + os.sep, "D\tother"]: + assert out in caplog.text + assert "modified" not in caplog.text + assert "deleted" not in caplog.text + assert "added" not in caplog.text + + +def test_stats_does_not_show_changes_by_default(tmp_dir, dvc, scm, caplog): + tmp_dir.dvc_gen( + {"dir": {"subdir": {"file": "file"}}, "other": "other"}, + commit="initial", + ) + scm.checkout("HEAD~") + + with caplog.at_level(logging.INFO, logger="dvc"): + caplog.clear() + assert main(["checkout", "--summary"]) == 0 + assert "2 deleted" in caplog.text + assert "dir" not in caplog.text + assert "other" not in caplog.text + + +@pytest.mark.parametrize("link", ["hardlink", "symlink", "copy"]) +def test_checkout_with_relink_existing(tmp_dir, dvc, link): + tmp_dir.dvc_gen("foo", "foo") + (tmp_dir / "foo").unlink() + + tmp_dir.dvc_gen("bar", "bar") + dvc.cache.local.cache_types = [link] + + stats = dvc.checkout(relink=True) + assert stats == {**empty_checkout, "added": ["foo"]} + + +def test_checkout_with_deps(tmp_dir, dvc): + tmp_dir.dvc_gen({"foo": "foo"}) + dvc.run( + fname="copy_file.dvc", cmd="echo foo > bar", outs=["bar"], deps=["foo"] + ) + + (tmp_dir / "bar").unlink() + (tmp_dir / "foo").unlink() + + stats = dvc.checkout(["copy_file.dvc"], with_deps=False) + assert stats == {**empty_checkout, "added": ["bar"]} + + (tmp_dir / "bar").unlink() + stats = dvc.checkout(["copy_file.dvc"], with_deps=True) + assert set(stats["added"]) == {"foo", "bar"} + + +def test_checkout_recursive(tmp_dir, dvc): + tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) + dvc.add("dir", recursive=True) + + (tmp_dir / "dir" / "foo").unlink() + (tmp_dir / "dir" / "bar").unlink() + + stats = dvc.checkout(["dir"], recursive=True) + assert set(stats["added"]) == { + os.path.join("dir", "foo"), + os.path.join("dir", "bar"), + } + + +@pytest.mark.skipif( + not S3.should_test(), reason="Only run with S3 credentials" +) +def test_checkout_for_external_outputs(tmp_dir, dvc): + dvc.cache.s3 = RemoteS3(dvc, {"url": S3.get_url()}) + + remote = RemoteS3(dvc, {"url": S3.get_url()}) + file_path = remote.path_info / "foo" + remote.s3.put_object( + Bucket=remote.path_info.bucket, Key=file_path.path, Body="foo" + ) + + dvc.add(str(remote.path_info / "foo")) + + remote.remove(file_path) + stats = dvc.checkout(force=True) + assert stats == {**empty_checkout, "added": [str(file_path)]} + assert remote.exists(file_path) + + remote.s3.put_object( + Bucket=remote.path_info.bucket, Key=file_path.path, Body="foo\nfoo" + ) + stats = dvc.checkout(force=True) + assert stats == {**empty_checkout, "modified": [str(file_path)]} diff --git a/tests/func/test_state.py b/tests/func/test_state.py index ed33ddbd54..36618f3025 100644 --- a/tests/func/test_state.py +++ b/tests/func/test_state.py @@ -1,4 +1,5 @@ import mock +import os from dvc.path_info import PathInfo from dvc.state import State @@ -69,16 +70,33 @@ def test_get_state_record_for_inode(get_inode_mock, tmp_dir, dvc): assert ret is not None -def test_remove_unused_links(tmp_dir, dvc): - assert len(tmp_dir.dvc_gen("foo", "foo_content")) == 1 - assert len(tmp_dir.dvc_gen("bar", "bar_content")) == 1 +def test_remove_links(tmp_dir, dvc): + tmp_dir.dvc_gen({"foo": "foo_content", "bar": "bar_content"}) - cmd_count_links = "SELECT count(*) FROM {}".format(State.LINK_STATE_TABLE) with dvc.state: + cmd_count_links = "SELECT count(*) FROM {}".format( + State.LINK_STATE_TABLE + ) result = dvc.state._execute(cmd_count_links).fetchone()[0] assert result == 2 - dvc.state.remove_unused_links([]) + dvc.state.remove_links(["foo", "bar"]) result = dvc.state._execute(cmd_count_links).fetchone()[0] assert result == 0 + + +def test_get_unused_links(tmp_dir, dvc): + tmp_dir.dvc_gen({"foo": "foo_content", "bar": "bar_content"}) + + with dvc.state: + links = [os.path.join(dvc.root_dir, link) for link in ["foo", "bar"]] + assert set(dvc.state.get_unused_links([])) == {"foo", "bar"} + assert set(dvc.state.get_unused_links(links[:1])) == {"bar"} + assert set(dvc.state.get_unused_links(links)) == set() + assert set( + dvc.state.get_unused_links( + used=links[:1] + + [os.path.join(dvc.root_dir, "not-existing-file")] + ) + ) == {"bar"} diff --git a/tests/unit/command/test_checkout.py b/tests/unit/command/test_checkout.py index 21fc7920e4..8e0a8f5f19 100644 --- a/tests/unit/command/test_checkout.py +++ b/tests/unit/command/test_checkout.py @@ -1,5 +1,7 @@ +import logging + from dvc.cli import parse_args -from dvc.command.checkout import CmdCheckout +from dvc.command.checkout import CmdCheckout, log_summary, log_changes def test_checkout(tmp_dir, dvc, mocker): @@ -19,3 +21,56 @@ def test_checkout(tmp_dir, dvc, mocker): relink=True, with_deps=True, ) + + +def test_log_summary(caplog): + stats = { + "added": ["file1", "file2", "file3"], + "deleted": ["file4", "file5"], + "modified": ["file6", "file7"], + } + + def _assert_output(stats, expected_text): + with caplog.at_level(logging.INFO, logger="dvc"): + caplog.clear() + log_summary(stats) + assert expected_text in caplog.text + + _assert_output(stats, "3 added, 2 deleted and 2 modified") + + del stats["deleted"][1] + _assert_output(stats, "3 added, 1 deleted and 2 modified") + + del stats["deleted"][0] + _assert_output(stats, "3 added and 2 modified") + + del stats["modified"] + _assert_output(stats, "3 added") + + _assert_output({}, "") + + +def test_log_changes(caplog): + stats = { + "added": ["file1", "dir1/"], + "deleted": ["dir2/"], + "modified": ["file2"], + } + + from itertools import zip_longest + + def _assert_output(stats, expected_outs): + with caplog.at_level(logging.INFO, logger="dvc"): + caplog.clear() + log_changes(stats) + actual_output = caplog.text.splitlines() + for out, line in zip_longest(expected_outs, actual_output): + assert out in line + + _assert_output(stats, ["M\tfile2", "A\tfile1", "A\tdir1/", "D\tdir2/"]) + + del stats["deleted"][0] + _assert_output(stats, ["M\tfile2", "A\tfile1", "A\tdir1/"]) + + del stats["modified"] + _assert_output(stats, ["A\tfile1", "A\tdir1/"])