From 5101bf12531a2ef1782e47cb8cbd73d6843652c4 Mon Sep 17 00:00:00 2001 From: Saugat Pachhai Date: Fri, 21 Feb 2020 16:24:59 +0545 Subject: [PATCH] gc: make it safer by only activating by use of certain flags (#3363) * gc: make it safer by only activating by use of certain flags Previous plain `gc` is now not supported. The same behavior can be trigger by `gc -w` or `gc --workspace`. When `--all-{tags, commits, branches}` are specified, it will work as if `-w` is also specified (same as before). * tests: adjust gc to use workspace on test_repro --- dvc/command/gc.py | 18 +++++++++++ dvc/exceptions.py | 4 +++ dvc/repo/gc.py | 23 +++++++++++++ tests/func/test_gc.py | 70 ++++++++++++++++++++++++++++++++++++---- tests/func/test_repro.py | 2 +- 5 files changed, 109 insertions(+), 8 deletions(-) diff --git a/dvc/command/gc.py b/dvc/command/gc.py index 2ee4979d5e..67c0927d2e 100644 --- a/dvc/command/gc.py +++ b/dvc/command/gc.py @@ -12,6 +12,16 @@ class CmdGC(CmdBase): def run(self): + from dvc.repo.gc import _raise_error_if_all_disabled + + _raise_error_if_all_disabled( + all_branches=self.args.all_branches, + all_tags=self.args.all_tags, + all_commits=self.args.all_commits, + workspace=self.args.workspace, + cloud=self.args.cloud, + ) + msg = "This will remove all cache except items used in " msg += "the working tree" @@ -47,6 +57,7 @@ def run(self): force=self.args.force, jobs=self.args.jobs, repos=self.args.repos, + workspace=self.args.workspace, ) return 0 @@ -64,6 +75,13 @@ def add_parser(subparsers, parent_parser): help=GC_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) + gc_parser.add_argument( + "-w", + "--workspace", + action="store_true", + default=False, + help="Keep data files used in the current workspace.", + ) gc_parser.add_argument( "-a", "--all-branches", diff --git a/dvc/exceptions.py b/dvc/exceptions.py index b91fd1424a..2f6c25a47b 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -7,6 +7,10 @@ class DvcException(Exception): """Base class for all dvc exceptions.""" +class InvalidArgumentError(ValueError, DvcException): + """Thrown if arguments are invalid.""" + + class OutputDuplicationError(DvcException): """Thrown if a file/directory is specified as an output in more than one stage. diff --git a/dvc/repo/gc.py b/dvc/repo/gc.py index e55a34dc3f..a19c3932ba 100644 --- a/dvc/repo/gc.py +++ b/dvc/repo/gc.py @@ -2,6 +2,7 @@ from . import locked from dvc.cache import NamedCache +from dvc.exceptions import InvalidArgumentError logger = logging.getLogger(__name__) @@ -13,6 +14,15 @@ def _do_gc(typ, func, clist): logger.info("No unused '{}' cache to remove.".format(typ)) +def _raise_error_if_all_disabled(**kwargs): + if not any(kwargs.values()): + raise InvalidArgumentError( + "Invalid Arguments. Either of ({}) needs to be enabled.".format( + ", ".join(kwargs.keys()) + ) + ) + + @locked def gc( self, @@ -25,7 +35,20 @@ def gc( force=False, jobs=None, repos=None, + workspace=False, ): + + # require `workspace` to be true to come into effect. + # assume `workspace` to be enabled if any of `all_tags`, `all_commits`, + # or `all_branches` are enabled. + _raise_error_if_all_disabled( + workspace=workspace, + all_tags=all_tags, + all_commits=all_commits, + all_branches=all_branches, + cloud=cloud, + ) + from contextlib import ExitStack from dvc.repo import Repo diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index f1aa4012ac..bb395669ab 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -1,9 +1,11 @@ +import logging import os import configobj import pytest from git import Repo +from dvc.compat import fspath from dvc.exceptions import CollectCacheError from dvc.main import main from dvc.repo import Repo as DvcRepo @@ -28,11 +30,11 @@ def setUp(self): self.bad_cache.append(path) def test_api(self): - self.dvc.gc() + self.dvc.gc(workspace=True) self._test_gc() def test_cli(self): - ret = main(["gc", "-f"]) + ret = main(["gc", "-wf"]) self.assertEqual(ret, 0) self._test_gc() @@ -169,10 +171,10 @@ def test(self): self._check_cache(3) - self.dvc.gc(repos=[self.additional_path]) + self.dvc.gc(repos=[self.additional_path], workspace=True) self._check_cache(3) - self.dvc.gc() + self.dvc.gc(workspace=True) self._check_cache(2) @@ -196,10 +198,10 @@ def test_gc_no_dir_cache(tmp_dir, dvc): os.unlink(dir_stage.outs[0].cache_path) with pytest.raises(CollectCacheError): - dvc.gc() + dvc.gc(workspace=True) assert _count_files(dvc.cache.local.cache_dir) == 4 - dvc.gc(force=True) + dvc.gc(force=True, workspace=True) assert _count_files(dvc.cache.local.cache_dir) == 2 @@ -218,5 +220,59 @@ def test_gc_no_unpacked_dir(tmp_dir, dvc): assert os.path.exists(unpackeddir) - dvc.gc(force=True) + dvc.gc(force=True, workspace=True) assert not os.path.exists(unpackeddir) + + +def test_gc_without_workspace_raises_error(tmp_dir, dvc): + dvc.gc(force=True, workspace=True) # works without error + + from dvc.exceptions import InvalidArgumentError + + with pytest.raises(InvalidArgumentError): + dvc.gc(force=True) + + with pytest.raises(InvalidArgumentError): + dvc.gc(force=True, workspace=False) + + +def test_gc_without_workspace_on_tags_branches_commits(tmp_dir, dvc): + dvc.gc(force=True, all_tags=True) + dvc.gc(force=True, all_commits=True) + dvc.gc(force=False, all_branches=True) + + # even if workspace is disabled, and others are enabled, assume as if + # workspace is enabled. + dvc.gc(force=False, all_branches=True, all_commits=False, workspace=False) + + +def test_gc_without_workspace(tmp_dir, dvc, caplog): + with caplog.at_level(logging.WARNING, logger="dvc"): + assert main(["gc", "-vf"]) == 255 + + assert "Invalid Arguments" in caplog.text + + +def test_gc_with_possible_args_positive(tmp_dir, dvc): + for flag in [ + "-w", + "-a", + "-T", + "--all-commits", + "-aT", + "-wa", + "-waT", + ]: + assert main(["gc", "-vf", flag]) == 0 + + +def test_gc_cloud_positive(tmp_dir, dvc, tmp_path_factory): + with dvc.config.edit() as conf: + storage = fspath(tmp_path_factory.mktemp("test_remote_base")) + conf["remote"]["local_remote"] = {"url": storage} + conf["core"]["remote"] = "local_remote" + + dvc.push() + + for flag in ["-c", "-ca", "-cT", "-caT", "-cwT"]: + assert main(["gc", "-vf", flag]) == 0 diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 99eb24d345..d084de26b7 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -950,7 +950,7 @@ def test(self, mock_prompt): self.assertEqual(self.dvc.status([cmd_stage.path]), {}) self.assertEqual(self.dvc.status(), {}) - self.dvc.gc() + self.dvc.gc(workspace=True) self.assertEqual(self.dvc.status(), {}) self.dvc.remove(cmd_stage.path, outs_only=True)