From 1e7724bdc2cc999b137eb293f472c49eb83c33d5 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Mon, 16 Nov 2020 18:30:55 +0000 Subject: [PATCH 01/14] Tidy --- cylc/flow/platforms.py | 7 +++---- cylc/flow/suite_files.py | 3 +-- tests/unit/test_suite_files.py | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/cylc/flow/platforms.py b/cylc/flow/platforms.py index 907c9e222ef..34bf597296d 100644 --- a/cylc/flow/platforms.py +++ b/cylc/flow/platforms.py @@ -339,7 +339,7 @@ def platform_from_job_info(platforms, job, remote): raise PlatformLookupError('No platform found matching your task') -def get_host_from_platform(platform, method=None): +def get_host_from_platform(platform, method='random'): """Placeholder for a more sophisticated function which returns a host given a platform dictionary. @@ -349,8 +349,7 @@ def get_host_from_platform(platform, method=None): method (str): Name a function to use when selecting hosts from list provided by platform. - - - None or 'random': Pick the first host from list + - 'random' (default): Pick a random host from list - 'first': Return the first host in the list Returns: @@ -361,7 +360,7 @@ def get_host_from_platform(platform, method=None): - Random Selection with check for host availability """ - if method is None or method == 'random': + if method == 'random': return random.choice(platform['hosts']) elif method == 'first': return platform['hosts'][0] diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index 5925c90dd60..58aeacb74a9 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -704,5 +704,4 @@ def get_cylc_run_abs_path(path): """ if os.path.isabs(path): return path - cylc_run_dir = os.path.expandvars(get_platform()['run directory']) - return os.path.join(cylc_run_dir, path) + return get_suite_run_dir(path) diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py index 19cc87e538b..dff39dc06b3 100644 --- a/tests/unit/test_suite_files.py +++ b/tests/unit/test_suite_files.py @@ -247,7 +247,7 @@ def mkdirs_standin(_, exist_ok=False): ('/a/b/c', '/a/b/c')] ) def test_get_cylc_run_abs_path(path, expected, monkeypatch): - monkeypatch.setattr('cylc.flow.suite_files.get_platform', + monkeypatch.setattr('cylc.flow.pathutil.get_platform', lambda: {'run directory': '/mock_cylc_dir'}) assert suite_files.get_cylc_run_abs_path(path) == expected @@ -268,7 +268,7 @@ def test_is_valid_run_dir(path, expected, is_abs_path, monkeypatch): serv_dir = os.path.join(prefix, 'service', 'dir', 'exists', '.service') monkeypatch.setattr('os.path.isfile', lambda x: x == flow_file) monkeypatch.setattr('os.path.isdir', lambda x: x == serv_dir) - monkeypatch.setattr('cylc.flow.suite_files.get_platform', + monkeypatch.setattr('cylc.flow.pathutil.get_platform', lambda: {'run directory': 'mock_cylc_dir'}) path = os.path.normpath(path) if is_abs_path: From cc54307ffacf6af7349fc87c8a4fec7318595cd8 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Wed, 18 Nov 2020 18:34:45 +0000 Subject: [PATCH 02/14] Create `cylc clean` command --- cylc/flow/etc/cylc-bash-completion | 2 +- cylc/flow/pathutil.py | 23 ++++++++++++ cylc/flow/scripts/clean.py | 57 ++++++++++++++++++++++++++++++ cylc/flow/suite_files.py | 33 +++++++++++++++-- setup.cfg | 1 + 5 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 cylc/flow/scripts/clean.py diff --git a/cylc/flow/etc/cylc-bash-completion b/cylc/flow/etc/cylc-bash-completion index 129465a413d..d67bed3f83e 100644 --- a/cylc/flow/etc/cylc-bash-completion +++ b/cylc/flow/etc/cylc-bash-completion @@ -38,7 +38,7 @@ _cylc() { cur="${COMP_WORDS[COMP_CWORD]}" sec="${COMP_WORDS[1]}" opts="$(cylc scan -t name 2>/dev/null)" - suite_cmds="broadcast|bcast|cat-log|log|check-versions|diff|compare|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|insert|kill|list|ls|tui|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|search|grep|set-verbosity|show|set-outputs|stop|shutdown|single|suite-state|test-battery|trigger|validate|view|warranty" + suite_cmds="broadcast|bcast|cat-log|check-versions|clean|compare|diff|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|insert|kill|list|log|ls|tui|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|search|grep|set-verbosity|show|set-outputs|stop|shutdown|single|suite-state|test-battery|trigger|validate|view|warranty" if [[ ${COMP_CWORD} -eq 1 ]]; then diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 7507c378377..7d3e4d794e7 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -203,3 +203,26 @@ def make_symlink(src, dst): os.symlink(src, dst, target_is_directory=True) except Exception as exc: raise WorkflowFilesError(f"Error when symlinking '{exc}'") + + +def remove_dir(path): + """Delete a directory including contents, including the target directory + if the specified path is a symlink. + + Args: + path (str): the path of the directory to delete. + """ + if not os.path.isdir(path): + raise NotADirectoryError(path) + if os.path.islink(path): + if os.path.exists(path): + target = os.path.realpath(path) + LOG.info(f'Removing symlink target directory: {path} -> {target}') + rmtree(target) + LOG.info(f'Removing symlink: {path}') + else: + LOG.info(f'Removing broken symlink: {path}') + os.remove(path) + else: + LOG.info(f'Removing directory: {path}') + rmtree(path) diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py new file mode 100644 index 00000000000..48994fb4164 --- /dev/null +++ b/cylc/flow/scripts/clean.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 + +# THIS FILE IS PART OF THE CYLC SUITE ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""cylc clean [OPTIONS] ARGS + +Remove a stopped workflow from the local scheduler filesystem. + +NOTE: this command is intended for workflows installed with `cylc install`. If +this is run for a workflow that was instead written directly in ~/cylc-run and +not backed up elsewhere, it will be lost. + +It will also remove an symlink directory targets. For now, it will fail if +run on a host which doesn't have access to that filesystem. + +Suite names can be hierarchical, corresponding to the path under ~/cylc-run. + +Examples: + # Remove the workflow at ~/cylc-run/foo + $ cylc clean foo + +""" + +from cylc.flow.option_parsers import CylcOptionParser as COP +from cylc.flow.terminal import cli_function +from cylc.flow.suite_files import clean + + +def get_option_parser(): + parser = COP( + __doc__, + argdoc=[("REG", "Suite name")] + ) + return parser + + +@cli_function(get_option_parser) +def main(parser, opts, reg): + clean(reg) + + +if __name__ == "__main__": + main() diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index 58aeacb74a9..9e893111263 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -28,8 +28,9 @@ import aiofiles from cylc.flow import LOG -from cylc.flow.exceptions import SuiteServiceFileError -from cylc.flow.pathutil import get_suite_run_dir, make_localhost_symlinks +from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError +from cylc.flow.pathutil import ( + get_suite_run_dir, make_localhost_symlinks, remove_dir) from cylc.flow.platforms import get_platform from cylc.flow.hostuserutil import ( get_user, @@ -559,6 +560,34 @@ def register(reg=None, source=None, redirect=False): return reg +def clean(reg): + """Remove stopped workflows on the local scheduler filesystem. + + Args: + reg (str): workflow name. + """ + run_dir = get_suite_run_dir(reg) + if not os.path.isdir(run_dir): + raise WorkflowFilesError(f'No workflow directory found at {run_dir}') + try: + detect_old_contact_file(reg) + except SuiteServiceFileError as exc: + raise SuiteServiceFileError( + f'Cannot remove running workflow.\n\n{exc}') + + # TODO: check task_jobs table in database to see what platforms are used + + base_path = os.path.realpath(run_dir) + possible_symlinks = [os.path.join(base_path, d) for d in [ + 'log', 'share/cycle', 'share', 'work']] + # Note: 'share/cycle' must come before 'share' + for path in [*possible_symlinks, run_dir]: + # Note: possible_symlinks must come before run_dir + remove_dir(path) + + # TODO: what if user creates a symlink ~/cylc-run/foo/work/ -> /net/home/ ! + + def remove_keys_on_server(keys): """Removes server-held authentication keys""" # WARNING, DESTRUCTIVE. Removes old keys if they already exist. diff --git a/setup.cfg b/setup.cfg index e3743500c23..ed594936aea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,6 +70,7 @@ cylc.command = broadcast = cylc.flow.scripts.broadcast:main cat-log = cylc.flow.scripts.cat_log:main check-versions = cylc.flow.scripts.check_versions:main + clean = cylc.flow.scripts.clean:main client = cylc.flow.scripts.client:main cycle-point = cylc.flow.scripts.cycle_point:main diff = cylc.flow.scripts.diff:main From dc9683b148b4e8b77766028dffc4ca568f61018b Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Thu, 19 Nov 2020 15:09:11 +0000 Subject: [PATCH 03/14] cylc clean: ensure the right dirs are removed And prevent symlink attack --- cylc/flow/pathutil.py | 8 ++++++-- cylc/flow/scripts/clean.py | 4 ++-- cylc/flow/suite_files.py | 18 ++++++++++++------ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 7d3e4d794e7..717e99c30bd 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -210,9 +210,11 @@ def remove_dir(path): if the specified path is a symlink. Args: - path (str): the path of the directory to delete. + path (str): the absolute path of the directory to delete. """ - if not os.path.isdir(path): + if not os.path.isabs(path): + raise ValueError('Path must be absolute') + if os.path.exists(path) and not os.path.isdir(path): raise NotADirectoryError(path) if os.path.islink(path): if os.path.exists(path): @@ -223,6 +225,8 @@ def remove_dir(path): else: LOG.info(f'Removing broken symlink: {path}') os.remove(path) + elif not os.path.exists(path): + raise FileNotFoundError(path) else: LOG.info(f'Removing directory: {path}') rmtree(path) diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py index 48994fb4164..6b36cfbb2c3 100644 --- a/cylc/flow/scripts/clean.py +++ b/cylc/flow/scripts/clean.py @@ -24,8 +24,8 @@ this is run for a workflow that was instead written directly in ~/cylc-run and not backed up elsewhere, it will be lost. -It will also remove an symlink directory targets. For now, it will fail if -run on a host which doesn't have access to that filesystem. +It will also remove an symlink directory targets. For now, it will fail to +remove workflow files/directories on a remote host. Suite names can be hierarchical, corresponding to the path under ~/cylc-run. diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index 9e893111263..4fca934dd7d 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -578,14 +578,20 @@ def clean(reg): # TODO: check task_jobs table in database to see what platforms are used base_path = os.path.realpath(run_dir) - possible_symlinks = [os.path.join(base_path, d) for d in [ + possible_symlinks = [(name, os.path.join(base_path, name)) for name in [ 'log', 'share/cycle', 'share', 'work']] # Note: 'share/cycle' must come before 'share' - for path in [*possible_symlinks, run_dir]: - # Note: possible_symlinks must come before run_dir - remove_dir(path) - - # TODO: what if user creates a symlink ~/cylc-run/foo/work/ -> /net/home/ ! + for name, path in possible_symlinks: + if os.path.islink(path): + target = os.path.realpath(path) + target_reg_dir = target.rsplit(name, 1)[0] + # Ensure symlink not pointing to wrong directory! + if not target_reg_dir.endswith(f'cylc-run/{reg}/'): + raise WorkflowFilesError( + f'Symlink target {target} does not match expected pattern') + if os.path.isdir(target_reg_dir): + remove_dir(target_reg_dir) + remove_dir(run_dir) def remove_keys_on_server(keys): From 62222f79ee7417b40594451642df9d41bf7e4be7 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Fri, 20 Nov 2020 15:39:02 +0000 Subject: [PATCH 04/14] cylc clean & register: unified "reg" validation --- cylc/flow/suite_files.py | 51 ++++++++++++++++++++++++---------- tests/unit/test_suite_files.py | 8 ++---- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index 4fca934dd7d..cdc5ec2867f 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -479,24 +479,18 @@ def register(reg=None, source=None, redirect=False): Raise: SuiteServiceFileError: - No flow.cylc file found in source location. - Illegal name (can look like a relative path, but not absolute). - Another suite already has this name (unless --redirect). - Trying to register a suite nested inside of another. + - No flow.cylc file found in source location. + - Illegal name (can look like a relative path, but not absolute). + - Another suite already has this name (unless --redirect). + - Trying to register a suite nested inside of another. """ if reg is None: reg = os.path.basename(os.getcwd()) - make_localhost_symlinks(reg) - is_valid, message = SuiteNameValidator.validate(reg) - if not is_valid: - raise SuiteServiceFileError(f'invalid suite name - {message}') - - if os.path.isabs(reg): - raise SuiteServiceFileError( - f'suite name cannot be an absolute path: {reg}') - + _validate_reg(reg) check_nested_run_dirs(reg) + make_localhost_symlinks(reg) + if source is not None: if os.path.basename(source) == SuiteFiles.FLOW_FILE: source = os.path.dirname(source) @@ -566,6 +560,9 @@ def clean(reg): Args: reg (str): workflow name. """ + reg = os.path.normpath(reg) + # TODO: prevent `cylc clean foo/../` (i.e. rm -r ~/cylc-clean !) + _validate_reg(reg) run_dir = get_suite_run_dir(reg) if not os.path.isdir(run_dir): raise WorkflowFilesError(f'No workflow directory found at {run_dir}') @@ -685,12 +682,36 @@ def _load_local_item(item, path): return None +def _validate_reg(reg): + """Check suite name is valid. + + Args: + reg (str): Suite name + + Raise: + SuiteServiceFileError: + - reg has form of absolute path or is otherwise not valid + """ + is_valid, message = SuiteNameValidator.validate(reg) + if not is_valid: + raise SuiteServiceFileError(f'invalid suite name "{reg}" - {message}') + if os.path.isabs(reg): + raise SuiteServiceFileError( + f'suite name cannot be an absolute path: {reg}') + + def check_nested_run_dirs(reg): """Disallow nested run dirs e.g. trying to register foo/bar where foo is already a valid suite directory. Args: reg (str): suite name + + Raise: + WorkflowFilesError: + - reg dir is nested inside a run dir + - reg dir contains a nested run dir (if not deeper than max scan + depth) """ exc_msg = ( 'Nested run directories not allowed - cannot register suite name ' @@ -700,7 +721,7 @@ def _check_child_dirs(path, depth_count=1): for result in os.scandir(path): if result.is_dir() and not result.is_symlink(): if is_valid_run_dir(result.path): - raise SuiteServiceFileError(exc_msg % (reg, result.path)) + raise WorkflowFilesError(exc_msg % (reg, result.path)) if depth_count < MAX_SCAN_DEPTH: _check_child_dirs(result.path, depth_count + 1) @@ -708,7 +729,7 @@ def _check_child_dirs(path, depth_count=1): parent_dir = os.path.dirname(reg_path) while parent_dir != '': if is_valid_run_dir(parent_dir): - raise SuiteServiceFileError( + raise WorkflowFilesError( exc_msg % (reg, get_cylc_run_abs_path(parent_dir))) parent_dir = os.path.dirname(parent_dir) diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py index dff39dc06b3..ac436b46563 100644 --- a/tests/unit/test_suite_files.py +++ b/tests/unit/test_suite_files.py @@ -15,13 +15,12 @@ # along with this program. If not, see . from cylc.flow.suite_files import check_nested_run_dirs -from cylc.flow.pathutil import make_localhost_symlinks import pytest from unittest import mock import os.path from cylc.flow import suite_files -from cylc.flow.exceptions import SuiteServiceFileError +from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError def get_register_test_cases(): @@ -293,7 +292,7 @@ def test_nested_run_dirs_raise_error(direction, monkeypatch): suite_files.check_nested_run_dirs('bright/falls') # Nested in a run dir - bad: for path in ('bright/falls/light', 'bright/falls/light/and/power'): - with pytest.raises(SuiteServiceFileError) as exc: + with pytest.raises(WorkflowFilesError) as exc: suite_files.check_nested_run_dirs(path) assert 'Nested run directories not allowed' in str(exc.value) @@ -319,9 +318,8 @@ def mock_scandir(path): for path in ('a/a/a', 'a/b'): suite_files.check_nested_run_dirs(path) # Run dir nested below - bad: - for path in ('a', 'a/a', 'a/c'): - with pytest.raises(SuiteServiceFileError) as exc: + with pytest.raises(WorkflowFilesError) as exc: check_nested_run_dirs(path) assert 'Nested run directories not allowed' in str(exc.value) # Run dir nested below max scan depth - not ideal but passes: From 118d82593035a5fc425db5a6f91d57d811e7ee6a Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Fri, 20 Nov 2020 20:33:16 +0000 Subject: [PATCH 05/14] cylc clean: better validation - Ensure passed "reg" is not a path that points to the cylc-run dir or higher - Ensure symlinks to be deleted have expected form i.e. `something/cylc-run/reg/log` etc --- cylc/flow/pathutil.py | 5 +++-- cylc/flow/suite_files.py | 25 +++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 717e99c30bd..91bba9304f8 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -202,7 +202,7 @@ def make_symlink(src, dst): try: os.symlink(src, dst, target_is_directory=True) except Exception as exc: - raise WorkflowFilesError(f"Error when symlinking '{exc}'") + raise WorkflowFilesError(f"Error when symlinking\n{exc}") def remove_dir(path): @@ -219,7 +219,8 @@ def remove_dir(path): if os.path.islink(path): if os.path.exists(path): target = os.path.realpath(path) - LOG.info(f'Removing symlink target directory: {path} -> {target}') + LOG.info( + f'Removing symlink target directory: ({path} ->) {target}') rmtree(target) LOG.info(f'Removing symlink: {path}') else: diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index cdc5ec2867f..69749c3c16a 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -560,12 +560,15 @@ def clean(reg): Args: reg (str): workflow name. """ - reg = os.path.normpath(reg) - # TODO: prevent `cylc clean foo/../` (i.e. rm -r ~/cylc-clean !) _validate_reg(reg) + reg = os.path.normpath(reg) + if reg.startswith('.'): + raise WorkflowFilesError( + 'Workflow name cannot be a path that points to the cylc-run ' + 'directory or above') run_dir = get_suite_run_dir(reg) if not os.path.isdir(run_dir): - raise WorkflowFilesError(f'No workflow directory found at {run_dir}') + raise WorkflowFilesError(f'No directory found at {run_dir}') try: detect_old_contact_file(reg) except SuiteServiceFileError as exc: @@ -580,12 +583,18 @@ def clean(reg): # Note: 'share/cycle' must come before 'share' for name, path in possible_symlinks: if os.path.islink(path): + # Ensure symlink is pointing to expected directory target = os.path.realpath(path) - target_reg_dir = target.rsplit(name, 1)[0] - # Ensure symlink not pointing to wrong directory! - if not target_reg_dir.endswith(f'cylc-run/{reg}/'): + if os.path.exists(target) and not os.path.isdir(target): raise WorkflowFilesError( - f'Symlink target {target} does not match expected pattern') + f'Invalid Cylc symlink directory {path} -> {target}\n' + f'Target is not a directory') + expected_end = os.path.join('cylc-run', reg, name) + if not target.endswith(expected_end): + raise WorkflowFilesError( + f'Invalid Cylc symlink directory {path} -> {target}\n' + f'Expected target to end with "{expected_end}"') + target_reg_dir = target.rsplit(name, 1)[0] if os.path.isdir(target_reg_dir): remove_dir(target_reg_dir) remove_dir(run_dir) @@ -739,7 +748,7 @@ def _check_child_dirs(path, depth_count=1): def is_valid_run_dir(path): - """Return True if path is a valid run directory, else False. + """Return True if path is a valid, existing run directory, else False. Args: path (str): if this is a relative path, it is taken to be relative to From 5abf13d297734af8d332b7732660c454f430561a Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Mon, 23 Nov 2020 17:03:13 +0000 Subject: [PATCH 06/14] Unit tests for cylc clean & helpers --- tests/unit/test_pathutil.py | 84 ++++++++++++++++++- tests/unit/test_suite_files.py | 149 +++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_pathutil.py b/tests/unit/test_pathutil.py index dff14979220..df555f07c72 100644 --- a/tests/unit/test_pathutil.py +++ b/tests/unit/test_pathutil.py @@ -15,7 +15,6 @@ # along with this program. If not, see . """Tests for "cylc.flow.pathutil".""" -from tempfile import TemporaryDirectory from unittest import TestCase from unittest.mock import patch, MagicMock, call @@ -38,8 +37,10 @@ get_suite_run_config_log_dir, get_suite_run_share_dir, get_suite_run_work_dir, - get_suite_test_log_name, make_localhost_symlinks, - make_suite_run_tree, make_symlink, + get_suite_test_log_name, + make_localhost_symlinks, + make_suite_run_tree, + remove_dir ) @@ -290,6 +291,83 @@ def test_incorrect_environment_variables_raise_error( make_localhost_symlinks('test_workflow') +@pytest.mark.parametrize( + 'filetype, expected_err', + [('dir', None), + ('file', NotADirectoryError), + (None, FileNotFoundError)] +) +def test_remove_dir(filetype, expected_err, tmp_path): + """Test that remove_dir() can delete nested dirs and handle bad paths.""" + test_path = tmp_path.joinpath('foo/bar') + if filetype == 'dir': + # Test removal of sub directories too + sub_dir = test_path.joinpath('baz') + sub_dir.mkdir(parents=True) + sub_dir_file = sub_dir.joinpath('meow') + sub_dir_file.touch() + elif filetype == 'file': + test_path = tmp_path.joinpath('meow') + test_path.touch() + + if expected_err: + with pytest.raises(expected_err): + remove_dir(test_path) + else: + remove_dir(test_path) + assert test_path.exists() is False + assert test_path.is_symlink() is False + + +@pytest.mark.parametrize( + 'target, expected_err', + [('dir', None), + ('file', NotADirectoryError), + (None, None)] +) +def test_remove_dir_symlinks(target, expected_err, tmp_path): + """Test that remove_dir() can delete symlinks, including the target.""" + target_path = tmp_path.joinpath('x/y') + target_path.mkdir(parents=True) + + tmp_path.joinpath('a').mkdir() + symlink_path = tmp_path.joinpath('a/b') + + if target == 'dir': + # Add a file into the the target dir to check it removes that ok + target_path.joinpath('meow').touch() + symlink_path.symlink_to(target_path) + elif target == 'file': + target_path = target_path.joinpath('meow') + target_path.touch() + symlink_path.symlink_to(target_path) + elif target is None: + symlink_path.symlink_to(target_path) + # Break symlink + target_path.rmdir() + + if expected_err: + with pytest.raises(expected_err): + remove_dir(symlink_path) + else: + remove_dir(symlink_path) + for path in [symlink_path, target_path]: + assert path.exists() is False + assert path.is_symlink() is False + + +def test_remove_dir_relative(tmp_path): + """Test that you cannot use remove_dir() on a relative path. + + When removing a path, we want to be absolute-ly sure where it is! + """ + # cd to temp dir in case we accidentally succeed in deleting the path + os.chdir(tmp_path) + with pytest.raises(ValueError) as cm: + remove_dir('foo/bar') + assert 'Path must be absolute' in str(cm.value) + + if __name__ == '__main__': from unittest import main main() diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py index ac436b46563..739372481bc 100644 --- a/tests/unit/test_suite_files.py +++ b/tests/unit/test_suite_files.py @@ -324,3 +324,152 @@ def mock_scandir(path): assert 'Nested run directories not allowed' in str(exc.value) # Run dir nested below max scan depth - not ideal but passes: suite_files.check_nested_run_dirs('a/d') + + +@pytest.mark.parametrize( + 'reg, expected_err', + [('foo/bar/', None), + ('/foo/bar', SuiteServiceFileError)] +) +def test_validate_reg(reg, expected_err): + if expected_err: + with pytest.raises(expected_err) as exc: + suite_files._validate_reg(reg) + assert 'cannot be an absolute path' in str(exc.value) + else: + suite_files._validate_reg(reg) + + +@pytest.mark.parametrize( + 'reg, props', + [ + ('foo/bar/', {}), + ('foo/..', { + 'no dir': True, + 'err': WorkflowFilesError, + 'err msg': ('cannot be a path that points to the cylc-run ' + 'directory or above') + }), + ('foo/../..', { + 'no dir': True, + 'err': WorkflowFilesError, + 'err msg': ('cannot be a path that points to the cylc-run ' + 'directory or above') + }), + ('foo', { + 'not stopped': True, + 'err': SuiteServiceFileError, + 'err msg': 'Cannot remove running workflow' + }), + ('foo', { + 'no dir': True, + 'err': WorkflowFilesError, + 'err msg': 'No directory found' + }), + ('foo', { + 'symlink dirs': { + 'log': 'sym-log', + 'share': 'sym-share', + 'share/cycle': 'sym-cycle', + 'work': 'sym-work' + } + }), + ('foo', { + 'symlink dirs': { + 'run': 'sym-run', + 'log': 'sym-log', + 'share': 'sym-share', + 'share/cycle': 'sym-cycle', + 'work': 'sym-work' + } + }), + ('foo', { + 'bad symlink': { + 'type': 'file', + 'path': 'sym-log/cylc-run/foo/meow.txt' + }, + 'err': WorkflowFilesError, + 'err msg': 'Target is not a directory' + }), + ('foo', { + 'bad symlink': { + 'type': 'dir', + 'path': 'sym-log/bad/path' + }, + 'err': WorkflowFilesError, + 'err msg': 'Expected target to end with "cylc-run/foo/log"' + }) + ] +) +def test_clean(reg, props, monkeypatch, tmp_path): + """Test the clean() function. + + Params: + reg (str): Workflow name. + props (dict): Possible values are (all optional): + 'err' (Exception): Expected error. + 'err msg' (str): Message that is expected to be in the exception. + 'no dir' (bool): If True, do not create run dir for this test case. + 'not stopped' (bool): If True, simulate that the workflow is + still running. + 'symlink dirs' (dict): As you would find in the global config + under [symlink dirs][platform]. + 'bad symlink' (dict): Simulate an invalid log symlink dir: + 'type' (str): 'file' or 'dir'. + 'path' (str): Path of the symlink target relative to tmp_path. + """ + # --- Setup --- + tmp_path.joinpath('cylc-run').mkdir() + run_dir = tmp_path.joinpath('cylc-run', reg) + symlink_dirs = props.get('symlink dirs') + bad_symlink = props.get('bad symlink') + if not props.get('no dir') and ( + not symlink_dirs or 'run' not in symlink_dirs): + run_dir.mkdir(parents=True) + + dirs_to_check = [run_dir] + if symlink_dirs: + if 'run' in symlink_dirs: + dst = tmp_path.joinpath(symlink_dirs['run'], 'cylc-run', reg) + dst.mkdir(parents=True) + run_dir.symlink_to(dst) + dirs_to_check.append(dst) + symlink_dirs.pop('run') + for s, d in symlink_dirs.items(): + dst = tmp_path.joinpath(d, 'cylc-run', reg, s) + dst.mkdir(parents=True) + src = run_dir.joinpath(s) + src.symlink_to(dst) + dirs_to_check.append(dst.parent) + if bad_symlink: + dst = tmp_path.joinpath(bad_symlink['path']) + if bad_symlink['type'] == 'file': + dst.parent.mkdir(parents=True) + dst.touch() + else: + dst.mkdir(parents=True) + src = run_dir.joinpath('log') + src.symlink_to(dst) + + def mocked_detect_old_contact_file(reg): + if props.get('not stopped'): + raise SuiteServiceFileError('Mocked error') + + monkeypatch.setattr('cylc.flow.suite_files.detect_old_contact_file', + mocked_detect_old_contact_file) + monkeypatch.setattr('cylc.flow.suite_files.get_suite_run_dir', + lambda x: run_dir) + + # --- The actual test --- + expected_err = props.get('err') + if expected_err: + with pytest.raises(expected_err) as exc: + suite_files.clean(reg) + expected_msg = props.get('err msg') + if expected_msg: + assert expected_msg in str(exc.value) + else: + suite_files.clean(reg) + for d in dirs_to_check: + assert d.exists() is False + assert d.is_symlink() is False From 411c335ce1b76127e4c635c8fcfc61ce2284f807 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 24 Nov 2020 11:51:22 +0000 Subject: [PATCH 07/14] Fix cylc clean for nested suite name e.g. foo/bar/baz --- cylc/flow/suite_files.py | 25 +++++++++++++++++-------- tests/unit/test_suite_files.py | 6 ++++-- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index 69749c3c16a..de2677a8e9e 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -557,6 +557,10 @@ def register(reg=None, source=None, redirect=False): def clean(reg): """Remove stopped workflows on the local scheduler filesystem. + Deletes the run dir in ~/cylc-run and any symlink dirs. Note: if the + run dir has been manually deleted, it will not be possible to clean the + symlink dirs. + Args: reg (str): workflow name. """ @@ -577,11 +581,10 @@ def clean(reg): # TODO: check task_jobs table in database to see what platforms are used - base_path = os.path.realpath(run_dir) - possible_symlinks = [(name, os.path.join(base_path, name)) for name in [ + possible_symlinks = [(name, os.path.join(run_dir, name)) for name in [ 'log', 'share/cycle', 'share', 'work']] # Note: 'share/cycle' must come before 'share' - for name, path in possible_symlinks: + for name, path in [*possible_symlinks, ('', run_dir)]: if os.path.islink(path): # Ensure symlink is pointing to expected directory target = os.path.realpath(path) @@ -589,15 +592,21 @@ def clean(reg): raise WorkflowFilesError( f'Invalid Cylc symlink directory {path} -> {target}\n' f'Target is not a directory') - expected_end = os.path.join('cylc-run', reg, name) + expected_end = str(Path('cylc-run', reg, name)) if not target.endswith(expected_end): raise WorkflowFilesError( f'Invalid Cylc symlink directory {path} -> {target}\n' f'Expected target to end with "{expected_end}"') - target_reg_dir = target.rsplit(name, 1)[0] - if os.path.isdir(target_reg_dir): - remove_dir(target_reg_dir) - remove_dir(run_dir) + reg_depth = len(Path(reg).parts) - 1 + target_reg_dir = target.rsplit(name, 1)[0] if name else target + if reg_depth > 0: + target_top_parent = Path(target_reg_dir).parents[reg_depth - 1] + else: + target_top_parent = target_reg_dir + if os.path.isdir(target_top_parent): + remove_dir(target_top_parent) + run_dir_top_parent = get_suite_run_dir(Path(reg).parts[0]) + remove_dir(run_dir_top_parent) def remove_keys_on_server(keys): diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py index 739372481bc..299fec29c55 100644 --- a/tests/unit/test_suite_files.py +++ b/tests/unit/test_suite_files.py @@ -19,6 +19,7 @@ from unittest import mock import os.path +from pathlib import Path from cylc.flow import suite_files from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError @@ -421,13 +422,14 @@ def test_clean(reg, props, monkeypatch, tmp_path): # --- Setup --- tmp_path.joinpath('cylc-run').mkdir() run_dir = tmp_path.joinpath('cylc-run', reg) + run_dir_top_parent = tmp_path.joinpath('cylc-run', Path(reg).parts[0]) symlink_dirs = props.get('symlink dirs') bad_symlink = props.get('bad symlink') if not props.get('no dir') and ( not symlink_dirs or 'run' not in symlink_dirs): run_dir.mkdir(parents=True) - dirs_to_check = [run_dir] + dirs_to_check = [run_dir_top_parent] if symlink_dirs: if 'run' in symlink_dirs: dst = tmp_path.joinpath(symlink_dirs['run'], 'cylc-run', reg) @@ -458,7 +460,7 @@ def mocked_detect_old_contact_file(reg): monkeypatch.setattr('cylc.flow.suite_files.detect_old_contact_file', mocked_detect_old_contact_file) monkeypatch.setattr('cylc.flow.suite_files.get_suite_run_dir', - lambda x: run_dir) + lambda x: tmp_path.joinpath('cylc-run', x)) # --- The actual test --- expected_err = props.get('err') From 64136322643bdcf0864eb12c77a3ff15d165c417 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 24 Nov 2020 11:51:35 +0000 Subject: [PATCH 08/14] Functional test for cylc clean --- tests/functional/cylc-clean/00-basic.t | 124 ++++++++++++++++++++++++ tests/functional/cylc-clean/test_header | 1 + 2 files changed, 125 insertions(+) create mode 100644 tests/functional/cylc-clean/00-basic.t create mode 120000 tests/functional/cylc-clean/test_header diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t new file mode 100644 index 00000000000..088f0d809a1 --- /dev/null +++ b/tests/functional/cylc-clean/00-basic.t @@ -0,0 +1,124 @@ +#!/bin/bash +# THIS FILE IS PART OF THE CYLC SUITE ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# ----------------------------------------------------------------------------- +# Test the cylc clean command + +. "$(dirname "$0")/test_header" +set_test_number 5 + +create_test_global_config "" " +[symlink dirs] + [[localhost]] + run = ${TEST_DIR}/sym-run + log = ${TEST_DIR}/sym-log + share = ${TEST_DIR}/sym-share + share/cycle = ${TEST_DIR}/sym-cycle + work = ${TEST_DIR}/sym-work +" +init_suite "${TEST_NAME_BASE}" << '__FLOW__' +[scheduling] + [[graph]] + R1 = darmok +__FLOW__ +# ----------------------------------------------------------------------------- +TEST_NAME="run-dir-tree-pre-clean" +tree "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout" +# Remove last line of output: +sed -i '$d' "${TEST_NAME}.stdout" + +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${HOME}/cylc-run/${SUITE_NAME} +|-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log +|-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share +\`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work + +__TREE__ + +TEST_NAME="test-dir-tree-pre-clean" +tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" +# Remove last line of output: +sed -i '$d' "${TEST_NAME}.stdout" +# Note: backticks need to be escaped in the heredoc +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/sym-cycle +\`-- cylc-run + \`-- cylctb-${CYLC_TEST_TIME_INIT} + \`-- f + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + \`-- share + \`-- cycle +${TEST_DIR}/sym-log +\`-- cylc-run + \`-- cylctb-${CYLC_TEST_TIME_INIT} + \`-- f + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + \`-- log +${TEST_DIR}/sym-run +\`-- cylc-run + \`-- cylctb-${CYLC_TEST_TIME_INIT} + \`-- f + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log + |-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share + \`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work +${TEST_DIR}/sym-share +\`-- cylc-run + \`-- cylctb-${CYLC_TEST_TIME_INIT} + \`-- f + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + \`-- share + \`-- cycle -> ${TEST_DIR}/sym-cycle/cylc-run/${SUITE_NAME}/share/cycle +${TEST_DIR}/sym-work +\`-- cylc-run + \`-- cylctb-${CYLC_TEST_TIME_INIT} + \`-- f + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + \`-- work + +__TREE__ +# ----------------------------------------------------------------------------- +run_ok "cylc-clean" cylc clean "$SUITE_NAME" +# ----------------------------------------------------------------------------- +TEST_NAME="run-dir-not-exist-post-clean" +exists_fail "$SUITE_RUN_DIR" + +TEST_NAME="test-dir-tree-post-clean" +tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" +# Remove last line of output: +sed -i '$d' "${TEST_NAME}.stdout" + +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/sym-cycle +\`-- cylc-run +${TEST_DIR}/sym-log +\`-- cylc-run +${TEST_DIR}/sym-run +\`-- cylc-run +${TEST_DIR}/sym-share +\`-- cylc-run +${TEST_DIR}/sym-work +\`-- cylc-run + +__TREE__ +# ----------------------------------------------------------------------------- +purge +exit diff --git a/tests/functional/cylc-clean/test_header b/tests/functional/cylc-clean/test_header new file mode 120000 index 00000000000..90bd5a36f92 --- /dev/null +++ b/tests/functional/cylc-clean/test_header @@ -0,0 +1 @@ +../lib/bash/test_header \ No newline at end of file From 2a05203410669e8db6677de20783c0295d4cfbef Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 24 Nov 2020 14:29:53 +0000 Subject: [PATCH 09/14] Make sure tree command is installed for functional test --- .github/workflows/test_functional.yml | 2 +- tests/functional/cylc-clean/00-basic.t | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_functional.yml b/.github/workflows/test_functional.yml index 30b9b0f1cc2..eb0ae5bd3c0 100644 --- a/.github/workflows/test_functional.yml +++ b/.github/workflows/test_functional.yml @@ -39,7 +39,7 @@ jobs: - name: Apt-Get run: | sudo apt-get update - sudo apt-get install -y sqlite3 + sudo apt-get install -y sqlite3 tree - name: Add .github/bin/ to PATH # Sets up mocked mail command & any other custom executables diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t index 088f0d809a1..3623b459c33 100644 --- a/tests/functional/cylc-clean/00-basic.t +++ b/tests/functional/cylc-clean/00-basic.t @@ -18,6 +18,9 @@ # Test the cylc clean command . "$(dirname "$0")/test_header" +if ! command -v 'tree' >'/dev/null'; then + skip_all '"tree" command not available' +fi set_test_number 5 create_test_global_config "" " @@ -36,7 +39,7 @@ init_suite "${TEST_NAME_BASE}" << '__FLOW__' __FLOW__ # ----------------------------------------------------------------------------- TEST_NAME="run-dir-tree-pre-clean" -tree "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout" +tree --charset=ascii "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout" # Remove last line of output: sed -i '$d' "${TEST_NAME}.stdout" @@ -49,7 +52,7 @@ ${HOME}/cylc-run/${SUITE_NAME} __TREE__ TEST_NAME="test-dir-tree-pre-clean" -tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" +tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" # Remove last line of output: sed -i '$d' "${TEST_NAME}.stdout" # Note: backticks need to be escaped in the heredoc @@ -102,7 +105,7 @@ TEST_NAME="run-dir-not-exist-post-clean" exists_fail "$SUITE_RUN_DIR" TEST_NAME="test-dir-tree-post-clean" -tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" +tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" # Remove last line of output: sed -i '$d' "${TEST_NAME}.stdout" From caab8b87d9d62d78aea21c42c67c6fe0d8ba2f60 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 24 Nov 2020 18:32:23 +0000 Subject: [PATCH 10/14] cylc clean: ensure workflow siblings are not removed Both in the run dir and symlink dirs. --- cylc/flow/suite_files.py | 72 ++++++++++++++++++-------- tests/functional/cylc-clean/00-basic.t | 13 +++-- tests/unit/test_suite_files.py | 26 +++++++++- 3 files changed, 85 insertions(+), 26 deletions(-) diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index de2677a8e9e..7a78ed0ba7a 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -570,8 +570,8 @@ def clean(reg): raise WorkflowFilesError( 'Workflow name cannot be a path that points to the cylc-run ' 'directory or above') - run_dir = get_suite_run_dir(reg) - if not os.path.isdir(run_dir): + run_dir = Path(get_suite_run_dir(reg)) + if not run_dir.is_dir(): raise WorkflowFilesError(f'No directory found at {run_dir}') try: detect_old_contact_file(reg) @@ -581,32 +581,62 @@ def clean(reg): # TODO: check task_jobs table in database to see what platforms are used - possible_symlinks = [(name, os.path.join(run_dir, name)) for name in [ - 'log', 'share/cycle', 'share', 'work']] - # Note: 'share/cycle' must come before 'share' - for name, path in [*possible_symlinks, ('', run_dir)]: - if os.path.islink(path): - # Ensure symlink is pointing to expected directory - target = os.path.realpath(path) - if os.path.exists(target) and not os.path.isdir(target): + possible_symlinks = [(Path(name), Path(run_dir, name)) for name in [ + 'log', 'share/cycle', 'share', 'work', '']] + # Note: 'share/cycle' must come before 'share', and '' must come last + for name, path in possible_symlinks: + if path.is_symlink(): + # Ensure symlink is pointing to expected directory. If not, + # something is wrong and we should abort + target = path.resolve() + if target.exists() and not target.is_dir(): raise WorkflowFilesError( f'Invalid Cylc symlink directory {path} -> {target}\n' f'Target is not a directory') expected_end = str(Path('cylc-run', reg, name)) - if not target.endswith(expected_end): + if not str(target).endswith(expected_end): raise WorkflowFilesError( f'Invalid Cylc symlink directory {path} -> {target}\n' f'Expected target to end with "{expected_end}"') - reg_depth = len(Path(reg).parts) - 1 - target_reg_dir = target.rsplit(name, 1)[0] if name else target - if reg_depth > 0: - target_top_parent = Path(target_reg_dir).parents[reg_depth - 1] - else: - target_top_parent = target_reg_dir - if os.path.isdir(target_top_parent): - remove_dir(target_top_parent) - run_dir_top_parent = get_suite_run_dir(Path(reg).parts[0]) - remove_dir(run_dir_top_parent) + # Remove /cylc-run/ + target_cylc_run_dir = str(target).rsplit(str(reg), 1)[0] + target_reg_dir = Path(target_cylc_run_dir, reg) + if target_reg_dir.is_dir(): + remove_dir(target_reg_dir) + # Remove empty parents + _remove_empty_reg_parents(reg, target_reg_dir) + + remove_dir(run_dir) + _remove_empty_reg_parents(reg, run_dir) + + +def _remove_empty_reg_parents(reg, path): + """If reg is nested e.g. a/b/c, work our way up the tree, removing empty + parents only. + + Args: + reg (str): workflow name, e.g. a/b/c + path (str): path to this directory, e.g. /foo/bar/a/b/c + + Example: + _remove_empty_reg_parents('a/b/c', '/foo/bar/a/b/c') would remove + /foo/bar/a/b (assuming it's empty), then /foo/bar/a (assuming it's + empty). + """ + reg = Path(reg) + reg_depth = len(reg.parts) - 1 + path = Path(path) + if not path.is_absolute(): + raise ValueError('Path must be absolute') + for i in range(reg_depth): + parent = path.parents[i] + if not parent.is_dir(): + continue + try: + parent.rmdir() + LOG.info(f'Removing directory: {parent}') + except OSError: + break def remove_keys_on_server(keys): diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t index 3623b459c33..64e4361d331 100644 --- a/tests/functional/cylc-clean/00-basic.t +++ b/tests/functional/cylc-clean/00-basic.t @@ -37,6 +37,8 @@ init_suite "${TEST_NAME_BASE}" << '__FLOW__' [[graph]] R1 = darmok __FLOW__ +# Create a fake sibling workflow dir in the sym-log dir: +mkdir "${TEST_DIR}/sym-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone" # ----------------------------------------------------------------------------- TEST_NAME="run-dir-tree-pre-clean" tree --charset=ascii "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout" @@ -68,10 +70,11 @@ ${TEST_DIR}/sym-cycle ${TEST_DIR}/sym-log \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} - \`-- f - \`-- cylc-clean - \`-- ${TEST_NAME_BASE} - \`-- log + |-- f + | \`-- cylc-clean + | \`-- ${TEST_NAME_BASE} + | \`-- log + \`-- leave-me-alone ${TEST_DIR}/sym-run \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} @@ -114,6 +117,8 @@ ${TEST_DIR}/sym-cycle \`-- cylc-run ${TEST_DIR}/sym-log \`-- cylc-run + \`-- cylctb-${CYLC_TEST_TIME_INIT} + \`-- leave-me-alone ${TEST_DIR}/sym-run \`-- cylc-run ${TEST_DIR}/sym-share diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py index 299fec29c55..4fd2750db95 100644 --- a/tests/unit/test_suite_files.py +++ b/tests/unit/test_suite_files.py @@ -367,7 +367,7 @@ def test_validate_reg(reg, expected_err): 'err': WorkflowFilesError, 'err msg': 'No directory found' }), - ('foo', { + ('foo/bar', { 'symlink dirs': { 'log': 'sym-log', 'share': 'sym-share', @@ -475,3 +475,27 @@ def mocked_detect_old_contact_file(reg): for d in dirs_to_check: assert d.exists() is False assert d.is_symlink() is False + + +def test_remove_empty_reg_parents(tmp_path): + """Test that _remove_empty_parents() doesn't remove parents containing a + sibling.""" + reg = 'foo/bar/baz/qux' + path = tmp_path.joinpath(reg) + tmp_path.joinpath('foo/bar/baz').mkdir(parents=True) + sibling_reg = 'foo/darmok' + sibling_path = tmp_path.joinpath(sibling_reg) + sibling_path.mkdir() + suite_files._remove_empty_reg_parents(reg, path) + assert tmp_path.joinpath('foo/bar').exists() is False + assert tmp_path.joinpath('foo').exists() is True + # Also path must be absolute + with pytest.raises(ValueError) as exc: + suite_files._remove_empty_reg_parents('foo/darmok', 'meow/foo/darmok') + assert 'Path must be absolute' in str(exc.value) + # Check it skips non-existent dirs, and stops at the right place too + tmp_path.joinpath('foo/bar').mkdir() + sibling_path.rmdir() + suite_files._remove_empty_reg_parents(reg, path) + assert tmp_path.joinpath('foo').exists() is False + assert tmp_path.exists() is True From 7c6b385884652d7e39d4d7112e68707a5e86d919 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 24 Nov 2020 18:37:11 +0000 Subject: [PATCH 11/14] cylc clean: do not raise error if specified workflow doesn't exist --- cylc/flow/suite_files.py | 3 ++- tests/unit/test_suite_files.py | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py index 7a78ed0ba7a..3cd3333dfd4 100644 --- a/cylc/flow/suite_files.py +++ b/cylc/flow/suite_files.py @@ -572,7 +572,8 @@ def clean(reg): 'directory or above') run_dir = Path(get_suite_run_dir(reg)) if not run_dir.is_dir(): - raise WorkflowFilesError(f'No directory found at {run_dir}') + LOG.info(f'No workflow directory to clean at {run_dir}') + return try: detect_old_contact_file(reg) except SuiteServiceFileError as exc: diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py index 4fd2750db95..a33dffbf445 100644 --- a/tests/unit/test_suite_files.py +++ b/tests/unit/test_suite_files.py @@ -345,6 +345,7 @@ def test_validate_reg(reg, expected_err): 'reg, props', [ ('foo/bar/', {}), + ('foo', {'no dir': True}), ('foo/..', { 'no dir': True, 'err': WorkflowFilesError, @@ -362,11 +363,6 @@ def test_validate_reg(reg, expected_err): 'err': SuiteServiceFileError, 'err msg': 'Cannot remove running workflow' }), - ('foo', { - 'no dir': True, - 'err': WorkflowFilesError, - 'err msg': 'No directory found' - }), ('foo/bar', { 'symlink dirs': { 'log': 'sym-log', From 8c7e368aa1dcc4a2dd1acf9ccb2df34c3df63c15 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Mon, 30 Nov 2020 16:14:16 +0000 Subject: [PATCH 12/14] Make cylc clean functional test less flaky --- tests/functional/cylc-clean/00-basic.t | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t index 64e4361d331..f0dce8673f4 100644 --- a/tests/functional/cylc-clean/00-basic.t +++ b/tests/functional/cylc-clean/00-basic.t @@ -21,7 +21,7 @@ if ! command -v 'tree' >'/dev/null'; then skip_all '"tree" command not available' fi -set_test_number 5 +set_test_number 6 create_test_global_config "" " [symlink dirs] @@ -37,8 +37,13 @@ init_suite "${TEST_NAME_BASE}" << '__FLOW__' [[graph]] R1 = darmok __FLOW__ + +run_ok "${TEST_NAME_BASE}-val" cylc validate "$SUITE_NAME" + # Create a fake sibling workflow dir in the sym-log dir: mkdir "${TEST_DIR}/sym-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone" + +FUNCTIONAL_DIR="${TEST_SOURCE_DIR_BASE%/*}" # ----------------------------------------------------------------------------- TEST_NAME="run-dir-tree-pre-clean" tree --charset=ascii "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout" @@ -62,7 +67,7 @@ cmp_ok "${TEST_NAME}.stdout" << __TREE__ ${TEST_DIR}/sym-cycle \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} - \`-- f + \`-- ${FUNCTIONAL_DIR} \`-- cylc-clean \`-- ${TEST_NAME_BASE} \`-- share @@ -70,7 +75,7 @@ ${TEST_DIR}/sym-cycle ${TEST_DIR}/sym-log \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} - |-- f + |-- ${FUNCTIONAL_DIR} | \`-- cylc-clean | \`-- ${TEST_NAME_BASE} | \`-- log @@ -78,7 +83,7 @@ ${TEST_DIR}/sym-log ${TEST_DIR}/sym-run \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} - \`-- f + \`-- ${FUNCTIONAL_DIR} \`-- cylc-clean \`-- ${TEST_NAME_BASE} |-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log @@ -87,7 +92,7 @@ ${TEST_DIR}/sym-run ${TEST_DIR}/sym-share \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} - \`-- f + \`-- ${FUNCTIONAL_DIR} \`-- cylc-clean \`-- ${TEST_NAME_BASE} \`-- share @@ -95,7 +100,7 @@ ${TEST_DIR}/sym-share ${TEST_DIR}/sym-work \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} - \`-- f + \`-- ${FUNCTIONAL_DIR} \`-- cylc-clean \`-- ${TEST_NAME_BASE} \`-- work From b35b10ba000f18dac5303b9bcd5393ceb1a2c6ed Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 1 Dec 2020 12:13:58 +0000 Subject: [PATCH 13/14] Prevent cylc-clean functional test clashing --- tests/functional/cylc-clean/00-basic.t | 54 ++++++++++++++------------ 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t index f0dce8673f4..5c09be7635d 100644 --- a/tests/functional/cylc-clean/00-basic.t +++ b/tests/functional/cylc-clean/00-basic.t @@ -23,14 +23,18 @@ if ! command -v 'tree' >'/dev/null'; then fi set_test_number 6 +# Generate random name for symlink dirs to avoid any clashes with other tests +SYM_NAME="$(mktemp -u)" +SYM_NAME="${SYM_NAME##*tmp.}" + create_test_global_config "" " [symlink dirs] [[localhost]] - run = ${TEST_DIR}/sym-run - log = ${TEST_DIR}/sym-log - share = ${TEST_DIR}/sym-share - share/cycle = ${TEST_DIR}/sym-cycle - work = ${TEST_DIR}/sym-work + run = ${TEST_DIR}/${SYM_NAME}-run + log = ${TEST_DIR}/${SYM_NAME}-log + share = ${TEST_DIR}/${SYM_NAME}-share + share/cycle = ${TEST_DIR}/${SYM_NAME}-cycle + work = ${TEST_DIR}/${SYM_NAME}-work " init_suite "${TEST_NAME_BASE}" << '__FLOW__' [scheduling] @@ -41,7 +45,7 @@ __FLOW__ run_ok "${TEST_NAME_BASE}-val" cylc validate "$SUITE_NAME" # Create a fake sibling workflow dir in the sym-log dir: -mkdir "${TEST_DIR}/sym-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone" +mkdir "${TEST_DIR}/${SYM_NAME}-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone" FUNCTIONAL_DIR="${TEST_SOURCE_DIR_BASE%/*}" # ----------------------------------------------------------------------------- @@ -52,19 +56,19 @@ sed -i '$d' "${TEST_NAME}.stdout" cmp_ok "${TEST_NAME}.stdout" << __TREE__ ${HOME}/cylc-run/${SUITE_NAME} -|-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log -|-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share -\`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work +|-- log -> ${TEST_DIR}/${SYM_NAME}-log/cylc-run/${SUITE_NAME}/log +|-- share -> ${TEST_DIR}/${SYM_NAME}-share/cylc-run/${SUITE_NAME}/share +\`-- work -> ${TEST_DIR}/${SYM_NAME}-work/cylc-run/${SUITE_NAME}/work __TREE__ TEST_NAME="test-dir-tree-pre-clean" -tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" +tree --charset=ascii "${TEST_DIR}/${SYM_NAME}-"* > "${TEST_NAME}.stdout" # Remove last line of output: sed -i '$d' "${TEST_NAME}.stdout" # Note: backticks need to be escaped in the heredoc cmp_ok "${TEST_NAME}.stdout" << __TREE__ -${TEST_DIR}/sym-cycle +${TEST_DIR}/${SYM_NAME}-cycle \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} \`-- ${FUNCTIONAL_DIR} @@ -72,7 +76,7 @@ ${TEST_DIR}/sym-cycle \`-- ${TEST_NAME_BASE} \`-- share \`-- cycle -${TEST_DIR}/sym-log +${TEST_DIR}/${SYM_NAME}-log \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} |-- ${FUNCTIONAL_DIR} @@ -80,24 +84,24 @@ ${TEST_DIR}/sym-log | \`-- ${TEST_NAME_BASE} | \`-- log \`-- leave-me-alone -${TEST_DIR}/sym-run +${TEST_DIR}/${SYM_NAME}-run \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} \`-- ${FUNCTIONAL_DIR} \`-- cylc-clean \`-- ${TEST_NAME_BASE} - |-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log - |-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share - \`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work -${TEST_DIR}/sym-share + |-- log -> ${TEST_DIR}/${SYM_NAME}-log/cylc-run/${SUITE_NAME}/log + |-- share -> ${TEST_DIR}/${SYM_NAME}-share/cylc-run/${SUITE_NAME}/share + \`-- work -> ${TEST_DIR}/${SYM_NAME}-work/cylc-run/${SUITE_NAME}/work +${TEST_DIR}/${SYM_NAME}-share \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} \`-- ${FUNCTIONAL_DIR} \`-- cylc-clean \`-- ${TEST_NAME_BASE} \`-- share - \`-- cycle -> ${TEST_DIR}/sym-cycle/cylc-run/${SUITE_NAME}/share/cycle -${TEST_DIR}/sym-work + \`-- cycle -> ${TEST_DIR}/${SYM_NAME}-cycle/cylc-run/${SUITE_NAME}/share/cycle +${TEST_DIR}/${SYM_NAME}-work \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} \`-- ${FUNCTIONAL_DIR} @@ -113,22 +117,22 @@ TEST_NAME="run-dir-not-exist-post-clean" exists_fail "$SUITE_RUN_DIR" TEST_NAME="test-dir-tree-post-clean" -tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout" +tree --charset=ascii "${TEST_DIR}/${SYM_NAME}-"* > "${TEST_NAME}.stdout" # Remove last line of output: sed -i '$d' "${TEST_NAME}.stdout" cmp_ok "${TEST_NAME}.stdout" << __TREE__ -${TEST_DIR}/sym-cycle +${TEST_DIR}/${SYM_NAME}-cycle \`-- cylc-run -${TEST_DIR}/sym-log +${TEST_DIR}/${SYM_NAME}-log \`-- cylc-run \`-- cylctb-${CYLC_TEST_TIME_INIT} \`-- leave-me-alone -${TEST_DIR}/sym-run +${TEST_DIR}/${SYM_NAME}-run \`-- cylc-run -${TEST_DIR}/sym-share +${TEST_DIR}/${SYM_NAME}-share \`-- cylc-run -${TEST_DIR}/sym-work +${TEST_DIR}/${SYM_NAME}-work \`-- cylc-run __TREE__ From 743d7199b4bd54056df7365a1ffbe47ab8e3768c Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Tue, 1 Dec 2020 12:16:25 +0000 Subject: [PATCH 14/14] Update changelog --- CHANGES.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 0b5adfeebbc..7d76a08419c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -122,9 +122,13 @@ hierarchy and ability to set site config directory. [#3883](https://github.com/cylc/cylc-flow/pull/3883) - Added a new workflow config option `[scheduling]stop after cycle point`. +[#3961](https://github.com/cylc/cylc-flow/pull/3961) - Added a new command: +`cylc clean`. + ### Fixes -[#3917](https://github.com/cylc/cylc-flow/pull/3917) - Fix a bug that caused one of the hostname resolution tests to fail in certain environments. +[#3917](https://github.com/cylc/cylc-flow/pull/3917) - Fix a bug that caused +one of the hostname resolution tests to fail in certain environments. [#3879](https://github.com/cylc/cylc-flow/pull/3879) - Removed Google Groups e-mail from pip packaging metadata. Users browsing PYPI will have