diff --git a/CHANGES.md b/CHANGES.md index 18fe1e70315..d4148bb9670 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -80,6 +80,12 @@ version control information on installation of a workflow. [#4222](https://github.com/cylc/cylc-flow/pull/4222) - Fix bug where a workflow's public database file was not closed properly. +[#4237](https://github.com/cylc/cylc-flow/pull/4237) - `cylc clean` can now +remove specific sub-directories instead of the whole run directory, using the +`--rm` option. There are also the options `--local-only` and `--remote-only` +for choosing to only clean on the local filesystem or remote install targets +respectively. + ### Fixes [#4248](https://github.com/cylc/cylc-flow/pull/4248) diff --git a/cylc/flow/exceptions.py b/cylc/flow/exceptions.py index 609b41e84d8..286bf6800e1 100644 --- a/cylc/flow/exceptions.py +++ b/cylc/flow/exceptions.py @@ -15,8 +15,8 @@ # along with this program. If not, see . """Exceptions for "expected" errors.""" - -from typing import Iterable +import errno +from typing import Callable, Iterable, NoReturn, Tuple, Type class CylcError(Exception): @@ -99,6 +99,31 @@ class WorkflowFilesError(CylcError): """Exception for errors related to workflow files/directories.""" +def handle_rmtree_err( + function: Callable, + path: str, + excinfo: Tuple[Type[Exception], Exception, object] +) -> NoReturn: + """Error handler for shutil.rmtree.""" + exc = excinfo[1] + if isinstance(exc, OSError) and exc.errno == errno.ENOTEMPTY: + # "Directory not empty", likely due to filesystem lag + raise FileRemovalError(exc) + raise exc + + +class FileRemovalError(CylcError): + """Exception for errors during deletion of files/directories, which are + probably the filesystem's fault, not Cylc's.""" + + def __init__(self, exc: Exception) -> None: + CylcError.__init__( + self, + f"{exc}. This is probably a temporary issue with the filesystem, " + "not a problem with Cylc." + ) + + class TaskRemoteMgmtError(CylcError): """Exceptions initialising workflow run directory of remote job host.""" diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py index 163af6e215f..91b06d720ca 100644 --- a/cylc/flow/pathutil.py +++ b/cylc/flow/pathutil.py @@ -19,17 +19,19 @@ from pathlib import Path import re from shutil import rmtree -from typing import Union +from typing import Dict, Iterable, Set, Union from cylc.flow import LOG from cylc.flow.cfgspec.glbl_cfg import glbl_cfg -from cylc.flow.exceptions import WorkflowFilesError +from cylc.flow.exceptions import ( + UserInputError, WorkflowFilesError, handle_rmtree_err +) from cylc.flow.platforms import get_localhost_install_target # Note: do not import this elsewhere, as it might bypass unit test # monkeypatching: -_CYLC_RUN_DIR = '$HOME/cylc-run' +_CYLC_RUN_DIR = os.path.join('$HOME', 'cylc-run') def expand_path(*args: Union[Path, str]) -> str: @@ -128,15 +130,17 @@ def make_workflow_run_tree(workflow): LOG.debug(f'{dir_}: directory created') -def make_localhost_symlinks(rund, named_sub_dir): +def make_localhost_symlinks( + rund: Union[Path, str], named_sub_dir: str +) -> Dict[str, Union[Path, str]]: """Creates symlinks for any configured symlink dirs from glbl_cfg. Args: rund: the entire run directory path named_sub_dir: e.g flow_name/run1 Returns: - dict - A dictionary of Symlinks with sources as keys and - destinations as values: ``{source: destination}`` + Dictionary of symlinks with sources as keys and + destinations as values: ``{source: destination}`` """ dirs_to_symlink = get_dirs_to_symlink( @@ -144,80 +148,89 @@ def make_localhost_symlinks(rund, named_sub_dir): symlinks_created = {} for key, value in dirs_to_symlink.items(): if key == 'run': - dst = rund + symlink_path = rund else: - dst = os.path.join(rund, key) - src = expand_path(value) - if '$' in src: + symlink_path = os.path.join(rund, key) + target = expand_path(value) + if '$' in target: raise WorkflowFilesError( - f'Unable to create symlink to {src}.' + f'Unable to create symlink to {target}.' f' \'{value}\' contains an invalid environment variable.' ' Please check configuration.') - symlink_success = make_symlink(src, dst) - # symlink info returned for logging purposes, symlinks created - # before logs as this dir may be a symlink. + symlink_success = make_symlink(symlink_path, target) + # Symlink info returned for logging purposes. Symlinks should be + # created before logs as the log dir may be a symlink. if symlink_success: - symlinks_created[src] = dst + symlinks_created[target] = symlink_path return symlinks_created -def get_dirs_to_symlink(install_target, flow_name): +def get_dirs_to_symlink(install_target: str, flow_name: str) -> Dict[str, str]: """Returns dictionary of directories to symlink from glbcfg. - Note the paths should remain unexpanded, to be expanded on the remote. + + Note the paths should remain unexpanded, to be expanded on the remote. """ - dirs_to_symlink = {} + dirs_to_symlink: Dict[str, str] = {} symlink_conf = glbl_cfg().get(['symlink dirs']) if install_target not in symlink_conf.keys(): return dirs_to_symlink base_dir = symlink_conf[install_target]['run'] - if base_dir is not None: + if base_dir: dirs_to_symlink['run'] = os.path.join(base_dir, 'cylc-run', flow_name) for dir_ in ['log', 'share', 'share/cycle', 'work']: link = symlink_conf[install_target][dir_] - if link is None or link == base_dir: + if (not link) or link == base_dir: continue dirs_to_symlink[dir_] = os.path.join(link, 'cylc-run', flow_name, dir_) return dirs_to_symlink -def make_symlink(src, dst): +def make_symlink(path: Union[Path, str], target: Union[Path, str]) -> bool: """Makes symlinks for directories. + Args: - src (str): target path, where the files are to be stored. - dst (str): full path of link that will point to src. + path: Absolute path of the desired symlink. + target: Absolute path of the symlink's target directory. """ - if os.path.exists(dst): - if os.path.islink(dst) and os.path.samefile(dst, src): + path = Path(path) + target = Path(target) + if path.exists(): + if path.is_symlink() and path.samefile(target): # correct symlink already exists return False # symlink name is in use by a physical file or directory # log and return LOG.debug( - f"Unable to create {src} symlink. The path {dst} already exists.") + f"Unable to create symlink to {target}. " + f"The path {path} already exists.") return False - elif os.path.islink(dst): + elif path.is_symlink(): # remove a bad symlink. try: - os.unlink(dst) - except Exception: + path.unlink() + except OSError: raise WorkflowFilesError( - f"Error when symlinking. Failed to unlink bad symlink {dst}.") - os.makedirs(src, exist_ok=True) - os.makedirs(os.path.dirname(dst), exist_ok=True) + f"Error when symlinking. Failed to unlink bad symlink {path}.") + target.mkdir(parents=True, exist_ok=True) + if path.exists(): + # Trying to link to itself; no symlink needed + # (e.g. path's parent is symlink to target's parent) + return False + path.parent.mkdir(parents=True, exist_ok=True) try: - os.symlink(src, dst, target_is_directory=True) + path.symlink_to(target) return True - except Exception as exc: + except OSError as exc: raise WorkflowFilesError(f"Error when symlinking\n{exc}") -def remove_dir(path): - """Delete a directory including contents, including the target directory - if the specified path is a symlink. +def remove_dir_and_target(path: Union[Path, str]) -> None: + """Delete a directory tree (i.e. including contents), as well as the + target directory tree if the specified path is a symlink. Args: - path (str): the absolute path of the directory to delete. + path: the absolute path of the directory to delete. """ if not os.path.isabs(path): raise ValueError('Path must be absolute') @@ -228,7 +241,7 @@ def remove_dir(path): target = os.path.realpath(path) LOG.debug( f'Removing symlink target directory: ({path} ->) {target}') - rmtree(target) + rmtree(target, onerror=handle_rmtree_err) LOG.debug(f'Removing symlink: {path}') else: LOG.debug(f'Removing broken symlink: {path}') @@ -237,7 +250,27 @@ def remove_dir(path): raise FileNotFoundError(path) else: LOG.debug(f'Removing directory: {path}') - rmtree(path) + rmtree(path, onerror=handle_rmtree_err) + + +def remove_dir_or_file(path: Union[Path, str]) -> None: + """Delete a directory tree, or a file, or a symlink. + Does not follow symlinks. + + Args: + path: the absolute path of the directory/file/symlink to delete. + """ + if not os.path.isabs(path): + raise ValueError("Path must be absolute") + if os.path.islink(path): + LOG.debug(f"Removing symlink: {path}") + os.remove(path) + elif os.path.isfile(path): + LOG.debug(f"Removing file: {path}") + os.remove(path) + else: + LOG.debug(f"Removing directory: {path}") + rmtree(path, onerror=handle_rmtree_err) def get_next_rundir_number(run_path): @@ -249,3 +282,43 @@ def get_next_rundir_number(run_path): return int(last_run_num) + 1 except OSError: return 1 + + +def parse_rm_dirs(rm_dirs: Iterable[str]) -> Set[str]: + """Parse a list of possibly colon-separated dirs (or files or globs). + Return the set of all the dirs. + + Used by cylc clean with the --rm option. + """ + result: Set[str] = set() + for item in rm_dirs: + for part in item.split(':'): + part = part.strip() + if not part: + continue + is_dir = part.endswith(os.sep) + part = os.path.normpath(part) + if os.path.isabs(part): + raise UserInputError("--rm option cannot take absolute paths") + if part == '.' or part.startswith(f'..{os.sep}'): + raise UserInputError( + "--rm option cannot take paths that point to the " + "run directory or above" + ) + if is_dir: + # Preserve trailing slash to ensure it only matches dirs, + # not files, when globbing + part += os.sep + result.add(part) + return result + + +def is_relative_to(path1: Union[Path, str], path2: Union[Path, str]) -> bool: + """Return whether or not path1 is relative to path2.""" + # In future, we can just use pathlib.Path.is_relative_to() + # when Python 3.9 becomes the minimum supported version + try: + Path(os.path.normpath(path1)).relative_to(os.path.normpath(path2)) + except ValueError: + return False + return True diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py index 56c62d49c31..79b58103845 100644 --- a/cylc/flow/scripts/clean.py +++ b/cylc/flow/scripts/clean.py @@ -32,16 +32,37 @@ # Remove the workflow at ~/cylc-run/foo/bar $ cylc clean foo/bar + # Remove the workflow's log directory + $ cylc clean foo/bar --rm log + + # Remove the log and work directories + $ cylc clean foo/bar --rm log:work + # or + $ cylc clean foo/bar --rm log --rm work + + # Remove all job log files from the 2020 cycle points + cylc clean foo/bar --rm 'log/job/2020*' + + # Remove all .csv files + $ cylc clean foo/bar --rm '**/*.csv' + + # Only remove the workflow on the local filesystem + $ cylc clean foo/bar --local-only + + # Only remove the workflow on remote install targets + $ cylc clean foo/bar --remote-only + """ +from typing import TYPE_CHECKING + import cylc.flow.flags from cylc.flow import LOG +from cylc.flow.exceptions import UserInputError from cylc.flow.loggingutil import CylcLogFormatter -from cylc.flow.option_parsers import CylcOptionParser as COP +from cylc.flow.option_parsers import CylcOptionParser as COP, Options from cylc.flow.terminal import cli_function -from cylc.flow.workflow_files import clean, init_clean - -from typing import TYPE_CHECKING +from cylc.flow.workflow_files import init_clean if TYPE_CHECKING: from optparse import Values @@ -53,12 +74,26 @@ def get_option_parser(): argdoc=[('REG', "Workflow name")] ) + parser.add_option( + '--rm', metavar='DIR[:DIR:...]', + help="Only clean the specified subdirectories (or files) in the " + "run directory, rather than the whole run directory. " + "Accepts quoted globs.", + action='append', dest='rm_dirs', default=[] + ) + parser.add_option( '--local-only', '--local', help="Only clean on the local filesystem (not remote hosts).", action='store_true', dest='local_only' ) + parser.add_option( + '--remote-only', '--remote', + help="Only clean on remote hosts (not the local filesystem).", + action='store_true', dest='remote_only' + ) + parser.add_option( '--timeout', help="The number of seconds to wait for cleaning to take place on " @@ -69,18 +104,23 @@ def get_option_parser(): return parser +CleanOptions = Options(get_option_parser()) + + @cli_function(get_option_parser) def main(parser: COP, opts: 'Values', reg: str): - if cylc.flow.flags.verbosity > 1: + if cylc.flow.flags.verbosity < 2: # for readability omit timestamps from logging unless in debug mode for handler in LOG.handlers: if isinstance(handler.formatter, CylcLogFormatter): handler.formatter.configure(timestamp=False) - if opts.local_only: - clean(reg) - else: - init_clean(reg, opts) + if opts.local_only and opts.remote_only: + raise UserInputError( + "--local and --remote options are mutually exclusive" + ) + + init_clean(reg, opts) if __name__ == "__main__": diff --git a/cylc/flow/task_remote_cmd.py b/cylc/flow/task_remote_cmd.py index 885f1f5d384..1baeb0da7e8 100644 --- a/cylc/flow/task_remote_cmd.py +++ b/cylc/flow/task_remote_cmd.py @@ -83,29 +83,29 @@ def create_client_keys(srvd, install_target): os.umask(old_umask) -def remote_init(install_target, rund, *dirs_to_symlink): +def remote_init(install_target: str, rund: str, *dirs_to_symlink: str) -> None: """cylc remote-init Arguments: - install_target (str): target to be initialised - rund (str): workflow run directory - dirs_to_symlink (list): directories to be symlinked in form + install_target: target to be initialised + rund: workflow run directory + dirs_to_symlink: directories to be symlinked in form [directory=symlink_location, ...] """ rund = os.path.expandvars(rund) for item in dirs_to_symlink: key, val = item.split("=", 1) if key == 'run': - dst = rund + path = rund else: - dst = os.path.join(rund, key) - src = os.path.expandvars(val) - if '$' in src: + path = os.path.join(rund, key) + target = os.path.expandvars(val) + if '$' in target: print(REMOTE_INIT_FAILED) print(f'Error occurred when symlinking.' - f' {src} contains an invalid environment variable.') + f' {target} contains an invalid environment variable.') return - make_symlink(src, dst) + make_symlink(path, target) srvd = os.path.join(rund, WorkflowFiles.Service.DIRNAME) os.makedirs(srvd, exist_ok=True) diff --git a/cylc/flow/workflow_files.py b/cylc/flow/workflow_files.py index 863cb12790f..776c8231754 100644 --- a/cylc/flow/workflow_files.py +++ b/cylc/flow/workflow_files.py @@ -17,8 +17,11 @@ """Workflow service files management.""" import aiofiles +from collections import deque from contextlib import suppress from enum import Enum +from functools import partial +import glob import logging import os from pathlib import Path @@ -27,7 +30,10 @@ import shutil from subprocess import Popen, PIPE, DEVNULL import time -from typing import List, Optional, Tuple, TYPE_CHECKING, Union +from typing import ( + Any, Container, Deque, Dict, Iterable, List, NamedTuple, Optional, Set, + Tuple, TYPE_CHECKING, Union +) import zmq.auth from cylc.flow import LOG @@ -37,13 +43,18 @@ PlatformLookupError, ServiceFileError, TaskRemoteMgmtError, - WorkflowFilesError) + WorkflowFilesError, + handle_rmtree_err +) from cylc.flow.pathutil import ( expand_path, get_workflow_run_dir, make_localhost_symlinks, - remove_dir, - get_next_rundir_number) + parse_rm_dirs, + remove_dir_and_target, + get_next_rundir_number, + remove_dir_or_file +) from cylc.flow.platforms import ( get_install_target_to_platforms_map, get_localhost_install_target, @@ -201,14 +212,21 @@ class Install: SOURCE = 'source' """Symlink to the workflow definition (For run dir).""" - RESERVED_DIRNAMES = [ - LOG_DIR, SHARE_DIR, WORK_DIR, RUN_N, Service.DIRNAME, Install.DIRNAME] + RESERVED_DIRNAMES = frozenset([ + LOG_DIR, SHARE_DIR, WORK_DIR, RUN_N, Service.DIRNAME, Install.DIRNAME + ]) """Reserved directory names that cannot be present in a source dir.""" - RESERVED_NAMES = [ - FLOW_FILE, SUITE_RC, *RESERVED_DIRNAMES] + RESERVED_NAMES = frozenset([FLOW_FILE, SUITE_RC, *RESERVED_DIRNAMES]) """Reserved filenames that cannot be used as run names.""" + SYMLINK_DIRS = frozenset([ + SHARE_CYCLE_DIR, SHARE_DIR, LOG_DIR, WORK_DIR, '' + ]) + """The paths of the symlink dirs that may be set in + global.cylc[symlink dirs], relative to the run dir + ('' represents the run dir).""" + class ContactFileFields: """Field names present in ``WorkflowFiles.Service.CONTACT``. @@ -264,6 +282,12 @@ class ContactFileFields: """Remote command setting for Scheduler.""" +class RemoteCleanQueueTuple(NamedTuple): + proc: 'Popen[str]' + install_target: str + platforms: List[Dict[str, Any]] + + REG_DELIM = "/" NO_TITLE = "No title provided" @@ -578,12 +602,12 @@ def is_installed(path: Union[Path, str]) -> bool: return cylc_install_folder.is_dir() and source.is_symlink() -def _clean_check(reg, run_dir): +def _clean_check(reg: str, run_dir: Path) -> None: """Check whether a workflow can be cleaned. Args: - reg (str): Workflow name. - run_dir (str): Path to the workflow run dir on the filesystem. + reg: Workflow name. + run_dir: Path to the workflow run dir on the filesystem. """ validate_flow_name(reg) reg = os.path.normpath(reg) @@ -591,6 +615,7 @@ def _clean_check(reg, run_dir): raise WorkflowFilesError( "Workflow name cannot be a path that points to the cylc-run " "directory or above") + # Must be dir or broken symlink: if not run_dir.is_dir() and not run_dir.is_symlink(): msg = f"No directory to clean at {run_dir}" raise FileNotFoundError(msg) @@ -606,8 +631,8 @@ def init_clean(reg: str, opts: 'Values') -> None: scheduler filesystem and remote hosts. Args: - reg (str): Workflow name. - opts (optparse.Values): CLI options object for cylc clean. + reg: Workflow name. + opts: CLI options object for cylc clean. """ local_run_dir = Path(get_workflow_run_dir(reg)) try: @@ -616,77 +641,191 @@ def init_clean(reg: str, opts: 'Values') -> None: LOG.info(str(exc)) return - platform_names = None - try: - platform_names = get_platforms_from_db(local_run_dir) - except FileNotFoundError: - LOG.info("No workflow database - will only clean locally") - except ServiceFileError as exc: - raise ServiceFileError(f"Cannot clean - {exc}") - - if platform_names and platform_names != {'localhost'}: - remote_clean(reg, platform_names, opts.remote_timeout) - LOG.info("Cleaning on local filesystem") - clean(reg) - - -def clean(reg): + if not opts.local_only: + platform_names = None + try: + platform_names = get_platforms_from_db(local_run_dir) + except FileNotFoundError: + if opts.remote_only: + raise ServiceFileError( + "No workflow database - cannot perform remote clean") + LOG.info("No workflow database - will only clean locally") + except ServiceFileError as exc: + raise ServiceFileError(f"Cannot clean - {exc}") + + if platform_names and platform_names != {'localhost'}: + remote_clean( + reg, platform_names, opts.rm_dirs, opts.remote_timeout) + + if not opts.remote_only: + rm_dirs = None + if opts.rm_dirs: + rm_dirs = parse_rm_dirs(opts.rm_dirs) + clean(reg, local_run_dir, rm_dirs) + + +def clean(reg: str, run_dir: Path, rm_dirs: Optional[Set[str]] = None) -> None: """Remove a stopped workflow from the local filesystem only. - Deletes the workflow run directory and any symlink dirs. Note: if the - run dir has already been manually deleted, it will not be possible to - clean the symlink dirs. + Deletes the workflow run directory and any symlink dirs, or just the + specified sub dirs if rm_dirs is specified. + + Note: if the run dir has already been manually deleted, it will not be + possible to clean any symlink dirs. Args: - reg (str): Workflow name. + reg: Workflow name. + run_dir: Absolute path of the workflow's run dir. + rm_dirs: Set of sub dirs to remove instead of the whole run dir. """ - run_dir = Path(get_workflow_run_dir(reg)) - try: - _clean_check(reg, run_dir) - except FileNotFoundError as exc: - LOG.info(str(exc)) - return - - # Note: 'share/cycle' must come first, and '' must come last - for possible_symlink in ( - WorkflowFiles.SHARE_CYCLE_DIR, WorkflowFiles.SHARE_DIR, - WorkflowFiles.LOG_DIR, WorkflowFiles.WORK_DIR, ''): - name = Path(possible_symlink) - path = Path(run_dir, possible_symlink) + LOG.info("Cleaning on local filesystem") + symlink_dirs = get_symlink_dirs(reg, run_dir) + if rm_dirs is not None: + # Targeted clean + for pattern in rm_dirs: + _clean_using_glob(run_dir, pattern, symlink_dirs) + else: + # Wholesale clean + for symlink in symlink_dirs: + # Remove /cylc-run// + remove_dir_and_target(run_dir / symlink) + if '' not in symlink_dirs: + # if run dir isn't a symlink dir and hasn't been deleted yet + remove_dir_and_target(run_dir) + # Tidy up if necessary + # Remove any empty parents of run dir up to ~/cylc-run/ + _remove_empty_parents(run_dir, reg) + for symlink, target in symlink_dirs.items(): + # Remove empty parents of symlink target up to /cylc-run/ + _remove_empty_parents(target, Path(reg, symlink)) + + +def get_symlink_dirs(reg: str, run_dir: Union[Path, str]) -> Dict[str, Path]: + """Return the standard symlink dirs and their targets if they exist in + the workflow run dir. + + Note: does not check the global config, only the existing run dir filetree. + + Raises WorkflowFilesError if a symlink points to an unexpected place. + """ + ret: Dict[str, Path] = {} + for _dir in sorted(WorkflowFiles.SYMLINK_DIRS, reverse=True): + # ordered by deepest to shallowest + path = Path(run_dir, _dir) if path.is_symlink(): - # Ensure symlink is pointing to expected directory. If not, - # something is wrong and we should abort target = path.resolve() if target.exists() and not target.is_dir(): raise WorkflowFilesError( f'Invalid Cylc symlink directory {path} -> {target}\n' f'Target is not a directory') - expected_end = str(Path('cylc-run', reg, name)) + expected_end = str(Path('cylc-run', reg, _dir)) if not str(target).endswith(expected_end): raise WorkflowFilesError( f'Invalid Cylc symlink directory {path} -> {target}\n' f'Expected target to end with "{expected_end}"') - # Remove /cylc-run/ - target_cylc_run_dir = str(target).rsplit(str(reg), 1)[0] - target_reg_dir = Path(target_cylc_run_dir, reg) - if target_reg_dir.is_dir(): - remove_dir(target_reg_dir) - # Remove empty parents - _remove_empty_reg_parents(reg, target_reg_dir) + ret[_dir] = target + return ret - remove_dir(run_dir) - _remove_empty_reg_parents(reg, run_dir) +def glob_in_run_dir( + run_dir: Union[Path, str], pattern: str, symlink_dirs: Container[Path] +) -> List[Path]: + """Execute a (recursive) glob search in the given run directory. -def remote_clean(reg, platform_names, timeout): + Returns list of any absolute paths that match the pattern. However: + * Does not follow symlinks (apart from the spcedified symlink dirs). + * Also does not return matching subpaths of matching directories (because + that would be redundant). + + Args: + run_dir: Absolute path of the workflow run dir. + pattern: The glob pattern. + symlink_dirs: Absolute paths to the workflow's symlink dirs. + """ + # Note: use os.path.join, not pathlib, to preserve trailing slash if + # present in pattern + pattern = os.path.join(glob.escape(str(run_dir)), pattern) + # Note: don't use pathlib.Path.glob() because when you give it an exact + # filename instead of pattern, it doesn't return broken symlinks + matches = sorted(Path(i) for i in glob.iglob(pattern, recursive=True)) + # sort guarantees parents come before their children + if len(matches) == 1 and not os.path.lexists(matches[0]): + # https://bugs.python.org/issue35201 + return [] + results: List[Path] = [] + subpath_excludes: Set[Path] = set() + for path in matches: + for rel_ancestor in reversed(path.relative_to(run_dir).parents): + ancestor = run_dir / rel_ancestor + if ancestor in subpath_excludes: + break + if ancestor.is_symlink() and ancestor not in symlink_dirs: + # Do not follow non-standard symlinks + subpath_excludes.add(ancestor) + break + if (not symlink_dirs) and ancestor in results: + # We can be sure all subpaths of this ancestor are redundant + subpath_excludes.add(ancestor) + break + if ancestor == path.parent: # noqa: SIM102 + # Final iteration over ancestors + if ancestor in matches and path not in symlink_dirs: + # Redundant (but don't exclude subpaths in case any of the + # subpaths are std symlink dirs) + break + else: # No break + results.append(path) + return results + + +def _clean_using_glob( + run_dir: Path, pattern: str, symlink_dirs: Iterable[str] +) -> None: + """Delete the files/dirs in the run dir that match the pattern. + + Does not follow symlinks (apart from the standard symlink dirs). + + Args: + run_dir: Absolute path of workflow run dir. + pattern: The glob pattern. + symlink_dirs: Paths of the workflow's symlink dirs relative to + the run dir. + """ + abs_symlink_dirs = tuple(sorted( + (run_dir / d for d in symlink_dirs), + reverse=True # ordered by deepest to shallowest + )) + matches = glob_in_run_dir(run_dir, pattern, abs_symlink_dirs) + if not matches: + return + # First clean any matching symlink dirs + for path in abs_symlink_dirs: + if path in matches: + remove_dir_and_target(path) + if path == run_dir: + # We have deleted the run dir + return + matches.remove(path) + # Now clean the rest + for path in matches: + remove_dir_or_file(path) + + +def remote_clean( + reg: str, + platform_names: Iterable[str], + rm_dirs: Optional[List[str]] = None, + timeout: str = '120' +) -> None: """Run subprocesses to clean workflows on remote install targets (skip localhost), given a set of platform names to look up. Args: - reg (str): Workflow name. - platform_names (list): List of platform names to look up in the global + reg: Workflow name. + platform_names: List of platform names to look up in the global config, in order to determine the install targets to clean on. - timeout (str): Number of seconds to wait before cancelling. + rm_dirs: Sub dirs to remove instead of the whole run dir. + timeout: Number of seconds to wait before cancelling. """ try: install_targets_map = ( @@ -695,95 +834,112 @@ def remote_clean(reg, platform_names, timeout): raise PlatformLookupError( "Cannot clean on remote platforms as the workflow database is " f"out of date/inconsistent with the global config - {exc}") - - pool = [] + queue: Deque[RemoteCleanQueueTuple] = deque() + remote_clean_cmd = partial( + _remote_clean_cmd, reg=reg, rm_dirs=rm_dirs, timeout=timeout + ) for target, platforms in install_targets_map.items(): if target == get_localhost_install_target(): continue shuffle(platforms) LOG.info( - f"Cleaning on install target: {platforms[0]['install target']}") + f"Cleaning on install target: {platforms[0]['install target']}" + ) # Issue ssh command: - pool.append( - (_remote_clean_cmd(reg, platforms[0], timeout), target, platforms) + queue.append( + RemoteCleanQueueTuple( + remote_clean_cmd(platform=platforms[0]), target, platforms + ) ) - failed_targets = [] + failed_targets: List[str] = [] # Handle subproc pool results almost concurrently: - while pool: - for proc, target, platforms in pool: - ret_code = proc.poll() - if ret_code is None: # proc still running - continue - pool.remove((proc, target, platforms)) - out, err = (f.decode() for f in proc.communicate()) - if out: - LOG.debug(out) - if ret_code: - # Try again using the next platform for this install target: - this_platform = platforms.pop(0) - excn = TaskRemoteMgmtError( - TaskRemoteMgmtError.MSG_TIDY, this_platform['name'], - " ".join(proc.args), ret_code, out, err) - LOG.debug(excn) - if platforms: - pool.append( - (_remote_clean_cmd(reg, platforms[0], timeout), - target, platforms) + while queue: + item = queue.popleft() + ret_code = item.proc.poll() + if ret_code is None: # proc still running + queue.append(item) + continue + out, err = item.proc.communicate() + if out: + LOG.debug(out) + if ret_code: + this_platform = item.platforms.pop(0) + LOG.debug(TaskRemoteMgmtError( + TaskRemoteMgmtError.MSG_TIDY, this_platform['name'], + item.proc.args, ret_code, out, err + )) + if ret_code == 255 and item.platforms: + # SSH error; try again using the next platform for this + # install target + queue.append( + item._replace( + proc=remote_clean_cmd(platform=item.platforms[0]) ) - else: # Exhausted list of platforms - failed_targets.append(target) - elif err: - LOG.debug(err) + ) + else: # Exhausted list of platforms + failed_targets.append(item.install_target) + elif err: + LOG.debug(err) time.sleep(0.2) if failed_targets: raise CylcError( - f"Could not clean on install targets: {', '.join(failed_targets)}") + f"Could not clean on install targets: {', '.join(failed_targets)}" + ) -def _remote_clean_cmd(reg, platform, timeout): +def _remote_clean_cmd( + reg: str, + platform: Dict[str, str], + rm_dirs: Optional[List[str]], + timeout: str +) -> 'Popen[str]': """Remove a stopped workflow on a remote host. Call "cylc clean --local-only" over ssh and return the subprocess. Args: - reg (str): Workflow name. - platform (dict): Config for the platform on which to remove the - workflow. - timeout (str): Number of seconds to wait before cancelling the command. + reg: Workflow name. + platform: Config for the platform on which to remove the workflow. + rm_dirs: Sub dirs to remove instead of the whole run dir. + timeout: Number of seconds to wait before cancelling the command. """ LOG.debug( f'Cleaning on install target: {platform["install target"]} ' f'(using platform: {platform["name"]})' ) - cmd = construct_ssh_cmd( - ['clean', '--local-only', reg], - platform, - timeout=timeout, - set_verbosity=True - ) + cmd = ['clean', '--local-only', reg] + if rm_dirs is not None: + for item in rm_dirs: + cmd.extend(['--rm', item]) + cmd = construct_ssh_cmd(cmd, platform, timeout=timeout, set_verbosity=True) LOG.debug(" ".join(cmd)) - return Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) + return Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE, text=True) -def _remove_empty_reg_parents(reg, path): - """If reg is nested e.g. a/b/c, work our way up the tree, removing empty - parents only. +def _remove_empty_parents( + path: Union[Path, str], tail: Union[Path, str] +) -> None: + """Work our way up the tail of path, removing empty dirs only. Args: - reg (str): workflow name, e.g. a/b/c - path (str): path to this directory, e.g. /foo/bar/a/b/c + path: Absolute path to the directory, e.g. /foo/bar/a/b/c + tail: The tail of the path to work our way up, e.g. a/b/c Example: - _remove_empty_reg_parents('a/b/c', '/foo/bar/a/b/c') would remove + _remove_empty_parents('/foo/bar/a/b/c', 'a/b/c') would remove /foo/bar/a/b (assuming it's empty), then /foo/bar/a (assuming it's empty). """ - reg = Path(reg) - reg_depth = len(reg.parts) - 1 path = Path(path) if not path.is_absolute(): - raise ValueError('Path must be absolute') - for i in range(reg_depth): + raise ValueError('path must be absolute') + tail = Path(tail) + if tail.is_absolute(): + raise ValueError('tail must not be an absolute path') + if not str(path).endswith(str(tail)): + raise ValueError(f"path '{path}' does not end with '{tail}'") + depth = len(tail.parts) - 1 + for i in range(depth): parent = path.parents[i] if not parent.is_dir(): continue @@ -803,7 +959,7 @@ def remove_keys_on_server(keys): # Remove client public key folder client_public_key_dir = keys["client_public_key"].key_path if os.path.exists(client_public_key_dir): - shutil.rmtree(client_public_key_dir) + shutil.rmtree(client_public_key_dir, onerror=handle_rmtree_err) def create_server_keys(keys, workflow_srv_dir): @@ -926,7 +1082,7 @@ def _check_child_dirs(path: Union[Path, str], depth_count: int = 1): reg_path: Union[Path, str] = os.path.normpath(run_dir) parent_dir = os.path.dirname(reg_path) - while parent_dir not in ['', '/']: + while parent_dir not in ['', os.sep]: if is_valid_run_dir(parent_dir): raise WorkflowFilesError( exc_msg.format(parent_dir, get_cylc_run_abs_path(parent_dir)) diff --git a/tests/functional/cylc-clean/02-targeted.t b/tests/functional/cylc-clean/02-targeted.t new file mode 100644 index 00000000000..08a7c417b5d --- /dev/null +++ b/tests/functional/cylc-clean/02-targeted.t @@ -0,0 +1,165 @@ +#!/usr/bin/env bash +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# ----------------------------------------------------------------------------- +# Test the cylc clean command + +. "$(dirname "$0")/test_header" +if ! command -v 'tree' >'/dev/null'; then + skip_all '"tree" command not available' +fi +set_test_number 16 + +# Generate random name for symlink dirs to avoid any clashes with other tests +SYM_NAME="$(mktemp -u)" +SYM_NAME="sym-${SYM_NAME##*tmp.}" + +create_test_global_config "" " +[symlink dirs] + [[localhost]] + run = ${TEST_DIR}/${SYM_NAME}/run + log = ${TEST_DIR}/${SYM_NAME}/other + share = ${TEST_DIR}/${SYM_NAME}/other + work = ${TEST_DIR}/${SYM_NAME}/other + # Need to override any symlink dirs set in global-tests.cylc: + share/cycle = +" +init_workflow "${TEST_NAME_BASE}" << __FLOW__ +[scheduler] + allow implicit tasks = True +[scheduling] + [[graph]] + R1 = jalad +__FLOW__ +# Also create some other file +touch "${WORKFLOW_RUN_DIR}/darmok.cylc" + +run_ok "${TEST_NAME_BASE}-val" cylc validate "$WORKFLOW_NAME" + +FUNCTIONAL_DIR="${TEST_SOURCE_DIR_BASE%/*}" +# ----------------------------------------------------------------------------- +TEST_NAME="${TEST_NAME_BASE}-run-dir-readlink-pre-clean" +readlink "$WORKFLOW_RUN_DIR" > "${TEST_NAME}.stdout" +cmp_ok "${TEST_NAME}.stdout" <<< "${TEST_DIR}/${SYM_NAME}/run/cylc-run/${WORKFLOW_NAME}" + +TEST_NAME="${TEST_NAME_BASE}-testdir-tree-pre-clean" +run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}" +# Note: backticks need to be escaped in the heredoc +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/${SYM_NAME}/other/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- log + | \`-- install + |-- share + \`-- work +${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- _cylc-install + | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME} + |-- darmok.cylc + |-- flow.cylc + |-- log -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/log + |-- share -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/share + \`-- work -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/work +__TREE__ +# ----------------------------------------------------------------------------- +# Clean the log dir only +run_ok "${TEST_NAME_BASE}-targeted-clean-1" cylc clean "$WORKFLOW_NAME" \ + --rm log + +TEST_NAME="${TEST_NAME_BASE}-testdir-tree-1" +run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}" +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/${SYM_NAME}/other/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- share + \`-- work +${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- _cylc-install + | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME} + |-- darmok.cylc + |-- flow.cylc + |-- share -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/share + \`-- work -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/work +__TREE__ +# ----------------------------------------------------------------------------- +# Clean using a glob +run_ok "${TEST_NAME_BASE}-targeted-clean-2" cylc clean "$WORKFLOW_NAME" \ + --rm 'wo*' + +TEST_NAME="${TEST_NAME_BASE}-testdir-tree-2" +run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}" +# Note: when using glob, the symlink dir target is not deleted +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/${SYM_NAME}/other/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + \`-- share +${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- _cylc-install + | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME} + |-- darmok.cylc + |-- flow.cylc + \`-- share -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/share +__TREE__ +# ----------------------------------------------------------------------------- +# Clean the last remaining symlink dir +run_ok "${TEST_NAME_BASE}-targeted-clean-3" cylc clean "$WORKFLOW_NAME" \ + --rm 'share' + +TEST_NAME="${TEST_NAME_BASE}-testdir-tree-3" +run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}" +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + |-- _cylc-install + | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME} + |-- darmok.cylc + \`-- flow.cylc +__TREE__ +# ----------------------------------------------------------------------------- +# Clean multiple things +run_ok "${TEST_NAME_BASE}-targeted-clean-3" cylc clean "$WORKFLOW_NAME" \ + --rm 'flow.cylc' --rm 'darmok.cylc' + +TEST_NAME="${TEST_NAME_BASE}-testdir-tree-3" +run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}" +cmp_ok "${TEST_NAME}.stdout" << __TREE__ +${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE} +\`-- ${FUNCTIONAL_DIR} + \`-- cylc-clean + \`-- ${TEST_NAME_BASE} + \`-- _cylc-install + \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME} +__TREE__ +# ----------------------------------------------------------------------------- +purge +exit diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 22a58120fea..a8151140e42 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -14,11 +14,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . """Standard pytest fixtures for unit tests.""" -from cylc.flow.data_store_mgr import DataStoreMgr + from pathlib import Path import pytest from shutil import rmtree -from typing import Optional +from typing import Any, Callable, Optional from unittest.mock import create_autospec, Mock from cylc.flow.cfgspec.globalcfg import SPEC @@ -27,12 +27,37 @@ ISO8601_CYCLING_TYPE, INTEGER_CYCLING_TYPE ) +from cylc.flow.data_store_mgr import DataStoreMgr from cylc.flow.parsec.config import ParsecConfig from cylc.flow.scheduler import Scheduler from cylc.flow.workflow_files import WorkflowFiles from cylc.flow.xtrigger_mgr import XtriggerManager +# Type alias for monkeymock() +MonkeyMock = Callable[..., Mock] + + +@pytest.fixture +def monkeymock(monkeypatch: pytest.MonkeyPatch): + """Fixture that patches a function/attr with a Mock and returns that Mock. + + Args: + pypath: The Python-style import path to be patched. + **kwargs: Any kwargs to set on the Mock. + + Example: + mock_clean = monkeymock('cylc.flow.workflow_files.clean') + something() # calls workflow_files.clean + assert mock_clean.called is True + """ + def inner(pypath: str, **kwargs: Any) -> Mock: + _mock = Mock(**kwargs) + monkeypatch.setattr(pypath, _mock) + return _mock + return inner + + @pytest.fixture def tmp_run_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): """Fixture that patches the cylc-run dir to the tests's {tmp_path}/cylc-run diff --git a/tests/unit/filetree.py b/tests/unit/filetree.py new file mode 100644 index 00000000000..bef09e4e556 --- /dev/null +++ b/tests/unit/filetree.py @@ -0,0 +1,167 @@ +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""Utilities for testing workflow directory structure. + +(There should be no tests in this module.) + +A filetree is represented by a dict like so: + { + # Dirs are represented by dicts (which are also sub-filetrees): + 'dir': { + 'another-dir': { + # Files are represented by None: + 'file.txt': None + } + }, + # Symlinks are represented by pathlib.Path, with the target represented + # by the relative path from the tmp_path directory: + 'symlink': Path('dir/another-dir') + } +""" + +from pathlib import Path +from typing import Any, Dict, List + + +def create_filetree( + filetree: Dict[str, Any], location: Path, root: Path +) -> None: + """Create the directory structure represented by the filetree dict. + + Args: + filetree: The filetree to create. + location: The absolute path in which to create the filetree. + root: The top-level dir from which relative symlink targets are + located (typically tmp_path). + """ + for name, entry in filetree.items(): + path = location / name + if isinstance(entry, dict): + path.mkdir(exist_ok=True) + create_filetree(entry, path, root) + elif isinstance(entry, Path): + path.symlink_to(root / entry) + else: + + path.touch() + + +def get_filetree_as_list( + filetree: Dict[str, Any], location: Path +) -> List[str]: + """Return a list of the paths in a filetree. + + Args: + filetree: The filetree to listify. + location: The absolute path to the filetree. + """ + ret: List[str] = [] + for name, entry in filetree.items(): + path = location / name + ret.append(str(path)) + if isinstance(entry, dict): + ret.extend(get_filetree_as_list(entry, path)) + return ret + + +FILETREE_1 = { + 'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'log': Path('sym/cylc-run/foo/bar/log'), + 'mirkwood': Path('you-shall-not-pass/mirkwood'), + 'rincewind.txt': Path('you-shall-not-pass/rincewind.txt') + }}}, + 'sym': {'cylc-run': {'foo': {'bar': { + 'log': { + 'darmok': Path('you-shall-not-pass/darmok'), + 'temba.txt': Path('you-shall-not-pass/temba.txt'), + 'bib': { + 'fortuna.txt': None + } + } + }}}}, + 'you-shall-not-pass': { # Nothing in here should get deleted + 'darmok': { + 'jalad.txt': None + }, + 'mirkwood': { + 'spiders.txt': None + }, + 'rincewind.txt': None, + 'temba.txt': None + } +} + +FILETREE_2 = { + 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}}, + 'sym-run': {'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'share': Path('sym-share/cylc-run/foo/bar/share') + }}}}, + 'sym-share': {'cylc-run': {'foo': {'bar': { + 'share': { + 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle') + } + }}}}, + 'sym-cycle': {'cylc-run': {'foo': {'bar': { + 'share': { + 'cycle': { + 'macklunkey.txt': None + } + } + }}}}, + 'you-shall-not-pass': {} +} + +FILETREE_3 = { + 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}}, + 'sym-run': {'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'share': { + 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle') + } + }}}}, + 'sym-cycle': {'cylc-run': {'foo': {'bar': { + 'share': { + 'cycle': { + 'sokath.txt': None + } + } + }}}}, + 'you-shall-not-pass': {} +} + +FILETREE_4 = { + 'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'share': { + 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle') + } + }}}, + 'sym-cycle': {'cylc-run': {'foo': {'bar': { + 'share': { + 'cycle': { + 'kiazi.txt': None + } + } + }}}}, + 'you-shall-not-pass': {} +} diff --git a/tests/unit/test_pathutil.py b/tests/unit/test_pathutil.py index e3f982ef651..3af92a833db 100644 --- a/tests/unit/test_pathutil.py +++ b/tests/unit/test_pathutil.py @@ -18,11 +18,11 @@ import logging import os from pathlib import Path -from typing import Callable, Iterable, List import pytest -from unittest.mock import patch, call +from typing import Callable, Dict, Iterable, List, Set +from unittest.mock import Mock, patch, call -from cylc.flow.exceptions import WorkflowFilesError +from cylc.flow.exceptions import UserInputError, WorkflowFilesError from cylc.flow.pathutil import ( expand_path, get_dirs_to_symlink, @@ -39,9 +39,13 @@ get_workflow_test_log_name, make_localhost_symlinks, make_workflow_run_tree, - remove_dir + parse_rm_dirs, + remove_dir_and_target, + remove_dir_or_file ) +from .conftest import MonkeyMock + HOME = Path.home() @@ -166,82 +170,109 @@ def test_make_workflow_run_tree( @pytest.mark.parametrize( - 'workflow, install_target, mocked_glbl_cfg, output', + 'mocked_glbl_cfg, output', [ - ( # basic - 'workflow1', 'install_target_1', - '''[symlink dirs] - [[install_target_1]] - run = $DEE - work = $DAH - log = $DUH - share = $DOH - share/cycle = $DAH''', { - 'run': '$DEE/cylc-run/workflow1', - 'work': '$DAH/cylc-run/workflow1/work', - 'log': '$DUH/cylc-run/workflow1/log', - 'share': '$DOH/cylc-run/workflow1/share', - 'share/cycle': '$DAH/cylc-run/workflow1/share/cycle' - }), - ( # remove nested run symlinks - 'workflow2', 'install_target_2', + pytest.param( # basic + ''' + [symlink dirs] + [[the_matrix]] + run = $DEE + work = $DAH + log = $DUH + share = $DOH + share/cycle = $DAH + ''', + { + 'run': '$DEE/cylc-run/morpheus', + 'work': '$DAH/cylc-run/morpheus/work', + 'log': '$DUH/cylc-run/morpheus/log', + 'share': '$DOH/cylc-run/morpheus/share', + 'share/cycle': '$DAH/cylc-run/morpheus/share/cycle' + }, + id="basic" + ), + pytest.param( # remove nested run symlinks + ''' + [symlink dirs] + [[the_matrix]] + run = $DEE + work = $DAH + log = $DEE + share = $DOH + share/cycle = $DAH + ''', + { + 'run': '$DEE/cylc-run/morpheus', + 'work': '$DAH/cylc-run/morpheus/work', + 'share': '$DOH/cylc-run/morpheus/share', + 'share/cycle': '$DAH/cylc-run/morpheus/share/cycle' + }, + id="remove nested run symlinks" + ), + pytest.param( # remove only nested run symlinks ''' - [symlink dirs] - [[install_target_2]] - run = $DEE - work = $DAH - log = $DEE - share = $DOH - share/cycle = $DAH - - ''', { - 'run': '$DEE/cylc-run/workflow2', - 'work': '$DAH/cylc-run/workflow2/work', - 'share': '$DOH/cylc-run/workflow2/share', - 'share/cycle': '$DAH/cylc-run/workflow2/share/cycle' - }), - ( # remove only nested run symlinks - 'workflow3', 'install_target_3', ''' - [symlink dirs] - [[install_target_3]] - run = $DOH - log = $DEE - share = $DEE - ''', { - 'run': '$DOH/cylc-run/workflow3', - 'log': '$DEE/cylc-run/workflow3/log', - 'share': '$DEE/cylc-run/workflow3/share'}) - ], ids=["1", "2", "3"]) -def test_get_dirs_to_symlink(workflow, install_target, mocked_glbl_cfg, - output, mock_glbl_cfg, tmp_path, monkeypatch): - # Using env variable 'DEE' to ensure dirs returned are unexpanded - monkeypatch.setenv('DEE', str(tmp_path)) + [symlink dirs] + [[the_matrix]] + run = $DOH + log = $DEE + share = $DEE + ''', + { + 'run': '$DOH/cylc-run/morpheus', + 'log': '$DEE/cylc-run/morpheus/log', + 'share': '$DEE/cylc-run/morpheus/share' + }, + id="remove only nested run symlinks" + ), + pytest.param( # blank entries + ''' + [symlink dirs] + [[the_matrix]] + run = + log = "" + share = + work = " " + ''', + {}, + id="blank entries" + ) + ] +) +def test_get_dirs_to_symlink( + mocked_glbl_cfg: str, + output: Dict[str, str], + mock_glbl_cfg: Callable, + monkeypatch: pytest.MonkeyPatch +) -> None: + # Set env var 'DEE', but we expect it to be unexpanded + monkeypatch.setenv('DEE', 'poiuytrewq') mock_glbl_cfg('cylc.flow.pathutil.glbl_cfg', mocked_glbl_cfg) - dirs = get_dirs_to_symlink(install_target, workflow) + dirs = get_dirs_to_symlink('the_matrix', 'morpheus') assert dirs == output -@patch('os.path.expandvars') @patch('cylc.flow.pathutil.get_workflow_run_dir') -@patch('cylc.flow.pathutil.make_symlink') @patch('cylc.flow.pathutil.get_dirs_to_symlink') def test_make_localhost_symlinks_calls_make_symlink_for_each_key_value_dir( - mocked_dirs_to_symlink, - mocked_make_symlink, - mocked_get_workflow_run_dir, mocked_expandvars): - + mocked_dirs_to_symlink: Mock, + mocked_get_workflow_run_dir: Mock, + monkeypatch: pytest.MonkeyPatch, monkeymock: MonkeyMock +) -> None: mocked_dirs_to_symlink.return_value = { - 'run': '$DOH/workflow3', - 'log': '$DEE/workflow3/log', - 'share': '$DEE/workflow3/share'} + 'run': '$DOH/trinity', + 'log': '$DEE/trinity/log', + 'share': '$DEE/trinity/share' + } mocked_get_workflow_run_dir.return_value = "rund" - mocked_expandvars.return_value = "expanded" - mocked_make_symlink.return_value = True + for v in ('DOH', 'DEE'): + monkeypatch.setenv(v, 'expanded') + mocked_make_symlink = monkeymock('cylc.flow.pathutil.make_symlink') + make_localhost_symlinks('rund', 'workflow') mocked_make_symlink.assert_has_calls([ - call('expanded', 'rund'), - call('expanded', 'rund/log'), - call('expanded', 'rund/share') + call('rund', 'expanded/trinity'), + call('rund/log', 'expanded/trinity/log'), + call('rund/share', 'expanded/trinity/share') ]) @@ -271,8 +302,9 @@ def test_incorrect_environment_variables_raise_error( ('file', NotADirectoryError), (None, FileNotFoundError)] ) -def test_remove_dir(filetype, expected_err, tmp_path): - """Test that remove_dir() can delete nested dirs and handle bad paths.""" +def test_remove_dir_and_target(filetype, expected_err, tmp_path): + """Test that remove_dir_and_target() can delete nested dirs and handle + bad paths.""" test_path = tmp_path.joinpath('foo/bar') if filetype == 'dir': # Test removal of sub directories too @@ -286,9 +318,9 @@ def test_remove_dir(filetype, expected_err, tmp_path): if expected_err: with pytest.raises(expected_err): - remove_dir(test_path) + remove_dir_and_target(test_path) else: - remove_dir(test_path) + remove_dir_and_target(test_path) assert test_path.exists() is False assert test_path.is_symlink() is False @@ -299,8 +331,9 @@ def test_remove_dir(filetype, expected_err, tmp_path): ('file', NotADirectoryError), (None, None)] ) -def test_remove_dir_symlinks(target, expected_err, tmp_path): - """Test that remove_dir() can delete symlinks, including the target.""" +def test_remove_dir_and_target_symlinks(target, expected_err, tmp_path): + """Test that remove_dir_and_target() can delete symlinks, including + the target.""" target_path = tmp_path.joinpath('x/y') target_path.mkdir(parents=True) @@ -322,21 +355,85 @@ def test_remove_dir_symlinks(target, expected_err, tmp_path): if expected_err: with pytest.raises(expected_err): - remove_dir(symlink_path) + remove_dir_and_target(symlink_path) else: - remove_dir(symlink_path) + remove_dir_and_target(symlink_path) for path in [symlink_path, target_path]: assert path.exists() is False assert path.is_symlink() is False -def test_remove_dir_relative(tmp_path): - """Test that you cannot use remove_dir() on a relative path. +@pytest.mark.parametrize( + 'func', [remove_dir_and_target, remove_dir_or_file] +) +def test_remove_relative(func: Callable, tmp_path: Path): + """Test that you cannot use remove_dir_and_target() or remove_dir_or_file() + on relative paths. When removing a path, we want to be absolute-ly sure where it is! """ # cd to temp dir in case we accidentally succeed in deleting the path os.chdir(tmp_path) with pytest.raises(ValueError) as cm: - remove_dir('foo/bar') + func('foo/bar') assert 'Path must be absolute' in str(cm.value) + + +def test_remove_dir_or_file(tmp_path: Path): + """Test remove_dir_or_file()""" + a_file = tmp_path.joinpath('fyle') + a_file.touch() + assert a_file.exists() + remove_dir_or_file(a_file) + assert a_file.exists() is False + + a_symlink = tmp_path.joinpath('simlynk') + a_file.touch() + a_symlink.symlink_to(a_file) + assert a_symlink.is_symlink() + remove_dir_or_file(a_symlink) + assert a_symlink.is_symlink() is False + assert a_file.exists() + + a_dir = tmp_path.joinpath('der') + # Add contents to check whole tree is removed + sub_dir = a_dir.joinpath('sub_der') + sub_dir.mkdir(parents=True) + sub_dir.joinpath('fyle').touch() + assert a_dir.exists() + remove_dir_or_file(a_dir) + assert a_dir.exists() is False + + +@pytest.mark.parametrize( + 'dirs, expected', + [ + ([" "], set()), + (["foo", "bar"], {"foo", "bar"}), + (["foo:bar", "baz/*"], {"foo", "bar", "baz/*"}), + ([" :foo :bar:"], {"foo", "bar"}), + (["foo/:bar//baz "], {"foo/", "bar/baz"}), + ([".foo", "..bar", " ./gah"], {".foo", "..bar", "gah"}) + ] +) +def test_parse_rm_dirs(dirs: List[str], expected: Set[str]): + """Test parse_dirs()""" + assert parse_rm_dirs(dirs) == expected + + +@pytest.mark.parametrize( + 'dirs, err_msg', + [ + (["foo:/bar"], + "--rm option cannot take absolute paths"), + (["foo:../bar"], + "cannot take paths that point to the run directory or above"), + (["foo:bar/../../gah"], + "cannot take paths that point to the run directory or above"), + ] +) +def test_parse_rm_dirs__bad(dirs: List[str], err_msg: str): + """Test parse_dirs() with bad inputs""" + with pytest.raises(UserInputError) as exc: + parse_rm_dirs(dirs) + assert err_msg in str(exc.value) diff --git a/tests/unit/test_workflow_files.py b/tests/unit/test_workflow_files.py index 9cf7ca02d7e..b2817be2373 100644 --- a/tests/unit/test_workflow_files.py +++ b/tests/unit/test_workflow_files.py @@ -14,12 +14,13 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from cylc.flow.option_parsers import Options +from glob import iglob import logging +import os from pathlib import Path import pytest import shutil -from typing import Callable, Optional, Tuple, Type +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union from unittest import mock from cylc.flow import CYLC_LOG @@ -30,16 +31,30 @@ TaskRemoteMgmtError, WorkflowFilesError ) -from cylc.flow.scripts.clean import get_option_parser as _clean_GOP +from cylc.flow.pathutil import parse_rm_dirs +from cylc.flow.scripts.clean import CleanOptions from cylc.flow.workflow_files import ( WorkflowFiles, + _clean_using_glob, + _remote_clean_cmd, check_flow_file, check_nested_run_dirs, + get_symlink_dirs, get_workflow_source_dir, - reinstall_workflow, search_install_source_dirs) - + glob_in_run_dir, + reinstall_workflow, + search_install_source_dirs +) -CleanOpts = Options(_clean_GOP()) +from .conftest import MonkeyMock +from .filetree import ( + FILETREE_1, + FILETREE_2, + FILETREE_3, + FILETREE_4, + create_filetree, + get_filetree_as_list +) @pytest.mark.parametrize( @@ -132,25 +147,32 @@ def test_validate_flow_name(reg, expected_err, expected_msg): @pytest.mark.parametrize( - 'reg, not_stopped, err, err_msg', - [('foo/..', False, WorkflowFilesError, + 'reg, stopped, err, err_msg', + [('foo/..', True, WorkflowFilesError, "cannot be a path that points to the cylc-run directory or above"), - ('foo/../..', False, WorkflowFilesError, + ('foo/../..', True, WorkflowFilesError, "cannot be a path that points to the cylc-run directory or above"), - ('foo', True, ServiceFileError, "Cannot remove running workflow")] + ('foo', False, ServiceFileError, "Cannot remove running workflow")] ) -def test_clean_check(reg, not_stopped, err, err_msg, monkeypatch): +def test_clean_check_fail( + reg: str, + stopped: bool, + err: Type[Exception], + err_msg: str, + monkeypatch: pytest.MonkeyPatch +) -> None: """Test that _clean_check() fails appropriately. Params: - reg (str): Workflow name. - err (Exception): Expected error. - err_msg (str): Message that is expected to be in the exception. + reg: Workflow name. + stopped: Whether the workflow is stopped when _clean_check() is called. + err: Expected error class. + err_msg: Message that is expected to be in the exception. """ run_dir = mock.Mock() - def mocked_detect_old_contact_file(reg): - if not_stopped: + def mocked_detect_old_contact_file(*a, **k): + if not stopped: raise ServiceFileError('Mocked error') monkeypatch.setattr('cylc.flow.workflow_files.detect_old_contact_file', @@ -162,221 +184,774 @@ def mocked_detect_old_contact_file(reg): @pytest.mark.parametrize( - 'reg, props, clean_called, remote_clean_called', + 'db_platforms, opts, clean_called, remote_clean_called', [ - ('foo/bar', { - 'no dir': True, - 'log': (logging.INFO, "No directory to clean") - }, False, False), - ('foo/bar', { - 'no db': True, - 'log': (logging.INFO, - "No workflow database - will only clean locally") - }, True, False), - ('foo/bar', { - 'db platforms': ['localhost', 'localhost'] - }, True, False), - ('foo/bar', { - 'db platforms': ['horse'] - }, True, True) + pytest.param( + ['localhost', 'localhost'], {}, True, False, + id="Only platform in DB is localhost" + ), + pytest.param( + ['horse'], {}, True, True, + id="Remote platform in DB" + ), + pytest.param( + ['horse'], {'local_only': True}, True, False, + id="Local clean only" + ), + pytest.param( + ['horse'], {'remote_only': True}, False, True, + id="Remote clean only" + ) ] ) -def test_init_clean_ok( - reg, props, clean_called, remote_clean_called, - monkeypatch, tmp_path, caplog): +def test_init_clean( + db_platforms: List[str], + opts: Dict[str, Any], + clean_called: bool, + remote_clean_called: bool, + monkeypatch: pytest.MonkeyPatch, monkeymock: MonkeyMock, + tmp_run_dir: Callable +) -> None: """Test the init_clean() function logic. Params: - reg (str): Workflow name. - props (dict): Possible values are (all optional): - 'no dir' (bool): If True, do not create run dir for this test case. - 'log' (tuple): Of form (severity, msg): - severity (logging level): Expected level e.g. logging.INFO. - msg (str): Message that is expected to be logged. - 'db platforms' (list): Platform names that would be loaded from - the database. - 'no db' (bool): If True, workflow database doesn't exist. - clean_called (bool): If a local clean is expected to go ahead. - remote_clean_called (bool): If a remote clean is expected to go ahead. + db_platforms: Platform names that would be loaded from the database. + opts: Any options passed to the cylc clean CLI. + clean_called: If a local clean is expected to go ahead. + remote_clean_called: If a remote clean is expected to go ahead. """ - # --- Setup --- - expected_log = props.get('log') - if expected_log: - level, msg = expected_log - caplog.set_level(level, CYLC_LOG) - - tmp_path.joinpath('cylc-run').mkdir() - run_dir = tmp_path.joinpath('cylc-run', reg) - if not props.get('no dir'): - run_dir.mkdir(parents=True) - - mocked_clean = mock.Mock() - monkeypatch.setattr('cylc.flow.workflow_files.clean', mocked_clean) - mocked_remote_clean = mock.Mock() - monkeypatch.setattr('cylc.flow.workflow_files.remote_clean', - mocked_remote_clean) - monkeypatch.setattr('cylc.flow.workflow_files.get_workflow_run_dir', - lambda x: tmp_path.joinpath('cylc-run', x)) - - _get_platforms_from_db = workflow_files.get_platforms_from_db - - def mocked_get_platforms_from_db(run_dir): - if props.get('no dir') or props.get('no db'): - return _get_platforms_from_db(run_dir) # Handle as normal - return set(props.get('db platforms')) + reg = 'foo/bar/' + tmp_run_dir(reg) + mock_clean = monkeymock('cylc.flow.workflow_files.clean') + mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean') + monkeypatch.setattr('cylc.flow.workflow_files.get_platforms_from_db', + lambda x: set(db_platforms)) + + workflow_files.init_clean(reg, opts=CleanOptions(**opts)) + assert mock_clean.called is clean_called + assert mock_remote_clean.called is remote_clean_called + + +def test_init_clean_no_dir( + monkeymock: MonkeyMock, tmp_run_dir: Callable, + caplog: pytest.LogCaptureFixture +) -> None: + """Test init_clean() when the run dir doesn't exist""" + caplog.set_level(logging.INFO, CYLC_LOG) + tmp_run_dir() + mock_clean = monkeymock('cylc.flow.workflow_files.clean') + mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean') + + workflow_files.init_clean('foo/bar', opts=CleanOptions()) + assert "No directory to clean" in caplog.text + assert mock_clean.called is False + assert mock_remote_clean.called is False + + +def test_init_clean_no_db( + monkeymock: MonkeyMock, tmp_run_dir: Callable, + caplog: pytest.LogCaptureFixture +) -> None: + """Test init_clean() when the workflow database doesn't exist""" + caplog.set_level(logging.INFO, CYLC_LOG) + tmp_run_dir('bespin') + mock_clean = monkeymock('cylc.flow.workflow_files.clean') + mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean') + + workflow_files.init_clean('bespin', opts=CleanOptions()) + assert "No workflow database - will only clean locally" in caplog.text + assert mock_clean.called is True + assert mock_remote_clean.called is False + + +def test_init_clean_remote_only_no_db( + monkeymock: MonkeyMock, tmp_run_dir: Callable +) -> None: + """Test remote-only init_clean() when the workflow DB doesn't exist""" + tmp_run_dir('hoth') + mock_clean = monkeymock('cylc.flow.workflow_files.clean') + mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean') + + with pytest.raises(ServiceFileError) as exc: + workflow_files.init_clean('hoth', opts=CleanOptions(remote_only=True)) + assert ("No workflow database - cannot perform remote clean" + in str(exc.value)) + assert mock_clean.called is False + assert mock_remote_clean.called is False + + +def test_init_clean_running_workflow( + monkeypatch: pytest.MonkeyPatch, tmp_run_dir: Callable +) -> None: + """Test init_clean() fails when workflow is still running""" + def mock_err(*args, **kwargs): + raise ServiceFileError("Mocked error") + monkeypatch.setattr('cylc.flow.workflow_files.detect_old_contact_file', + mock_err) + tmp_run_dir('yavin') + with pytest.raises(ServiceFileError) as exc: + workflow_files.init_clean('yavin', opts=mock.Mock()) + assert "Cannot remove running workflow" in str(exc.value) + + +@pytest.mark.parametrize( + 'rm_dirs, expected_clean, expected_remote_clean', + [(None, None, []), + (["r2d2:c3po"], {"r2d2", "c3po"}, ["r2d2:c3po"])] +) +def test_init_clean_rm_dirs( + rm_dirs: Optional[List[str]], + expected_clean: Set[str], + expected_remote_clean: List[str], + monkeymock: MonkeyMock, monkeypatch: pytest.MonkeyPatch, + tmp_run_dir: Callable +) -> None: + """Test init_clean() with the --rm option. + + Params: + rm_dirs: Dirs given by --rm option. + expected_clean: The dirs that are expected to be passed to clean(). + expected_remote_clean: The dirs that are expected to be passed to + remote_clean(). + """ + reg = 'dagobah' + run_dir: Path = tmp_run_dir(reg) + mock_clean = monkeymock('cylc.flow.workflow_files.clean') + mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean') + platforms = {'platform_one'} monkeypatch.setattr('cylc.flow.workflow_files.get_platforms_from_db', - mocked_get_platforms_from_db) + lambda x: platforms) + opts = CleanOptions(rm_dirs=rm_dirs) if rm_dirs else CleanOptions() - # --- The actual test --- - workflow_files.init_clean(reg, opts=mock.Mock()) - if expected_log: - assert msg in caplog.text - if clean_called: - assert mocked_clean.called is True - else: - assert mocked_clean.called is False - if remote_clean_called: - assert mocked_remote_clean.called is True - else: - assert mocked_remote_clean.called is False + workflow_files.init_clean(reg, opts=opts) + mock_clean.assert_called_with(reg, run_dir, expected_clean) + mock_remote_clean.assert_called_with( + reg, platforms, expected_remote_clean, opts.remote_timeout) @pytest.mark.parametrize( - 'reg, props', + 'reg, symlink_dirs, rm_dirs, expected_deleted, expected_remaining', [ - ('foo/bar/', {}), # Works ok - ('foo', {'no dir': True}), # Nothing to clean - ('foo', { - 'not stopped': True, - 'err': ServiceFileError, - 'err msg': 'Cannot remove running workflow' - }), - ('foo/bar', { - 'symlink dirs': { + pytest.param( + 'foo/bar', + {}, + None, + ['cylc-run/foo'], + ['cylc-run'], + id="Basic clean" + ), + pytest.param( + 'foo/bar/baz', + { 'log': 'sym-log', 'share': 'sym-share', 'share/cycle': 'sym-cycle', 'work': 'sym-work' - } - }), - ('foo', { - 'symlink dirs': { + }, + None, + ['cylc-run/foo', 'sym-log/cylc-run/foo', 'sym-share/cylc-run/foo', + 'sym-cycle/cylc-run/foo', 'sym-work/cylc-run/foo'], + ['cylc-run', 'sym-log/cylc-run', 'sym-share/cylc-run', + 'sym-cycle/cylc-run', 'sym-work/cylc-run'], + id="Symlink dirs" + ), + pytest.param( + 'foo', + { 'run': 'sym-run', 'log': 'sym-log', 'share': 'sym-share', 'share/cycle': 'sym-cycle', 'work': 'sym-work' - } - }), - ('foo', { - 'bad symlink': { - 'type': 'file', - 'path': 'sym-log/cylc-run/foo/meow.txt' }, - 'err': WorkflowFilesError, - 'err msg': 'Target is not a directory' - }), - ('foo', { - 'bad symlink': { - 'type': 'dir', - 'path': 'sym-log/bad/path' - }, - 'err': WorkflowFilesError, - 'err msg': 'Expected target to end with "cylc-run/foo/log"' - }) + None, + ['cylc-run/foo', 'sym-run/cylc-run/foo', 'sym-log/cylc-run/foo', + 'sym-share/cylc-run/foo', 'sym-cycle/cylc-run/foo', + 'sym-work/cylc-run/foo'], + ['cylc-run', 'sym-run/cylc-run', 'sym-log/cylc-run', + 'sym-share/cylc-run', 'sym-cycle/cylc-run', + 'sym-work'], + id="Symlink dirs including run dir" + ), + pytest.param( + 'foo', + {}, + {'log', 'share'}, + ['cylc-run/foo/log', 'cylc-run/foo/share'], + ['cylc-run/foo/work'], + id="Targeted clean" + ), + pytest.param( + 'foo', + {'log': 'sym-log'}, + {'log'}, + ['cylc-run/foo/log', 'sym-log/cylc-run/foo'], + ['cylc-run/foo/work', 'cylc-run/foo/share/cycle', + 'sym-log/cylc-run'], + id="Targeted clean with symlink dirs" + ), + pytest.param( + 'foo', + {}, + {'share/cy*'}, + ['cylc-run/foo/share/cycle'], + ['cylc-run/foo/log', 'cylc-run/foo/work', 'cylc-run/foo/share'], + id="Targeted clean with glob" + ), + pytest.param( + 'foo', + {'log': 'sym-log'}, + {'w*', 'wo*', 'l*', 'lo*'}, + ['cylc-run/foo/work', 'cylc-run/foo/log', 'sym-log/cylc-run/foo'], + ['cylc-run/foo/share', 'cylc-run/foo/share/cycle'], + id="Targeted clean with degenerate glob" + ), ] ) -def test_clean(reg, props, monkeypatch, tmp_path): +def test_clean( + reg: str, + symlink_dirs: Dict[str, str], + rm_dirs: Optional[Set[str]], + expected_deleted: List[str], + expected_remaining: List[str], + tmp_path: Path, tmp_run_dir: Callable +) -> None: """Test the clean() function. Params: - reg (str): Workflow name. - props (dict): Possible values are (all optional): - 'err' (Exception): Expected error. - 'err msg' (str): Message that is expected to be in the exception. - 'no dir' (bool): If True, do not create run dir for this test case. - 'not stopped' (bool): If True, simulate that the workflow is - still running. - 'symlink dirs' (dict): As you would find in the global config - under [symlink dirs][platform]. - 'bad symlink' (dict): Simulate an invalid log symlink dir: - 'type' (str): 'file' or 'dir'. - 'path' (str): Path of the symlink target relative to tmp_path. + reg: Workflow name. + symlink_dirs: As you would find in the global config + under [symlink dirs][platform]. + rm_dirs: As passed to clean(). + expected_deleted: Dirs (relative paths under tmp_path) that are + expected to be cleaned. + expected_remaining: Any dirs (relative paths under tmp_path) that are + not expected to be cleaned. """ # --- Setup --- - tmp_path.joinpath('cylc-run').mkdir() - run_dir = tmp_path.joinpath('cylc-run', reg) - run_dir_top_parent = tmp_path.joinpath('cylc-run', Path(reg).parts[0]) - symlink_dirs = props.get('symlink dirs') - bad_symlink = props.get('bad symlink') - if not props.get('no dir') and ( - not symlink_dirs or 'run' not in symlink_dirs): - run_dir.mkdir(parents=True) - - dirs_to_check = [run_dir_top_parent] - if symlink_dirs: - if 'run' in symlink_dirs: - dst = tmp_path.joinpath(symlink_dirs['run'], 'cylc-run', reg) - dst.mkdir(parents=True) - run_dir.symlink_to(dst) - dirs_to_check.append(dst) - symlink_dirs.pop('run') - for s, d in symlink_dirs.items(): - dst = tmp_path.joinpath(d, 'cylc-run', reg, s) - dst.mkdir(parents=True) - src = run_dir.joinpath(s) - src.symlink_to(dst) - dirs_to_check.append(dst.parent) - if bad_symlink: - dst = tmp_path.joinpath(bad_symlink['path']) - if bad_symlink['type'] == 'file': - dst.parent.mkdir(parents=True) - dst.touch() - else: - dst.mkdir(parents=True) - src = run_dir.joinpath('log') - src.symlink_to(dst) - - def mocked_detect_old_contact_file(reg): - if props.get('not stopped'): - raise ServiceFileError('Mocked error') - - monkeypatch.setattr('cylc.flow.workflow_files.detect_old_contact_file', - mocked_detect_old_contact_file) - monkeypatch.setattr('cylc.flow.workflow_files.get_workflow_run_dir', - lambda x: tmp_path.joinpath('cylc-run', x)) + run_dir: Path = tmp_run_dir(reg) + + if 'run' in symlink_dirs: + target = tmp_path / symlink_dirs['run'] / 'cylc-run' / reg + target.mkdir(parents=True) + shutil.rmtree(run_dir) + run_dir.symlink_to(target) + symlink_dirs.pop('run') + for symlink_name, target_name in symlink_dirs.items(): + target = tmp_path / target_name / 'cylc-run' / reg / symlink_name + target.mkdir(parents=True) + symlink = run_dir / symlink_name + symlink.symlink_to(target) + for d_name in ('log', 'share', 'share/cycle', 'work'): + if d_name not in symlink_dirs: + (run_dir / d_name).mkdir() + + for rel_path in [*expected_deleted, *expected_remaining]: + assert (tmp_path / rel_path).exists() # --- The actual test --- - expected_err = props.get('err') - if expected_err: - with pytest.raises(expected_err) as exc: - workflow_files.clean(reg) - expected_msg = props.get('err msg') - if expected_msg: - assert expected_msg in str(exc.value) - else: - workflow_files.clean(reg) - for d in dirs_to_check: - assert d.exists() is False - assert d.is_symlink() is False + workflow_files.clean(reg, run_dir, rm_dirs) + for rel_path in expected_deleted: + assert (tmp_path / rel_path).exists() is False + assert (tmp_path / rel_path).is_symlink() is False + for rel_path in expected_remaining: + assert (tmp_path / rel_path).exists() -def test_clean_broken_symlink_run_dir(monkeypatch, tmp_path): - """Test clean() for removing a run dir that is a broken symlink.""" +def test_clean__broken_symlink_run_dir( + tmp_path: Path, tmp_run_dir: Callable +) -> None: + """Test clean() successfully remove a run dir that is a broken symlink.""" + # Setup reg = 'foo/bar' - run_dir = tmp_path.joinpath('cylc-run', reg) - run_dir.parent.mkdir(parents=True) + run_dir: Path = tmp_run_dir(reg) target = tmp_path.joinpath('rabbow/cylc-run', reg) target.mkdir(parents=True) + shutil.rmtree(run_dir) run_dir.symlink_to(target) target.rmdir() + assert run_dir.parent.exists() is True # cylc-run/foo should exist + # Test + workflow_files.clean(reg, run_dir) + assert run_dir.parent.exists() is False # cylc-run/foo should be gone + assert target.parent.exists() is False # rabbow/cylc-run/foo too + + +def test_clean__bad_symlink_dir_wrong_type( + tmp_path: Path, tmp_run_dir: Callable +) -> None: + """Test clean() raises error when a symlink dir actually points to a file + instead of a dir""" + reg = 'foo' + run_dir: Path = tmp_run_dir(reg) + symlink = run_dir.joinpath('log') + target = tmp_path.joinpath('sym-log', 'cylc-run', reg, 'meow.txt') + target.parent.mkdir(parents=True) + target.touch() + symlink.symlink_to(target) + + with pytest.raises(WorkflowFilesError) as exc: + workflow_files.clean(reg, run_dir) + assert "Target is not a directory" in str(exc.value) + assert symlink.exists() is True + + +def test_clean__bad_symlink_dir_wrong_form( + tmp_path: Path, tmp_run_dir: Callable +) -> None: + """Test clean() raises error when a symlink dir points to an + unexpected dir""" + run_dir: Path = tmp_run_dir('foo') + symlink = run_dir.joinpath('log') + target = tmp_path.joinpath('sym-log', 'oops', 'log') + target.mkdir(parents=True) + symlink.symlink_to(target) + + with pytest.raises(WorkflowFilesError) as exc: + workflow_files.clean('foo', run_dir) + assert 'Expected target to end with "cylc-run/foo/log"' in str(exc.value) + assert symlink.exists() is True + + +@pytest.mark.parametrize('pattern', ['thing/', 'thing/*']) +def test_clean__rm_dir_not_file(pattern: str, tmp_run_dir: Callable): + """Test clean() does not remove a file when the rm_dir glob pattern would + match a dir only.""" + reg = 'foo' + run_dir: Path = tmp_run_dir(reg) + a_file = run_dir.joinpath('thing') + a_file.touch() + rm_dirs = parse_rm_dirs([pattern]) + + workflow_files.clean(reg, run_dir, rm_dirs) + assert a_file.exists() + + +@pytest.mark.parametrize( + 'filetree, expected', + [ + pytest.param( + FILETREE_1, + {'log': 'sym/cylc-run/foo/bar/log'}, + id="filetree1" + ), + pytest.param( + FILETREE_2, + { + 'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle', + 'share': 'sym-share/cylc-run/foo/bar/share', + '': 'sym-run/cylc-run/foo/bar/' + }, + id="filetree2" + ), + pytest.param( + FILETREE_3, + { + 'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle', + '': 'sym-run/cylc-run/foo/bar/' + }, + id="filetree3" + ), + pytest.param( + FILETREE_4, + {'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle'}, + id="filetree4" + ), + ] +) +def test_get_symlink_dirs( + filetree: Dict[str, Any], + expected: Dict[str, Union[Path, str]], + tmp_run_dir: Callable, tmp_path: Path +): + """Test get_symlink_dirs(). + + Params: + filetree: The directory structure to test against. + expected: The expected return dictionary, except with the values being + relative to tmp_path instead of absolute paths. + """ + # Setup + cylc_run_dir = tmp_run_dir() + create_filetree(filetree, tmp_path, tmp_path) + reg = 'foo/bar' + for k, v in expected.items(): + expected[k] = Path(tmp_path / v) + # Test + assert get_symlink_dirs(reg, cylc_run_dir / reg) == expected + + +@pytest.mark.parametrize( + 'pattern, filetree, expected_matches', + [ + pytest.param( + '**', + FILETREE_1, + ['cylc-run/foo/bar', + 'cylc-run/foo/bar/log'], + id="filetree1 **" + ), + pytest.param( + '*', + FILETREE_1, + ['cylc-run/foo/bar/flow.cylc', + 'cylc-run/foo/bar/log', + 'cylc-run/foo/bar/mirkwood', + 'cylc-run/foo/bar/rincewind.txt'], + id="filetree1 *" + ), + pytest.param( + '**/*.txt', + FILETREE_1, + ['cylc-run/foo/bar/log/bib/fortuna.txt', + 'cylc-run/foo/bar/log/temba.txt', + 'cylc-run/foo/bar/rincewind.txt'], + id="filetree1 **/*.txt" + ), + pytest.param( + '**', + FILETREE_2, + ['cylc-run/foo/bar', + 'cylc-run/foo/bar/share', + 'cylc-run/foo/bar/share/cycle'], + id="filetree2 **" + ), + pytest.param( + '**', + FILETREE_3, + ['cylc-run/foo/bar', + 'cylc-run/foo/bar/share/cycle'], + id="filetree3 **" + ), + pytest.param( + '**/s*', + FILETREE_3, + ['cylc-run/foo/bar/share', + 'cylc-run/foo/bar/share/cycle/sokath.txt'], + id="filetree3 **/s*" + ), + pytest.param( + '**', + FILETREE_4, + ['cylc-run/foo/bar', + 'cylc-run/foo/bar/share/cycle'], + id="filetree4 **" + ), + ] +) +def test_glob_in_run_dir( + pattern: str, + filetree: Dict[str, Any], + expected_matches: List[str], + tmp_path: Path, tmp_run_dir: Callable +) -> None: + """Test that glob_in_run_dir() returns the minimal set of results with + no redundant paths. + """ + # Setup + cylc_run_dir: Path = tmp_run_dir() + reg = 'foo/bar' + run_dir = cylc_run_dir / reg + create_filetree(filetree, tmp_path, tmp_path) + symlink_dirs = [run_dir / i for i in get_symlink_dirs(reg, run_dir)] + expected = [tmp_path / i for i in expected_matches] + # Test + assert glob_in_run_dir(run_dir, pattern, symlink_dirs) == expected + + +@pytest.fixture +def filetree_for_testing_cylc_clean(tmp_path: Path): + """Fixture that creates a filetree from the given dict, and returns which + files are expected to be deleted and which aren't. + + See tests/unit/filetree.py + + Args: + reg: Workflow name. + initial_filetree: The filetree before cleaning. + filetree_left_behind: The filetree that is expected to be left behind + after cleaning, excluding the 'you-shall-not-pass/' directory, + which is always expected to be left behind. + + Returns: + run_dir: Workflow run dir. + files_to_delete: List of files that are expected to be deleted. + files_not_to_delete: List of files that are not expected to be deleted. + """ + def _filetree_for_testing_cylc_clean( + reg: str, + initial_filetree: Dict[str, Any], + filetree_left_behind: Dict[str, Any] + ) -> Tuple[Path, List[str], List[str]]: + create_filetree(initial_filetree, tmp_path, tmp_path) + files_not_to_delete = [ + os.path.normpath(i) for i in + iglob(str(tmp_path / 'you-shall-not-pass/**'), recursive=True) + ] + files_not_to_delete.extend( + get_filetree_as_list(filetree_left_behind, tmp_path) + ) + files_to_delete = list( + set(get_filetree_as_list(initial_filetree, tmp_path)).difference( + files_not_to_delete + ) + ) + run_dir = tmp_path / 'cylc-run' / reg + return run_dir, files_to_delete, files_not_to_delete + return _filetree_for_testing_cylc_clean - monkeypatch.setattr('cylc.flow.workflow_files.get_workflow_run_dir', - lambda x: tmp_path.joinpath('cylc-run', x)) - workflow_files.clean(reg) - assert run_dir.parent.is_dir() is False +@pytest.mark.parametrize( + 'pattern, initial_filetree, filetree_left_behind', + [ + pytest.param( + '**', + FILETREE_1, + { + 'cylc-run': {'foo': {}}, + 'sym': {'cylc-run': {'foo': {'bar': {}}}} + } + ), + pytest.param( + '*/**', + FILETREE_1, + { + 'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'rincewind.txt': Path('whatever') + }}}, + 'sym': {'cylc-run': {'foo': {'bar': {}}}} + } + ), + pytest.param( + '**/*.txt', + FILETREE_1, + { + 'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'log': Path('whatever'), + 'mirkwood': Path('whatever') + }}}, + 'sym': {'cylc-run': {'foo': {'bar': { + 'log': { + 'darmok': Path('whatever'), + 'bib': {} + } + }}}} + } + ) + ] +) +def test__clean_using_glob( + pattern: str, + initial_filetree: Dict[str, Any], + filetree_left_behind: Dict[str, Any], + filetree_for_testing_cylc_clean: Callable +) -> None: + """Test _clean_using_glob(), particularly that it does not follow and + delete symlinks (apart from the standard symlink dirs). + + Params: + pattern: The glob pattern to test. + initial_filetree: The filetree to test against. + files_left_behind: The filetree expected to remain after + _clean_using_glob() is called (excluding + /you-shall-not-pass, which is always expected to remain). + """ + # --- Setup --- + run_dir: Path + files_to_delete: List[str] + files_not_to_delete: List[str] + run_dir, files_to_delete, files_not_to_delete = ( + filetree_for_testing_cylc_clean( + 'foo/bar', initial_filetree, filetree_left_behind) + ) + # --- Test --- + _clean_using_glob(run_dir, pattern, symlink_dirs=['log']) + for file in files_not_to_delete: + assert os.path.exists(file) is True + for file in files_to_delete: + assert os.path.lexists(file) is False + + +@pytest.mark.parametrize( + 'rm_dirs, initial_filetree, filetree_left_behind', + [ + pytest.param( + {'**'}, + FILETREE_1, + { + 'cylc-run': {}, + 'sym': {'cylc-run': {}} + }, + id="filetree1 **" + ), + pytest.param( + {'*/**'}, + FILETREE_1, + { + 'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'rincewind.txt': Path('whatever') + }}}, + 'sym': {'cylc-run': {}} + }, + id="filetree1 */**" + ), + pytest.param( + {'**/*.txt'}, + FILETREE_1, + { + 'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'log': Path('whatever'), + 'mirkwood': Path('whatever') + }}}, + 'sym': {'cylc-run': {'foo': {'bar': { + 'log': { + 'darmok': Path('whatever'), + 'bib': {} + } + }}}} + }, + id="filetree1 **/*.txt" + ), + pytest.param( + {'**/cycle'}, + FILETREE_2, + { + 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}}, + 'sym-run': {'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + 'share': Path('sym-share/cylc-run/foo/bar/share') + }}}}, + 'sym-share': {'cylc-run': {'foo': {'bar': { + 'share': {} + }}}}, + 'sym-cycle': {'cylc-run': {}} + }, + id="filetree2 **/cycle" + ), + pytest.param( + {'share'}, + FILETREE_2, + { + 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}}, + 'sym-run': {'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + 'flow.cylc': None, + }}}}, + 'sym-share': {'cylc-run': {}}, + 'sym-cycle': {'cylc-run': {'foo': {'bar': { + 'share': { + 'cycle': { + 'macklunkey.txt': None + } + } + }}}} + }, + id="filetree2 share" + ), + pytest.param( + {'**'}, + FILETREE_2, + { + 'cylc-run': {}, + 'sym-run': {'cylc-run': {}}, + 'sym-share': {'cylc-run': {}}, + 'sym-cycle': {'cylc-run': {}} + }, + id="filetree2 **" + ), + pytest.param( + {'*'}, + FILETREE_2, + { + 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}}, + 'sym-run': {'cylc-run': {'foo': {'bar': { + '.service': {'db': None}, + }}}}, + 'sym-share': {'cylc-run': {}}, + 'sym-cycle': {'cylc-run': {'foo': {'bar': { + 'share': { + 'cycle': { + 'macklunkey.txt': None + } + } + }}}} + }, + id="filetree2 *" + ), + pytest.param( # Check https://bugs.python.org/issue35201 has no effect + {'non-exist/**'}, + FILETREE_2, + FILETREE_2, + id="filetree2 non-exist/**" + ), + pytest.param( + {'**'}, + FILETREE_3, + { + 'cylc-run': {}, + 'sym-run': {'cylc-run': {}}, + 'sym-cycle': {'cylc-run': {}}, + }, + id="filetree3 **" + ), + pytest.param( + {'**'}, + FILETREE_4, + { + 'cylc-run': {}, + 'sym-cycle': {'cylc-run': {}}, + }, + id="filetree4 **" + ) + ], +) +def test_clean__targeted( + rm_dirs: Set[str], + initial_filetree: Dict[str, Any], + filetree_left_behind: Dict[str, Any], + caplog: pytest.LogCaptureFixture, tmp_run_dir: Callable, + filetree_for_testing_cylc_clean: Callable +) -> None: + """Test clean(), particularly that it does not follow and delete symlinks + (apart from the standard symlink dirs). + + This is similar to test__clean_using_glob(), but the filetree expected to + remain after cleaning is different due to the tidy up of empty dirs. + + Params: + rm_dirs: The glob patterns to test. + initial_filetree: The filetree to test against. + files_left_behind: The filetree expected to remain after + clean() is called (excluding /you-shall-not-pass, + which is always expected to remain). + """ + # --- Setup --- + caplog.set_level(logging.DEBUG, CYLC_LOG) + tmp_run_dir() + reg = 'foo/bar' + run_dir: Path + files_to_delete: List[str] + files_not_to_delete: List[str] + run_dir, files_to_delete, files_not_to_delete = ( + filetree_for_testing_cylc_clean( + reg, initial_filetree, filetree_left_behind) + ) + # --- Test --- + workflow_files.clean(reg, run_dir, rm_dirs) + for file in files_not_to_delete: + assert os.path.exists(file) is True + for file in files_to_delete: + assert os.path.lexists(file) is False PLATFORMS = { @@ -406,68 +981,91 @@ def test_clean_broken_symlink_run_dir(monkeypatch, tmp_path): @pytest.mark.parametrize( 'install_targets_map, failed_platforms, expected_platforms, expected_err', [ - ( - {'localhost': [PLATFORMS['exeter']]}, None, None, None + pytest.param( + {'localhost': [PLATFORMS['exeter']]}, None, None, None, + id="Only localhost install target - no remote clean" ), - ( + pytest.param( { 'localhost': [PLATFORMS['exeter']], 'picard': [PLATFORMS['enterprise']] }, None, ['enterprise'], - None + None, + id="Localhost and remote install target" ), - ( + pytest.param( { 'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']], 'janeway': [PLATFORMS['voyager']] }, None, ['enterprise', 'voyager'], - None + None, + id="Only remote install targets" ), - ( + pytest.param( { 'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']], 'janeway': [PLATFORMS['voyager']] }, - ['enterprise'], + {'enterprise': 255}, ['enterprise', 'stargazer', 'voyager'], - None + None, + id="Install target with 1 failed, 1 successful platform" ), - ( + pytest.param( { 'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']], 'janeway': [PLATFORMS['voyager']] }, - ['enterprise', 'stargazer'], + {'enterprise': 255, 'stargazer': 255}, ['enterprise', 'stargazer', 'voyager'], - (CylcError, "Could not clean on install targets: picard") + (CylcError, "Could not clean on install targets: picard"), + id="Install target with all failed platforms" ), - ( + pytest.param( { 'picard': [PLATFORMS['enterprise']], 'janeway': [PLATFORMS['voyager']] }, + {'enterprise': 255, 'voyager': 255}, ['enterprise', 'voyager'], - ['enterprise', 'voyager'], - (CylcError, "Could not clean on install targets: picard, janeway") - ) + (CylcError, "Could not clean on install targets: picard, janeway"), + id="All install targets have all failed platforms" + ), + pytest.param( + { + 'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']] + }, + {'enterprise': 1}, + ['enterprise'], + (CylcError, "Could not clean on install targets: picard"), + id=("Remote clean cmd fails on a platform for non-SSH reason - " + "does not retry") + ), ] ) -def test_remote_clean(install_targets_map, failed_platforms, - expected_platforms, expected_err, monkeypatch, caplog): +def test_remote_clean( + install_targets_map: Dict[str, Any], + failed_platforms: Optional[Dict[str, int]], + expected_platforms: Optional[List[str]], + expected_err: Optional[Tuple[Type[Exception], str]], + monkeymock: MonkeyMock, monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture +) -> None: """Test remote_clean() logic. Params: - install_targets_map (dict): The map that would be returned by + install_targets_map The map that would be returned by platforms.get_install_target_to_platforms_map() - failed_platforms (list): If specified, any platforms that clean will - artificially fail on in this test case. - expected_platforms (list): If specified, all the platforms that the + failed_platforms: If specified, any platforms that clean will + artificially fail on in this test case. The key is the platform + name, the value is the remote clean cmd return code. + expected_platforms: If specified, all the platforms that the remote clean cmd is expected to run on. - expected_err (tuple): If specified, a tuple of the form + expected_err: If specified, a tuple of the form (Exception, str) giving an exception that is expected to be raised. """ # ----- Setup ----- @@ -476,22 +1074,23 @@ def test_remote_clean(install_targets_map, failed_platforms, 'cylc.flow.workflow_files.get_install_target_to_platforms_map', lambda x: install_targets_map) # Remove randomness: - mocked_shuffle = mock.Mock() - monkeypatch.setattr('cylc.flow.workflow_files.shuffle', mocked_shuffle) + monkeymock('cylc.flow.workflow_files.shuffle') - def mocked_remote_clean_cmd_side_effect(reg, platform, timeout): + def mocked_remote_clean_cmd_side_effect(reg, platform, rm_dirs, timeout): proc_ret_code = 0 if failed_platforms and platform['name'] in failed_platforms: - proc_ret_code = 1 + proc_ret_code = failed_platforms[platform['name']] return mock.Mock( poll=lambda: proc_ret_code, - communicate=lambda: (b"", b""), - args=[]) + communicate=lambda: ("", ""), + args=[] + ) - mocked_remote_clean_cmd = mock.Mock( + mocked_remote_clean_cmd = monkeymock( + 'cylc.flow.workflow_files._remote_clean_cmd', + spec=_remote_clean_cmd, side_effect=mocked_remote_clean_cmd_side_effect) - monkeypatch.setattr( - 'cylc.flow.workflow_files._remote_clean_cmd', mocked_remote_clean_cmd) + rm_dirs = ["whatever"] # ----- Test ----- reg = 'foo' platform_names = ( @@ -500,14 +1099,15 @@ def mocked_remote_clean_cmd_side_effect(reg, platform, timeout): err, msg = expected_err with pytest.raises(err) as exc: workflow_files.remote_clean( - reg, platform_names, timeout='irrelevant') + reg, platform_names, rm_dirs, timeout='irrelevant') assert msg in str(exc.value) else: - workflow_files.remote_clean(reg, platform_names, timeout='irrelevant') + workflow_files.remote_clean( + reg, platform_names, rm_dirs, timeout='irrelevant') if expected_platforms: for p_name in expected_platforms: mocked_remote_clean_cmd.assert_any_call( - reg, PLATFORMS[p_name], 'irrelevant') + reg, PLATFORMS[p_name], rm_dirs, 'irrelevant') else: mocked_remote_clean_cmd.assert_not_called() if failed_platforms: @@ -515,31 +1115,87 @@ def mocked_remote_clean_cmd_side_effect(reg, platform, timeout): assert f"{p_name}: {TaskRemoteMgmtError.MSG_TIDY}" in caplog.text -def test_remove_empty_reg_parents(tmp_path): +@pytest.mark.parametrize( + 'rm_dirs, expected_args', + [ + (None, []), + (['holodeck', 'ten_forward'], + ['--rm', 'holodeck', '--rm', 'ten_forward']) + ] +) +def test_remote_clean_cmd( + rm_dirs: Optional[List[str]], + expected_args: List[str], + monkeymock: MonkeyMock +) -> None: + """Test _remote_clean_cmd() + + Params: + rm_dirs: Argument passed to _remote_clean_cmd(). + expected_args: Expected CLI arguments of the cylc clean command that + gets constructed. + """ + reg = 'jean/luc/picard' + platform = {'name': 'enterprise', 'install target': 'mars'} + mock_construct_ssh_cmd = monkeymock( + 'cylc.flow.workflow_files.construct_ssh_cmd', return_value=['blah']) + monkeymock('cylc.flow.workflow_files.Popen') + + workflow_files._remote_clean_cmd(reg, platform, rm_dirs, timeout='dunno') + args, kwargs = mock_construct_ssh_cmd.call_args + constructed_cmd = args[0] + assert constructed_cmd == ['clean', '--local-only', reg, *expected_args] + + +def test_remove_empty_parents(tmp_path: Path): """Test that _remove_empty_parents() doesn't remove parents containing a sibling.""" + # -- Setup -- reg = 'foo/bar/baz/qux' path = tmp_path.joinpath(reg) tmp_path.joinpath('foo/bar/baz').mkdir(parents=True) + # Note qux does not exist, but that shouldn't matter sibling_reg = 'foo/darmok' sibling_path = tmp_path.joinpath(sibling_reg) sibling_path.mkdir() - workflow_files._remove_empty_reg_parents(reg, path) + # -- Test -- + workflow_files._remove_empty_parents(path, reg) assert tmp_path.joinpath('foo/bar').exists() is False assert tmp_path.joinpath('foo').exists() is True - # Also path must be absolute - with pytest.raises(ValueError) as exc: - workflow_files._remove_empty_reg_parents( - 'foo/darmok', 'meow/foo/darmok') - assert 'Path must be absolute' in str(exc.value) # Check it skips non-existent dirs, and stops at the right place too tmp_path.joinpath('foo/bar').mkdir() sibling_path.rmdir() - workflow_files._remove_empty_reg_parents(reg, path) + workflow_files._remove_empty_parents(path, reg) assert tmp_path.joinpath('foo').exists() is False assert tmp_path.exists() is True +@pytest.mark.parametrize( + 'path, tail, exc_msg', + [ + pytest.param( + 'meow/foo/darmok', 'foo/darmok', "path must be absolute", + id="relative path" + ), + pytest.param( + '/meow/foo/darmok', '/foo/darmok', + "tail must not be an absolute path", + id="absolute tail" + ), + pytest.param( + '/meow/foo/darmok', 'foo/jalad', + "path '/meow/foo/darmok' does not end with 'foo/jalad'", + id="tail not in path" + ) + ] +) +def test_remove_empty_parents_bad(path: str, tail: str, exc_msg: str): + """Test that _remove_empty_parents() fails appropriately with bad args.""" + with pytest.raises(ValueError) as exc: + workflow_files._remove_empty_parents(path, tail) + assert exc_msg in str(exc.value) + + @pytest.mark.parametrize( 'run_dir, srv_dir', [