diff --git a/CHANGES.md b/CHANGES.md
index 18fe1e70315..d4148bb9670 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -80,6 +80,12 @@ version control information on installation of a workflow.
[#4222](https://github.com/cylc/cylc-flow/pull/4222) - Fix bug where a
workflow's public database file was not closed properly.
+[#4237](https://github.com/cylc/cylc-flow/pull/4237) - `cylc clean` can now
+remove specific sub-directories instead of the whole run directory, using the
+`--rm` option. There are also the options `--local-only` and `--remote-only`
+for choosing to only clean on the local filesystem or remote install targets
+respectively.
+
### Fixes
[#4248](https://github.com/cylc/cylc-flow/pull/4248)
diff --git a/cylc/flow/exceptions.py b/cylc/flow/exceptions.py
index 609b41e84d8..286bf6800e1 100644
--- a/cylc/flow/exceptions.py
+++ b/cylc/flow/exceptions.py
@@ -15,8 +15,8 @@
# along with this program. If not, see .
"""Exceptions for "expected" errors."""
-
-from typing import Iterable
+import errno
+from typing import Callable, Iterable, NoReturn, Tuple, Type
class CylcError(Exception):
@@ -99,6 +99,31 @@ class WorkflowFilesError(CylcError):
"""Exception for errors related to workflow files/directories."""
+def handle_rmtree_err(
+ function: Callable,
+ path: str,
+ excinfo: Tuple[Type[Exception], Exception, object]
+) -> NoReturn:
+ """Error handler for shutil.rmtree."""
+ exc = excinfo[1]
+ if isinstance(exc, OSError) and exc.errno == errno.ENOTEMPTY:
+ # "Directory not empty", likely due to filesystem lag
+ raise FileRemovalError(exc)
+ raise exc
+
+
+class FileRemovalError(CylcError):
+ """Exception for errors during deletion of files/directories, which are
+ probably the filesystem's fault, not Cylc's."""
+
+ def __init__(self, exc: Exception) -> None:
+ CylcError.__init__(
+ self,
+ f"{exc}. This is probably a temporary issue with the filesystem, "
+ "not a problem with Cylc."
+ )
+
+
class TaskRemoteMgmtError(CylcError):
"""Exceptions initialising workflow run directory of remote job host."""
diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py
index 163af6e215f..91b06d720ca 100644
--- a/cylc/flow/pathutil.py
+++ b/cylc/flow/pathutil.py
@@ -19,17 +19,19 @@
from pathlib import Path
import re
from shutil import rmtree
-from typing import Union
+from typing import Dict, Iterable, Set, Union
from cylc.flow import LOG
from cylc.flow.cfgspec.glbl_cfg import glbl_cfg
-from cylc.flow.exceptions import WorkflowFilesError
+from cylc.flow.exceptions import (
+ UserInputError, WorkflowFilesError, handle_rmtree_err
+)
from cylc.flow.platforms import get_localhost_install_target
# Note: do not import this elsewhere, as it might bypass unit test
# monkeypatching:
-_CYLC_RUN_DIR = '$HOME/cylc-run'
+_CYLC_RUN_DIR = os.path.join('$HOME', 'cylc-run')
def expand_path(*args: Union[Path, str]) -> str:
@@ -128,15 +130,17 @@ def make_workflow_run_tree(workflow):
LOG.debug(f'{dir_}: directory created')
-def make_localhost_symlinks(rund, named_sub_dir):
+def make_localhost_symlinks(
+ rund: Union[Path, str], named_sub_dir: str
+) -> Dict[str, Union[Path, str]]:
"""Creates symlinks for any configured symlink dirs from glbl_cfg.
Args:
rund: the entire run directory path
named_sub_dir: e.g flow_name/run1
Returns:
- dict - A dictionary of Symlinks with sources as keys and
- destinations as values: ``{source: destination}``
+ Dictionary of symlinks with sources as keys and
+ destinations as values: ``{source: destination}``
"""
dirs_to_symlink = get_dirs_to_symlink(
@@ -144,80 +148,89 @@ def make_localhost_symlinks(rund, named_sub_dir):
symlinks_created = {}
for key, value in dirs_to_symlink.items():
if key == 'run':
- dst = rund
+ symlink_path = rund
else:
- dst = os.path.join(rund, key)
- src = expand_path(value)
- if '$' in src:
+ symlink_path = os.path.join(rund, key)
+ target = expand_path(value)
+ if '$' in target:
raise WorkflowFilesError(
- f'Unable to create symlink to {src}.'
+ f'Unable to create symlink to {target}.'
f' \'{value}\' contains an invalid environment variable.'
' Please check configuration.')
- symlink_success = make_symlink(src, dst)
- # symlink info returned for logging purposes, symlinks created
- # before logs as this dir may be a symlink.
+ symlink_success = make_symlink(symlink_path, target)
+ # Symlink info returned for logging purposes. Symlinks should be
+ # created before logs as the log dir may be a symlink.
if symlink_success:
- symlinks_created[src] = dst
+ symlinks_created[target] = symlink_path
return symlinks_created
-def get_dirs_to_symlink(install_target, flow_name):
+def get_dirs_to_symlink(install_target: str, flow_name: str) -> Dict[str, str]:
"""Returns dictionary of directories to symlink from glbcfg.
- Note the paths should remain unexpanded, to be expanded on the remote.
+
+ Note the paths should remain unexpanded, to be expanded on the remote.
"""
- dirs_to_symlink = {}
+ dirs_to_symlink: Dict[str, str] = {}
symlink_conf = glbl_cfg().get(['symlink dirs'])
if install_target not in symlink_conf.keys():
return dirs_to_symlink
base_dir = symlink_conf[install_target]['run']
- if base_dir is not None:
+ if base_dir:
dirs_to_symlink['run'] = os.path.join(base_dir, 'cylc-run', flow_name)
for dir_ in ['log', 'share', 'share/cycle', 'work']:
link = symlink_conf[install_target][dir_]
- if link is None or link == base_dir:
+ if (not link) or link == base_dir:
continue
dirs_to_symlink[dir_] = os.path.join(link, 'cylc-run', flow_name, dir_)
return dirs_to_symlink
-def make_symlink(src, dst):
+def make_symlink(path: Union[Path, str], target: Union[Path, str]) -> bool:
"""Makes symlinks for directories.
+
Args:
- src (str): target path, where the files are to be stored.
- dst (str): full path of link that will point to src.
+ path: Absolute path of the desired symlink.
+ target: Absolute path of the symlink's target directory.
"""
- if os.path.exists(dst):
- if os.path.islink(dst) and os.path.samefile(dst, src):
+ path = Path(path)
+ target = Path(target)
+ if path.exists():
+ if path.is_symlink() and path.samefile(target):
# correct symlink already exists
return False
# symlink name is in use by a physical file or directory
# log and return
LOG.debug(
- f"Unable to create {src} symlink. The path {dst} already exists.")
+ f"Unable to create symlink to {target}. "
+ f"The path {path} already exists.")
return False
- elif os.path.islink(dst):
+ elif path.is_symlink():
# remove a bad symlink.
try:
- os.unlink(dst)
- except Exception:
+ path.unlink()
+ except OSError:
raise WorkflowFilesError(
- f"Error when symlinking. Failed to unlink bad symlink {dst}.")
- os.makedirs(src, exist_ok=True)
- os.makedirs(os.path.dirname(dst), exist_ok=True)
+ f"Error when symlinking. Failed to unlink bad symlink {path}.")
+ target.mkdir(parents=True, exist_ok=True)
+ if path.exists():
+ # Trying to link to itself; no symlink needed
+ # (e.g. path's parent is symlink to target's parent)
+ return False
+ path.parent.mkdir(parents=True, exist_ok=True)
try:
- os.symlink(src, dst, target_is_directory=True)
+ path.symlink_to(target)
return True
- except Exception as exc:
+ except OSError as exc:
raise WorkflowFilesError(f"Error when symlinking\n{exc}")
-def remove_dir(path):
- """Delete a directory including contents, including the target directory
- if the specified path is a symlink.
+def remove_dir_and_target(path: Union[Path, str]) -> None:
+ """Delete a directory tree (i.e. including contents), as well as the
+ target directory tree if the specified path is a symlink.
Args:
- path (str): the absolute path of the directory to delete.
+ path: the absolute path of the directory to delete.
"""
if not os.path.isabs(path):
raise ValueError('Path must be absolute')
@@ -228,7 +241,7 @@ def remove_dir(path):
target = os.path.realpath(path)
LOG.debug(
f'Removing symlink target directory: ({path} ->) {target}')
- rmtree(target)
+ rmtree(target, onerror=handle_rmtree_err)
LOG.debug(f'Removing symlink: {path}')
else:
LOG.debug(f'Removing broken symlink: {path}')
@@ -237,7 +250,27 @@ def remove_dir(path):
raise FileNotFoundError(path)
else:
LOG.debug(f'Removing directory: {path}')
- rmtree(path)
+ rmtree(path, onerror=handle_rmtree_err)
+
+
+def remove_dir_or_file(path: Union[Path, str]) -> None:
+ """Delete a directory tree, or a file, or a symlink.
+ Does not follow symlinks.
+
+ Args:
+ path: the absolute path of the directory/file/symlink to delete.
+ """
+ if not os.path.isabs(path):
+ raise ValueError("Path must be absolute")
+ if os.path.islink(path):
+ LOG.debug(f"Removing symlink: {path}")
+ os.remove(path)
+ elif os.path.isfile(path):
+ LOG.debug(f"Removing file: {path}")
+ os.remove(path)
+ else:
+ LOG.debug(f"Removing directory: {path}")
+ rmtree(path, onerror=handle_rmtree_err)
def get_next_rundir_number(run_path):
@@ -249,3 +282,43 @@ def get_next_rundir_number(run_path):
return int(last_run_num) + 1
except OSError:
return 1
+
+
+def parse_rm_dirs(rm_dirs: Iterable[str]) -> Set[str]:
+ """Parse a list of possibly colon-separated dirs (or files or globs).
+ Return the set of all the dirs.
+
+ Used by cylc clean with the --rm option.
+ """
+ result: Set[str] = set()
+ for item in rm_dirs:
+ for part in item.split(':'):
+ part = part.strip()
+ if not part:
+ continue
+ is_dir = part.endswith(os.sep)
+ part = os.path.normpath(part)
+ if os.path.isabs(part):
+ raise UserInputError("--rm option cannot take absolute paths")
+ if part == '.' or part.startswith(f'..{os.sep}'):
+ raise UserInputError(
+ "--rm option cannot take paths that point to the "
+ "run directory or above"
+ )
+ if is_dir:
+ # Preserve trailing slash to ensure it only matches dirs,
+ # not files, when globbing
+ part += os.sep
+ result.add(part)
+ return result
+
+
+def is_relative_to(path1: Union[Path, str], path2: Union[Path, str]) -> bool:
+ """Return whether or not path1 is relative to path2."""
+ # In future, we can just use pathlib.Path.is_relative_to()
+ # when Python 3.9 becomes the minimum supported version
+ try:
+ Path(os.path.normpath(path1)).relative_to(os.path.normpath(path2))
+ except ValueError:
+ return False
+ return True
diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py
index 56c62d49c31..79b58103845 100644
--- a/cylc/flow/scripts/clean.py
+++ b/cylc/flow/scripts/clean.py
@@ -32,16 +32,37 @@
# Remove the workflow at ~/cylc-run/foo/bar
$ cylc clean foo/bar
+ # Remove the workflow's log directory
+ $ cylc clean foo/bar --rm log
+
+ # Remove the log and work directories
+ $ cylc clean foo/bar --rm log:work
+ # or
+ $ cylc clean foo/bar --rm log --rm work
+
+ # Remove all job log files from the 2020 cycle points
+ cylc clean foo/bar --rm 'log/job/2020*'
+
+ # Remove all .csv files
+ $ cylc clean foo/bar --rm '**/*.csv'
+
+ # Only remove the workflow on the local filesystem
+ $ cylc clean foo/bar --local-only
+
+ # Only remove the workflow on remote install targets
+ $ cylc clean foo/bar --remote-only
+
"""
+from typing import TYPE_CHECKING
+
import cylc.flow.flags
from cylc.flow import LOG
+from cylc.flow.exceptions import UserInputError
from cylc.flow.loggingutil import CylcLogFormatter
-from cylc.flow.option_parsers import CylcOptionParser as COP
+from cylc.flow.option_parsers import CylcOptionParser as COP, Options
from cylc.flow.terminal import cli_function
-from cylc.flow.workflow_files import clean, init_clean
-
-from typing import TYPE_CHECKING
+from cylc.flow.workflow_files import init_clean
if TYPE_CHECKING:
from optparse import Values
@@ -53,12 +74,26 @@ def get_option_parser():
argdoc=[('REG', "Workflow name")]
)
+ parser.add_option(
+ '--rm', metavar='DIR[:DIR:...]',
+ help="Only clean the specified subdirectories (or files) in the "
+ "run directory, rather than the whole run directory. "
+ "Accepts quoted globs.",
+ action='append', dest='rm_dirs', default=[]
+ )
+
parser.add_option(
'--local-only', '--local',
help="Only clean on the local filesystem (not remote hosts).",
action='store_true', dest='local_only'
)
+ parser.add_option(
+ '--remote-only', '--remote',
+ help="Only clean on remote hosts (not the local filesystem).",
+ action='store_true', dest='remote_only'
+ )
+
parser.add_option(
'--timeout',
help="The number of seconds to wait for cleaning to take place on "
@@ -69,18 +104,23 @@ def get_option_parser():
return parser
+CleanOptions = Options(get_option_parser())
+
+
@cli_function(get_option_parser)
def main(parser: COP, opts: 'Values', reg: str):
- if cylc.flow.flags.verbosity > 1:
+ if cylc.flow.flags.verbosity < 2:
# for readability omit timestamps from logging unless in debug mode
for handler in LOG.handlers:
if isinstance(handler.formatter, CylcLogFormatter):
handler.formatter.configure(timestamp=False)
- if opts.local_only:
- clean(reg)
- else:
- init_clean(reg, opts)
+ if opts.local_only and opts.remote_only:
+ raise UserInputError(
+ "--local and --remote options are mutually exclusive"
+ )
+
+ init_clean(reg, opts)
if __name__ == "__main__":
diff --git a/cylc/flow/task_remote_cmd.py b/cylc/flow/task_remote_cmd.py
index 885f1f5d384..1baeb0da7e8 100644
--- a/cylc/flow/task_remote_cmd.py
+++ b/cylc/flow/task_remote_cmd.py
@@ -83,29 +83,29 @@ def create_client_keys(srvd, install_target):
os.umask(old_umask)
-def remote_init(install_target, rund, *dirs_to_symlink):
+def remote_init(install_target: str, rund: str, *dirs_to_symlink: str) -> None:
"""cylc remote-init
Arguments:
- install_target (str): target to be initialised
- rund (str): workflow run directory
- dirs_to_symlink (list): directories to be symlinked in form
+ install_target: target to be initialised
+ rund: workflow run directory
+ dirs_to_symlink: directories to be symlinked in form
[directory=symlink_location, ...]
"""
rund = os.path.expandvars(rund)
for item in dirs_to_symlink:
key, val = item.split("=", 1)
if key == 'run':
- dst = rund
+ path = rund
else:
- dst = os.path.join(rund, key)
- src = os.path.expandvars(val)
- if '$' in src:
+ path = os.path.join(rund, key)
+ target = os.path.expandvars(val)
+ if '$' in target:
print(REMOTE_INIT_FAILED)
print(f'Error occurred when symlinking.'
- f' {src} contains an invalid environment variable.')
+ f' {target} contains an invalid environment variable.')
return
- make_symlink(src, dst)
+ make_symlink(path, target)
srvd = os.path.join(rund, WorkflowFiles.Service.DIRNAME)
os.makedirs(srvd, exist_ok=True)
diff --git a/cylc/flow/workflow_files.py b/cylc/flow/workflow_files.py
index 863cb12790f..776c8231754 100644
--- a/cylc/flow/workflow_files.py
+++ b/cylc/flow/workflow_files.py
@@ -17,8 +17,11 @@
"""Workflow service files management."""
import aiofiles
+from collections import deque
from contextlib import suppress
from enum import Enum
+from functools import partial
+import glob
import logging
import os
from pathlib import Path
@@ -27,7 +30,10 @@
import shutil
from subprocess import Popen, PIPE, DEVNULL
import time
-from typing import List, Optional, Tuple, TYPE_CHECKING, Union
+from typing import (
+ Any, Container, Deque, Dict, Iterable, List, NamedTuple, Optional, Set,
+ Tuple, TYPE_CHECKING, Union
+)
import zmq.auth
from cylc.flow import LOG
@@ -37,13 +43,18 @@
PlatformLookupError,
ServiceFileError,
TaskRemoteMgmtError,
- WorkflowFilesError)
+ WorkflowFilesError,
+ handle_rmtree_err
+)
from cylc.flow.pathutil import (
expand_path,
get_workflow_run_dir,
make_localhost_symlinks,
- remove_dir,
- get_next_rundir_number)
+ parse_rm_dirs,
+ remove_dir_and_target,
+ get_next_rundir_number,
+ remove_dir_or_file
+)
from cylc.flow.platforms import (
get_install_target_to_platforms_map,
get_localhost_install_target,
@@ -201,14 +212,21 @@ class Install:
SOURCE = 'source'
"""Symlink to the workflow definition (For run dir)."""
- RESERVED_DIRNAMES = [
- LOG_DIR, SHARE_DIR, WORK_DIR, RUN_N, Service.DIRNAME, Install.DIRNAME]
+ RESERVED_DIRNAMES = frozenset([
+ LOG_DIR, SHARE_DIR, WORK_DIR, RUN_N, Service.DIRNAME, Install.DIRNAME
+ ])
"""Reserved directory names that cannot be present in a source dir."""
- RESERVED_NAMES = [
- FLOW_FILE, SUITE_RC, *RESERVED_DIRNAMES]
+ RESERVED_NAMES = frozenset([FLOW_FILE, SUITE_RC, *RESERVED_DIRNAMES])
"""Reserved filenames that cannot be used as run names."""
+ SYMLINK_DIRS = frozenset([
+ SHARE_CYCLE_DIR, SHARE_DIR, LOG_DIR, WORK_DIR, ''
+ ])
+ """The paths of the symlink dirs that may be set in
+ global.cylc[symlink dirs], relative to the run dir
+ ('' represents the run dir)."""
+
class ContactFileFields:
"""Field names present in ``WorkflowFiles.Service.CONTACT``.
@@ -264,6 +282,12 @@ class ContactFileFields:
"""Remote command setting for Scheduler."""
+class RemoteCleanQueueTuple(NamedTuple):
+ proc: 'Popen[str]'
+ install_target: str
+ platforms: List[Dict[str, Any]]
+
+
REG_DELIM = "/"
NO_TITLE = "No title provided"
@@ -578,12 +602,12 @@ def is_installed(path: Union[Path, str]) -> bool:
return cylc_install_folder.is_dir() and source.is_symlink()
-def _clean_check(reg, run_dir):
+def _clean_check(reg: str, run_dir: Path) -> None:
"""Check whether a workflow can be cleaned.
Args:
- reg (str): Workflow name.
- run_dir (str): Path to the workflow run dir on the filesystem.
+ reg: Workflow name.
+ run_dir: Path to the workflow run dir on the filesystem.
"""
validate_flow_name(reg)
reg = os.path.normpath(reg)
@@ -591,6 +615,7 @@ def _clean_check(reg, run_dir):
raise WorkflowFilesError(
"Workflow name cannot be a path that points to the cylc-run "
"directory or above")
+ # Must be dir or broken symlink:
if not run_dir.is_dir() and not run_dir.is_symlink():
msg = f"No directory to clean at {run_dir}"
raise FileNotFoundError(msg)
@@ -606,8 +631,8 @@ def init_clean(reg: str, opts: 'Values') -> None:
scheduler filesystem and remote hosts.
Args:
- reg (str): Workflow name.
- opts (optparse.Values): CLI options object for cylc clean.
+ reg: Workflow name.
+ opts: CLI options object for cylc clean.
"""
local_run_dir = Path(get_workflow_run_dir(reg))
try:
@@ -616,77 +641,191 @@ def init_clean(reg: str, opts: 'Values') -> None:
LOG.info(str(exc))
return
- platform_names = None
- try:
- platform_names = get_platforms_from_db(local_run_dir)
- except FileNotFoundError:
- LOG.info("No workflow database - will only clean locally")
- except ServiceFileError as exc:
- raise ServiceFileError(f"Cannot clean - {exc}")
-
- if platform_names and platform_names != {'localhost'}:
- remote_clean(reg, platform_names, opts.remote_timeout)
- LOG.info("Cleaning on local filesystem")
- clean(reg)
-
-
-def clean(reg):
+ if not opts.local_only:
+ platform_names = None
+ try:
+ platform_names = get_platforms_from_db(local_run_dir)
+ except FileNotFoundError:
+ if opts.remote_only:
+ raise ServiceFileError(
+ "No workflow database - cannot perform remote clean")
+ LOG.info("No workflow database - will only clean locally")
+ except ServiceFileError as exc:
+ raise ServiceFileError(f"Cannot clean - {exc}")
+
+ if platform_names and platform_names != {'localhost'}:
+ remote_clean(
+ reg, platform_names, opts.rm_dirs, opts.remote_timeout)
+
+ if not opts.remote_only:
+ rm_dirs = None
+ if opts.rm_dirs:
+ rm_dirs = parse_rm_dirs(opts.rm_dirs)
+ clean(reg, local_run_dir, rm_dirs)
+
+
+def clean(reg: str, run_dir: Path, rm_dirs: Optional[Set[str]] = None) -> None:
"""Remove a stopped workflow from the local filesystem only.
- Deletes the workflow run directory and any symlink dirs. Note: if the
- run dir has already been manually deleted, it will not be possible to
- clean the symlink dirs.
+ Deletes the workflow run directory and any symlink dirs, or just the
+ specified sub dirs if rm_dirs is specified.
+
+ Note: if the run dir has already been manually deleted, it will not be
+ possible to clean any symlink dirs.
Args:
- reg (str): Workflow name.
+ reg: Workflow name.
+ run_dir: Absolute path of the workflow's run dir.
+ rm_dirs: Set of sub dirs to remove instead of the whole run dir.
"""
- run_dir = Path(get_workflow_run_dir(reg))
- try:
- _clean_check(reg, run_dir)
- except FileNotFoundError as exc:
- LOG.info(str(exc))
- return
-
- # Note: 'share/cycle' must come first, and '' must come last
- for possible_symlink in (
- WorkflowFiles.SHARE_CYCLE_DIR, WorkflowFiles.SHARE_DIR,
- WorkflowFiles.LOG_DIR, WorkflowFiles.WORK_DIR, ''):
- name = Path(possible_symlink)
- path = Path(run_dir, possible_symlink)
+ LOG.info("Cleaning on local filesystem")
+ symlink_dirs = get_symlink_dirs(reg, run_dir)
+ if rm_dirs is not None:
+ # Targeted clean
+ for pattern in rm_dirs:
+ _clean_using_glob(run_dir, pattern, symlink_dirs)
+ else:
+ # Wholesale clean
+ for symlink in symlink_dirs:
+ # Remove /cylc-run//
+ remove_dir_and_target(run_dir / symlink)
+ if '' not in symlink_dirs:
+ # if run dir isn't a symlink dir and hasn't been deleted yet
+ remove_dir_and_target(run_dir)
+ # Tidy up if necessary
+ # Remove any empty parents of run dir up to ~/cylc-run/
+ _remove_empty_parents(run_dir, reg)
+ for symlink, target in symlink_dirs.items():
+ # Remove empty parents of symlink target up to /cylc-run/
+ _remove_empty_parents(target, Path(reg, symlink))
+
+
+def get_symlink_dirs(reg: str, run_dir: Union[Path, str]) -> Dict[str, Path]:
+ """Return the standard symlink dirs and their targets if they exist in
+ the workflow run dir.
+
+ Note: does not check the global config, only the existing run dir filetree.
+
+ Raises WorkflowFilesError if a symlink points to an unexpected place.
+ """
+ ret: Dict[str, Path] = {}
+ for _dir in sorted(WorkflowFiles.SYMLINK_DIRS, reverse=True):
+ # ordered by deepest to shallowest
+ path = Path(run_dir, _dir)
if path.is_symlink():
- # Ensure symlink is pointing to expected directory. If not,
- # something is wrong and we should abort
target = path.resolve()
if target.exists() and not target.is_dir():
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Target is not a directory')
- expected_end = str(Path('cylc-run', reg, name))
+ expected_end = str(Path('cylc-run', reg, _dir))
if not str(target).endswith(expected_end):
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Expected target to end with "{expected_end}"')
- # Remove /cylc-run/
- target_cylc_run_dir = str(target).rsplit(str(reg), 1)[0]
- target_reg_dir = Path(target_cylc_run_dir, reg)
- if target_reg_dir.is_dir():
- remove_dir(target_reg_dir)
- # Remove empty parents
- _remove_empty_reg_parents(reg, target_reg_dir)
+ ret[_dir] = target
+ return ret
- remove_dir(run_dir)
- _remove_empty_reg_parents(reg, run_dir)
+def glob_in_run_dir(
+ run_dir: Union[Path, str], pattern: str, symlink_dirs: Container[Path]
+) -> List[Path]:
+ """Execute a (recursive) glob search in the given run directory.
-def remote_clean(reg, platform_names, timeout):
+ Returns list of any absolute paths that match the pattern. However:
+ * Does not follow symlinks (apart from the spcedified symlink dirs).
+ * Also does not return matching subpaths of matching directories (because
+ that would be redundant).
+
+ Args:
+ run_dir: Absolute path of the workflow run dir.
+ pattern: The glob pattern.
+ symlink_dirs: Absolute paths to the workflow's symlink dirs.
+ """
+ # Note: use os.path.join, not pathlib, to preserve trailing slash if
+ # present in pattern
+ pattern = os.path.join(glob.escape(str(run_dir)), pattern)
+ # Note: don't use pathlib.Path.glob() because when you give it an exact
+ # filename instead of pattern, it doesn't return broken symlinks
+ matches = sorted(Path(i) for i in glob.iglob(pattern, recursive=True))
+ # sort guarantees parents come before their children
+ if len(matches) == 1 and not os.path.lexists(matches[0]):
+ # https://bugs.python.org/issue35201
+ return []
+ results: List[Path] = []
+ subpath_excludes: Set[Path] = set()
+ for path in matches:
+ for rel_ancestor in reversed(path.relative_to(run_dir).parents):
+ ancestor = run_dir / rel_ancestor
+ if ancestor in subpath_excludes:
+ break
+ if ancestor.is_symlink() and ancestor not in symlink_dirs:
+ # Do not follow non-standard symlinks
+ subpath_excludes.add(ancestor)
+ break
+ if (not symlink_dirs) and ancestor in results:
+ # We can be sure all subpaths of this ancestor are redundant
+ subpath_excludes.add(ancestor)
+ break
+ if ancestor == path.parent: # noqa: SIM102
+ # Final iteration over ancestors
+ if ancestor in matches and path not in symlink_dirs:
+ # Redundant (but don't exclude subpaths in case any of the
+ # subpaths are std symlink dirs)
+ break
+ else: # No break
+ results.append(path)
+ return results
+
+
+def _clean_using_glob(
+ run_dir: Path, pattern: str, symlink_dirs: Iterable[str]
+) -> None:
+ """Delete the files/dirs in the run dir that match the pattern.
+
+ Does not follow symlinks (apart from the standard symlink dirs).
+
+ Args:
+ run_dir: Absolute path of workflow run dir.
+ pattern: The glob pattern.
+ symlink_dirs: Paths of the workflow's symlink dirs relative to
+ the run dir.
+ """
+ abs_symlink_dirs = tuple(sorted(
+ (run_dir / d for d in symlink_dirs),
+ reverse=True # ordered by deepest to shallowest
+ ))
+ matches = glob_in_run_dir(run_dir, pattern, abs_symlink_dirs)
+ if not matches:
+ return
+ # First clean any matching symlink dirs
+ for path in abs_symlink_dirs:
+ if path in matches:
+ remove_dir_and_target(path)
+ if path == run_dir:
+ # We have deleted the run dir
+ return
+ matches.remove(path)
+ # Now clean the rest
+ for path in matches:
+ remove_dir_or_file(path)
+
+
+def remote_clean(
+ reg: str,
+ platform_names: Iterable[str],
+ rm_dirs: Optional[List[str]] = None,
+ timeout: str = '120'
+) -> None:
"""Run subprocesses to clean workflows on remote install targets
(skip localhost), given a set of platform names to look up.
Args:
- reg (str): Workflow name.
- platform_names (list): List of platform names to look up in the global
+ reg: Workflow name.
+ platform_names: List of platform names to look up in the global
config, in order to determine the install targets to clean on.
- timeout (str): Number of seconds to wait before cancelling.
+ rm_dirs: Sub dirs to remove instead of the whole run dir.
+ timeout: Number of seconds to wait before cancelling.
"""
try:
install_targets_map = (
@@ -695,95 +834,112 @@ def remote_clean(reg, platform_names, timeout):
raise PlatformLookupError(
"Cannot clean on remote platforms as the workflow database is "
f"out of date/inconsistent with the global config - {exc}")
-
- pool = []
+ queue: Deque[RemoteCleanQueueTuple] = deque()
+ remote_clean_cmd = partial(
+ _remote_clean_cmd, reg=reg, rm_dirs=rm_dirs, timeout=timeout
+ )
for target, platforms in install_targets_map.items():
if target == get_localhost_install_target():
continue
shuffle(platforms)
LOG.info(
- f"Cleaning on install target: {platforms[0]['install target']}")
+ f"Cleaning on install target: {platforms[0]['install target']}"
+ )
# Issue ssh command:
- pool.append(
- (_remote_clean_cmd(reg, platforms[0], timeout), target, platforms)
+ queue.append(
+ RemoteCleanQueueTuple(
+ remote_clean_cmd(platform=platforms[0]), target, platforms
+ )
)
- failed_targets = []
+ failed_targets: List[str] = []
# Handle subproc pool results almost concurrently:
- while pool:
- for proc, target, platforms in pool:
- ret_code = proc.poll()
- if ret_code is None: # proc still running
- continue
- pool.remove((proc, target, platforms))
- out, err = (f.decode() for f in proc.communicate())
- if out:
- LOG.debug(out)
- if ret_code:
- # Try again using the next platform for this install target:
- this_platform = platforms.pop(0)
- excn = TaskRemoteMgmtError(
- TaskRemoteMgmtError.MSG_TIDY, this_platform['name'],
- " ".join(proc.args), ret_code, out, err)
- LOG.debug(excn)
- if platforms:
- pool.append(
- (_remote_clean_cmd(reg, platforms[0], timeout),
- target, platforms)
+ while queue:
+ item = queue.popleft()
+ ret_code = item.proc.poll()
+ if ret_code is None: # proc still running
+ queue.append(item)
+ continue
+ out, err = item.proc.communicate()
+ if out:
+ LOG.debug(out)
+ if ret_code:
+ this_platform = item.platforms.pop(0)
+ LOG.debug(TaskRemoteMgmtError(
+ TaskRemoteMgmtError.MSG_TIDY, this_platform['name'],
+ item.proc.args, ret_code, out, err
+ ))
+ if ret_code == 255 and item.platforms:
+ # SSH error; try again using the next platform for this
+ # install target
+ queue.append(
+ item._replace(
+ proc=remote_clean_cmd(platform=item.platforms[0])
)
- else: # Exhausted list of platforms
- failed_targets.append(target)
- elif err:
- LOG.debug(err)
+ )
+ else: # Exhausted list of platforms
+ failed_targets.append(item.install_target)
+ elif err:
+ LOG.debug(err)
time.sleep(0.2)
if failed_targets:
raise CylcError(
- f"Could not clean on install targets: {', '.join(failed_targets)}")
+ f"Could not clean on install targets: {', '.join(failed_targets)}"
+ )
-def _remote_clean_cmd(reg, platform, timeout):
+def _remote_clean_cmd(
+ reg: str,
+ platform: Dict[str, str],
+ rm_dirs: Optional[List[str]],
+ timeout: str
+) -> 'Popen[str]':
"""Remove a stopped workflow on a remote host.
Call "cylc clean --local-only" over ssh and return the subprocess.
Args:
- reg (str): Workflow name.
- platform (dict): Config for the platform on which to remove the
- workflow.
- timeout (str): Number of seconds to wait before cancelling the command.
+ reg: Workflow name.
+ platform: Config for the platform on which to remove the workflow.
+ rm_dirs: Sub dirs to remove instead of the whole run dir.
+ timeout: Number of seconds to wait before cancelling the command.
"""
LOG.debug(
f'Cleaning on install target: {platform["install target"]} '
f'(using platform: {platform["name"]})'
)
- cmd = construct_ssh_cmd(
- ['clean', '--local-only', reg],
- platform,
- timeout=timeout,
- set_verbosity=True
- )
+ cmd = ['clean', '--local-only', reg]
+ if rm_dirs is not None:
+ for item in rm_dirs:
+ cmd.extend(['--rm', item])
+ cmd = construct_ssh_cmd(cmd, platform, timeout=timeout, set_verbosity=True)
LOG.debug(" ".join(cmd))
- return Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
+ return Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE, text=True)
-def _remove_empty_reg_parents(reg, path):
- """If reg is nested e.g. a/b/c, work our way up the tree, removing empty
- parents only.
+def _remove_empty_parents(
+ path: Union[Path, str], tail: Union[Path, str]
+) -> None:
+ """Work our way up the tail of path, removing empty dirs only.
Args:
- reg (str): workflow name, e.g. a/b/c
- path (str): path to this directory, e.g. /foo/bar/a/b/c
+ path: Absolute path to the directory, e.g. /foo/bar/a/b/c
+ tail: The tail of the path to work our way up, e.g. a/b/c
Example:
- _remove_empty_reg_parents('a/b/c', '/foo/bar/a/b/c') would remove
+ _remove_empty_parents('/foo/bar/a/b/c', 'a/b/c') would remove
/foo/bar/a/b (assuming it's empty), then /foo/bar/a (assuming it's
empty).
"""
- reg = Path(reg)
- reg_depth = len(reg.parts) - 1
path = Path(path)
if not path.is_absolute():
- raise ValueError('Path must be absolute')
- for i in range(reg_depth):
+ raise ValueError('path must be absolute')
+ tail = Path(tail)
+ if tail.is_absolute():
+ raise ValueError('tail must not be an absolute path')
+ if not str(path).endswith(str(tail)):
+ raise ValueError(f"path '{path}' does not end with '{tail}'")
+ depth = len(tail.parts) - 1
+ for i in range(depth):
parent = path.parents[i]
if not parent.is_dir():
continue
@@ -803,7 +959,7 @@ def remove_keys_on_server(keys):
# Remove client public key folder
client_public_key_dir = keys["client_public_key"].key_path
if os.path.exists(client_public_key_dir):
- shutil.rmtree(client_public_key_dir)
+ shutil.rmtree(client_public_key_dir, onerror=handle_rmtree_err)
def create_server_keys(keys, workflow_srv_dir):
@@ -926,7 +1082,7 @@ def _check_child_dirs(path: Union[Path, str], depth_count: int = 1):
reg_path: Union[Path, str] = os.path.normpath(run_dir)
parent_dir = os.path.dirname(reg_path)
- while parent_dir not in ['', '/']:
+ while parent_dir not in ['', os.sep]:
if is_valid_run_dir(parent_dir):
raise WorkflowFilesError(
exc_msg.format(parent_dir, get_cylc_run_abs_path(parent_dir))
diff --git a/tests/functional/cylc-clean/02-targeted.t b/tests/functional/cylc-clean/02-targeted.t
new file mode 100644
index 00000000000..08a7c417b5d
--- /dev/null
+++ b/tests/functional/cylc-clean/02-targeted.t
@@ -0,0 +1,165 @@
+#!/usr/bin/env bash
+# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE.
+# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------
+# Test the cylc clean command
+
+. "$(dirname "$0")/test_header"
+if ! command -v 'tree' >'/dev/null'; then
+ skip_all '"tree" command not available'
+fi
+set_test_number 16
+
+# Generate random name for symlink dirs to avoid any clashes with other tests
+SYM_NAME="$(mktemp -u)"
+SYM_NAME="sym-${SYM_NAME##*tmp.}"
+
+create_test_global_config "" "
+[symlink dirs]
+ [[localhost]]
+ run = ${TEST_DIR}/${SYM_NAME}/run
+ log = ${TEST_DIR}/${SYM_NAME}/other
+ share = ${TEST_DIR}/${SYM_NAME}/other
+ work = ${TEST_DIR}/${SYM_NAME}/other
+ # Need to override any symlink dirs set in global-tests.cylc:
+ share/cycle =
+"
+init_workflow "${TEST_NAME_BASE}" << __FLOW__
+[scheduler]
+ allow implicit tasks = True
+[scheduling]
+ [[graph]]
+ R1 = jalad
+__FLOW__
+# Also create some other file
+touch "${WORKFLOW_RUN_DIR}/darmok.cylc"
+
+run_ok "${TEST_NAME_BASE}-val" cylc validate "$WORKFLOW_NAME"
+
+FUNCTIONAL_DIR="${TEST_SOURCE_DIR_BASE%/*}"
+# -----------------------------------------------------------------------------
+TEST_NAME="${TEST_NAME_BASE}-run-dir-readlink-pre-clean"
+readlink "$WORKFLOW_RUN_DIR" > "${TEST_NAME}.stdout"
+cmp_ok "${TEST_NAME}.stdout" <<< "${TEST_DIR}/${SYM_NAME}/run/cylc-run/${WORKFLOW_NAME}"
+
+TEST_NAME="${TEST_NAME_BASE}-testdir-tree-pre-clean"
+run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}"
+# Note: backticks need to be escaped in the heredoc
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/${SYM_NAME}/other/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- log
+ | \`-- install
+ |-- share
+ \`-- work
+${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- _cylc-install
+ | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME}
+ |-- darmok.cylc
+ |-- flow.cylc
+ |-- log -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/log
+ |-- share -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/share
+ \`-- work -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/work
+__TREE__
+# -----------------------------------------------------------------------------
+# Clean the log dir only
+run_ok "${TEST_NAME_BASE}-targeted-clean-1" cylc clean "$WORKFLOW_NAME" \
+ --rm log
+
+TEST_NAME="${TEST_NAME_BASE}-testdir-tree-1"
+run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}"
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/${SYM_NAME}/other/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- share
+ \`-- work
+${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- _cylc-install
+ | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME}
+ |-- darmok.cylc
+ |-- flow.cylc
+ |-- share -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/share
+ \`-- work -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/work
+__TREE__
+# -----------------------------------------------------------------------------
+# Clean using a glob
+run_ok "${TEST_NAME_BASE}-targeted-clean-2" cylc clean "$WORKFLOW_NAME" \
+ --rm 'wo*'
+
+TEST_NAME="${TEST_NAME_BASE}-testdir-tree-2"
+run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}"
+# Note: when using glob, the symlink dir target is not deleted
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/${SYM_NAME}/other/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ \`-- share
+${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- _cylc-install
+ | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME}
+ |-- darmok.cylc
+ |-- flow.cylc
+ \`-- share -> ${TEST_DIR}/${SYM_NAME}/other/cylc-run/${WORKFLOW_NAME}/share
+__TREE__
+# -----------------------------------------------------------------------------
+# Clean the last remaining symlink dir
+run_ok "${TEST_NAME_BASE}-targeted-clean-3" cylc clean "$WORKFLOW_NAME" \
+ --rm 'share'
+
+TEST_NAME="${TEST_NAME_BASE}-testdir-tree-3"
+run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}"
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- _cylc-install
+ | \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME}
+ |-- darmok.cylc
+ \`-- flow.cylc
+__TREE__
+# -----------------------------------------------------------------------------
+# Clean multiple things
+run_ok "${TEST_NAME_BASE}-targeted-clean-3" cylc clean "$WORKFLOW_NAME" \
+ --rm 'flow.cylc' --rm 'darmok.cylc'
+
+TEST_NAME="${TEST_NAME_BASE}-testdir-tree-3"
+run_ok "${TEST_NAME}" tree -L 5 --noreport --charset=ascii "${TEST_DIR}/${SYM_NAME}/"*"/cylc-run/${CYLC_TEST_REG_BASE}"
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/${SYM_NAME}/run/cylc-run/${CYLC_TEST_REG_BASE}
+\`-- ${FUNCTIONAL_DIR}
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ \`-- _cylc-install
+ \`-- source -> ${TEST_DIR}/${WORKFLOW_NAME}
+__TREE__
+# -----------------------------------------------------------------------------
+purge
+exit
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 22a58120fea..a8151140e42 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -14,11 +14,11 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
"""Standard pytest fixtures for unit tests."""
-from cylc.flow.data_store_mgr import DataStoreMgr
+
from pathlib import Path
import pytest
from shutil import rmtree
-from typing import Optional
+from typing import Any, Callable, Optional
from unittest.mock import create_autospec, Mock
from cylc.flow.cfgspec.globalcfg import SPEC
@@ -27,12 +27,37 @@
ISO8601_CYCLING_TYPE,
INTEGER_CYCLING_TYPE
)
+from cylc.flow.data_store_mgr import DataStoreMgr
from cylc.flow.parsec.config import ParsecConfig
from cylc.flow.scheduler import Scheduler
from cylc.flow.workflow_files import WorkflowFiles
from cylc.flow.xtrigger_mgr import XtriggerManager
+# Type alias for monkeymock()
+MonkeyMock = Callable[..., Mock]
+
+
+@pytest.fixture
+def monkeymock(monkeypatch: pytest.MonkeyPatch):
+ """Fixture that patches a function/attr with a Mock and returns that Mock.
+
+ Args:
+ pypath: The Python-style import path to be patched.
+ **kwargs: Any kwargs to set on the Mock.
+
+ Example:
+ mock_clean = monkeymock('cylc.flow.workflow_files.clean')
+ something() # calls workflow_files.clean
+ assert mock_clean.called is True
+ """
+ def inner(pypath: str, **kwargs: Any) -> Mock:
+ _mock = Mock(**kwargs)
+ monkeypatch.setattr(pypath, _mock)
+ return _mock
+ return inner
+
+
@pytest.fixture
def tmp_run_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
"""Fixture that patches the cylc-run dir to the tests's {tmp_path}/cylc-run
diff --git a/tests/unit/filetree.py b/tests/unit/filetree.py
new file mode 100644
index 00000000000..bef09e4e556
--- /dev/null
+++ b/tests/unit/filetree.py
@@ -0,0 +1,167 @@
+# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE.
+# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+"""Utilities for testing workflow directory structure.
+
+(There should be no tests in this module.)
+
+A filetree is represented by a dict like so:
+ {
+ # Dirs are represented by dicts (which are also sub-filetrees):
+ 'dir': {
+ 'another-dir': {
+ # Files are represented by None:
+ 'file.txt': None
+ }
+ },
+ # Symlinks are represented by pathlib.Path, with the target represented
+ # by the relative path from the tmp_path directory:
+ 'symlink': Path('dir/another-dir')
+ }
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List
+
+
+def create_filetree(
+ filetree: Dict[str, Any], location: Path, root: Path
+) -> None:
+ """Create the directory structure represented by the filetree dict.
+
+ Args:
+ filetree: The filetree to create.
+ location: The absolute path in which to create the filetree.
+ root: The top-level dir from which relative symlink targets are
+ located (typically tmp_path).
+ """
+ for name, entry in filetree.items():
+ path = location / name
+ if isinstance(entry, dict):
+ path.mkdir(exist_ok=True)
+ create_filetree(entry, path, root)
+ elif isinstance(entry, Path):
+ path.symlink_to(root / entry)
+ else:
+
+ path.touch()
+
+
+def get_filetree_as_list(
+ filetree: Dict[str, Any], location: Path
+) -> List[str]:
+ """Return a list of the paths in a filetree.
+
+ Args:
+ filetree: The filetree to listify.
+ location: The absolute path to the filetree.
+ """
+ ret: List[str] = []
+ for name, entry in filetree.items():
+ path = location / name
+ ret.append(str(path))
+ if isinstance(entry, dict):
+ ret.extend(get_filetree_as_list(entry, path))
+ return ret
+
+
+FILETREE_1 = {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'log': Path('sym/cylc-run/foo/bar/log'),
+ 'mirkwood': Path('you-shall-not-pass/mirkwood'),
+ 'rincewind.txt': Path('you-shall-not-pass/rincewind.txt')
+ }}},
+ 'sym': {'cylc-run': {'foo': {'bar': {
+ 'log': {
+ 'darmok': Path('you-shall-not-pass/darmok'),
+ 'temba.txt': Path('you-shall-not-pass/temba.txt'),
+ 'bib': {
+ 'fortuna.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': { # Nothing in here should get deleted
+ 'darmok': {
+ 'jalad.txt': None
+ },
+ 'mirkwood': {
+ 'spiders.txt': None
+ },
+ 'rincewind.txt': None,
+ 'temba.txt': None
+ }
+}
+
+FILETREE_2 = {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': Path('sym-share/cylc-run/foo/bar/share')
+ }}}},
+ 'sym-share': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
+ }
+ }}}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'macklunkey.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': {}
+}
+
+FILETREE_3 = {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': {
+ 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
+ }
+ }}}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'sokath.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': {}
+}
+
+FILETREE_4 = {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': {
+ 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
+ }
+ }}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'kiazi.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': {}
+}
diff --git a/tests/unit/test_pathutil.py b/tests/unit/test_pathutil.py
index e3f982ef651..3af92a833db 100644
--- a/tests/unit/test_pathutil.py
+++ b/tests/unit/test_pathutil.py
@@ -18,11 +18,11 @@
import logging
import os
from pathlib import Path
-from typing import Callable, Iterable, List
import pytest
-from unittest.mock import patch, call
+from typing import Callable, Dict, Iterable, List, Set
+from unittest.mock import Mock, patch, call
-from cylc.flow.exceptions import WorkflowFilesError
+from cylc.flow.exceptions import UserInputError, WorkflowFilesError
from cylc.flow.pathutil import (
expand_path,
get_dirs_to_symlink,
@@ -39,9 +39,13 @@
get_workflow_test_log_name,
make_localhost_symlinks,
make_workflow_run_tree,
- remove_dir
+ parse_rm_dirs,
+ remove_dir_and_target,
+ remove_dir_or_file
)
+from .conftest import MonkeyMock
+
HOME = Path.home()
@@ -166,82 +170,109 @@ def test_make_workflow_run_tree(
@pytest.mark.parametrize(
- 'workflow, install_target, mocked_glbl_cfg, output',
+ 'mocked_glbl_cfg, output',
[
- ( # basic
- 'workflow1', 'install_target_1',
- '''[symlink dirs]
- [[install_target_1]]
- run = $DEE
- work = $DAH
- log = $DUH
- share = $DOH
- share/cycle = $DAH''', {
- 'run': '$DEE/cylc-run/workflow1',
- 'work': '$DAH/cylc-run/workflow1/work',
- 'log': '$DUH/cylc-run/workflow1/log',
- 'share': '$DOH/cylc-run/workflow1/share',
- 'share/cycle': '$DAH/cylc-run/workflow1/share/cycle'
- }),
- ( # remove nested run symlinks
- 'workflow2', 'install_target_2',
+ pytest.param( # basic
+ '''
+ [symlink dirs]
+ [[the_matrix]]
+ run = $DEE
+ work = $DAH
+ log = $DUH
+ share = $DOH
+ share/cycle = $DAH
+ ''',
+ {
+ 'run': '$DEE/cylc-run/morpheus',
+ 'work': '$DAH/cylc-run/morpheus/work',
+ 'log': '$DUH/cylc-run/morpheus/log',
+ 'share': '$DOH/cylc-run/morpheus/share',
+ 'share/cycle': '$DAH/cylc-run/morpheus/share/cycle'
+ },
+ id="basic"
+ ),
+ pytest.param( # remove nested run symlinks
+ '''
+ [symlink dirs]
+ [[the_matrix]]
+ run = $DEE
+ work = $DAH
+ log = $DEE
+ share = $DOH
+ share/cycle = $DAH
+ ''',
+ {
+ 'run': '$DEE/cylc-run/morpheus',
+ 'work': '$DAH/cylc-run/morpheus/work',
+ 'share': '$DOH/cylc-run/morpheus/share',
+ 'share/cycle': '$DAH/cylc-run/morpheus/share/cycle'
+ },
+ id="remove nested run symlinks"
+ ),
+ pytest.param( # remove only nested run symlinks
'''
- [symlink dirs]
- [[install_target_2]]
- run = $DEE
- work = $DAH
- log = $DEE
- share = $DOH
- share/cycle = $DAH
-
- ''', {
- 'run': '$DEE/cylc-run/workflow2',
- 'work': '$DAH/cylc-run/workflow2/work',
- 'share': '$DOH/cylc-run/workflow2/share',
- 'share/cycle': '$DAH/cylc-run/workflow2/share/cycle'
- }),
- ( # remove only nested run symlinks
- 'workflow3', 'install_target_3', '''
- [symlink dirs]
- [[install_target_3]]
- run = $DOH
- log = $DEE
- share = $DEE
- ''', {
- 'run': '$DOH/cylc-run/workflow3',
- 'log': '$DEE/cylc-run/workflow3/log',
- 'share': '$DEE/cylc-run/workflow3/share'})
- ], ids=["1", "2", "3"])
-def test_get_dirs_to_symlink(workflow, install_target, mocked_glbl_cfg,
- output, mock_glbl_cfg, tmp_path, monkeypatch):
- # Using env variable 'DEE' to ensure dirs returned are unexpanded
- monkeypatch.setenv('DEE', str(tmp_path))
+ [symlink dirs]
+ [[the_matrix]]
+ run = $DOH
+ log = $DEE
+ share = $DEE
+ ''',
+ {
+ 'run': '$DOH/cylc-run/morpheus',
+ 'log': '$DEE/cylc-run/morpheus/log',
+ 'share': '$DEE/cylc-run/morpheus/share'
+ },
+ id="remove only nested run symlinks"
+ ),
+ pytest.param( # blank entries
+ '''
+ [symlink dirs]
+ [[the_matrix]]
+ run =
+ log = ""
+ share =
+ work = " "
+ ''',
+ {},
+ id="blank entries"
+ )
+ ]
+)
+def test_get_dirs_to_symlink(
+ mocked_glbl_cfg: str,
+ output: Dict[str, str],
+ mock_glbl_cfg: Callable,
+ monkeypatch: pytest.MonkeyPatch
+) -> None:
+ # Set env var 'DEE', but we expect it to be unexpanded
+ monkeypatch.setenv('DEE', 'poiuytrewq')
mock_glbl_cfg('cylc.flow.pathutil.glbl_cfg', mocked_glbl_cfg)
- dirs = get_dirs_to_symlink(install_target, workflow)
+ dirs = get_dirs_to_symlink('the_matrix', 'morpheus')
assert dirs == output
-@patch('os.path.expandvars')
@patch('cylc.flow.pathutil.get_workflow_run_dir')
-@patch('cylc.flow.pathutil.make_symlink')
@patch('cylc.flow.pathutil.get_dirs_to_symlink')
def test_make_localhost_symlinks_calls_make_symlink_for_each_key_value_dir(
- mocked_dirs_to_symlink,
- mocked_make_symlink,
- mocked_get_workflow_run_dir, mocked_expandvars):
-
+ mocked_dirs_to_symlink: Mock,
+ mocked_get_workflow_run_dir: Mock,
+ monkeypatch: pytest.MonkeyPatch, monkeymock: MonkeyMock
+) -> None:
mocked_dirs_to_symlink.return_value = {
- 'run': '$DOH/workflow3',
- 'log': '$DEE/workflow3/log',
- 'share': '$DEE/workflow3/share'}
+ 'run': '$DOH/trinity',
+ 'log': '$DEE/trinity/log',
+ 'share': '$DEE/trinity/share'
+ }
mocked_get_workflow_run_dir.return_value = "rund"
- mocked_expandvars.return_value = "expanded"
- mocked_make_symlink.return_value = True
+ for v in ('DOH', 'DEE'):
+ monkeypatch.setenv(v, 'expanded')
+ mocked_make_symlink = monkeymock('cylc.flow.pathutil.make_symlink')
+
make_localhost_symlinks('rund', 'workflow')
mocked_make_symlink.assert_has_calls([
- call('expanded', 'rund'),
- call('expanded', 'rund/log'),
- call('expanded', 'rund/share')
+ call('rund', 'expanded/trinity'),
+ call('rund/log', 'expanded/trinity/log'),
+ call('rund/share', 'expanded/trinity/share')
])
@@ -271,8 +302,9 @@ def test_incorrect_environment_variables_raise_error(
('file', NotADirectoryError),
(None, FileNotFoundError)]
)
-def test_remove_dir(filetype, expected_err, tmp_path):
- """Test that remove_dir() can delete nested dirs and handle bad paths."""
+def test_remove_dir_and_target(filetype, expected_err, tmp_path):
+ """Test that remove_dir_and_target() can delete nested dirs and handle
+ bad paths."""
test_path = tmp_path.joinpath('foo/bar')
if filetype == 'dir':
# Test removal of sub directories too
@@ -286,9 +318,9 @@ def test_remove_dir(filetype, expected_err, tmp_path):
if expected_err:
with pytest.raises(expected_err):
- remove_dir(test_path)
+ remove_dir_and_target(test_path)
else:
- remove_dir(test_path)
+ remove_dir_and_target(test_path)
assert test_path.exists() is False
assert test_path.is_symlink() is False
@@ -299,8 +331,9 @@ def test_remove_dir(filetype, expected_err, tmp_path):
('file', NotADirectoryError),
(None, None)]
)
-def test_remove_dir_symlinks(target, expected_err, tmp_path):
- """Test that remove_dir() can delete symlinks, including the target."""
+def test_remove_dir_and_target_symlinks(target, expected_err, tmp_path):
+ """Test that remove_dir_and_target() can delete symlinks, including
+ the target."""
target_path = tmp_path.joinpath('x/y')
target_path.mkdir(parents=True)
@@ -322,21 +355,85 @@ def test_remove_dir_symlinks(target, expected_err, tmp_path):
if expected_err:
with pytest.raises(expected_err):
- remove_dir(symlink_path)
+ remove_dir_and_target(symlink_path)
else:
- remove_dir(symlink_path)
+ remove_dir_and_target(symlink_path)
for path in [symlink_path, target_path]:
assert path.exists() is False
assert path.is_symlink() is False
-def test_remove_dir_relative(tmp_path):
- """Test that you cannot use remove_dir() on a relative path.
+@pytest.mark.parametrize(
+ 'func', [remove_dir_and_target, remove_dir_or_file]
+)
+def test_remove_relative(func: Callable, tmp_path: Path):
+ """Test that you cannot use remove_dir_and_target() or remove_dir_or_file()
+ on relative paths.
When removing a path, we want to be absolute-ly sure where it is!
"""
# cd to temp dir in case we accidentally succeed in deleting the path
os.chdir(tmp_path)
with pytest.raises(ValueError) as cm:
- remove_dir('foo/bar')
+ func('foo/bar')
assert 'Path must be absolute' in str(cm.value)
+
+
+def test_remove_dir_or_file(tmp_path: Path):
+ """Test remove_dir_or_file()"""
+ a_file = tmp_path.joinpath('fyle')
+ a_file.touch()
+ assert a_file.exists()
+ remove_dir_or_file(a_file)
+ assert a_file.exists() is False
+
+ a_symlink = tmp_path.joinpath('simlynk')
+ a_file.touch()
+ a_symlink.symlink_to(a_file)
+ assert a_symlink.is_symlink()
+ remove_dir_or_file(a_symlink)
+ assert a_symlink.is_symlink() is False
+ assert a_file.exists()
+
+ a_dir = tmp_path.joinpath('der')
+ # Add contents to check whole tree is removed
+ sub_dir = a_dir.joinpath('sub_der')
+ sub_dir.mkdir(parents=True)
+ sub_dir.joinpath('fyle').touch()
+ assert a_dir.exists()
+ remove_dir_or_file(a_dir)
+ assert a_dir.exists() is False
+
+
+@pytest.mark.parametrize(
+ 'dirs, expected',
+ [
+ ([" "], set()),
+ (["foo", "bar"], {"foo", "bar"}),
+ (["foo:bar", "baz/*"], {"foo", "bar", "baz/*"}),
+ ([" :foo :bar:"], {"foo", "bar"}),
+ (["foo/:bar//baz "], {"foo/", "bar/baz"}),
+ ([".foo", "..bar", " ./gah"], {".foo", "..bar", "gah"})
+ ]
+)
+def test_parse_rm_dirs(dirs: List[str], expected: Set[str]):
+ """Test parse_dirs()"""
+ assert parse_rm_dirs(dirs) == expected
+
+
+@pytest.mark.parametrize(
+ 'dirs, err_msg',
+ [
+ (["foo:/bar"],
+ "--rm option cannot take absolute paths"),
+ (["foo:../bar"],
+ "cannot take paths that point to the run directory or above"),
+ (["foo:bar/../../gah"],
+ "cannot take paths that point to the run directory or above"),
+ ]
+)
+def test_parse_rm_dirs__bad(dirs: List[str], err_msg: str):
+ """Test parse_dirs() with bad inputs"""
+ with pytest.raises(UserInputError) as exc:
+ parse_rm_dirs(dirs)
+ assert err_msg in str(exc.value)
diff --git a/tests/unit/test_workflow_files.py b/tests/unit/test_workflow_files.py
index 9cf7ca02d7e..b2817be2373 100644
--- a/tests/unit/test_workflow_files.py
+++ b/tests/unit/test_workflow_files.py
@@ -14,12 +14,13 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-from cylc.flow.option_parsers import Options
+from glob import iglob
import logging
+import os
from pathlib import Path
import pytest
import shutil
-from typing import Callable, Optional, Tuple, Type
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from unittest import mock
from cylc.flow import CYLC_LOG
@@ -30,16 +31,30 @@
TaskRemoteMgmtError,
WorkflowFilesError
)
-from cylc.flow.scripts.clean import get_option_parser as _clean_GOP
+from cylc.flow.pathutil import parse_rm_dirs
+from cylc.flow.scripts.clean import CleanOptions
from cylc.flow.workflow_files import (
WorkflowFiles,
+ _clean_using_glob,
+ _remote_clean_cmd,
check_flow_file,
check_nested_run_dirs,
+ get_symlink_dirs,
get_workflow_source_dir,
- reinstall_workflow, search_install_source_dirs)
-
+ glob_in_run_dir,
+ reinstall_workflow,
+ search_install_source_dirs
+)
-CleanOpts = Options(_clean_GOP())
+from .conftest import MonkeyMock
+from .filetree import (
+ FILETREE_1,
+ FILETREE_2,
+ FILETREE_3,
+ FILETREE_4,
+ create_filetree,
+ get_filetree_as_list
+)
@pytest.mark.parametrize(
@@ -132,25 +147,32 @@ def test_validate_flow_name(reg, expected_err, expected_msg):
@pytest.mark.parametrize(
- 'reg, not_stopped, err, err_msg',
- [('foo/..', False, WorkflowFilesError,
+ 'reg, stopped, err, err_msg',
+ [('foo/..', True, WorkflowFilesError,
"cannot be a path that points to the cylc-run directory or above"),
- ('foo/../..', False, WorkflowFilesError,
+ ('foo/../..', True, WorkflowFilesError,
"cannot be a path that points to the cylc-run directory or above"),
- ('foo', True, ServiceFileError, "Cannot remove running workflow")]
+ ('foo', False, ServiceFileError, "Cannot remove running workflow")]
)
-def test_clean_check(reg, not_stopped, err, err_msg, monkeypatch):
+def test_clean_check_fail(
+ reg: str,
+ stopped: bool,
+ err: Type[Exception],
+ err_msg: str,
+ monkeypatch: pytest.MonkeyPatch
+) -> None:
"""Test that _clean_check() fails appropriately.
Params:
- reg (str): Workflow name.
- err (Exception): Expected error.
- err_msg (str): Message that is expected to be in the exception.
+ reg: Workflow name.
+ stopped: Whether the workflow is stopped when _clean_check() is called.
+ err: Expected error class.
+ err_msg: Message that is expected to be in the exception.
"""
run_dir = mock.Mock()
- def mocked_detect_old_contact_file(reg):
- if not_stopped:
+ def mocked_detect_old_contact_file(*a, **k):
+ if not stopped:
raise ServiceFileError('Mocked error')
monkeypatch.setattr('cylc.flow.workflow_files.detect_old_contact_file',
@@ -162,221 +184,774 @@ def mocked_detect_old_contact_file(reg):
@pytest.mark.parametrize(
- 'reg, props, clean_called, remote_clean_called',
+ 'db_platforms, opts, clean_called, remote_clean_called',
[
- ('foo/bar', {
- 'no dir': True,
- 'log': (logging.INFO, "No directory to clean")
- }, False, False),
- ('foo/bar', {
- 'no db': True,
- 'log': (logging.INFO,
- "No workflow database - will only clean locally")
- }, True, False),
- ('foo/bar', {
- 'db platforms': ['localhost', 'localhost']
- }, True, False),
- ('foo/bar', {
- 'db platforms': ['horse']
- }, True, True)
+ pytest.param(
+ ['localhost', 'localhost'], {}, True, False,
+ id="Only platform in DB is localhost"
+ ),
+ pytest.param(
+ ['horse'], {}, True, True,
+ id="Remote platform in DB"
+ ),
+ pytest.param(
+ ['horse'], {'local_only': True}, True, False,
+ id="Local clean only"
+ ),
+ pytest.param(
+ ['horse'], {'remote_only': True}, False, True,
+ id="Remote clean only"
+ )
]
)
-def test_init_clean_ok(
- reg, props, clean_called, remote_clean_called,
- monkeypatch, tmp_path, caplog):
+def test_init_clean(
+ db_platforms: List[str],
+ opts: Dict[str, Any],
+ clean_called: bool,
+ remote_clean_called: bool,
+ monkeypatch: pytest.MonkeyPatch, monkeymock: MonkeyMock,
+ tmp_run_dir: Callable
+) -> None:
"""Test the init_clean() function logic.
Params:
- reg (str): Workflow name.
- props (dict): Possible values are (all optional):
- 'no dir' (bool): If True, do not create run dir for this test case.
- 'log' (tuple): Of form (severity, msg):
- severity (logging level): Expected level e.g. logging.INFO.
- msg (str): Message that is expected to be logged.
- 'db platforms' (list): Platform names that would be loaded from
- the database.
- 'no db' (bool): If True, workflow database doesn't exist.
- clean_called (bool): If a local clean is expected to go ahead.
- remote_clean_called (bool): If a remote clean is expected to go ahead.
+ db_platforms: Platform names that would be loaded from the database.
+ opts: Any options passed to the cylc clean CLI.
+ clean_called: If a local clean is expected to go ahead.
+ remote_clean_called: If a remote clean is expected to go ahead.
"""
- # --- Setup ---
- expected_log = props.get('log')
- if expected_log:
- level, msg = expected_log
- caplog.set_level(level, CYLC_LOG)
-
- tmp_path.joinpath('cylc-run').mkdir()
- run_dir = tmp_path.joinpath('cylc-run', reg)
- if not props.get('no dir'):
- run_dir.mkdir(parents=True)
-
- mocked_clean = mock.Mock()
- monkeypatch.setattr('cylc.flow.workflow_files.clean', mocked_clean)
- mocked_remote_clean = mock.Mock()
- monkeypatch.setattr('cylc.flow.workflow_files.remote_clean',
- mocked_remote_clean)
- monkeypatch.setattr('cylc.flow.workflow_files.get_workflow_run_dir',
- lambda x: tmp_path.joinpath('cylc-run', x))
-
- _get_platforms_from_db = workflow_files.get_platforms_from_db
-
- def mocked_get_platforms_from_db(run_dir):
- if props.get('no dir') or props.get('no db'):
- return _get_platforms_from_db(run_dir) # Handle as normal
- return set(props.get('db platforms'))
+ reg = 'foo/bar/'
+ tmp_run_dir(reg)
+ mock_clean = monkeymock('cylc.flow.workflow_files.clean')
+ mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean')
+ monkeypatch.setattr('cylc.flow.workflow_files.get_platforms_from_db',
+ lambda x: set(db_platforms))
+
+ workflow_files.init_clean(reg, opts=CleanOptions(**opts))
+ assert mock_clean.called is clean_called
+ assert mock_remote_clean.called is remote_clean_called
+
+
+def test_init_clean_no_dir(
+ monkeymock: MonkeyMock, tmp_run_dir: Callable,
+ caplog: pytest.LogCaptureFixture
+) -> None:
+ """Test init_clean() when the run dir doesn't exist"""
+ caplog.set_level(logging.INFO, CYLC_LOG)
+ tmp_run_dir()
+ mock_clean = monkeymock('cylc.flow.workflow_files.clean')
+ mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean')
+
+ workflow_files.init_clean('foo/bar', opts=CleanOptions())
+ assert "No directory to clean" in caplog.text
+ assert mock_clean.called is False
+ assert mock_remote_clean.called is False
+
+
+def test_init_clean_no_db(
+ monkeymock: MonkeyMock, tmp_run_dir: Callable,
+ caplog: pytest.LogCaptureFixture
+) -> None:
+ """Test init_clean() when the workflow database doesn't exist"""
+ caplog.set_level(logging.INFO, CYLC_LOG)
+ tmp_run_dir('bespin')
+ mock_clean = monkeymock('cylc.flow.workflow_files.clean')
+ mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean')
+
+ workflow_files.init_clean('bespin', opts=CleanOptions())
+ assert "No workflow database - will only clean locally" in caplog.text
+ assert mock_clean.called is True
+ assert mock_remote_clean.called is False
+
+
+def test_init_clean_remote_only_no_db(
+ monkeymock: MonkeyMock, tmp_run_dir: Callable
+) -> None:
+ """Test remote-only init_clean() when the workflow DB doesn't exist"""
+ tmp_run_dir('hoth')
+ mock_clean = monkeymock('cylc.flow.workflow_files.clean')
+ mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean')
+
+ with pytest.raises(ServiceFileError) as exc:
+ workflow_files.init_clean('hoth', opts=CleanOptions(remote_only=True))
+ assert ("No workflow database - cannot perform remote clean"
+ in str(exc.value))
+ assert mock_clean.called is False
+ assert mock_remote_clean.called is False
+
+
+def test_init_clean_running_workflow(
+ monkeypatch: pytest.MonkeyPatch, tmp_run_dir: Callable
+) -> None:
+ """Test init_clean() fails when workflow is still running"""
+ def mock_err(*args, **kwargs):
+ raise ServiceFileError("Mocked error")
+ monkeypatch.setattr('cylc.flow.workflow_files.detect_old_contact_file',
+ mock_err)
+ tmp_run_dir('yavin')
+ with pytest.raises(ServiceFileError) as exc:
+ workflow_files.init_clean('yavin', opts=mock.Mock())
+ assert "Cannot remove running workflow" in str(exc.value)
+
+
+@pytest.mark.parametrize(
+ 'rm_dirs, expected_clean, expected_remote_clean',
+ [(None, None, []),
+ (["r2d2:c3po"], {"r2d2", "c3po"}, ["r2d2:c3po"])]
+)
+def test_init_clean_rm_dirs(
+ rm_dirs: Optional[List[str]],
+ expected_clean: Set[str],
+ expected_remote_clean: List[str],
+ monkeymock: MonkeyMock, monkeypatch: pytest.MonkeyPatch,
+ tmp_run_dir: Callable
+) -> None:
+ """Test init_clean() with the --rm option.
+
+ Params:
+ rm_dirs: Dirs given by --rm option.
+ expected_clean: The dirs that are expected to be passed to clean().
+ expected_remote_clean: The dirs that are expected to be passed to
+ remote_clean().
+ """
+ reg = 'dagobah'
+ run_dir: Path = tmp_run_dir(reg)
+ mock_clean = monkeymock('cylc.flow.workflow_files.clean')
+ mock_remote_clean = monkeymock('cylc.flow.workflow_files.remote_clean')
+ platforms = {'platform_one'}
monkeypatch.setattr('cylc.flow.workflow_files.get_platforms_from_db',
- mocked_get_platforms_from_db)
+ lambda x: platforms)
+ opts = CleanOptions(rm_dirs=rm_dirs) if rm_dirs else CleanOptions()
- # --- The actual test ---
- workflow_files.init_clean(reg, opts=mock.Mock())
- if expected_log:
- assert msg in caplog.text
- if clean_called:
- assert mocked_clean.called is True
- else:
- assert mocked_clean.called is False
- if remote_clean_called:
- assert mocked_remote_clean.called is True
- else:
- assert mocked_remote_clean.called is False
+ workflow_files.init_clean(reg, opts=opts)
+ mock_clean.assert_called_with(reg, run_dir, expected_clean)
+ mock_remote_clean.assert_called_with(
+ reg, platforms, expected_remote_clean, opts.remote_timeout)
@pytest.mark.parametrize(
- 'reg, props',
+ 'reg, symlink_dirs, rm_dirs, expected_deleted, expected_remaining',
[
- ('foo/bar/', {}), # Works ok
- ('foo', {'no dir': True}), # Nothing to clean
- ('foo', {
- 'not stopped': True,
- 'err': ServiceFileError,
- 'err msg': 'Cannot remove running workflow'
- }),
- ('foo/bar', {
- 'symlink dirs': {
+ pytest.param(
+ 'foo/bar',
+ {},
+ None,
+ ['cylc-run/foo'],
+ ['cylc-run'],
+ id="Basic clean"
+ ),
+ pytest.param(
+ 'foo/bar/baz',
+ {
'log': 'sym-log',
'share': 'sym-share',
'share/cycle': 'sym-cycle',
'work': 'sym-work'
- }
- }),
- ('foo', {
- 'symlink dirs': {
+ },
+ None,
+ ['cylc-run/foo', 'sym-log/cylc-run/foo', 'sym-share/cylc-run/foo',
+ 'sym-cycle/cylc-run/foo', 'sym-work/cylc-run/foo'],
+ ['cylc-run', 'sym-log/cylc-run', 'sym-share/cylc-run',
+ 'sym-cycle/cylc-run', 'sym-work/cylc-run'],
+ id="Symlink dirs"
+ ),
+ pytest.param(
+ 'foo',
+ {
'run': 'sym-run',
'log': 'sym-log',
'share': 'sym-share',
'share/cycle': 'sym-cycle',
'work': 'sym-work'
- }
- }),
- ('foo', {
- 'bad symlink': {
- 'type': 'file',
- 'path': 'sym-log/cylc-run/foo/meow.txt'
},
- 'err': WorkflowFilesError,
- 'err msg': 'Target is not a directory'
- }),
- ('foo', {
- 'bad symlink': {
- 'type': 'dir',
- 'path': 'sym-log/bad/path'
- },
- 'err': WorkflowFilesError,
- 'err msg': 'Expected target to end with "cylc-run/foo/log"'
- })
+ None,
+ ['cylc-run/foo', 'sym-run/cylc-run/foo', 'sym-log/cylc-run/foo',
+ 'sym-share/cylc-run/foo', 'sym-cycle/cylc-run/foo',
+ 'sym-work/cylc-run/foo'],
+ ['cylc-run', 'sym-run/cylc-run', 'sym-log/cylc-run',
+ 'sym-share/cylc-run', 'sym-cycle/cylc-run',
+ 'sym-work'],
+ id="Symlink dirs including run dir"
+ ),
+ pytest.param(
+ 'foo',
+ {},
+ {'log', 'share'},
+ ['cylc-run/foo/log', 'cylc-run/foo/share'],
+ ['cylc-run/foo/work'],
+ id="Targeted clean"
+ ),
+ pytest.param(
+ 'foo',
+ {'log': 'sym-log'},
+ {'log'},
+ ['cylc-run/foo/log', 'sym-log/cylc-run/foo'],
+ ['cylc-run/foo/work', 'cylc-run/foo/share/cycle',
+ 'sym-log/cylc-run'],
+ id="Targeted clean with symlink dirs"
+ ),
+ pytest.param(
+ 'foo',
+ {},
+ {'share/cy*'},
+ ['cylc-run/foo/share/cycle'],
+ ['cylc-run/foo/log', 'cylc-run/foo/work', 'cylc-run/foo/share'],
+ id="Targeted clean with glob"
+ ),
+ pytest.param(
+ 'foo',
+ {'log': 'sym-log'},
+ {'w*', 'wo*', 'l*', 'lo*'},
+ ['cylc-run/foo/work', 'cylc-run/foo/log', 'sym-log/cylc-run/foo'],
+ ['cylc-run/foo/share', 'cylc-run/foo/share/cycle'],
+ id="Targeted clean with degenerate glob"
+ ),
]
)
-def test_clean(reg, props, monkeypatch, tmp_path):
+def test_clean(
+ reg: str,
+ symlink_dirs: Dict[str, str],
+ rm_dirs: Optional[Set[str]],
+ expected_deleted: List[str],
+ expected_remaining: List[str],
+ tmp_path: Path, tmp_run_dir: Callable
+) -> None:
"""Test the clean() function.
Params:
- reg (str): Workflow name.
- props (dict): Possible values are (all optional):
- 'err' (Exception): Expected error.
- 'err msg' (str): Message that is expected to be in the exception.
- 'no dir' (bool): If True, do not create run dir for this test case.
- 'not stopped' (bool): If True, simulate that the workflow is
- still running.
- 'symlink dirs' (dict): As you would find in the global config
- under [symlink dirs][platform].
- 'bad symlink' (dict): Simulate an invalid log symlink dir:
- 'type' (str): 'file' or 'dir'.
- 'path' (str): Path of the symlink target relative to tmp_path.
+ reg: Workflow name.
+ symlink_dirs: As you would find in the global config
+ under [symlink dirs][platform].
+ rm_dirs: As passed to clean().
+ expected_deleted: Dirs (relative paths under tmp_path) that are
+ expected to be cleaned.
+ expected_remaining: Any dirs (relative paths under tmp_path) that are
+ not expected to be cleaned.
"""
# --- Setup ---
- tmp_path.joinpath('cylc-run').mkdir()
- run_dir = tmp_path.joinpath('cylc-run', reg)
- run_dir_top_parent = tmp_path.joinpath('cylc-run', Path(reg).parts[0])
- symlink_dirs = props.get('symlink dirs')
- bad_symlink = props.get('bad symlink')
- if not props.get('no dir') and (
- not symlink_dirs or 'run' not in symlink_dirs):
- run_dir.mkdir(parents=True)
-
- dirs_to_check = [run_dir_top_parent]
- if symlink_dirs:
- if 'run' in symlink_dirs:
- dst = tmp_path.joinpath(symlink_dirs['run'], 'cylc-run', reg)
- dst.mkdir(parents=True)
- run_dir.symlink_to(dst)
- dirs_to_check.append(dst)
- symlink_dirs.pop('run')
- for s, d in symlink_dirs.items():
- dst = tmp_path.joinpath(d, 'cylc-run', reg, s)
- dst.mkdir(parents=True)
- src = run_dir.joinpath(s)
- src.symlink_to(dst)
- dirs_to_check.append(dst.parent)
- if bad_symlink:
- dst = tmp_path.joinpath(bad_symlink['path'])
- if bad_symlink['type'] == 'file':
- dst.parent.mkdir(parents=True)
- dst.touch()
- else:
- dst.mkdir(parents=True)
- src = run_dir.joinpath('log')
- src.symlink_to(dst)
-
- def mocked_detect_old_contact_file(reg):
- if props.get('not stopped'):
- raise ServiceFileError('Mocked error')
-
- monkeypatch.setattr('cylc.flow.workflow_files.detect_old_contact_file',
- mocked_detect_old_contact_file)
- monkeypatch.setattr('cylc.flow.workflow_files.get_workflow_run_dir',
- lambda x: tmp_path.joinpath('cylc-run', x))
+ run_dir: Path = tmp_run_dir(reg)
+
+ if 'run' in symlink_dirs:
+ target = tmp_path / symlink_dirs['run'] / 'cylc-run' / reg
+ target.mkdir(parents=True)
+ shutil.rmtree(run_dir)
+ run_dir.symlink_to(target)
+ symlink_dirs.pop('run')
+ for symlink_name, target_name in symlink_dirs.items():
+ target = tmp_path / target_name / 'cylc-run' / reg / symlink_name
+ target.mkdir(parents=True)
+ symlink = run_dir / symlink_name
+ symlink.symlink_to(target)
+ for d_name in ('log', 'share', 'share/cycle', 'work'):
+ if d_name not in symlink_dirs:
+ (run_dir / d_name).mkdir()
+
+ for rel_path in [*expected_deleted, *expected_remaining]:
+ assert (tmp_path / rel_path).exists()
# --- The actual test ---
- expected_err = props.get('err')
- if expected_err:
- with pytest.raises(expected_err) as exc:
- workflow_files.clean(reg)
- expected_msg = props.get('err msg')
- if expected_msg:
- assert expected_msg in str(exc.value)
- else:
- workflow_files.clean(reg)
- for d in dirs_to_check:
- assert d.exists() is False
- assert d.is_symlink() is False
+ workflow_files.clean(reg, run_dir, rm_dirs)
+ for rel_path in expected_deleted:
+ assert (tmp_path / rel_path).exists() is False
+ assert (tmp_path / rel_path).is_symlink() is False
+ for rel_path in expected_remaining:
+ assert (tmp_path / rel_path).exists()
-def test_clean_broken_symlink_run_dir(monkeypatch, tmp_path):
- """Test clean() for removing a run dir that is a broken symlink."""
+def test_clean__broken_symlink_run_dir(
+ tmp_path: Path, tmp_run_dir: Callable
+) -> None:
+ """Test clean() successfully remove a run dir that is a broken symlink."""
+ # Setup
reg = 'foo/bar'
- run_dir = tmp_path.joinpath('cylc-run', reg)
- run_dir.parent.mkdir(parents=True)
+ run_dir: Path = tmp_run_dir(reg)
target = tmp_path.joinpath('rabbow/cylc-run', reg)
target.mkdir(parents=True)
+ shutil.rmtree(run_dir)
run_dir.symlink_to(target)
target.rmdir()
+ assert run_dir.parent.exists() is True # cylc-run/foo should exist
+ # Test
+ workflow_files.clean(reg, run_dir)
+ assert run_dir.parent.exists() is False # cylc-run/foo should be gone
+ assert target.parent.exists() is False # rabbow/cylc-run/foo too
+
+
+def test_clean__bad_symlink_dir_wrong_type(
+ tmp_path: Path, tmp_run_dir: Callable
+) -> None:
+ """Test clean() raises error when a symlink dir actually points to a file
+ instead of a dir"""
+ reg = 'foo'
+ run_dir: Path = tmp_run_dir(reg)
+ symlink = run_dir.joinpath('log')
+ target = tmp_path.joinpath('sym-log', 'cylc-run', reg, 'meow.txt')
+ target.parent.mkdir(parents=True)
+ target.touch()
+ symlink.symlink_to(target)
+
+ with pytest.raises(WorkflowFilesError) as exc:
+ workflow_files.clean(reg, run_dir)
+ assert "Target is not a directory" in str(exc.value)
+ assert symlink.exists() is True
+
+
+def test_clean__bad_symlink_dir_wrong_form(
+ tmp_path: Path, tmp_run_dir: Callable
+) -> None:
+ """Test clean() raises error when a symlink dir points to an
+ unexpected dir"""
+ run_dir: Path = tmp_run_dir('foo')
+ symlink = run_dir.joinpath('log')
+ target = tmp_path.joinpath('sym-log', 'oops', 'log')
+ target.mkdir(parents=True)
+ symlink.symlink_to(target)
+
+ with pytest.raises(WorkflowFilesError) as exc:
+ workflow_files.clean('foo', run_dir)
+ assert 'Expected target to end with "cylc-run/foo/log"' in str(exc.value)
+ assert symlink.exists() is True
+
+
+@pytest.mark.parametrize('pattern', ['thing/', 'thing/*'])
+def test_clean__rm_dir_not_file(pattern: str, tmp_run_dir: Callable):
+ """Test clean() does not remove a file when the rm_dir glob pattern would
+ match a dir only."""
+ reg = 'foo'
+ run_dir: Path = tmp_run_dir(reg)
+ a_file = run_dir.joinpath('thing')
+ a_file.touch()
+ rm_dirs = parse_rm_dirs([pattern])
+
+ workflow_files.clean(reg, run_dir, rm_dirs)
+ assert a_file.exists()
+
+
+@pytest.mark.parametrize(
+ 'filetree, expected',
+ [
+ pytest.param(
+ FILETREE_1,
+ {'log': 'sym/cylc-run/foo/bar/log'},
+ id="filetree1"
+ ),
+ pytest.param(
+ FILETREE_2,
+ {
+ 'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle',
+ 'share': 'sym-share/cylc-run/foo/bar/share',
+ '': 'sym-run/cylc-run/foo/bar/'
+ },
+ id="filetree2"
+ ),
+ pytest.param(
+ FILETREE_3,
+ {
+ 'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle',
+ '': 'sym-run/cylc-run/foo/bar/'
+ },
+ id="filetree3"
+ ),
+ pytest.param(
+ FILETREE_4,
+ {'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle'},
+ id="filetree4"
+ ),
+ ]
+)
+def test_get_symlink_dirs(
+ filetree: Dict[str, Any],
+ expected: Dict[str, Union[Path, str]],
+ tmp_run_dir: Callable, tmp_path: Path
+):
+ """Test get_symlink_dirs().
+
+ Params:
+ filetree: The directory structure to test against.
+ expected: The expected return dictionary, except with the values being
+ relative to tmp_path instead of absolute paths.
+ """
+ # Setup
+ cylc_run_dir = tmp_run_dir()
+ create_filetree(filetree, tmp_path, tmp_path)
+ reg = 'foo/bar'
+ for k, v in expected.items():
+ expected[k] = Path(tmp_path / v)
+ # Test
+ assert get_symlink_dirs(reg, cylc_run_dir / reg) == expected
+
+
+@pytest.mark.parametrize(
+ 'pattern, filetree, expected_matches',
+ [
+ pytest.param(
+ '**',
+ FILETREE_1,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/log'],
+ id="filetree1 **"
+ ),
+ pytest.param(
+ '*',
+ FILETREE_1,
+ ['cylc-run/foo/bar/flow.cylc',
+ 'cylc-run/foo/bar/log',
+ 'cylc-run/foo/bar/mirkwood',
+ 'cylc-run/foo/bar/rincewind.txt'],
+ id="filetree1 *"
+ ),
+ pytest.param(
+ '**/*.txt',
+ FILETREE_1,
+ ['cylc-run/foo/bar/log/bib/fortuna.txt',
+ 'cylc-run/foo/bar/log/temba.txt',
+ 'cylc-run/foo/bar/rincewind.txt'],
+ id="filetree1 **/*.txt"
+ ),
+ pytest.param(
+ '**',
+ FILETREE_2,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/share',
+ 'cylc-run/foo/bar/share/cycle'],
+ id="filetree2 **"
+ ),
+ pytest.param(
+ '**',
+ FILETREE_3,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/share/cycle'],
+ id="filetree3 **"
+ ),
+ pytest.param(
+ '**/s*',
+ FILETREE_3,
+ ['cylc-run/foo/bar/share',
+ 'cylc-run/foo/bar/share/cycle/sokath.txt'],
+ id="filetree3 **/s*"
+ ),
+ pytest.param(
+ '**',
+ FILETREE_4,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/share/cycle'],
+ id="filetree4 **"
+ ),
+ ]
+)
+def test_glob_in_run_dir(
+ pattern: str,
+ filetree: Dict[str, Any],
+ expected_matches: List[str],
+ tmp_path: Path, tmp_run_dir: Callable
+) -> None:
+ """Test that glob_in_run_dir() returns the minimal set of results with
+ no redundant paths.
+ """
+ # Setup
+ cylc_run_dir: Path = tmp_run_dir()
+ reg = 'foo/bar'
+ run_dir = cylc_run_dir / reg
+ create_filetree(filetree, tmp_path, tmp_path)
+ symlink_dirs = [run_dir / i for i in get_symlink_dirs(reg, run_dir)]
+ expected = [tmp_path / i for i in expected_matches]
+ # Test
+ assert glob_in_run_dir(run_dir, pattern, symlink_dirs) == expected
+
+
+@pytest.fixture
+def filetree_for_testing_cylc_clean(tmp_path: Path):
+ """Fixture that creates a filetree from the given dict, and returns which
+ files are expected to be deleted and which aren't.
+
+ See tests/unit/filetree.py
+
+ Args:
+ reg: Workflow name.
+ initial_filetree: The filetree before cleaning.
+ filetree_left_behind: The filetree that is expected to be left behind
+ after cleaning, excluding the 'you-shall-not-pass/' directory,
+ which is always expected to be left behind.
+
+ Returns:
+ run_dir: Workflow run dir.
+ files_to_delete: List of files that are expected to be deleted.
+ files_not_to_delete: List of files that are not expected to be deleted.
+ """
+ def _filetree_for_testing_cylc_clean(
+ reg: str,
+ initial_filetree: Dict[str, Any],
+ filetree_left_behind: Dict[str, Any]
+ ) -> Tuple[Path, List[str], List[str]]:
+ create_filetree(initial_filetree, tmp_path, tmp_path)
+ files_not_to_delete = [
+ os.path.normpath(i) for i in
+ iglob(str(tmp_path / 'you-shall-not-pass/**'), recursive=True)
+ ]
+ files_not_to_delete.extend(
+ get_filetree_as_list(filetree_left_behind, tmp_path)
+ )
+ files_to_delete = list(
+ set(get_filetree_as_list(initial_filetree, tmp_path)).difference(
+ files_not_to_delete
+ )
+ )
+ run_dir = tmp_path / 'cylc-run' / reg
+ return run_dir, files_to_delete, files_not_to_delete
+ return _filetree_for_testing_cylc_clean
- monkeypatch.setattr('cylc.flow.workflow_files.get_workflow_run_dir',
- lambda x: tmp_path.joinpath('cylc-run', x))
- workflow_files.clean(reg)
- assert run_dir.parent.is_dir() is False
+@pytest.mark.parametrize(
+ 'pattern, initial_filetree, filetree_left_behind',
+ [
+ pytest.param(
+ '**',
+ FILETREE_1,
+ {
+ 'cylc-run': {'foo': {}},
+ 'sym': {'cylc-run': {'foo': {'bar': {}}}}
+ }
+ ),
+ pytest.param(
+ '*/**',
+ FILETREE_1,
+ {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'rincewind.txt': Path('whatever')
+ }}},
+ 'sym': {'cylc-run': {'foo': {'bar': {}}}}
+ }
+ ),
+ pytest.param(
+ '**/*.txt',
+ FILETREE_1,
+ {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'log': Path('whatever'),
+ 'mirkwood': Path('whatever')
+ }}},
+ 'sym': {'cylc-run': {'foo': {'bar': {
+ 'log': {
+ 'darmok': Path('whatever'),
+ 'bib': {}
+ }
+ }}}}
+ }
+ )
+ ]
+)
+def test__clean_using_glob(
+ pattern: str,
+ initial_filetree: Dict[str, Any],
+ filetree_left_behind: Dict[str, Any],
+ filetree_for_testing_cylc_clean: Callable
+) -> None:
+ """Test _clean_using_glob(), particularly that it does not follow and
+ delete symlinks (apart from the standard symlink dirs).
+
+ Params:
+ pattern: The glob pattern to test.
+ initial_filetree: The filetree to test against.
+ files_left_behind: The filetree expected to remain after
+ _clean_using_glob() is called (excluding
+ /you-shall-not-pass, which is always expected to remain).
+ """
+ # --- Setup ---
+ run_dir: Path
+ files_to_delete: List[str]
+ files_not_to_delete: List[str]
+ run_dir, files_to_delete, files_not_to_delete = (
+ filetree_for_testing_cylc_clean(
+ 'foo/bar', initial_filetree, filetree_left_behind)
+ )
+ # --- Test ---
+ _clean_using_glob(run_dir, pattern, symlink_dirs=['log'])
+ for file in files_not_to_delete:
+ assert os.path.exists(file) is True
+ for file in files_to_delete:
+ assert os.path.lexists(file) is False
+
+
+@pytest.mark.parametrize(
+ 'rm_dirs, initial_filetree, filetree_left_behind',
+ [
+ pytest.param(
+ {'**'},
+ FILETREE_1,
+ {
+ 'cylc-run': {},
+ 'sym': {'cylc-run': {}}
+ },
+ id="filetree1 **"
+ ),
+ pytest.param(
+ {'*/**'},
+ FILETREE_1,
+ {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'rincewind.txt': Path('whatever')
+ }}},
+ 'sym': {'cylc-run': {}}
+ },
+ id="filetree1 */**"
+ ),
+ pytest.param(
+ {'**/*.txt'},
+ FILETREE_1,
+ {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'log': Path('whatever'),
+ 'mirkwood': Path('whatever')
+ }}},
+ 'sym': {'cylc-run': {'foo': {'bar': {
+ 'log': {
+ 'darmok': Path('whatever'),
+ 'bib': {}
+ }
+ }}}}
+ },
+ id="filetree1 **/*.txt"
+ ),
+ pytest.param(
+ {'**/cycle'},
+ FILETREE_2,
+ {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': Path('sym-share/cylc-run/foo/bar/share')
+ }}}},
+ 'sym-share': {'cylc-run': {'foo': {'bar': {
+ 'share': {}
+ }}}},
+ 'sym-cycle': {'cylc-run': {}}
+ },
+ id="filetree2 **/cycle"
+ ),
+ pytest.param(
+ {'share'},
+ FILETREE_2,
+ {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ }}}},
+ 'sym-share': {'cylc-run': {}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'macklunkey.txt': None
+ }
+ }
+ }}}}
+ },
+ id="filetree2 share"
+ ),
+ pytest.param(
+ {'**'},
+ FILETREE_2,
+ {
+ 'cylc-run': {},
+ 'sym-run': {'cylc-run': {}},
+ 'sym-share': {'cylc-run': {}},
+ 'sym-cycle': {'cylc-run': {}}
+ },
+ id="filetree2 **"
+ ),
+ pytest.param(
+ {'*'},
+ FILETREE_2,
+ {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ }}}},
+ 'sym-share': {'cylc-run': {}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'macklunkey.txt': None
+ }
+ }
+ }}}}
+ },
+ id="filetree2 *"
+ ),
+ pytest.param( # Check https://bugs.python.org/issue35201 has no effect
+ {'non-exist/**'},
+ FILETREE_2,
+ FILETREE_2,
+ id="filetree2 non-exist/**"
+ ),
+ pytest.param(
+ {'**'},
+ FILETREE_3,
+ {
+ 'cylc-run': {},
+ 'sym-run': {'cylc-run': {}},
+ 'sym-cycle': {'cylc-run': {}},
+ },
+ id="filetree3 **"
+ ),
+ pytest.param(
+ {'**'},
+ FILETREE_4,
+ {
+ 'cylc-run': {},
+ 'sym-cycle': {'cylc-run': {}},
+ },
+ id="filetree4 **"
+ )
+ ],
+)
+def test_clean__targeted(
+ rm_dirs: Set[str],
+ initial_filetree: Dict[str, Any],
+ filetree_left_behind: Dict[str, Any],
+ caplog: pytest.LogCaptureFixture, tmp_run_dir: Callable,
+ filetree_for_testing_cylc_clean: Callable
+) -> None:
+ """Test clean(), particularly that it does not follow and delete symlinks
+ (apart from the standard symlink dirs).
+
+ This is similar to test__clean_using_glob(), but the filetree expected to
+ remain after cleaning is different due to the tidy up of empty dirs.
+
+ Params:
+ rm_dirs: The glob patterns to test.
+ initial_filetree: The filetree to test against.
+ files_left_behind: The filetree expected to remain after
+ clean() is called (excluding /you-shall-not-pass,
+ which is always expected to remain).
+ """
+ # --- Setup ---
+ caplog.set_level(logging.DEBUG, CYLC_LOG)
+ tmp_run_dir()
+ reg = 'foo/bar'
+ run_dir: Path
+ files_to_delete: List[str]
+ files_not_to_delete: List[str]
+ run_dir, files_to_delete, files_not_to_delete = (
+ filetree_for_testing_cylc_clean(
+ reg, initial_filetree, filetree_left_behind)
+ )
+ # --- Test ---
+ workflow_files.clean(reg, run_dir, rm_dirs)
+ for file in files_not_to_delete:
+ assert os.path.exists(file) is True
+ for file in files_to_delete:
+ assert os.path.lexists(file) is False
PLATFORMS = {
@@ -406,68 +981,91 @@ def test_clean_broken_symlink_run_dir(monkeypatch, tmp_path):
@pytest.mark.parametrize(
'install_targets_map, failed_platforms, expected_platforms, expected_err',
[
- (
- {'localhost': [PLATFORMS['exeter']]}, None, None, None
+ pytest.param(
+ {'localhost': [PLATFORMS['exeter']]}, None, None, None,
+ id="Only localhost install target - no remote clean"
),
- (
+ pytest.param(
{
'localhost': [PLATFORMS['exeter']],
'picard': [PLATFORMS['enterprise']]
},
None,
['enterprise'],
- None
+ None,
+ id="Localhost and remote install target"
),
- (
+ pytest.param(
{
'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']],
'janeway': [PLATFORMS['voyager']]
},
None,
['enterprise', 'voyager'],
- None
+ None,
+ id="Only remote install targets"
),
- (
+ pytest.param(
{
'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']],
'janeway': [PLATFORMS['voyager']]
},
- ['enterprise'],
+ {'enterprise': 255},
['enterprise', 'stargazer', 'voyager'],
- None
+ None,
+ id="Install target with 1 failed, 1 successful platform"
),
- (
+ pytest.param(
{
'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']],
'janeway': [PLATFORMS['voyager']]
},
- ['enterprise', 'stargazer'],
+ {'enterprise': 255, 'stargazer': 255},
['enterprise', 'stargazer', 'voyager'],
- (CylcError, "Could not clean on install targets: picard")
+ (CylcError, "Could not clean on install targets: picard"),
+ id="Install target with all failed platforms"
),
- (
+ pytest.param(
{
'picard': [PLATFORMS['enterprise']],
'janeway': [PLATFORMS['voyager']]
},
+ {'enterprise': 255, 'voyager': 255},
['enterprise', 'voyager'],
- ['enterprise', 'voyager'],
- (CylcError, "Could not clean on install targets: picard, janeway")
- )
+ (CylcError, "Could not clean on install targets: picard, janeway"),
+ id="All install targets have all failed platforms"
+ ),
+ pytest.param(
+ {
+ 'picard': [PLATFORMS['enterprise'], PLATFORMS['stargazer']]
+ },
+ {'enterprise': 1},
+ ['enterprise'],
+ (CylcError, "Could not clean on install targets: picard"),
+ id=("Remote clean cmd fails on a platform for non-SSH reason - "
+ "does not retry")
+ ),
]
)
-def test_remote_clean(install_targets_map, failed_platforms,
- expected_platforms, expected_err, monkeypatch, caplog):
+def test_remote_clean(
+ install_targets_map: Dict[str, Any],
+ failed_platforms: Optional[Dict[str, int]],
+ expected_platforms: Optional[List[str]],
+ expected_err: Optional[Tuple[Type[Exception], str]],
+ monkeymock: MonkeyMock, monkeypatch: pytest.MonkeyPatch,
+ caplog: pytest.LogCaptureFixture
+) -> None:
"""Test remote_clean() logic.
Params:
- install_targets_map (dict): The map that would be returned by
+ install_targets_map The map that would be returned by
platforms.get_install_target_to_platforms_map()
- failed_platforms (list): If specified, any platforms that clean will
- artificially fail on in this test case.
- expected_platforms (list): If specified, all the platforms that the
+ failed_platforms: If specified, any platforms that clean will
+ artificially fail on in this test case. The key is the platform
+ name, the value is the remote clean cmd return code.
+ expected_platforms: If specified, all the platforms that the
remote clean cmd is expected to run on.
- expected_err (tuple): If specified, a tuple of the form
+ expected_err: If specified, a tuple of the form
(Exception, str) giving an exception that is expected to be raised.
"""
# ----- Setup -----
@@ -476,22 +1074,23 @@ def test_remote_clean(install_targets_map, failed_platforms,
'cylc.flow.workflow_files.get_install_target_to_platforms_map',
lambda x: install_targets_map)
# Remove randomness:
- mocked_shuffle = mock.Mock()
- monkeypatch.setattr('cylc.flow.workflow_files.shuffle', mocked_shuffle)
+ monkeymock('cylc.flow.workflow_files.shuffle')
- def mocked_remote_clean_cmd_side_effect(reg, platform, timeout):
+ def mocked_remote_clean_cmd_side_effect(reg, platform, rm_dirs, timeout):
proc_ret_code = 0
if failed_platforms and platform['name'] in failed_platforms:
- proc_ret_code = 1
+ proc_ret_code = failed_platforms[platform['name']]
return mock.Mock(
poll=lambda: proc_ret_code,
- communicate=lambda: (b"", b""),
- args=[])
+ communicate=lambda: ("", ""),
+ args=[]
+ )
- mocked_remote_clean_cmd = mock.Mock(
+ mocked_remote_clean_cmd = monkeymock(
+ 'cylc.flow.workflow_files._remote_clean_cmd',
+ spec=_remote_clean_cmd,
side_effect=mocked_remote_clean_cmd_side_effect)
- monkeypatch.setattr(
- 'cylc.flow.workflow_files._remote_clean_cmd', mocked_remote_clean_cmd)
+ rm_dirs = ["whatever"]
# ----- Test -----
reg = 'foo'
platform_names = (
@@ -500,14 +1099,15 @@ def mocked_remote_clean_cmd_side_effect(reg, platform, timeout):
err, msg = expected_err
with pytest.raises(err) as exc:
workflow_files.remote_clean(
- reg, platform_names, timeout='irrelevant')
+ reg, platform_names, rm_dirs, timeout='irrelevant')
assert msg in str(exc.value)
else:
- workflow_files.remote_clean(reg, platform_names, timeout='irrelevant')
+ workflow_files.remote_clean(
+ reg, platform_names, rm_dirs, timeout='irrelevant')
if expected_platforms:
for p_name in expected_platforms:
mocked_remote_clean_cmd.assert_any_call(
- reg, PLATFORMS[p_name], 'irrelevant')
+ reg, PLATFORMS[p_name], rm_dirs, 'irrelevant')
else:
mocked_remote_clean_cmd.assert_not_called()
if failed_platforms:
@@ -515,31 +1115,87 @@ def mocked_remote_clean_cmd_side_effect(reg, platform, timeout):
assert f"{p_name}: {TaskRemoteMgmtError.MSG_TIDY}" in caplog.text
-def test_remove_empty_reg_parents(tmp_path):
+@pytest.mark.parametrize(
+ 'rm_dirs, expected_args',
+ [
+ (None, []),
+ (['holodeck', 'ten_forward'],
+ ['--rm', 'holodeck', '--rm', 'ten_forward'])
+ ]
+)
+def test_remote_clean_cmd(
+ rm_dirs: Optional[List[str]],
+ expected_args: List[str],
+ monkeymock: MonkeyMock
+) -> None:
+ """Test _remote_clean_cmd()
+
+ Params:
+ rm_dirs: Argument passed to _remote_clean_cmd().
+ expected_args: Expected CLI arguments of the cylc clean command that
+ gets constructed.
+ """
+ reg = 'jean/luc/picard'
+ platform = {'name': 'enterprise', 'install target': 'mars'}
+ mock_construct_ssh_cmd = monkeymock(
+ 'cylc.flow.workflow_files.construct_ssh_cmd', return_value=['blah'])
+ monkeymock('cylc.flow.workflow_files.Popen')
+
+ workflow_files._remote_clean_cmd(reg, platform, rm_dirs, timeout='dunno')
+ args, kwargs = mock_construct_ssh_cmd.call_args
+ constructed_cmd = args[0]
+ assert constructed_cmd == ['clean', '--local-only', reg, *expected_args]
+
+
+def test_remove_empty_parents(tmp_path: Path):
"""Test that _remove_empty_parents() doesn't remove parents containing a
sibling."""
+ # -- Setup --
reg = 'foo/bar/baz/qux'
path = tmp_path.joinpath(reg)
tmp_path.joinpath('foo/bar/baz').mkdir(parents=True)
+ # Note qux does not exist, but that shouldn't matter
sibling_reg = 'foo/darmok'
sibling_path = tmp_path.joinpath(sibling_reg)
sibling_path.mkdir()
- workflow_files._remove_empty_reg_parents(reg, path)
+ # -- Test --
+ workflow_files._remove_empty_parents(path, reg)
assert tmp_path.joinpath('foo/bar').exists() is False
assert tmp_path.joinpath('foo').exists() is True
- # Also path must be absolute
- with pytest.raises(ValueError) as exc:
- workflow_files._remove_empty_reg_parents(
- 'foo/darmok', 'meow/foo/darmok')
- assert 'Path must be absolute' in str(exc.value)
# Check it skips non-existent dirs, and stops at the right place too
tmp_path.joinpath('foo/bar').mkdir()
sibling_path.rmdir()
- workflow_files._remove_empty_reg_parents(reg, path)
+ workflow_files._remove_empty_parents(path, reg)
assert tmp_path.joinpath('foo').exists() is False
assert tmp_path.exists() is True
+@pytest.mark.parametrize(
+ 'path, tail, exc_msg',
+ [
+ pytest.param(
+ 'meow/foo/darmok', 'foo/darmok', "path must be absolute",
+ id="relative path"
+ ),
+ pytest.param(
+ '/meow/foo/darmok', '/foo/darmok',
+ "tail must not be an absolute path",
+ id="absolute tail"
+ ),
+ pytest.param(
+ '/meow/foo/darmok', 'foo/jalad',
+ "path '/meow/foo/darmok' does not end with 'foo/jalad'",
+ id="tail not in path"
+ )
+ ]
+)
+def test_remove_empty_parents_bad(path: str, tail: str, exc_msg: str):
+ """Test that _remove_empty_parents() fails appropriately with bad args."""
+ with pytest.raises(ValueError) as exc:
+ workflow_files._remove_empty_parents(path, tail)
+ assert exc_msg in str(exc.value)
+
+
@pytest.mark.parametrize(
'run_dir, srv_dir',
[