From 1e7724bdc2cc999b137eb293f472c49eb83c33d5 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Mon, 16 Nov 2020 18:30:55 +0000
Subject: [PATCH 01/14] Tidy
---
cylc/flow/platforms.py | 7 +++----
cylc/flow/suite_files.py | 3 +--
tests/unit/test_suite_files.py | 4 ++--
3 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/cylc/flow/platforms.py b/cylc/flow/platforms.py
index 907c9e222ef..34bf597296d 100644
--- a/cylc/flow/platforms.py
+++ b/cylc/flow/platforms.py
@@ -339,7 +339,7 @@ def platform_from_job_info(platforms, job, remote):
raise PlatformLookupError('No platform found matching your task')
-def get_host_from_platform(platform, method=None):
+def get_host_from_platform(platform, method='random'):
"""Placeholder for a more sophisticated function which returns a host
given a platform dictionary.
@@ -349,8 +349,7 @@ def get_host_from_platform(platform, method=None):
method (str):
Name a function to use when selecting hosts from list provided
by platform.
-
- - None or 'random': Pick the first host from list
+ - 'random' (default): Pick a random host from list
- 'first': Return the first host in the list
Returns:
@@ -361,7 +360,7 @@ def get_host_from_platform(platform, method=None):
- Random Selection with check for host availability
"""
- if method is None or method == 'random':
+ if method == 'random':
return random.choice(platform['hosts'])
elif method == 'first':
return platform['hosts'][0]
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index 5925c90dd60..58aeacb74a9 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -704,5 +704,4 @@ def get_cylc_run_abs_path(path):
"""
if os.path.isabs(path):
return path
- cylc_run_dir = os.path.expandvars(get_platform()['run directory'])
- return os.path.join(cylc_run_dir, path)
+ return get_suite_run_dir(path)
diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py
index 19cc87e538b..dff39dc06b3 100644
--- a/tests/unit/test_suite_files.py
+++ b/tests/unit/test_suite_files.py
@@ -247,7 +247,7 @@ def mkdirs_standin(_, exist_ok=False):
('/a/b/c', '/a/b/c')]
)
def test_get_cylc_run_abs_path(path, expected, monkeypatch):
- monkeypatch.setattr('cylc.flow.suite_files.get_platform',
+ monkeypatch.setattr('cylc.flow.pathutil.get_platform',
lambda: {'run directory': '/mock_cylc_dir'})
assert suite_files.get_cylc_run_abs_path(path) == expected
@@ -268,7 +268,7 @@ def test_is_valid_run_dir(path, expected, is_abs_path, monkeypatch):
serv_dir = os.path.join(prefix, 'service', 'dir', 'exists', '.service')
monkeypatch.setattr('os.path.isfile', lambda x: x == flow_file)
monkeypatch.setattr('os.path.isdir', lambda x: x == serv_dir)
- monkeypatch.setattr('cylc.flow.suite_files.get_platform',
+ monkeypatch.setattr('cylc.flow.pathutil.get_platform',
lambda: {'run directory': 'mock_cylc_dir'})
path = os.path.normpath(path)
if is_abs_path:
From cc54307ffacf6af7349fc87c8a4fec7318595cd8 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Wed, 18 Nov 2020 18:34:45 +0000
Subject: [PATCH 02/14] Create `cylc clean` command
---
cylc/flow/etc/cylc-bash-completion | 2 +-
cylc/flow/pathutil.py | 23 ++++++++++++
cylc/flow/scripts/clean.py | 57 ++++++++++++++++++++++++++++++
cylc/flow/suite_files.py | 33 +++++++++++++++--
setup.cfg | 1 +
5 files changed, 113 insertions(+), 3 deletions(-)
create mode 100644 cylc/flow/scripts/clean.py
diff --git a/cylc/flow/etc/cylc-bash-completion b/cylc/flow/etc/cylc-bash-completion
index 129465a413d..d67bed3f83e 100644
--- a/cylc/flow/etc/cylc-bash-completion
+++ b/cylc/flow/etc/cylc-bash-completion
@@ -38,7 +38,7 @@ _cylc() {
cur="${COMP_WORDS[COMP_CWORD]}"
sec="${COMP_WORDS[1]}"
opts="$(cylc scan -t name 2>/dev/null)"
- suite_cmds="broadcast|bcast|cat-log|log|check-versions|diff|compare|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|insert|kill|list|ls|tui|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|search|grep|set-verbosity|show|set-outputs|stop|shutdown|single|suite-state|test-battery|trigger|validate|view|warranty"
+ suite_cmds="broadcast|bcast|cat-log|check-versions|clean|compare|diff|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|insert|kill|list|log|ls|tui|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|search|grep|set-verbosity|show|set-outputs|stop|shutdown|single|suite-state|test-battery|trigger|validate|view|warranty"
if [[ ${COMP_CWORD} -eq 1 ]]; then
diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py
index 7507c378377..7d3e4d794e7 100644
--- a/cylc/flow/pathutil.py
+++ b/cylc/flow/pathutil.py
@@ -203,3 +203,26 @@ def make_symlink(src, dst):
os.symlink(src, dst, target_is_directory=True)
except Exception as exc:
raise WorkflowFilesError(f"Error when symlinking '{exc}'")
+
+
+def remove_dir(path):
+ """Delete a directory including contents, including the target directory
+ if the specified path is a symlink.
+
+ Args:
+ path (str): the path of the directory to delete.
+ """
+ if not os.path.isdir(path):
+ raise NotADirectoryError(path)
+ if os.path.islink(path):
+ if os.path.exists(path):
+ target = os.path.realpath(path)
+ LOG.info(f'Removing symlink target directory: {path} -> {target}')
+ rmtree(target)
+ LOG.info(f'Removing symlink: {path}')
+ else:
+ LOG.info(f'Removing broken symlink: {path}')
+ os.remove(path)
+ else:
+ LOG.info(f'Removing directory: {path}')
+ rmtree(path)
diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py
new file mode 100644
index 00000000000..48994fb4164
--- /dev/null
+++ b/cylc/flow/scripts/clean.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
+# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+"""cylc clean [OPTIONS] ARGS
+
+Remove a stopped workflow from the local scheduler filesystem.
+
+NOTE: this command is intended for workflows installed with `cylc install`. If
+this is run for a workflow that was instead written directly in ~/cylc-run and
+not backed up elsewhere, it will be lost.
+
+It will also remove an symlink directory targets. For now, it will fail if
+run on a host which doesn't have access to that filesystem.
+
+Suite names can be hierarchical, corresponding to the path under ~/cylc-run.
+
+Examples:
+ # Remove the workflow at ~/cylc-run/foo
+ $ cylc clean foo
+
+"""
+
+from cylc.flow.option_parsers import CylcOptionParser as COP
+from cylc.flow.terminal import cli_function
+from cylc.flow.suite_files import clean
+
+
+def get_option_parser():
+ parser = COP(
+ __doc__,
+ argdoc=[("REG", "Suite name")]
+ )
+ return parser
+
+
+@cli_function(get_option_parser)
+def main(parser, opts, reg):
+ clean(reg)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index 58aeacb74a9..9e893111263 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -28,8 +28,9 @@
import aiofiles
from cylc.flow import LOG
-from cylc.flow.exceptions import SuiteServiceFileError
-from cylc.flow.pathutil import get_suite_run_dir, make_localhost_symlinks
+from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError
+from cylc.flow.pathutil import (
+ get_suite_run_dir, make_localhost_symlinks, remove_dir)
from cylc.flow.platforms import get_platform
from cylc.flow.hostuserutil import (
get_user,
@@ -559,6 +560,34 @@ def register(reg=None, source=None, redirect=False):
return reg
+def clean(reg):
+ """Remove stopped workflows on the local scheduler filesystem.
+
+ Args:
+ reg (str): workflow name.
+ """
+ run_dir = get_suite_run_dir(reg)
+ if not os.path.isdir(run_dir):
+ raise WorkflowFilesError(f'No workflow directory found at {run_dir}')
+ try:
+ detect_old_contact_file(reg)
+ except SuiteServiceFileError as exc:
+ raise SuiteServiceFileError(
+ f'Cannot remove running workflow.\n\n{exc}')
+
+ # TODO: check task_jobs table in database to see what platforms are used
+
+ base_path = os.path.realpath(run_dir)
+ possible_symlinks = [os.path.join(base_path, d) for d in [
+ 'log', 'share/cycle', 'share', 'work']]
+ # Note: 'share/cycle' must come before 'share'
+ for path in [*possible_symlinks, run_dir]:
+ # Note: possible_symlinks must come before run_dir
+ remove_dir(path)
+
+ # TODO: what if user creates a symlink ~/cylc-run/foo/work/ -> /net/home/ !
+
+
def remove_keys_on_server(keys):
"""Removes server-held authentication keys"""
# WARNING, DESTRUCTIVE. Removes old keys if they already exist.
diff --git a/setup.cfg b/setup.cfg
index e3743500c23..ed594936aea 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -70,6 +70,7 @@ cylc.command =
broadcast = cylc.flow.scripts.broadcast:main
cat-log = cylc.flow.scripts.cat_log:main
check-versions = cylc.flow.scripts.check_versions:main
+ clean = cylc.flow.scripts.clean:main
client = cylc.flow.scripts.client:main
cycle-point = cylc.flow.scripts.cycle_point:main
diff = cylc.flow.scripts.diff:main
From dc9683b148b4e8b77766028dffc4ca568f61018b Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Thu, 19 Nov 2020 15:09:11 +0000
Subject: [PATCH 03/14] cylc clean: ensure the right dirs are removed
And prevent symlink attack
---
cylc/flow/pathutil.py | 8 ++++++--
cylc/flow/scripts/clean.py | 4 ++--
cylc/flow/suite_files.py | 18 ++++++++++++------
3 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py
index 7d3e4d794e7..717e99c30bd 100644
--- a/cylc/flow/pathutil.py
+++ b/cylc/flow/pathutil.py
@@ -210,9 +210,11 @@ def remove_dir(path):
if the specified path is a symlink.
Args:
- path (str): the path of the directory to delete.
+ path (str): the absolute path of the directory to delete.
"""
- if not os.path.isdir(path):
+ if not os.path.isabs(path):
+ raise ValueError('Path must be absolute')
+ if os.path.exists(path) and not os.path.isdir(path):
raise NotADirectoryError(path)
if os.path.islink(path):
if os.path.exists(path):
@@ -223,6 +225,8 @@ def remove_dir(path):
else:
LOG.info(f'Removing broken symlink: {path}')
os.remove(path)
+ elif not os.path.exists(path):
+ raise FileNotFoundError(path)
else:
LOG.info(f'Removing directory: {path}')
rmtree(path)
diff --git a/cylc/flow/scripts/clean.py b/cylc/flow/scripts/clean.py
index 48994fb4164..6b36cfbb2c3 100644
--- a/cylc/flow/scripts/clean.py
+++ b/cylc/flow/scripts/clean.py
@@ -24,8 +24,8 @@
this is run for a workflow that was instead written directly in ~/cylc-run and
not backed up elsewhere, it will be lost.
-It will also remove an symlink directory targets. For now, it will fail if
-run on a host which doesn't have access to that filesystem.
+It will also remove an symlink directory targets. For now, it will fail to
+remove workflow files/directories on a remote host.
Suite names can be hierarchical, corresponding to the path under ~/cylc-run.
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index 9e893111263..4fca934dd7d 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -578,14 +578,20 @@ def clean(reg):
# TODO: check task_jobs table in database to see what platforms are used
base_path = os.path.realpath(run_dir)
- possible_symlinks = [os.path.join(base_path, d) for d in [
+ possible_symlinks = [(name, os.path.join(base_path, name)) for name in [
'log', 'share/cycle', 'share', 'work']]
# Note: 'share/cycle' must come before 'share'
- for path in [*possible_symlinks, run_dir]:
- # Note: possible_symlinks must come before run_dir
- remove_dir(path)
-
- # TODO: what if user creates a symlink ~/cylc-run/foo/work/ -> /net/home/ !
+ for name, path in possible_symlinks:
+ if os.path.islink(path):
+ target = os.path.realpath(path)
+ target_reg_dir = target.rsplit(name, 1)[0]
+ # Ensure symlink not pointing to wrong directory!
+ if not target_reg_dir.endswith(f'cylc-run/{reg}/'):
+ raise WorkflowFilesError(
+ f'Symlink target {target} does not match expected pattern')
+ if os.path.isdir(target_reg_dir):
+ remove_dir(target_reg_dir)
+ remove_dir(run_dir)
def remove_keys_on_server(keys):
From 62222f79ee7417b40594451642df9d41bf7e4be7 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Fri, 20 Nov 2020 15:39:02 +0000
Subject: [PATCH 04/14] cylc clean & register: unified "reg" validation
---
cylc/flow/suite_files.py | 51 ++++++++++++++++++++++++----------
tests/unit/test_suite_files.py | 8 ++----
2 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index 4fca934dd7d..cdc5ec2867f 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -479,24 +479,18 @@ def register(reg=None, source=None, redirect=False):
Raise:
SuiteServiceFileError:
- No flow.cylc file found in source location.
- Illegal name (can look like a relative path, but not absolute).
- Another suite already has this name (unless --redirect).
- Trying to register a suite nested inside of another.
+ - No flow.cylc file found in source location.
+ - Illegal name (can look like a relative path, but not absolute).
+ - Another suite already has this name (unless --redirect).
+ - Trying to register a suite nested inside of another.
"""
if reg is None:
reg = os.path.basename(os.getcwd())
- make_localhost_symlinks(reg)
- is_valid, message = SuiteNameValidator.validate(reg)
- if not is_valid:
- raise SuiteServiceFileError(f'invalid suite name - {message}')
-
- if os.path.isabs(reg):
- raise SuiteServiceFileError(
- f'suite name cannot be an absolute path: {reg}')
-
+ _validate_reg(reg)
check_nested_run_dirs(reg)
+ make_localhost_symlinks(reg)
+
if source is not None:
if os.path.basename(source) == SuiteFiles.FLOW_FILE:
source = os.path.dirname(source)
@@ -566,6 +560,9 @@ def clean(reg):
Args:
reg (str): workflow name.
"""
+ reg = os.path.normpath(reg)
+ # TODO: prevent `cylc clean foo/../` (i.e. rm -r ~/cylc-clean !)
+ _validate_reg(reg)
run_dir = get_suite_run_dir(reg)
if not os.path.isdir(run_dir):
raise WorkflowFilesError(f'No workflow directory found at {run_dir}')
@@ -685,12 +682,36 @@ def _load_local_item(item, path):
return None
+def _validate_reg(reg):
+ """Check suite name is valid.
+
+ Args:
+ reg (str): Suite name
+
+ Raise:
+ SuiteServiceFileError:
+ - reg has form of absolute path or is otherwise not valid
+ """
+ is_valid, message = SuiteNameValidator.validate(reg)
+ if not is_valid:
+ raise SuiteServiceFileError(f'invalid suite name "{reg}" - {message}')
+ if os.path.isabs(reg):
+ raise SuiteServiceFileError(
+ f'suite name cannot be an absolute path: {reg}')
+
+
def check_nested_run_dirs(reg):
"""Disallow nested run dirs e.g. trying to register foo/bar where foo is
already a valid suite directory.
Args:
reg (str): suite name
+
+ Raise:
+ WorkflowFilesError:
+ - reg dir is nested inside a run dir
+ - reg dir contains a nested run dir (if not deeper than max scan
+ depth)
"""
exc_msg = (
'Nested run directories not allowed - cannot register suite name '
@@ -700,7 +721,7 @@ def _check_child_dirs(path, depth_count=1):
for result in os.scandir(path):
if result.is_dir() and not result.is_symlink():
if is_valid_run_dir(result.path):
- raise SuiteServiceFileError(exc_msg % (reg, result.path))
+ raise WorkflowFilesError(exc_msg % (reg, result.path))
if depth_count < MAX_SCAN_DEPTH:
_check_child_dirs(result.path, depth_count + 1)
@@ -708,7 +729,7 @@ def _check_child_dirs(path, depth_count=1):
parent_dir = os.path.dirname(reg_path)
while parent_dir != '':
if is_valid_run_dir(parent_dir):
- raise SuiteServiceFileError(
+ raise WorkflowFilesError(
exc_msg % (reg, get_cylc_run_abs_path(parent_dir)))
parent_dir = os.path.dirname(parent_dir)
diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py
index dff39dc06b3..ac436b46563 100644
--- a/tests/unit/test_suite_files.py
+++ b/tests/unit/test_suite_files.py
@@ -15,13 +15,12 @@
# along with this program. If not, see .
from cylc.flow.suite_files import check_nested_run_dirs
-from cylc.flow.pathutil import make_localhost_symlinks
import pytest
from unittest import mock
import os.path
from cylc.flow import suite_files
-from cylc.flow.exceptions import SuiteServiceFileError
+from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError
def get_register_test_cases():
@@ -293,7 +292,7 @@ def test_nested_run_dirs_raise_error(direction, monkeypatch):
suite_files.check_nested_run_dirs('bright/falls')
# Nested in a run dir - bad:
for path in ('bright/falls/light', 'bright/falls/light/and/power'):
- with pytest.raises(SuiteServiceFileError) as exc:
+ with pytest.raises(WorkflowFilesError) as exc:
suite_files.check_nested_run_dirs(path)
assert 'Nested run directories not allowed' in str(exc.value)
@@ -319,9 +318,8 @@ def mock_scandir(path):
for path in ('a/a/a', 'a/b'):
suite_files.check_nested_run_dirs(path)
# Run dir nested below - bad:
-
for path in ('a', 'a/a', 'a/c'):
- with pytest.raises(SuiteServiceFileError) as exc:
+ with pytest.raises(WorkflowFilesError) as exc:
check_nested_run_dirs(path)
assert 'Nested run directories not allowed' in str(exc.value)
# Run dir nested below max scan depth - not ideal but passes:
From 118d82593035a5fc425db5a6f91d57d811e7ee6a Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Fri, 20 Nov 2020 20:33:16 +0000
Subject: [PATCH 05/14] cylc clean: better validation
- Ensure passed "reg" is not a path that points to the cylc-run dir or
higher
- Ensure symlinks to be deleted have expected form i.e.
`something/cylc-run/reg/log` etc
---
cylc/flow/pathutil.py | 5 +++--
cylc/flow/suite_files.py | 25 +++++++++++++++++--------
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py
index 717e99c30bd..91bba9304f8 100644
--- a/cylc/flow/pathutil.py
+++ b/cylc/flow/pathutil.py
@@ -202,7 +202,7 @@ def make_symlink(src, dst):
try:
os.symlink(src, dst, target_is_directory=True)
except Exception as exc:
- raise WorkflowFilesError(f"Error when symlinking '{exc}'")
+ raise WorkflowFilesError(f"Error when symlinking\n{exc}")
def remove_dir(path):
@@ -219,7 +219,8 @@ def remove_dir(path):
if os.path.islink(path):
if os.path.exists(path):
target = os.path.realpath(path)
- LOG.info(f'Removing symlink target directory: {path} -> {target}')
+ LOG.info(
+ f'Removing symlink target directory: ({path} ->) {target}')
rmtree(target)
LOG.info(f'Removing symlink: {path}')
else:
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index cdc5ec2867f..69749c3c16a 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -560,12 +560,15 @@ def clean(reg):
Args:
reg (str): workflow name.
"""
- reg = os.path.normpath(reg)
- # TODO: prevent `cylc clean foo/../` (i.e. rm -r ~/cylc-clean !)
_validate_reg(reg)
+ reg = os.path.normpath(reg)
+ if reg.startswith('.'):
+ raise WorkflowFilesError(
+ 'Workflow name cannot be a path that points to the cylc-run '
+ 'directory or above')
run_dir = get_suite_run_dir(reg)
if not os.path.isdir(run_dir):
- raise WorkflowFilesError(f'No workflow directory found at {run_dir}')
+ raise WorkflowFilesError(f'No directory found at {run_dir}')
try:
detect_old_contact_file(reg)
except SuiteServiceFileError as exc:
@@ -580,12 +583,18 @@ def clean(reg):
# Note: 'share/cycle' must come before 'share'
for name, path in possible_symlinks:
if os.path.islink(path):
+ # Ensure symlink is pointing to expected directory
target = os.path.realpath(path)
- target_reg_dir = target.rsplit(name, 1)[0]
- # Ensure symlink not pointing to wrong directory!
- if not target_reg_dir.endswith(f'cylc-run/{reg}/'):
+ if os.path.exists(target) and not os.path.isdir(target):
raise WorkflowFilesError(
- f'Symlink target {target} does not match expected pattern')
+ f'Invalid Cylc symlink directory {path} -> {target}\n'
+ f'Target is not a directory')
+ expected_end = os.path.join('cylc-run', reg, name)
+ if not target.endswith(expected_end):
+ raise WorkflowFilesError(
+ f'Invalid Cylc symlink directory {path} -> {target}\n'
+ f'Expected target to end with "{expected_end}"')
+ target_reg_dir = target.rsplit(name, 1)[0]
if os.path.isdir(target_reg_dir):
remove_dir(target_reg_dir)
remove_dir(run_dir)
@@ -739,7 +748,7 @@ def _check_child_dirs(path, depth_count=1):
def is_valid_run_dir(path):
- """Return True if path is a valid run directory, else False.
+ """Return True if path is a valid, existing run directory, else False.
Args:
path (str): if this is a relative path, it is taken to be relative to
From 5abf13d297734af8d332b7732660c454f430561a Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Mon, 23 Nov 2020 17:03:13 +0000
Subject: [PATCH 06/14] Unit tests for cylc clean & helpers
---
tests/unit/test_pathutil.py | 84 ++++++++++++++++++-
tests/unit/test_suite_files.py | 149 +++++++++++++++++++++++++++++++++
2 files changed, 230 insertions(+), 3 deletions(-)
diff --git a/tests/unit/test_pathutil.py b/tests/unit/test_pathutil.py
index dff14979220..df555f07c72 100644
--- a/tests/unit/test_pathutil.py
+++ b/tests/unit/test_pathutil.py
@@ -15,7 +15,6 @@
# along with this program. If not, see .
"""Tests for "cylc.flow.pathutil"."""
-from tempfile import TemporaryDirectory
from unittest import TestCase
from unittest.mock import patch, MagicMock, call
@@ -38,8 +37,10 @@
get_suite_run_config_log_dir,
get_suite_run_share_dir,
get_suite_run_work_dir,
- get_suite_test_log_name, make_localhost_symlinks,
- make_suite_run_tree, make_symlink,
+ get_suite_test_log_name,
+ make_localhost_symlinks,
+ make_suite_run_tree,
+ remove_dir
)
@@ -290,6 +291,83 @@ def test_incorrect_environment_variables_raise_error(
make_localhost_symlinks('test_workflow')
+@pytest.mark.parametrize(
+ 'filetype, expected_err',
+ [('dir', None),
+ ('file', NotADirectoryError),
+ (None, FileNotFoundError)]
+)
+def test_remove_dir(filetype, expected_err, tmp_path):
+ """Test that remove_dir() can delete nested dirs and handle bad paths."""
+ test_path = tmp_path.joinpath('foo/bar')
+ if filetype == 'dir':
+ # Test removal of sub directories too
+ sub_dir = test_path.joinpath('baz')
+ sub_dir.mkdir(parents=True)
+ sub_dir_file = sub_dir.joinpath('meow')
+ sub_dir_file.touch()
+ elif filetype == 'file':
+ test_path = tmp_path.joinpath('meow')
+ test_path.touch()
+
+ if expected_err:
+ with pytest.raises(expected_err):
+ remove_dir(test_path)
+ else:
+ remove_dir(test_path)
+ assert test_path.exists() is False
+ assert test_path.is_symlink() is False
+
+
+@pytest.mark.parametrize(
+ 'target, expected_err',
+ [('dir', None),
+ ('file', NotADirectoryError),
+ (None, None)]
+)
+def test_remove_dir_symlinks(target, expected_err, tmp_path):
+ """Test that remove_dir() can delete symlinks, including the target."""
+ target_path = tmp_path.joinpath('x/y')
+ target_path.mkdir(parents=True)
+
+ tmp_path.joinpath('a').mkdir()
+ symlink_path = tmp_path.joinpath('a/b')
+
+ if target == 'dir':
+ # Add a file into the the target dir to check it removes that ok
+ target_path.joinpath('meow').touch()
+ symlink_path.symlink_to(target_path)
+ elif target == 'file':
+ target_path = target_path.joinpath('meow')
+ target_path.touch()
+ symlink_path.symlink_to(target_path)
+ elif target is None:
+ symlink_path.symlink_to(target_path)
+ # Break symlink
+ target_path.rmdir()
+
+ if expected_err:
+ with pytest.raises(expected_err):
+ remove_dir(symlink_path)
+ else:
+ remove_dir(symlink_path)
+ for path in [symlink_path, target_path]:
+ assert path.exists() is False
+ assert path.is_symlink() is False
+
+
+def test_remove_dir_relative(tmp_path):
+ """Test that you cannot use remove_dir() on a relative path.
+
+ When removing a path, we want to be absolute-ly sure where it is!
+ """
+ # cd to temp dir in case we accidentally succeed in deleting the path
+ os.chdir(tmp_path)
+ with pytest.raises(ValueError) as cm:
+ remove_dir('foo/bar')
+ assert 'Path must be absolute' in str(cm.value)
+
+
if __name__ == '__main__':
from unittest import main
main()
diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py
index ac436b46563..739372481bc 100644
--- a/tests/unit/test_suite_files.py
+++ b/tests/unit/test_suite_files.py
@@ -324,3 +324,152 @@ def mock_scandir(path):
assert 'Nested run directories not allowed' in str(exc.value)
# Run dir nested below max scan depth - not ideal but passes:
suite_files.check_nested_run_dirs('a/d')
+
+
+@pytest.mark.parametrize(
+ 'reg, expected_err',
+ [('foo/bar/', None),
+ ('/foo/bar', SuiteServiceFileError)]
+)
+def test_validate_reg(reg, expected_err):
+ if expected_err:
+ with pytest.raises(expected_err) as exc:
+ suite_files._validate_reg(reg)
+ assert 'cannot be an absolute path' in str(exc.value)
+ else:
+ suite_files._validate_reg(reg)
+
+
+@pytest.mark.parametrize(
+ 'reg, props',
+ [
+ ('foo/bar/', {}),
+ ('foo/..', {
+ 'no dir': True,
+ 'err': WorkflowFilesError,
+ 'err msg': ('cannot be a path that points to the cylc-run '
+ 'directory or above')
+ }),
+ ('foo/../..', {
+ 'no dir': True,
+ 'err': WorkflowFilesError,
+ 'err msg': ('cannot be a path that points to the cylc-run '
+ 'directory or above')
+ }),
+ ('foo', {
+ 'not stopped': True,
+ 'err': SuiteServiceFileError,
+ 'err msg': 'Cannot remove running workflow'
+ }),
+ ('foo', {
+ 'no dir': True,
+ 'err': WorkflowFilesError,
+ 'err msg': 'No directory found'
+ }),
+ ('foo', {
+ 'symlink dirs': {
+ 'log': 'sym-log',
+ 'share': 'sym-share',
+ 'share/cycle': 'sym-cycle',
+ 'work': 'sym-work'
+ }
+ }),
+ ('foo', {
+ 'symlink dirs': {
+ 'run': 'sym-run',
+ 'log': 'sym-log',
+ 'share': 'sym-share',
+ 'share/cycle': 'sym-cycle',
+ 'work': 'sym-work'
+ }
+ }),
+ ('foo', {
+ 'bad symlink': {
+ 'type': 'file',
+ 'path': 'sym-log/cylc-run/foo/meow.txt'
+ },
+ 'err': WorkflowFilesError,
+ 'err msg': 'Target is not a directory'
+ }),
+ ('foo', {
+ 'bad symlink': {
+ 'type': 'dir',
+ 'path': 'sym-log/bad/path'
+ },
+ 'err': WorkflowFilesError,
+ 'err msg': 'Expected target to end with "cylc-run/foo/log"'
+ })
+ ]
+)
+def test_clean(reg, props, monkeypatch, tmp_path):
+ """Test the clean() function.
+
+ Params:
+ reg (str): Workflow name.
+ props (dict): Possible values are (all optional):
+ 'err' (Exception): Expected error.
+ 'err msg' (str): Message that is expected to be in the exception.
+ 'no dir' (bool): If True, do not create run dir for this test case.
+ 'not stopped' (bool): If True, simulate that the workflow is
+ still running.
+ 'symlink dirs' (dict): As you would find in the global config
+ under [symlink dirs][platform].
+ 'bad symlink' (dict): Simulate an invalid log symlink dir:
+ 'type' (str): 'file' or 'dir'.
+ 'path' (str): Path of the symlink target relative to tmp_path.
+ """
+ # --- Setup ---
+ tmp_path.joinpath('cylc-run').mkdir()
+ run_dir = tmp_path.joinpath('cylc-run', reg)
+ symlink_dirs = props.get('symlink dirs')
+ bad_symlink = props.get('bad symlink')
+ if not props.get('no dir') and (
+ not symlink_dirs or 'run' not in symlink_dirs):
+ run_dir.mkdir(parents=True)
+
+ dirs_to_check = [run_dir]
+ if symlink_dirs:
+ if 'run' in symlink_dirs:
+ dst = tmp_path.joinpath(symlink_dirs['run'], 'cylc-run', reg)
+ dst.mkdir(parents=True)
+ run_dir.symlink_to(dst)
+ dirs_to_check.append(dst)
+ symlink_dirs.pop('run')
+ for s, d in symlink_dirs.items():
+ dst = tmp_path.joinpath(d, 'cylc-run', reg, s)
+ dst.mkdir(parents=True)
+ src = run_dir.joinpath(s)
+ src.symlink_to(dst)
+ dirs_to_check.append(dst.parent)
+ if bad_symlink:
+ dst = tmp_path.joinpath(bad_symlink['path'])
+ if bad_symlink['type'] == 'file':
+ dst.parent.mkdir(parents=True)
+ dst.touch()
+ else:
+ dst.mkdir(parents=True)
+ src = run_dir.joinpath('log')
+ src.symlink_to(dst)
+
+ def mocked_detect_old_contact_file(reg):
+ if props.get('not stopped'):
+ raise SuiteServiceFileError('Mocked error')
+
+ monkeypatch.setattr('cylc.flow.suite_files.detect_old_contact_file',
+ mocked_detect_old_contact_file)
+ monkeypatch.setattr('cylc.flow.suite_files.get_suite_run_dir',
+ lambda x: run_dir)
+
+ # --- The actual test ---
+ expected_err = props.get('err')
+ if expected_err:
+ with pytest.raises(expected_err) as exc:
+ suite_files.clean(reg)
+ expected_msg = props.get('err msg')
+ if expected_msg:
+ assert expected_msg in str(exc.value)
+ else:
+ suite_files.clean(reg)
+ for d in dirs_to_check:
+ assert d.exists() is False
+ assert d.is_symlink() is False
From 411c335ce1b76127e4c635c8fcfc61ce2284f807 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 24 Nov 2020 11:51:22 +0000
Subject: [PATCH 07/14] Fix cylc clean for nested suite name e.g. foo/bar/baz
---
cylc/flow/suite_files.py | 25 +++++++++++++++++--------
tests/unit/test_suite_files.py | 6 ++++--
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index 69749c3c16a..de2677a8e9e 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -557,6 +557,10 @@ def register(reg=None, source=None, redirect=False):
def clean(reg):
"""Remove stopped workflows on the local scheduler filesystem.
+ Deletes the run dir in ~/cylc-run and any symlink dirs. Note: if the
+ run dir has been manually deleted, it will not be possible to clean the
+ symlink dirs.
+
Args:
reg (str): workflow name.
"""
@@ -577,11 +581,10 @@ def clean(reg):
# TODO: check task_jobs table in database to see what platforms are used
- base_path = os.path.realpath(run_dir)
- possible_symlinks = [(name, os.path.join(base_path, name)) for name in [
+ possible_symlinks = [(name, os.path.join(run_dir, name)) for name in [
'log', 'share/cycle', 'share', 'work']]
# Note: 'share/cycle' must come before 'share'
- for name, path in possible_symlinks:
+ for name, path in [*possible_symlinks, ('', run_dir)]:
if os.path.islink(path):
# Ensure symlink is pointing to expected directory
target = os.path.realpath(path)
@@ -589,15 +592,21 @@ def clean(reg):
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Target is not a directory')
- expected_end = os.path.join('cylc-run', reg, name)
+ expected_end = str(Path('cylc-run', reg, name))
if not target.endswith(expected_end):
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Expected target to end with "{expected_end}"')
- target_reg_dir = target.rsplit(name, 1)[0]
- if os.path.isdir(target_reg_dir):
- remove_dir(target_reg_dir)
- remove_dir(run_dir)
+ reg_depth = len(Path(reg).parts) - 1
+ target_reg_dir = target.rsplit(name, 1)[0] if name else target
+ if reg_depth > 0:
+ target_top_parent = Path(target_reg_dir).parents[reg_depth - 1]
+ else:
+ target_top_parent = target_reg_dir
+ if os.path.isdir(target_top_parent):
+ remove_dir(target_top_parent)
+ run_dir_top_parent = get_suite_run_dir(Path(reg).parts[0])
+ remove_dir(run_dir_top_parent)
def remove_keys_on_server(keys):
diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py
index 739372481bc..299fec29c55 100644
--- a/tests/unit/test_suite_files.py
+++ b/tests/unit/test_suite_files.py
@@ -19,6 +19,7 @@
from unittest import mock
import os.path
+from pathlib import Path
from cylc.flow import suite_files
from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError
@@ -421,13 +422,14 @@ def test_clean(reg, props, monkeypatch, tmp_path):
# --- Setup ---
tmp_path.joinpath('cylc-run').mkdir()
run_dir = tmp_path.joinpath('cylc-run', reg)
+ run_dir_top_parent = tmp_path.joinpath('cylc-run', Path(reg).parts[0])
symlink_dirs = props.get('symlink dirs')
bad_symlink = props.get('bad symlink')
if not props.get('no dir') and (
not symlink_dirs or 'run' not in symlink_dirs):
run_dir.mkdir(parents=True)
- dirs_to_check = [run_dir]
+ dirs_to_check = [run_dir_top_parent]
if symlink_dirs:
if 'run' in symlink_dirs:
dst = tmp_path.joinpath(symlink_dirs['run'], 'cylc-run', reg)
@@ -458,7 +460,7 @@ def mocked_detect_old_contact_file(reg):
monkeypatch.setattr('cylc.flow.suite_files.detect_old_contact_file',
mocked_detect_old_contact_file)
monkeypatch.setattr('cylc.flow.suite_files.get_suite_run_dir',
- lambda x: run_dir)
+ lambda x: tmp_path.joinpath('cylc-run', x))
# --- The actual test ---
expected_err = props.get('err')
From 64136322643bdcf0864eb12c77a3ff15d165c417 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 24 Nov 2020 11:51:35 +0000
Subject: [PATCH 08/14] Functional test for cylc clean
---
tests/functional/cylc-clean/00-basic.t | 124 ++++++++++++++++++++++++
tests/functional/cylc-clean/test_header | 1 +
2 files changed, 125 insertions(+)
create mode 100644 tests/functional/cylc-clean/00-basic.t
create mode 120000 tests/functional/cylc-clean/test_header
diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t
new file mode 100644
index 00000000000..088f0d809a1
--- /dev/null
+++ b/tests/functional/cylc-clean/00-basic.t
@@ -0,0 +1,124 @@
+#!/bin/bash
+# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
+# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+# -----------------------------------------------------------------------------
+# Test the cylc clean command
+
+. "$(dirname "$0")/test_header"
+set_test_number 5
+
+create_test_global_config "" "
+[symlink dirs]
+ [[localhost]]
+ run = ${TEST_DIR}/sym-run
+ log = ${TEST_DIR}/sym-log
+ share = ${TEST_DIR}/sym-share
+ share/cycle = ${TEST_DIR}/sym-cycle
+ work = ${TEST_DIR}/sym-work
+"
+init_suite "${TEST_NAME_BASE}" << '__FLOW__'
+[scheduling]
+ [[graph]]
+ R1 = darmok
+__FLOW__
+# -----------------------------------------------------------------------------
+TEST_NAME="run-dir-tree-pre-clean"
+tree "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout"
+# Remove last line of output:
+sed -i '$d' "${TEST_NAME}.stdout"
+
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${HOME}/cylc-run/${SUITE_NAME}
+|-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log
+|-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share
+\`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work
+
+__TREE__
+
+TEST_NAME="test-dir-tree-pre-clean"
+tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
+# Remove last line of output:
+sed -i '$d' "${TEST_NAME}.stdout"
+# Note: backticks need to be escaped in the heredoc
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/sym-cycle
+\`-- cylc-run
+ \`-- cylctb-${CYLC_TEST_TIME_INIT}
+ \`-- f
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ \`-- share
+ \`-- cycle
+${TEST_DIR}/sym-log
+\`-- cylc-run
+ \`-- cylctb-${CYLC_TEST_TIME_INIT}
+ \`-- f
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ \`-- log
+${TEST_DIR}/sym-run
+\`-- cylc-run
+ \`-- cylctb-${CYLC_TEST_TIME_INIT}
+ \`-- f
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ |-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log
+ |-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share
+ \`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work
+${TEST_DIR}/sym-share
+\`-- cylc-run
+ \`-- cylctb-${CYLC_TEST_TIME_INIT}
+ \`-- f
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ \`-- share
+ \`-- cycle -> ${TEST_DIR}/sym-cycle/cylc-run/${SUITE_NAME}/share/cycle
+${TEST_DIR}/sym-work
+\`-- cylc-run
+ \`-- cylctb-${CYLC_TEST_TIME_INIT}
+ \`-- f
+ \`-- cylc-clean
+ \`-- ${TEST_NAME_BASE}
+ \`-- work
+
+__TREE__
+# -----------------------------------------------------------------------------
+run_ok "cylc-clean" cylc clean "$SUITE_NAME"
+# -----------------------------------------------------------------------------
+TEST_NAME="run-dir-not-exist-post-clean"
+exists_fail "$SUITE_RUN_DIR"
+
+TEST_NAME="test-dir-tree-post-clean"
+tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
+# Remove last line of output:
+sed -i '$d' "${TEST_NAME}.stdout"
+
+cmp_ok "${TEST_NAME}.stdout" << __TREE__
+${TEST_DIR}/sym-cycle
+\`-- cylc-run
+${TEST_DIR}/sym-log
+\`-- cylc-run
+${TEST_DIR}/sym-run
+\`-- cylc-run
+${TEST_DIR}/sym-share
+\`-- cylc-run
+${TEST_DIR}/sym-work
+\`-- cylc-run
+
+__TREE__
+# -----------------------------------------------------------------------------
+purge
+exit
diff --git a/tests/functional/cylc-clean/test_header b/tests/functional/cylc-clean/test_header
new file mode 120000
index 00000000000..90bd5a36f92
--- /dev/null
+++ b/tests/functional/cylc-clean/test_header
@@ -0,0 +1 @@
+../lib/bash/test_header
\ No newline at end of file
From 2a05203410669e8db6677de20783c0295d4cfbef Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 24 Nov 2020 14:29:53 +0000
Subject: [PATCH 09/14] Make sure tree command is installed for functional test
---
.github/workflows/test_functional.yml | 2 +-
tests/functional/cylc-clean/00-basic.t | 9 ++++++---
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/test_functional.yml b/.github/workflows/test_functional.yml
index 30b9b0f1cc2..eb0ae5bd3c0 100644
--- a/.github/workflows/test_functional.yml
+++ b/.github/workflows/test_functional.yml
@@ -39,7 +39,7 @@ jobs:
- name: Apt-Get
run: |
sudo apt-get update
- sudo apt-get install -y sqlite3
+ sudo apt-get install -y sqlite3 tree
- name: Add .github/bin/ to PATH
# Sets up mocked mail command & any other custom executables
diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t
index 088f0d809a1..3623b459c33 100644
--- a/tests/functional/cylc-clean/00-basic.t
+++ b/tests/functional/cylc-clean/00-basic.t
@@ -18,6 +18,9 @@
# Test the cylc clean command
. "$(dirname "$0")/test_header"
+if ! command -v 'tree' >'/dev/null'; then
+ skip_all '"tree" command not available'
+fi
set_test_number 5
create_test_global_config "" "
@@ -36,7 +39,7 @@ init_suite "${TEST_NAME_BASE}" << '__FLOW__'
__FLOW__
# -----------------------------------------------------------------------------
TEST_NAME="run-dir-tree-pre-clean"
-tree "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout"
+tree --charset=ascii "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout"
# Remove last line of output:
sed -i '$d' "${TEST_NAME}.stdout"
@@ -49,7 +52,7 @@ ${HOME}/cylc-run/${SUITE_NAME}
__TREE__
TEST_NAME="test-dir-tree-pre-clean"
-tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
+tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
# Remove last line of output:
sed -i '$d' "${TEST_NAME}.stdout"
# Note: backticks need to be escaped in the heredoc
@@ -102,7 +105,7 @@ TEST_NAME="run-dir-not-exist-post-clean"
exists_fail "$SUITE_RUN_DIR"
TEST_NAME="test-dir-tree-post-clean"
-tree "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
+tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
# Remove last line of output:
sed -i '$d' "${TEST_NAME}.stdout"
From caab8b87d9d62d78aea21c42c67c6fe0d8ba2f60 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 24 Nov 2020 18:32:23 +0000
Subject: [PATCH 10/14] cylc clean: ensure workflow siblings are not removed
Both in the run dir and symlink dirs.
---
cylc/flow/suite_files.py | 72 ++++++++++++++++++--------
tests/functional/cylc-clean/00-basic.t | 13 +++--
tests/unit/test_suite_files.py | 26 +++++++++-
3 files changed, 85 insertions(+), 26 deletions(-)
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index de2677a8e9e..7a78ed0ba7a 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -570,8 +570,8 @@ def clean(reg):
raise WorkflowFilesError(
'Workflow name cannot be a path that points to the cylc-run '
'directory or above')
- run_dir = get_suite_run_dir(reg)
- if not os.path.isdir(run_dir):
+ run_dir = Path(get_suite_run_dir(reg))
+ if not run_dir.is_dir():
raise WorkflowFilesError(f'No directory found at {run_dir}')
try:
detect_old_contact_file(reg)
@@ -581,32 +581,62 @@ def clean(reg):
# TODO: check task_jobs table in database to see what platforms are used
- possible_symlinks = [(name, os.path.join(run_dir, name)) for name in [
- 'log', 'share/cycle', 'share', 'work']]
- # Note: 'share/cycle' must come before 'share'
- for name, path in [*possible_symlinks, ('', run_dir)]:
- if os.path.islink(path):
- # Ensure symlink is pointing to expected directory
- target = os.path.realpath(path)
- if os.path.exists(target) and not os.path.isdir(target):
+ possible_symlinks = [(Path(name), Path(run_dir, name)) for name in [
+ 'log', 'share/cycle', 'share', 'work', '']]
+ # Note: 'share/cycle' must come before 'share', and '' must come last
+ for name, path in possible_symlinks:
+ if path.is_symlink():
+ # Ensure symlink is pointing to expected directory. If not,
+ # something is wrong and we should abort
+ target = path.resolve()
+ if target.exists() and not target.is_dir():
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Target is not a directory')
expected_end = str(Path('cylc-run', reg, name))
- if not target.endswith(expected_end):
+ if not str(target).endswith(expected_end):
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Expected target to end with "{expected_end}"')
- reg_depth = len(Path(reg).parts) - 1
- target_reg_dir = target.rsplit(name, 1)[0] if name else target
- if reg_depth > 0:
- target_top_parent = Path(target_reg_dir).parents[reg_depth - 1]
- else:
- target_top_parent = target_reg_dir
- if os.path.isdir(target_top_parent):
- remove_dir(target_top_parent)
- run_dir_top_parent = get_suite_run_dir(Path(reg).parts[0])
- remove_dir(run_dir_top_parent)
+ # Remove /cylc-run/
+ target_cylc_run_dir = str(target).rsplit(str(reg), 1)[0]
+ target_reg_dir = Path(target_cylc_run_dir, reg)
+ if target_reg_dir.is_dir():
+ remove_dir(target_reg_dir)
+ # Remove empty parents
+ _remove_empty_reg_parents(reg, target_reg_dir)
+
+ remove_dir(run_dir)
+ _remove_empty_reg_parents(reg, run_dir)
+
+
+def _remove_empty_reg_parents(reg, path):
+ """If reg is nested e.g. a/b/c, work our way up the tree, removing empty
+ parents only.
+
+ Args:
+ reg (str): workflow name, e.g. a/b/c
+ path (str): path to this directory, e.g. /foo/bar/a/b/c
+
+ Example:
+ _remove_empty_reg_parents('a/b/c', '/foo/bar/a/b/c') would remove
+ /foo/bar/a/b (assuming it's empty), then /foo/bar/a (assuming it's
+ empty).
+ """
+ reg = Path(reg)
+ reg_depth = len(reg.parts) - 1
+ path = Path(path)
+ if not path.is_absolute():
+ raise ValueError('Path must be absolute')
+ for i in range(reg_depth):
+ parent = path.parents[i]
+ if not parent.is_dir():
+ continue
+ try:
+ parent.rmdir()
+ LOG.info(f'Removing directory: {parent}')
+ except OSError:
+ break
def remove_keys_on_server(keys):
diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t
index 3623b459c33..64e4361d331 100644
--- a/tests/functional/cylc-clean/00-basic.t
+++ b/tests/functional/cylc-clean/00-basic.t
@@ -37,6 +37,8 @@ init_suite "${TEST_NAME_BASE}" << '__FLOW__'
[[graph]]
R1 = darmok
__FLOW__
+# Create a fake sibling workflow dir in the sym-log dir:
+mkdir "${TEST_DIR}/sym-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone"
# -----------------------------------------------------------------------------
TEST_NAME="run-dir-tree-pre-clean"
tree --charset=ascii "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout"
@@ -68,10 +70,11 @@ ${TEST_DIR}/sym-cycle
${TEST_DIR}/sym-log
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
- \`-- f
- \`-- cylc-clean
- \`-- ${TEST_NAME_BASE}
- \`-- log
+ |-- f
+ | \`-- cylc-clean
+ | \`-- ${TEST_NAME_BASE}
+ | \`-- log
+ \`-- leave-me-alone
${TEST_DIR}/sym-run
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
@@ -114,6 +117,8 @@ ${TEST_DIR}/sym-cycle
\`-- cylc-run
${TEST_DIR}/sym-log
\`-- cylc-run
+ \`-- cylctb-${CYLC_TEST_TIME_INIT}
+ \`-- leave-me-alone
${TEST_DIR}/sym-run
\`-- cylc-run
${TEST_DIR}/sym-share
diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py
index 299fec29c55..4fd2750db95 100644
--- a/tests/unit/test_suite_files.py
+++ b/tests/unit/test_suite_files.py
@@ -367,7 +367,7 @@ def test_validate_reg(reg, expected_err):
'err': WorkflowFilesError,
'err msg': 'No directory found'
}),
- ('foo', {
+ ('foo/bar', {
'symlink dirs': {
'log': 'sym-log',
'share': 'sym-share',
@@ -475,3 +475,27 @@ def mocked_detect_old_contact_file(reg):
for d in dirs_to_check:
assert d.exists() is False
assert d.is_symlink() is False
+
+
+def test_remove_empty_reg_parents(tmp_path):
+ """Test that _remove_empty_parents() doesn't remove parents containing a
+ sibling."""
+ reg = 'foo/bar/baz/qux'
+ path = tmp_path.joinpath(reg)
+ tmp_path.joinpath('foo/bar/baz').mkdir(parents=True)
+ sibling_reg = 'foo/darmok'
+ sibling_path = tmp_path.joinpath(sibling_reg)
+ sibling_path.mkdir()
+ suite_files._remove_empty_reg_parents(reg, path)
+ assert tmp_path.joinpath('foo/bar').exists() is False
+ assert tmp_path.joinpath('foo').exists() is True
+ # Also path must be absolute
+ with pytest.raises(ValueError) as exc:
+ suite_files._remove_empty_reg_parents('foo/darmok', 'meow/foo/darmok')
+ assert 'Path must be absolute' in str(exc.value)
+ # Check it skips non-existent dirs, and stops at the right place too
+ tmp_path.joinpath('foo/bar').mkdir()
+ sibling_path.rmdir()
+ suite_files._remove_empty_reg_parents(reg, path)
+ assert tmp_path.joinpath('foo').exists() is False
+ assert tmp_path.exists() is True
From 7c6b385884652d7e39d4d7112e68707a5e86d919 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 24 Nov 2020 18:37:11 +0000
Subject: [PATCH 11/14] cylc clean: do not raise error if specified workflow
doesn't exist
---
cylc/flow/suite_files.py | 3 ++-
tests/unit/test_suite_files.py | 6 +-----
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/cylc/flow/suite_files.py b/cylc/flow/suite_files.py
index 7a78ed0ba7a..3cd3333dfd4 100644
--- a/cylc/flow/suite_files.py
+++ b/cylc/flow/suite_files.py
@@ -572,7 +572,8 @@ def clean(reg):
'directory or above')
run_dir = Path(get_suite_run_dir(reg))
if not run_dir.is_dir():
- raise WorkflowFilesError(f'No directory found at {run_dir}')
+ LOG.info(f'No workflow directory to clean at {run_dir}')
+ return
try:
detect_old_contact_file(reg)
except SuiteServiceFileError as exc:
diff --git a/tests/unit/test_suite_files.py b/tests/unit/test_suite_files.py
index 4fd2750db95..a33dffbf445 100644
--- a/tests/unit/test_suite_files.py
+++ b/tests/unit/test_suite_files.py
@@ -345,6 +345,7 @@ def test_validate_reg(reg, expected_err):
'reg, props',
[
('foo/bar/', {}),
+ ('foo', {'no dir': True}),
('foo/..', {
'no dir': True,
'err': WorkflowFilesError,
@@ -362,11 +363,6 @@ def test_validate_reg(reg, expected_err):
'err': SuiteServiceFileError,
'err msg': 'Cannot remove running workflow'
}),
- ('foo', {
- 'no dir': True,
- 'err': WorkflowFilesError,
- 'err msg': 'No directory found'
- }),
('foo/bar', {
'symlink dirs': {
'log': 'sym-log',
From 8c7e368aa1dcc4a2dd1acf9ccb2df34c3df63c15 Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Mon, 30 Nov 2020 16:14:16 +0000
Subject: [PATCH 12/14] Make cylc clean functional test less flaky
---
tests/functional/cylc-clean/00-basic.t | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t
index 64e4361d331..f0dce8673f4 100644
--- a/tests/functional/cylc-clean/00-basic.t
+++ b/tests/functional/cylc-clean/00-basic.t
@@ -21,7 +21,7 @@
if ! command -v 'tree' >'/dev/null'; then
skip_all '"tree" command not available'
fi
-set_test_number 5
+set_test_number 6
create_test_global_config "" "
[symlink dirs]
@@ -37,8 +37,13 @@ init_suite "${TEST_NAME_BASE}" << '__FLOW__'
[[graph]]
R1 = darmok
__FLOW__
+
+run_ok "${TEST_NAME_BASE}-val" cylc validate "$SUITE_NAME"
+
# Create a fake sibling workflow dir in the sym-log dir:
mkdir "${TEST_DIR}/sym-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone"
+
+FUNCTIONAL_DIR="${TEST_SOURCE_DIR_BASE%/*}"
# -----------------------------------------------------------------------------
TEST_NAME="run-dir-tree-pre-clean"
tree --charset=ascii "$SUITE_RUN_DIR" > "${TEST_NAME}.stdout"
@@ -62,7 +67,7 @@ cmp_ok "${TEST_NAME}.stdout" << __TREE__
${TEST_DIR}/sym-cycle
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
- \`-- f
+ \`-- ${FUNCTIONAL_DIR}
\`-- cylc-clean
\`-- ${TEST_NAME_BASE}
\`-- share
@@ -70,7 +75,7 @@ ${TEST_DIR}/sym-cycle
${TEST_DIR}/sym-log
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
- |-- f
+ |-- ${FUNCTIONAL_DIR}
| \`-- cylc-clean
| \`-- ${TEST_NAME_BASE}
| \`-- log
@@ -78,7 +83,7 @@ ${TEST_DIR}/sym-log
${TEST_DIR}/sym-run
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
- \`-- f
+ \`-- ${FUNCTIONAL_DIR}
\`-- cylc-clean
\`-- ${TEST_NAME_BASE}
|-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log
@@ -87,7 +92,7 @@ ${TEST_DIR}/sym-run
${TEST_DIR}/sym-share
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
- \`-- f
+ \`-- ${FUNCTIONAL_DIR}
\`-- cylc-clean
\`-- ${TEST_NAME_BASE}
\`-- share
@@ -95,7 +100,7 @@ ${TEST_DIR}/sym-share
${TEST_DIR}/sym-work
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
- \`-- f
+ \`-- ${FUNCTIONAL_DIR}
\`-- cylc-clean
\`-- ${TEST_NAME_BASE}
\`-- work
From b35b10ba000f18dac5303b9bcd5393ceb1a2c6ed Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 1 Dec 2020 12:13:58 +0000
Subject: [PATCH 13/14] Prevent cylc-clean functional test clashing
---
tests/functional/cylc-clean/00-basic.t | 54 ++++++++++++++------------
1 file changed, 29 insertions(+), 25 deletions(-)
diff --git a/tests/functional/cylc-clean/00-basic.t b/tests/functional/cylc-clean/00-basic.t
index f0dce8673f4..5c09be7635d 100644
--- a/tests/functional/cylc-clean/00-basic.t
+++ b/tests/functional/cylc-clean/00-basic.t
@@ -23,14 +23,18 @@ if ! command -v 'tree' >'/dev/null'; then
fi
set_test_number 6
+# Generate random name for symlink dirs to avoid any clashes with other tests
+SYM_NAME="$(mktemp -u)"
+SYM_NAME="${SYM_NAME##*tmp.}"
+
create_test_global_config "" "
[symlink dirs]
[[localhost]]
- run = ${TEST_DIR}/sym-run
- log = ${TEST_DIR}/sym-log
- share = ${TEST_DIR}/sym-share
- share/cycle = ${TEST_DIR}/sym-cycle
- work = ${TEST_DIR}/sym-work
+ run = ${TEST_DIR}/${SYM_NAME}-run
+ log = ${TEST_DIR}/${SYM_NAME}-log
+ share = ${TEST_DIR}/${SYM_NAME}-share
+ share/cycle = ${TEST_DIR}/${SYM_NAME}-cycle
+ work = ${TEST_DIR}/${SYM_NAME}-work
"
init_suite "${TEST_NAME_BASE}" << '__FLOW__'
[scheduling]
@@ -41,7 +45,7 @@ __FLOW__
run_ok "${TEST_NAME_BASE}-val" cylc validate "$SUITE_NAME"
# Create a fake sibling workflow dir in the sym-log dir:
-mkdir "${TEST_DIR}/sym-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone"
+mkdir "${TEST_DIR}/${SYM_NAME}-log/cylc-run/cylctb-${CYLC_TEST_TIME_INIT}/leave-me-alone"
FUNCTIONAL_DIR="${TEST_SOURCE_DIR_BASE%/*}"
# -----------------------------------------------------------------------------
@@ -52,19 +56,19 @@ sed -i '$d' "${TEST_NAME}.stdout"
cmp_ok "${TEST_NAME}.stdout" << __TREE__
${HOME}/cylc-run/${SUITE_NAME}
-|-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log
-|-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share
-\`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work
+|-- log -> ${TEST_DIR}/${SYM_NAME}-log/cylc-run/${SUITE_NAME}/log
+|-- share -> ${TEST_DIR}/${SYM_NAME}-share/cylc-run/${SUITE_NAME}/share
+\`-- work -> ${TEST_DIR}/${SYM_NAME}-work/cylc-run/${SUITE_NAME}/work
__TREE__
TEST_NAME="test-dir-tree-pre-clean"
-tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
+tree --charset=ascii "${TEST_DIR}/${SYM_NAME}-"* > "${TEST_NAME}.stdout"
# Remove last line of output:
sed -i '$d' "${TEST_NAME}.stdout"
# Note: backticks need to be escaped in the heredoc
cmp_ok "${TEST_NAME}.stdout" << __TREE__
-${TEST_DIR}/sym-cycle
+${TEST_DIR}/${SYM_NAME}-cycle
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
\`-- ${FUNCTIONAL_DIR}
@@ -72,7 +76,7 @@ ${TEST_DIR}/sym-cycle
\`-- ${TEST_NAME_BASE}
\`-- share
\`-- cycle
-${TEST_DIR}/sym-log
+${TEST_DIR}/${SYM_NAME}-log
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
|-- ${FUNCTIONAL_DIR}
@@ -80,24 +84,24 @@ ${TEST_DIR}/sym-log
| \`-- ${TEST_NAME_BASE}
| \`-- log
\`-- leave-me-alone
-${TEST_DIR}/sym-run
+${TEST_DIR}/${SYM_NAME}-run
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
\`-- ${FUNCTIONAL_DIR}
\`-- cylc-clean
\`-- ${TEST_NAME_BASE}
- |-- log -> ${TEST_DIR}/sym-log/cylc-run/${SUITE_NAME}/log
- |-- share -> ${TEST_DIR}/sym-share/cylc-run/${SUITE_NAME}/share
- \`-- work -> ${TEST_DIR}/sym-work/cylc-run/${SUITE_NAME}/work
-${TEST_DIR}/sym-share
+ |-- log -> ${TEST_DIR}/${SYM_NAME}-log/cylc-run/${SUITE_NAME}/log
+ |-- share -> ${TEST_DIR}/${SYM_NAME}-share/cylc-run/${SUITE_NAME}/share
+ \`-- work -> ${TEST_DIR}/${SYM_NAME}-work/cylc-run/${SUITE_NAME}/work
+${TEST_DIR}/${SYM_NAME}-share
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
\`-- ${FUNCTIONAL_DIR}
\`-- cylc-clean
\`-- ${TEST_NAME_BASE}
\`-- share
- \`-- cycle -> ${TEST_DIR}/sym-cycle/cylc-run/${SUITE_NAME}/share/cycle
-${TEST_DIR}/sym-work
+ \`-- cycle -> ${TEST_DIR}/${SYM_NAME}-cycle/cylc-run/${SUITE_NAME}/share/cycle
+${TEST_DIR}/${SYM_NAME}-work
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
\`-- ${FUNCTIONAL_DIR}
@@ -113,22 +117,22 @@ TEST_NAME="run-dir-not-exist-post-clean"
exists_fail "$SUITE_RUN_DIR"
TEST_NAME="test-dir-tree-post-clean"
-tree --charset=ascii "${TEST_DIR}/sym-"* > "${TEST_NAME}.stdout"
+tree --charset=ascii "${TEST_DIR}/${SYM_NAME}-"* > "${TEST_NAME}.stdout"
# Remove last line of output:
sed -i '$d' "${TEST_NAME}.stdout"
cmp_ok "${TEST_NAME}.stdout" << __TREE__
-${TEST_DIR}/sym-cycle
+${TEST_DIR}/${SYM_NAME}-cycle
\`-- cylc-run
-${TEST_DIR}/sym-log
+${TEST_DIR}/${SYM_NAME}-log
\`-- cylc-run
\`-- cylctb-${CYLC_TEST_TIME_INIT}
\`-- leave-me-alone
-${TEST_DIR}/sym-run
+${TEST_DIR}/${SYM_NAME}-run
\`-- cylc-run
-${TEST_DIR}/sym-share
+${TEST_DIR}/${SYM_NAME}-share
\`-- cylc-run
-${TEST_DIR}/sym-work
+${TEST_DIR}/${SYM_NAME}-work
\`-- cylc-run
__TREE__
From 743d7199b4bd54056df7365a1ffbe47ab8e3768c Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Tue, 1 Dec 2020 12:16:25 +0000
Subject: [PATCH 14/14] Update changelog
---
CHANGES.md | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/CHANGES.md b/CHANGES.md
index 0b5adfeebbc..7d76a08419c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -122,9 +122,13 @@ hierarchy and ability to set site config directory.
[#3883](https://github.com/cylc/cylc-flow/pull/3883) - Added a new workflow
config option `[scheduling]stop after cycle point`.
+[#3961](https://github.com/cylc/cylc-flow/pull/3961) - Added a new command:
+`cylc clean`.
+
### Fixes
-[#3917](https://github.com/cylc/cylc-flow/pull/3917) - Fix a bug that caused one of the hostname resolution tests to fail in certain environments.
+[#3917](https://github.com/cylc/cylc-flow/pull/3917) - Fix a bug that caused
+one of the hostname resolution tests to fail in certain environments.
[#3879](https://github.com/cylc/cylc-flow/pull/3879) - Removed Google
Groups e-mail from pip packaging metadata. Users browsing PYPI will have