Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cylc clean - initial implementation #3961

Merged
merged 14 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_functional.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
- name: Apt-Get
run: |
sudo apt-get update
sudo apt-get install -y sqlite3
sudo apt-get install -y sqlite3 tree

- name: Add .github/bin/ to PATH
# Sets up mocked mail command & any other custom executables
Expand Down
6 changes: 5 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,13 @@ hierarchy and ability to set site config directory.
[#3883](https://github.com/cylc/cylc-flow/pull/3883) - Added a new workflow
config option `[scheduling]stop after cycle point`.

[#3961](https://github.com/cylc/cylc-flow/pull/3961) - Added a new command:
`cylc clean`.

### Fixes

[#3917](https://github.com/cylc/cylc-flow/pull/3917) - Fix a bug that caused one of the hostname resolution tests to fail in certain environments.
[#3917](https://github.com/cylc/cylc-flow/pull/3917) - Fix a bug that caused
one of the hostname resolution tests to fail in certain environments.

[#3879](https://github.com/cylc/cylc-flow/pull/3879) - Removed Google
Groups e-mail from pip packaging metadata. Users browsing PYPI will have
Expand Down
2 changes: 1 addition & 1 deletion cylc/flow/etc/cylc-bash-completion
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ _cylc() {
cur="${COMP_WORDS[COMP_CWORD]}"
sec="${COMP_WORDS[1]}"
opts="$(cylc scan -t name 2>/dev/null)"
suite_cmds="broadcast|bcast|cat-log|log|check-versions|diff|compare|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|insert|kill|list|ls|tui|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|search|grep|set-verbosity|show|set-outputs|stop|shutdown|single|suite-state|test-battery|trigger|validate|view|warranty"
suite_cmds="broadcast|bcast|cat-log|check-versions|clean|compare|diff|dump|edit|ext-trigger|external-trigger|get-directory|get-suite-config|get-config|get-suite-version|get-cylc-version|graph|graph-diff|hold|insert|kill|list|log|ls|tui|ping|poll|print|register|release|unhold|reload|remove|report-timings|reset|restart|run|start|scan|search|grep|set-verbosity|show|set-outputs|stop|shutdown|single|suite-state|test-battery|trigger|validate|view|warranty"


if [[ ${COMP_CWORD} -eq 1 ]]; then
Expand Down
30 changes: 29 additions & 1 deletion cylc/flow/pathutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,4 +202,32 @@ def make_symlink(src, dst):
try:
os.symlink(src, dst, target_is_directory=True)
except Exception as exc:
raise WorkflowFilesError(f"Error when symlinking '{exc}'")
raise WorkflowFilesError(f"Error when symlinking\n{exc}")


def remove_dir(path):
"""Delete a directory including contents, including the target directory
if the specified path is a symlink.

Args:
path (str): the absolute path of the directory to delete.
"""
if not os.path.isabs(path):
raise ValueError('Path must be absolute')
if os.path.exists(path) and not os.path.isdir(path):
raise NotADirectoryError(path)
if os.path.islink(path):
if os.path.exists(path):
target = os.path.realpath(path)
LOG.info(
f'Removing symlink target directory: ({path} ->) {target}')
rmtree(target)
LOG.info(f'Removing symlink: {path}')
else:
LOG.info(f'Removing broken symlink: {path}')
os.remove(path)
elif not os.path.exists(path):
raise FileNotFoundError(path)
else:
LOG.info(f'Removing directory: {path}')
rmtree(path)
7 changes: 3 additions & 4 deletions cylc/flow/platforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def platform_from_job_info(platforms, job, remote):
raise PlatformLookupError('No platform found matching your task')


def get_host_from_platform(platform, method=None):
def get_host_from_platform(platform, method='random'):
"""Placeholder for a more sophisticated function which returns a host
given a platform dictionary.

Expand All @@ -349,8 +349,7 @@ def get_host_from_platform(platform, method=None):
method (str):
Name a function to use when selecting hosts from list provided
by platform.

- None or 'random': Pick the first host from list
- 'random' (default): Pick a random host from list
- 'first': Return the first host in the list

Returns:
Expand All @@ -361,7 +360,7 @@ def get_host_from_platform(platform, method=None):
- Random Selection with check for host availability

"""
if method is None or method == 'random':
if method == 'random':
return random.choice(platform['hosts'])
elif method == 'first':
return platform['hosts'][0]
Expand Down
57 changes: 57 additions & 0 deletions cylc/flow/scripts/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3

# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""cylc clean [OPTIONS] ARGS

Remove a stopped workflow from the local scheduler filesystem.

NOTE: this command is intended for workflows installed with `cylc install`. If
this is run for a workflow that was instead written directly in ~/cylc-run and
not backed up elsewhere, it will be lost.
oliver-sanders marked this conversation as resolved.
Show resolved Hide resolved

It will also remove an symlink directory targets. For now, it will fail to
remove workflow files/directories on a remote host.

Suite names can be hierarchical, corresponding to the path under ~/cylc-run.

Examples:
# Remove the workflow at ~/cylc-run/foo
$ cylc clean foo

"""

from cylc.flow.option_parsers import CylcOptionParser as COP
from cylc.flow.terminal import cli_function
from cylc.flow.suite_files import clean


def get_option_parser():
parser = COP(
__doc__,
argdoc=[("REG", "Suite name")]
)
return parser


@cli_function(get_option_parser)
def main(parser, opts, reg):
clean(reg)


if __name__ == "__main__":
main()
144 changes: 124 additions & 20 deletions cylc/flow/suite_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
import aiofiles

from cylc.flow import LOG
from cylc.flow.exceptions import SuiteServiceFileError
from cylc.flow.pathutil import get_suite_run_dir, make_localhost_symlinks
from cylc.flow.exceptions import SuiteServiceFileError, WorkflowFilesError
from cylc.flow.pathutil import (
get_suite_run_dir, make_localhost_symlinks, remove_dir)
from cylc.flow.platforms import get_platform
from cylc.flow.hostuserutil import (
get_user,
Expand Down Expand Up @@ -478,24 +479,18 @@ def register(reg=None, source=None, redirect=False):

Raise:
SuiteServiceFileError:
No flow.cylc file found in source location.
Illegal name (can look like a relative path, but not absolute).
Another suite already has this name (unless --redirect).
Trying to register a suite nested inside of another.
- No flow.cylc file found in source location.
- Illegal name (can look like a relative path, but not absolute).
- Another suite already has this name (unless --redirect).
- Trying to register a suite nested inside of another.
"""
if reg is None:
reg = os.path.basename(os.getcwd())
make_localhost_symlinks(reg)
is_valid, message = SuiteNameValidator.validate(reg)
if not is_valid:
raise SuiteServiceFileError(f'invalid suite name - {message}')

if os.path.isabs(reg):
raise SuiteServiceFileError(
f'suite name cannot be an absolute path: {reg}')

_validate_reg(reg)
check_nested_run_dirs(reg)

make_localhost_symlinks(reg)

if source is not None:
if os.path.basename(source) == SuiteFiles.FLOW_FILE:
source = os.path.dirname(source)
Expand Down Expand Up @@ -559,6 +554,92 @@ def register(reg=None, source=None, redirect=False):
return reg


def clean(reg):
"""Remove stopped workflows on the local scheduler filesystem.

Deletes the run dir in ~/cylc-run and any symlink dirs. Note: if the
run dir has been manually deleted, it will not be possible to clean the
symlink dirs.

Args:
reg (str): workflow name.
"""
_validate_reg(reg)
reg = os.path.normpath(reg)
if reg.startswith('.'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW the suite name validator has you covered here:

not_starts_with(r'\.', r'\-'),

Copy link
Member Author

@MetRonnie MetRonnie Nov 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The difference is I'm checking after normalising the path, because you could have cylc clean foo/../.. which normalises to cylc clean .., i.e. rm $HOME. However, I guess I could change the order to

reg = os.path.normpath(reg)
_validate_reg(reg)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should accept path manipulations, though I'm unaware of a simple method to block them (testing for .. doesn't work for all cases).

Probs good enough to ensure that the flow exists within the cylc-run dir:

reg = Path(reg)
reg.resolve()
try:
    reg.relative_to(run_dir_path)
except ValueError:
    raise Exception('Flow not found')

Anyway, not a problem for this PR.

raise WorkflowFilesError(
'Workflow name cannot be a path that points to the cylc-run '
'directory or above')
run_dir = Path(get_suite_run_dir(reg))
if not run_dir.is_dir():
LOG.info(f'No workflow directory to clean at {run_dir}')
return
try:
detect_old_contact_file(reg)
except SuiteServiceFileError as exc:
raise SuiteServiceFileError(
f'Cannot remove running workflow.\n\n{exc}')

# TODO: check task_jobs table in database to see what platforms are used

possible_symlinks = [(Path(name), Path(run_dir, name)) for name in [
'log', 'share/cycle', 'share', 'work', '']]
# Note: 'share/cycle' must come before 'share', and '' must come last
for name, path in possible_symlinks:
if path.is_symlink():
# Ensure symlink is pointing to expected directory. If not,
# something is wrong and we should abort
target = path.resolve()
if target.exists() and not target.is_dir():
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Target is not a directory')
expected_end = str(Path('cylc-run', reg, name))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

str(Path( 😆

if not str(target).endswith(expected_end):
raise WorkflowFilesError(
f'Invalid Cylc symlink directory {path} -> {target}\n'
f'Expected target to end with "{expected_end}"')
# Remove <symlink_dir>/cylc-run/<reg>
target_cylc_run_dir = str(target).rsplit(str(reg), 1)[0]
target_reg_dir = Path(target_cylc_run_dir, reg)
if target_reg_dir.is_dir():
remove_dir(target_reg_dir)
# Remove empty parents
_remove_empty_reg_parents(reg, target_reg_dir)

remove_dir(run_dir)
_remove_empty_reg_parents(reg, run_dir)


def _remove_empty_reg_parents(reg, path):
"""If reg is nested e.g. a/b/c, work our way up the tree, removing empty
parents only.

Args:
reg (str): workflow name, e.g. a/b/c
path (str): path to this directory, e.g. /foo/bar/a/b/c

Example:
_remove_empty_reg_parents('a/b/c', '/foo/bar/a/b/c') would remove
/foo/bar/a/b (assuming it's empty), then /foo/bar/a (assuming it's
empty).
"""
reg = Path(reg)
reg_depth = len(reg.parts) - 1
path = Path(path)
if not path.is_absolute():
raise ValueError('Path must be absolute')
for i in range(reg_depth):
parent = path.parents[i]
if not parent.is_dir():
continue
try:
parent.rmdir()
LOG.info(f'Removing directory: {parent}')
except OSError:
break


def remove_keys_on_server(keys):
"""Removes server-held authentication keys"""
# WARNING, DESTRUCTIVE. Removes old keys if they already exist.
Expand Down Expand Up @@ -650,12 +731,36 @@ def _load_local_item(item, path):
return None


def _validate_reg(reg):
"""Check suite name is valid.

Args:
reg (str): Suite name

Raise:
SuiteServiceFileError:
- reg has form of absolute path or is otherwise not valid
"""
is_valid, message = SuiteNameValidator.validate(reg)
if not is_valid:
raise SuiteServiceFileError(f'invalid suite name "{reg}" - {message}')
if os.path.isabs(reg):
raise SuiteServiceFileError(
f'suite name cannot be an absolute path: {reg}')


def check_nested_run_dirs(reg):
"""Disallow nested run dirs e.g. trying to register foo/bar where foo is
already a valid suite directory.

Args:
reg (str): suite name

Raise:
WorkflowFilesError:
- reg dir is nested inside a run dir
- reg dir contains a nested run dir (if not deeper than max scan
depth)
"""
exc_msg = (
'Nested run directories not allowed - cannot register suite name '
Expand All @@ -665,15 +770,15 @@ def _check_child_dirs(path, depth_count=1):
for result in os.scandir(path):
if result.is_dir() and not result.is_symlink():
if is_valid_run_dir(result.path):
raise SuiteServiceFileError(exc_msg % (reg, result.path))
raise WorkflowFilesError(exc_msg % (reg, result.path))
if depth_count < MAX_SCAN_DEPTH:
_check_child_dirs(result.path, depth_count + 1)

reg_path = os.path.normpath(reg)
parent_dir = os.path.dirname(reg_path)
while parent_dir != '':
if is_valid_run_dir(parent_dir):
raise SuiteServiceFileError(
raise WorkflowFilesError(
exc_msg % (reg, get_cylc_run_abs_path(parent_dir)))
parent_dir = os.path.dirname(parent_dir)

Expand All @@ -683,7 +788,7 @@ def _check_child_dirs(path, depth_count=1):


def is_valid_run_dir(path):
"""Return True if path is a valid run directory, else False.
"""Return True if path is a valid, existing run directory, else False.

Args:
path (str): if this is a relative path, it is taken to be relative to
Expand All @@ -704,5 +809,4 @@ def get_cylc_run_abs_path(path):
"""
if os.path.isabs(path):
return path
cylc_run_dir = os.path.expandvars(get_platform()['run directory'])
return os.path.join(cylc_run_dir, path)
return get_suite_run_dir(path)
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ cylc.command =
broadcast = cylc.flow.scripts.broadcast:main
cat-log = cylc.flow.scripts.cat_log:main
check-versions = cylc.flow.scripts.check_versions:main
clean = cylc.flow.scripts.clean:main
client = cylc.flow.scripts.client:main
cycle-point = cylc.flow.scripts.cycle_point:main
diff = cylc.flow.scripts.diff:main
Expand Down
Loading