From 59ba89e73432b5601d4dfa82bd96b43927abe96c Mon Sep 17 00:00:00 2001
From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com>
Date: Fri, 11 Jun 2021 18:06:51 +0100
Subject: [PATCH] Move glob_run_dir() to workflow_files
Add unit tests
---
cylc/flow/pathutil.py | 54 +-----
cylc/flow/workflow_files.py | 57 ++++++-
tests/unit/filetree.py | 167 ++++++++++++++++++
tests/unit/test_pathutil.py | 2 +-
tests/unit/test_workflow_files.py | 275 ++++++++++++++++--------------
5 files changed, 368 insertions(+), 187 deletions(-)
create mode 100644 tests/unit/filetree.py
diff --git a/cylc/flow/pathutil.py b/cylc/flow/pathutil.py
index 719e3bc4356..91b06d720ca 100644
--- a/cylc/flow/pathutil.py
+++ b/cylc/flow/pathutil.py
@@ -15,12 +15,11 @@
# along with this program. If not, see .
"""Functions to return paths to common workflow files and directories."""
-import glob
import os
from pathlib import Path
import re
from shutil import rmtree
-from typing import Container, Dict, Iterable, List, Set, Union
+from typing import Dict, Iterable, Set, Union
from cylc.flow import LOG
from cylc.flow.cfgspec.glbl_cfg import glbl_cfg
@@ -314,57 +313,6 @@ def parse_rm_dirs(rm_dirs: Iterable[str]) -> Set[str]:
return result
-def glob_in_run_dir(
- run_dir: Union[Path, str], pattern: str, symlink_dirs: Container[Path]
-) -> List[Path]:
- """Execute a (recursive) glob search in the given run directory.
-
- Returns list of any absolute paths that match the pattern. However:
- * Does not follow symlinks (apart from the spcedified symlink dirs).
- * Also does not return matching subpaths of matching directories (because
- that would be redundant).
-
- Args:
- run_dir: Absolute path of the workflow run dir.
- pattern: The glob pattern.
- symlink_dirs: Absolute paths to the workflow's symlink dirs.
- """
- # Note: use os.path.join, not pathlib, to preserve trailing slash if
- # present in pattern
- pattern = os.path.join(glob.escape(str(run_dir)), pattern)
- # Note: don't use pathlib.Path.glob() because when you give it an exact
- # filename instead of pattern, it doesn't return broken symlinks
- matches = sorted(Path(i) for i in glob.iglob(pattern, recursive=True))
- # sort guarantees parents come before their children
- if len(matches) == 1 and not os.path.lexists(matches[0]):
- # https://bugs.python.org/issue35201
- return []
- results: List[Path] = []
- subpath_excludes: Set[Path] = set()
- for path in matches:
- for rel_ancestor in reversed(path.relative_to(run_dir).parents):
- ancestor = run_dir / rel_ancestor
- if ancestor in subpath_excludes:
- break
- if ancestor.is_symlink() and ancestor not in symlink_dirs:
- # Do not follow non-standard symlinks
- subpath_excludes.add(ancestor)
- break
- if (not symlink_dirs) and ancestor in results:
- # We can be sure all subpaths of this ancestor are redundant
- subpath_excludes.add(ancestor)
- break
- if ancestor == path.parent:
- # Final iteration over ancestors
- if ancestor in matches and path not in symlink_dirs:
- # Redundant (but don't exclude subpaths in case any of the
- # subpaths are std symlink dirs)
- break
- else: # no break
- results.append(path)
- return results
-
-
def is_relative_to(path1: Union[Path, str], path2: Union[Path, str]) -> bool:
"""Return whether or not path1 is relative to path2."""
# In future, we can just use pathlib.Path.is_relative_to()
diff --git a/cylc/flow/workflow_files.py b/cylc/flow/workflow_files.py
index b46e5530c2b..c3ed0babff2 100644
--- a/cylc/flow/workflow_files.py
+++ b/cylc/flow/workflow_files.py
@@ -20,6 +20,7 @@
from collections import deque
from enum import Enum
from functools import partial
+import glob
import logging
import os
from pathlib import Path
@@ -29,8 +30,8 @@
from subprocess import Popen, PIPE, DEVNULL
import time
from typing import (
- Any, Deque, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple,
- TYPE_CHECKING, Union
+ Any, Container, Deque, Dict, Iterable, List, NamedTuple, Optional, Set,
+ Tuple, TYPE_CHECKING, Union
)
import zmq.auth
@@ -47,7 +48,6 @@
from cylc.flow.pathutil import (
expand_path,
get_workflow_run_dir,
- glob_in_run_dir,
make_localhost_symlinks,
parse_rm_dirs,
remove_dir_and_target,
@@ -723,6 +723,57 @@ def get_symlink_dirs(reg: str, run_dir: Union[Path, str]) -> Dict[str, Path]:
return ret
+def glob_in_run_dir(
+ run_dir: Union[Path, str], pattern: str, symlink_dirs: Container[Path]
+) -> List[Path]:
+ """Execute a (recursive) glob search in the given run directory.
+
+ Returns list of any absolute paths that match the pattern. However:
+ * Does not follow symlinks (apart from the spcedified symlink dirs).
+ * Also does not return matching subpaths of matching directories (because
+ that would be redundant).
+
+ Args:
+ run_dir: Absolute path of the workflow run dir.
+ pattern: The glob pattern.
+ symlink_dirs: Absolute paths to the workflow's symlink dirs.
+ """
+ # Note: use os.path.join, not pathlib, to preserve trailing slash if
+ # present in pattern
+ pattern = os.path.join(glob.escape(str(run_dir)), pattern)
+ # Note: don't use pathlib.Path.glob() because when you give it an exact
+ # filename instead of pattern, it doesn't return broken symlinks
+ matches = sorted(Path(i) for i in glob.iglob(pattern, recursive=True))
+ # sort guarantees parents come before their children
+ if len(matches) == 1 and not os.path.lexists(matches[0]):
+ # https://bugs.python.org/issue35201
+ return []
+ results: List[Path] = []
+ subpath_excludes: Set[Path] = set()
+ for path in matches:
+ for rel_ancestor in reversed(path.relative_to(run_dir).parents):
+ ancestor = run_dir / rel_ancestor
+ if ancestor in subpath_excludes:
+ break
+ if ancestor.is_symlink() and ancestor not in symlink_dirs:
+ # Do not follow non-standard symlinks
+ subpath_excludes.add(ancestor)
+ break
+ if (not symlink_dirs) and ancestor in results:
+ # We can be sure all subpaths of this ancestor are redundant
+ subpath_excludes.add(ancestor)
+ break
+ if ancestor == path.parent:
+ # Final iteration over ancestors
+ if ancestor in matches and path not in symlink_dirs:
+ # Redundant (but don't exclude subpaths in case any of the
+ # subpaths are std symlink dirs)
+ break
+ else: # no break
+ results.append(path)
+ return results
+
+
def _clean_using_glob(
run_dir: Path, pattern: str, symlink_dirs: Iterable[str]
) -> None:
diff --git a/tests/unit/filetree.py b/tests/unit/filetree.py
new file mode 100644
index 00000000000..bef09e4e556
--- /dev/null
+++ b/tests/unit/filetree.py
@@ -0,0 +1,167 @@
+# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE.
+# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+"""Utilities for testing workflow directory structure.
+
+(There should be no tests in this module.)
+
+A filetree is represented by a dict like so:
+ {
+ # Dirs are represented by dicts (which are also sub-filetrees):
+ 'dir': {
+ 'another-dir': {
+ # Files are represented by None:
+ 'file.txt': None
+ }
+ },
+ # Symlinks are represented by pathlib.Path, with the target represented
+ # by the relative path from the tmp_path directory:
+ 'symlink': Path('dir/another-dir')
+ }
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List
+
+
+def create_filetree(
+ filetree: Dict[str, Any], location: Path, root: Path
+) -> None:
+ """Create the directory structure represented by the filetree dict.
+
+ Args:
+ filetree: The filetree to create.
+ location: The absolute path in which to create the filetree.
+ root: The top-level dir from which relative symlink targets are
+ located (typically tmp_path).
+ """
+ for name, entry in filetree.items():
+ path = location / name
+ if isinstance(entry, dict):
+ path.mkdir(exist_ok=True)
+ create_filetree(entry, path, root)
+ elif isinstance(entry, Path):
+ path.symlink_to(root / entry)
+ else:
+
+ path.touch()
+
+
+def get_filetree_as_list(
+ filetree: Dict[str, Any], location: Path
+) -> List[str]:
+ """Return a list of the paths in a filetree.
+
+ Args:
+ filetree: The filetree to listify.
+ location: The absolute path to the filetree.
+ """
+ ret: List[str] = []
+ for name, entry in filetree.items():
+ path = location / name
+ ret.append(str(path))
+ if isinstance(entry, dict):
+ ret.extend(get_filetree_as_list(entry, path))
+ return ret
+
+
+FILETREE_1 = {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'log': Path('sym/cylc-run/foo/bar/log'),
+ 'mirkwood': Path('you-shall-not-pass/mirkwood'),
+ 'rincewind.txt': Path('you-shall-not-pass/rincewind.txt')
+ }}},
+ 'sym': {'cylc-run': {'foo': {'bar': {
+ 'log': {
+ 'darmok': Path('you-shall-not-pass/darmok'),
+ 'temba.txt': Path('you-shall-not-pass/temba.txt'),
+ 'bib': {
+ 'fortuna.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': { # Nothing in here should get deleted
+ 'darmok': {
+ 'jalad.txt': None
+ },
+ 'mirkwood': {
+ 'spiders.txt': None
+ },
+ 'rincewind.txt': None,
+ 'temba.txt': None
+ }
+}
+
+FILETREE_2 = {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': Path('sym-share/cylc-run/foo/bar/share')
+ }}}},
+ 'sym-share': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
+ }
+ }}}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'macklunkey.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': {}
+}
+
+FILETREE_3 = {
+ 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
+ 'sym-run': {'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': {
+ 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
+ }
+ }}}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'sokath.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': {}
+}
+
+FILETREE_4 = {
+ 'cylc-run': {'foo': {'bar': {
+ '.service': {'db': None},
+ 'flow.cylc': None,
+ 'share': {
+ 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
+ }
+ }}},
+ 'sym-cycle': {'cylc-run': {'foo': {'bar': {
+ 'share': {
+ 'cycle': {
+ 'kiazi.txt': None
+ }
+ }
+ }}}},
+ 'you-shall-not-pass': {}
+}
diff --git a/tests/unit/test_pathutil.py b/tests/unit/test_pathutil.py
index dea560d4c7f..6b3cf174ec3 100644
--- a/tests/unit/test_pathutil.py
+++ b/tests/unit/test_pathutil.py
@@ -432,7 +432,7 @@ def test_parse_rm_dirs(dirs: List[str], expected: Set[str]):
"cannot take paths that point to the run directory or above"),
]
)
-def test_parse_rm_dirs_bad(dirs: List[str], err_msg: str):
+def test_parse_rm_dirs__bad(dirs: List[str], err_msg: str):
"""Test parse_dirs() with bad inputs"""
with pytest.raises(UserInputError) as exc:
parse_rm_dirs(dirs)
diff --git a/tests/unit/test_workflow_files.py b/tests/unit/test_workflow_files.py
index 0cb045e0753..300dafea35a 100644
--- a/tests/unit/test_workflow_files.py
+++ b/tests/unit/test_workflow_files.py
@@ -20,7 +20,7 @@
from pathlib import Path
import pytest
import shutil
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from unittest import mock
from cylc.flow import CYLC_LOG
@@ -39,12 +39,22 @@
_remote_clean_cmd,
check_flow_file,
check_nested_run_dirs,
+ get_symlink_dirs,
get_workflow_source_dir,
+ glob_in_run_dir,
reinstall_workflow,
search_install_source_dirs
)
from tests.unit.conftest import MonkeyMock
+from tests.unit.filetree import (
+ FILETREE_1,
+ FILETREE_2,
+ FILETREE_3,
+ FILETREE_4,
+ create_filetree,
+ get_filetree_as_list
+)
@pytest.mark.parametrize(
@@ -518,24 +528,144 @@ def test_clean__rm_dir_not_file(pattern: str, tmp_run_dir: Callable):
assert a_file.exists()
+@pytest.mark.parametrize(
+ 'filetree, expected',
+ [
+ pytest.param(
+ FILETREE_1,
+ {'log': 'sym/cylc-run/foo/bar/log'},
+ id="filetree1"
+ ),
+ pytest.param(
+ FILETREE_2,
+ {
+ 'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle',
+ 'share': 'sym-share/cylc-run/foo/bar/share',
+ '': 'sym-run/cylc-run/foo/bar/'
+ },
+ id="filetree2"
+ ),
+ pytest.param(
+ FILETREE_3,
+ {
+ 'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle',
+ '': 'sym-run/cylc-run/foo/bar/'
+ },
+ id="filetree3"
+ ),
+ pytest.param(
+ FILETREE_4,
+ {'share/cycle': 'sym-cycle/cylc-run/foo/bar/share/cycle'},
+ id="filetree4"
+ ),
+ ]
+)
+def test_get_symlink_dirs(
+ filetree: Dict[str, Any],
+ expected: Dict[str, Union[Path, str]],
+ tmp_run_dir: Callable, tmp_path: Path
+):
+ """Test get_symlink_dirs().
+
+ Params:
+ filetree: The directory structure to test against.
+ expected: The expected return dictionary, except with the values being
+ relative to tmp_path instead of absolute paths.
+ """
+ # Setup
+ cylc_run_dir = tmp_run_dir()
+ create_filetree(filetree, tmp_path, tmp_path)
+ reg = 'foo/bar'
+ for k, v in expected.items():
+ expected[k] = Path(tmp_path / v)
+ # Test
+ assert get_symlink_dirs(reg, cylc_run_dir / reg) == expected
+
+
+@pytest.mark.parametrize(
+ 'pattern, filetree, expected_matches',
+ [
+ pytest.param(
+ '**',
+ FILETREE_1,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/log'],
+ id="filetree1 **"
+ ),
+ pytest.param(
+ '*/*',
+ FILETREE_1,
+ ['cylc-run/foo/bar/flow.cylc',
+ 'cylc-run/foo/bar/log',
+ 'cylc-run/foo/bar/mirkwood',
+ 'cylc-run/foo/bar/rincewind.txt'],
+ id="filetree1 */*"
+ ),
+ pytest.param(
+ '**/*.txt',
+ FILETREE_1,
+ ['cylc-run/foo/bar/log/bib/fortuna.txt',
+ 'cylc-run/foo/bar/log/temba.txt',
+ 'cylc-run/foo/bar/rincewind.txt'],
+ id="filetree1 **/*.txt"
+ ),
+ pytest.param(
+ '**',
+ FILETREE_2,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/share',
+ 'cylc-run/foo/bar/share/cycle'],
+ id="filetree2 **"
+ ),
+ pytest.param(
+ '**',
+ FILETREE_3,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/share/cycle'],
+ id="filetree3 **"
+ ),
+ pytest.param(
+ '**/s*',
+ FILETREE_3,
+ ['cylc-run/foo/bar/share',
+ 'cylc-run/foo/bar/share/cycle/sokath.txt'],
+ id="filetree3 **/s*"
+ ),
+ pytest.param(
+ '**',
+ FILETREE_4,
+ ['cylc-run/foo/bar',
+ 'cylc-run/foo/bar/share/cycle'],
+ id="filetree4 **"
+ ),
+ ]
+)
+def test_glob_in_run_dir(
+ pattern: str,
+ filetree: Dict[str, Any],
+ expected_matches: List[str],
+ tmp_path: Path, tmp_run_dir: Callable
+) -> None:
+ """Test that glob_in_run_dir() returns the minimal set of results with
+ no redundant paths.
+ """
+ # Setup
+ cylc_run_dir: Path = tmp_run_dir()
+ reg = 'foo/bar'
+ run_dir = cylc_run_dir / reg
+ create_filetree(filetree, tmp_path, tmp_path)
+ symlink_dirs = [run_dir / i for i in get_symlink_dirs(reg, run_dir)]
+ expected = [tmp_path / i for i in expected_matches]
+ # Test
+ assert glob_in_run_dir(run_dir, pattern, symlink_dirs) == expected
+
+
@pytest.fixture
def filetree_for_testing_cylc_clean(tmp_path: Path):
"""Fixture that creates a filetree from the given dict, and returns which
files are expected to be deleted and which aren't.
- A filetree is represented by a dict like so:
- {
- # Dirs are represented by dicts (which are also sub-filetrees):
- 'dir': {
- 'another-dir': {
- # Files are represented by None:
- 'file.txt': None
- }
- },
- # Symlinks are represented by pathlib.Path, with the target represented
- # by the relative path from the tmp_path directory:
- 'symlink': Path('dir/another-dir')
- }
+ See tests/unit/filetree.py
Args:
reg: Workflow name.
@@ -554,7 +684,7 @@ def _filetree_for_testing_cylc_clean(
initial_filetree: Dict[str, Any],
filetree_left_behind: Dict[str, Any]
) -> Tuple[Path, List[str], List[str]]:
- create_filetree(initial_filetree, tmp_path)
+ create_filetree(initial_filetree, tmp_path, tmp_path)
files_not_to_delete = [
os.path.normpath(i) for i in
iglob(str(tmp_path / 'you-shall-not-pass/**'), recursive=True)
@@ -569,64 +699,9 @@ def _filetree_for_testing_cylc_clean(
)
run_dir = tmp_path / 'cylc-run' / reg
return run_dir, files_to_delete, files_not_to_delete
-
- def create_filetree(filetree: Dict[str, Any], location: Path) -> None:
- """Helper function"""
- for name, entry in filetree.items():
- path = location / name
- if isinstance(entry, dict):
- path.mkdir(exist_ok=True)
- create_filetree(entry, path)
- elif isinstance(entry, Path):
- path.symlink_to(tmp_path / entry)
- else:
- path.touch()
-
- def get_filetree_as_list(
- filetree: Dict[str, Any], location: Path
- ) -> List[str]:
- """Helper function"""
- ret: List[str] = []
- for name, entry in filetree.items():
- path = location / name
- ret.append(str(path))
- if isinstance(entry, dict):
- ret.extend(get_filetree_as_list(entry, path))
- return ret
-
return _filetree_for_testing_cylc_clean
-FILETREE_1 = {
- 'cylc-run': {'foo': {'bar': {
- '.service': {'db': None},
- 'flow.cylc': None,
- 'log': Path('sym/cylc-run/foo/bar/log'),
- 'mirkwood': Path('you-shall-not-pass/mirkwood'),
- 'rincewind.txt': Path('you-shall-not-pass/rincewind.txt')
- }}},
- 'sym': {'cylc-run': {'foo': {'bar': {
- 'log': {
- 'darmok': Path('you-shall-not-pass/darmok'),
- 'temba.txt': Path('you-shall-not-pass/temba.txt'),
- 'bib': {
- 'fortuna.txt': None
- }
- }
- }}}},
- 'you-shall-not-pass': { # Nothing in here should get deleted
- 'darmok': {
- 'jalad.txt': None
- },
- 'mirkwood': {
- 'spiders.txt': None
- },
- 'rincewind.txt': None,
- 'temba.txt': None
- }
-}
-
-
@pytest.mark.parametrize(
'pattern, initial_filetree, filetree_left_behind',
[
@@ -702,66 +777,6 @@ def test__clean_using_glob(
assert os.path.lexists(file) is False
-FILETREE_2 = {
- 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
- 'sym-run': {'cylc-run': {'foo': {'bar': {
- '.service': {'db': None},
- 'flow.cylc': None,
- 'share': Path('sym-share/cylc-run/foo/bar/share')
- }}}},
- 'sym-share': {'cylc-run': {'foo': {'bar': {
- 'share': {
- 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
- }
- }}}},
- 'sym-cycle': {'cylc-run': {'foo': {'bar': {
- 'share': {
- 'cycle': {
- 'macklunkey.txt': None
- }
- }
- }}}},
- 'you-shall-not-pass': {}
-}
-
-FILETREE_3 = {
- 'cylc-run': {'foo': {'bar': Path('sym-run/cylc-run/foo/bar')}},
- 'sym-run': {'cylc-run': {'foo': {'bar': {
- '.service': {'db': None},
- 'flow.cylc': None,
- 'share': {
- 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
- }
- }}}},
- 'sym-cycle': {'cylc-run': {'foo': {'bar': {
- 'share': {
- 'cycle': {
- 'sokath.txt': None
- }
- }
- }}}},
- 'you-shall-not-pass': {}
-}
-
-FILETREE_4 = {
- 'cylc-run': {'foo': {'bar': {
- '.service': {'db': None},
- 'flow.cylc': None,
- 'share': {
- 'cycle': Path('sym-cycle/cylc-run/foo/bar/share/cycle')
- }
- }}},
- 'sym-cycle': {'cylc-run': {'foo': {'bar': {
- 'share': {
- 'cycle': {
- 'kiazi.txt': None
- }
- }
- }}}},
- 'you-shall-not-pass': {}
-}
-
-
@pytest.mark.parametrize(
'rm_dirs, initial_filetree, filetree_left_behind',
[