From a438c69001ebfc01a4194f6a36c9357943f4391e Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Mon, 26 Sep 2022 10:34:38 +0100 Subject: [PATCH] `log_vc_info`: Redirect diff straight to file to avoid blocking pipe (#5139) * log_vc_info: redirect diff straight to file to avoid blocking pipe * Update changelog --- CHANGES.md | 4 + cylc/flow/install_plugins/log_vc_info.py | 98 +++++++++++++-------- tests/unit/post_install/test_log_vc_info.py | 46 ++++++---- 3 files changed, 94 insertions(+), 54 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a70cfee8406..e5aea3b3559 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -38,6 +38,10 @@ Maintenance release. [#5104](https://github.com/cylc/cylc-flow/pull/5104) - Fix retriggering of failed tasks after a reload. +[#5139](https://github.com/cylc/cylc-flow/pull/5139) - Fix bug where +`cylc install` could hang if there was a large uncommitted diff in the +source dir (for git/svn repos). + [#5131](https://github.com/cylc/cylc-flow/pull/5131) - Infer workflow run number for `workflow_state` xtrigger. diff --git a/cylc/flow/install_plugins/log_vc_info.py b/cylc/flow/install_plugins/log_vc_info.py index 9be634c5b0f..1aec3eecd5d 100644 --- a/cylc/flow/install_plugins/log_vc_info.py +++ b/cylc/flow/install_plugins/log_vc_info.py @@ -62,7 +62,9 @@ import json from pathlib import Path from subprocess import Popen, DEVNULL, PIPE -from typing import Any, Dict, Iterable, List, Optional, TYPE_CHECKING, Union +from typing import ( + Any, Dict, Iterable, List, Optional, TYPE_CHECKING, TextIO, Union, overload +) from cylc.flow import LOG from cylc.flow.exceptions import CylcError @@ -80,8 +82,6 @@ GIT: ['describe', '--always', '--dirty'] } -# git ['show', '--quiet', '--format=short'], - STATUS_COMMANDS: Dict[str, List[str]] = { SVN: ['status', '--non-interactive'], GIT: ['status', '--short'] @@ -189,13 +189,40 @@ def get_vc_info(path: Union[Path, str]) -> Optional[Dict[str, Any]]: return None -def _run_cmd(vcs: str, args: Iterable[str], cwd: Union[Path, str]) -> str: - """Run a VCS command, return stdout. +@overload +def _run_cmd( + vcs: str, args: Iterable[str], cwd: Union[Path, str], stdout: int = PIPE +) -> str: + ... + + +@overload +def _run_cmd( + vcs: str, args: Iterable[str], cwd: Union[Path, str], stdout: TextIO +) -> None: + ... + + +def _run_cmd( + vcs: str, + args: Iterable[str], + cwd: Union[Path, str], + stdout: Union[TextIO, int] = PIPE +) -> Optional[str]: + """Run a VCS command. Args: vcs: The version control system. args: The args to pass to the version control command. cwd: Directory to run the command in. + stdout: Where to redirect output (either PIPE or a + text stream/file object). Note: only use PIPE for + commands that will not generate a large output, otherwise + the pipe might get blocked. + + Returns: + Stdout output if stdout=PIPE, else None as the output has been + written directly to the specified file. Raises: VCSNotInstalledError: The VCS is not found. @@ -208,7 +235,7 @@ def _run_cmd(vcs: str, args: Iterable[str], cwd: Union[Path, str]) -> str: cmd, cwd=cwd, stdin=DEVNULL, - stdout=PIPE, + stdout=stdout, stderr=PIPE, text=True, ) @@ -275,41 +302,40 @@ def _parse_svn_info(info_text: str) -> Dict[str, Any]: return ret -def get_diff(vcs: str, path: Union[Path, str]) -> Optional[str]: - """Return the diff of uncommitted changes for a repository. +def write_diff( + vcs: str, repo_path: Union[Path, str], run_dir: Union[Path, str] +) -> Path: + """Get and write the diff of uncommitted changes for a repository to the + workflow's vcs log dir. Args: vcs: The version control system. - path: The path to the repo. - """ - args_ = DIFF_COMMANDS[vcs] - if Path(path).is_absolute(): - args_.append(str(path)) - else: - args_.append(str(Path().cwd() / path)) - - try: - diff = _run_cmd(vcs, args_, cwd=path) - except (VCSNotInstalledError, VCSMissingBaseError): - return None - header = ( - "# Auto-generated diff of uncommitted changes in the Cylc " - "workflow repository:\n" - f"# {path}") - return f"{header}\n{diff}" - - -def write_diff(diff: str, run_dir: Union[Path, str]) -> None: - """Write a diff to the workflow's vcs log dir. - - Args: - diff: The diff. + repo_path: The path to the repo. run_dir: The workflow run directory. + + Returns the path to diff file. """ + args = DIFF_COMMANDS[vcs] + args.append( + str(repo_path) if Path(repo_path).is_absolute() else + str(Path().cwd() / repo_path) + ) + diff_file = Path(run_dir, LOG_VERSION_DIR, DIFF_FILENAME) diff_file.parent.mkdir(exist_ok=True) - with open(diff_file, 'w') as f: - f.write(diff) + + with open(diff_file, 'a') as f: + f.write( + "# Auto-generated diff of uncommitted changes in the Cylc " + "workflow repository:\n" + f"# {repo_path}\n" + ) + f.flush() + try: + _run_cmd(vcs, args, repo_path, stdout=f) + except VCSMissingBaseError as exc: + f.write(f"# No diff - {exc}") + return diff_file # Entry point: @@ -331,8 +357,6 @@ def main( if vc_info is None: return False vcs = vc_info['version control system'] - diff = get_diff(vcs, srcdir) write_vc_info(vc_info, rundir) - if diff is not None: - write_diff(diff, rundir) + write_diff(vcs, srcdir, rundir) return True diff --git a/tests/unit/post_install/test_log_vc_info.py b/tests/unit/post_install/test_log_vc_info.py index cddb8012997..58e16e241b5 100644 --- a/tests/unit/post_install/test_log_vc_info.py +++ b/tests/unit/post_install/test_log_vc_info.py @@ -26,13 +26,15 @@ from cylc.flow.install_plugins.log_vc_info import ( INFO_FILENAME, LOG_VERSION_DIR, - get_diff, _get_git_commit, get_status, get_vc_info, main, + write_diff, ) +from cylc.flow.workflow_files import WorkflowFiles + Fixture = Any @@ -161,12 +163,14 @@ def test_get_vc_info_git(git_source_repo: Tuple[str, str]): @require_git -def test_get_diff_git(git_source_repo: Tuple[str, str]): - """Test get_diff() for a git repo""" - source_dir, commit_sha = git_source_repo - diff = get_diff('git', source_dir) - assert diff is not None - diff_lines = diff.splitlines() +def test_write_diff_git(git_source_repo: Tuple[str, str], tmp_path: Path): + """Test write_diff() for a git repo""" + source_dir, _ = git_source_repo + run_dir = tmp_path / 'run_dir' + (run_dir / WorkflowFiles.LOG_DIR).mkdir(parents=True) + diff_file = write_diff('git', source_dir, run_dir) + diff_lines = diff_file.read_text().splitlines() + assert diff_lines[0].startswith("# Auto-generated diff") for line in ("diff --git a/flow.cylc b/flow.cylc", "- R1 = foo", "+ R1 = bar"): @@ -205,12 +209,14 @@ def test_get_vc_info_svn(svn_source_repo: Tuple[str, str, str]): @require_svn -def test_get_diff_svn(svn_source_repo: Tuple[str, str, str]): - """Test get_diff() for an svn working copy""" - source_dir, uuid, repo_path = svn_source_repo - diff = get_diff('svn', source_dir) - assert diff is not None - diff_lines = diff.splitlines() +def test_write_diff_svn(svn_source_repo: Tuple[str, str, str], tmp_path: Path): + """Test write_diff() for an svn working copy""" + source_dir, _, _ = svn_source_repo + run_dir = tmp_path / 'run_dir' + (run_dir / WorkflowFiles.LOG_DIR).mkdir(parents=True) + diff_file = write_diff('svn', source_dir, run_dir) + diff_lines = diff_file.read_text().splitlines() + assert diff_lines[0].startswith("# Auto-generated diff") for line in (f"--- {source_dir}/flow.cylc (revision 1)", f"+++ {source_dir}/flow.cylc (working copy)", "- R1 = foo", @@ -239,20 +245,26 @@ def test_not_repo(tmp_path: Path, monkeypatch: MonkeyPatch): @require_git def test_no_base_commit_git(tmp_path: Path): - """Test get_vc_info() and get_diff() for a recently init'd git source dir + """Test get_vc_info() and write_diff() for a recently init'd git source dir that does not have a base commit yet.""" source_dir = Path(tmp_path, 'new_git_repo') source_dir.mkdir() subprocess.run(['git', 'init'], cwd=source_dir, check=True) flow_file = source_dir.joinpath('flow.cylc') flow_file.write_text(BASIC_FLOW_1) + run_dir = tmp_path / 'run_dir' + (run_dir / WorkflowFiles.LOG_DIR).mkdir(parents=True) vc_info = get_vc_info(source_dir) assert vc_info is not None - expected = [ + assert list(vc_info.items()) == [ ('version control system', "git"), ('working copy root path', str(source_dir)), ('status', ["?? flow.cylc"]) ] - assert list(vc_info.items()) == expected - assert get_diff('git', source_dir) is None + + # Diff file expected to be empty (only containing comment lines), + # but should work without raising + diff_file = write_diff('git', source_dir, run_dir) + for line in diff_file.read_text().splitlines(): + assert line.startswith('#')