From e6d152a526c07dcfdd3db56e5235e50ddb129d6a Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Thu, 20 Jul 2023 15:57:54 +0200 Subject: [PATCH 01/16] restructure modules lint adding components lint --- CHANGELOG.md | 2 + nf_core/__main__.py | 4 +- nf_core/components/lint/__init__.py | 299 ++++++++++++++++++++ nf_core/components/nfcore_component.py | 71 +++++ nf_core/lint/__init__.py | 12 +- nf_core/modules/bump_versions.py | 38 +-- nf_core/modules/lint/__init__.py | 258 +---------------- nf_core/modules/lint/main_nf.py | 4 +- nf_core/modules/lint/meta_yml.py | 6 +- nf_core/modules/lint/module_changes.py | 18 +- nf_core/modules/lint/module_deprecations.py | 6 +- nf_core/modules/lint/module_patch.py | 12 +- nf_core/modules/lint/module_tests.py | 4 +- nf_core/modules/lint/module_todos.py | 4 +- nf_core/modules/lint/module_version.py | 12 +- nf_core/modules/modules_utils.py | 13 +- nf_core/modules/nfcore_module.py | 66 ----- nf_core/subworkflows/lint/__init__.py | 22 ++ 18 files changed, 481 insertions(+), 370 deletions(-) create mode 100644 nf_core/components/lint/__init__.py create mode 100644 nf_core/components/nfcore_component.py delete mode 100644 nf_core/modules/nfcore_module.py create mode 100644 nf_core/subworkflows/lint/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index a8680b1ee3..ffe25a1b37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ ### Linting +- Add new command `nf-core subworkflows lint` ([]()) + ### Modules ### Subworkflows diff --git a/nf_core/__main__.py b/nf_core/__main__.py index d745a896f7..fd8fa9b993 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -912,8 +912,8 @@ def lint( Test modules within a pipeline or a clone of the nf-core/modules repository. """ + from nf_core.components.lint import LintException from nf_core.modules import ModuleLint - from nf_core.modules.lint import ModuleLintException try: module_lint = ModuleLint( @@ -938,7 +938,7 @@ def lint( ) if len(module_lint.failed) > 0: sys.exit(1) - except ModuleLintException as e: + except LintException as e: log.error(e) sys.exit(1) except (UserWarning, LookupError) as e: diff --git a/nf_core/components/lint/__init__.py b/nf_core/components/lint/__init__.py new file mode 100644 index 0000000000..a1b40ab892 --- /dev/null +++ b/nf_core/components/lint/__init__.py @@ -0,0 +1,299 @@ +""" +Code for linting modules and subworkflows in the nf-core/modules repository and +in nf-core pipelines +""" + +from __future__ import print_function + +import logging +import operator +import os +from pathlib import Path + +import questionary +import rich +from rich.markdown import Markdown +from rich.table import Table + +import nf_core.modules.modules_utils +import nf_core.utils +from nf_core.components.components_command import ComponentCommand +from nf_core.components.nfcore_component import NFCoreComponent +from nf_core.lint_utils import console +from nf_core.modules.modules_json import ModulesJson +from nf_core.utils import plural_s as _s + +log = logging.getLogger(__name__) + + +class LintException(Exception): + """Exception raised when there was an error with module or subworkflow linting""" + + pass + + +class LintResult: + """An object to hold the results of a lint test""" + + def __init__(self, component, lint_test, message, file_path): + self.component = component + self.lint_test = lint_test + self.message = message + self.file_path = file_path + self.component_name = component.component_name + + +class ComponentLint(ComponentCommand): + """ + An object for linting modules and subworkflows either in a clone of the 'nf-core/modules' + repository or in any nf-core pipeline directory + """ + + def __init__( + self, + component_type, + dir, + fail_warned=False, + remote_url=None, + branch=None, + no_pull=False, + registry=None, + hide_progress=False, + ): + super().__init__( + component_type, + dir=dir, + remote_url=remote_url, + branch=branch, + no_pull=no_pull, + hide_progress=hide_progress, + ) + + self.fail_warned = fail_warned + self.passed = [] + self.warned = [] + self.failed = [] + if self.component_type == "modules": + self.lint_tests = self.get_all_module_lint_tests(self.repo_type == "pipeline") + else: + self.lint_tests = self.get_all_subworkflow_lint_tests(self.repo_type == "pipeline") + + if self.repo_type == "pipeline": + modules_json = ModulesJson(self.dir) + modules_json.check_up_to_date() + self.all_remote_components = [] + for repo_url, components in modules_json.get_all_components(self.component_type).items(): + if remote_url is not None and remote_url != repo_url: + continue + for org, comp in components: + self.all_remote_components.append( + NFCoreComponent( + comp, + repo_url, + Path(self.dir, self.component_type, org, comp), + self.repo_type, + Path(self.dir), + self.component_type, + ) + ) + if not self.all_remote_components: + raise LookupError( + f"No {self.component_type} from {self.modules_repo.remote_url} installed in pipeline." + ) + local_component_dir = Path(self.dir, self.component_type, "local") + self.all_local_components = [] + if local_component_dir.exists(): + self.all_local_components = [ + NFCoreComponent( + m, + None, + Path(local_component_dir, m), + self.repo_type, + Path(self.dir), + self.component_type, + remote_component=False, + ) + for m in self.get_local_components() + ] + self.config = nf_core.utils.fetch_wf_config(self.dir, cache_config=True) + else: + component_dir = Path( + self.dir, + self.default_modules_path if self.component_type == "modules" else self.default_subworkflows_path, + ) + self.all_remote_components = [ + NFCoreComponent(m, None, component_dir / m, self.repo_type, Path(self.dir), self.component_type) + for m in self.get_components_clone_modules() + ] + self.all_local_components = [] + if not self.all_remote_components: + raise LookupError(f"No {self.component_type} in '{self.component_type}' directory") + + # This could be better, perhaps glob for all nextflow.config files in? + self.config = nf_core.utils.fetch_wf_config(Path(self.dir).joinpath("tests", "config"), cache_config=True) + + if registry is None: + self.registry = self.config.get("docker.registry", "quay.io") + else: + self.registry = registry + log.debug(f"Registry set to {self.registry}") + + self.lint_config = None + self.modules_json = None + + @staticmethod + def get_all_module_lint_tests(is_pipeline): + if is_pipeline: + return [ + "module_patch", + "module_version", + "main_nf", + "meta_yml", + "module_todos", + "module_deprecations", + "module_changes", + ] + else: + return ["main_nf", "meta_yml", "module_todos", "module_deprecations", "module_tests"] + + def set_up_pipeline_files(self): + self.load_lint_config() + self.modules_json = ModulesJson(self.dir) + self.modules_json.load() + + # Only continue if a lint config has been loaded + if self.lint_config: + for test_name in self.lint_tests: + if self.lint_config.get(test_name, {}) is False: + log.info(f"Ignoring lint test: {test_name}") + self.lint_tests.remove(test_name) + + def filter_tests_by_key(self, key): + """Filters the tests by the supplied key""" + # Check that supplied test keys exist + bad_keys = [k for k in key if k not in self.lint_tests] + if len(bad_keys) > 0: + raise AssertionError( + "Test name{} not recognised: '{}'".format( + _s(bad_keys), + "', '".join(bad_keys), + ) + ) + + # If -k supplied, only run these tests + self.lint_tests = [k for k in self.lint_tests if k in key] + + def _print_results(self, show_passed=False, sort_by="test"): + """Print linting results to the command line. + + Uses the ``rich`` library to print a set of formatted tables to the command line + summarising the linting results. + """ + + log.debug("Printing final results") + + sort_order = ["lint_test", "component_name", "message"] + if sort_by == "module": + sort_order = ["component_name", "lint_test", "message"] + + # Sort the results + self.passed.sort(key=operator.attrgetter(*sort_order)) + self.warned.sort(key=operator.attrgetter(*sort_order)) + self.failed.sort(key=operator.attrgetter(*sort_order)) + + # Find maximum module name length + max_name_len = 40 + for tests in [self.passed, self.warned, self.failed]: + try: + for lint_result in tests: + max_name_len = max(len(lint_result.component_name), max_name_len) + except: + pass + + # Helper function to format test links nicely + def format_result(test_results, table): + """ + Given an list of error message IDs and the message texts, return a nicely formatted + string for the terminal with appropriate ASCII colours. + """ + # TODO: Row styles don't work current as table-level style overrides. + # Leaving it here in case there is a future fix + last_modname = False + even_row = False + for lint_result in test_results: + if last_modname and lint_result.component_name != last_modname: + even_row = not even_row + last_modname = lint_result.component_name + table.add_row( + Markdown(f"{lint_result.component_name}"), + os.path.relpath(lint_result.file_path, self.dir), + Markdown(f"{lint_result.message}"), + style="dim" if even_row else None, + ) + return table + + # Print blank line for spacing + console.print("") + + # Table of passed tests + if len(self.passed) > 0 and show_passed: + table = Table(style="green", box=rich.box.MINIMAL, pad_edge=False, border_style="dim") + table.add_column(f"{self.component_type[:-1].title()} name", width=max_name_len) + table.add_column("File path") + table.add_column("Test message") + table = format_result(self.passed, table) + console.print( + rich.panel.Panel( + table, + title=rf"[bold][✔] {len(self.passed)} {self.component_type[:-1].title()} Test{_s(self.passed)} Passed", + title_align="left", + style="green", + padding=0, + ) + ) + + # Table of warning tests + if len(self.warned) > 0: + table = Table(style="yellow", box=rich.box.MINIMAL, pad_edge=False, border_style="dim") + table.add_column(f"{self.component_type[:-1].title()} name", width=max_name_len) + table.add_column("File path") + table.add_column("Test message") + table = format_result(self.warned, table) + console.print( + rich.panel.Panel( + table, + title=rf"[bold][!] {len(self.warned)} {self.component_type[:-1].title()} Test Warning{_s(self.warned)}", + title_align="left", + style="yellow", + padding=0, + ) + ) + + # Table of failing tests + if len(self.failed) > 0: + table = Table(style="red", box=rich.box.MINIMAL, pad_edge=False, border_style="dim") + table.add_column(f"{self.component_type[:-1].title()} name", width=max_name_len) + table.add_column("File path") + table.add_column("Test message") + table = format_result(self.failed, table) + console.print( + rich.panel.Panel( + table, + title=rf"[bold][✗] {len(self.failed)} {self.component_type[:-1].title()} Test{_s(self.failed)} Failed", + title_align="left", + style="red", + padding=0, + ) + ) + + def print_summary(self): + """Print a summary table to the console.""" + table = Table(box=rich.box.ROUNDED) + table.add_column("[bold green]LINT RESULTS SUMMARY", no_wrap=True) + table.add_row( + rf"[✔] {len(self.passed):>3} Test{_s(self.passed)} Passed", + style="green", + ) + table.add_row(rf"[!] {len(self.warned):>3} Test Warning{_s(self.warned)}", style="yellow") + table.add_row(rf"[✗] {len(self.failed):>3} Test{_s(self.failed)} Failed", style="red") + console.print(table) diff --git a/nf_core/components/nfcore_component.py b/nf_core/components/nfcore_component.py new file mode 100644 index 0000000000..9083f21ddc --- /dev/null +++ b/nf_core/components/nfcore_component.py @@ -0,0 +1,71 @@ +""" +The NFCoreComponent class holds information and utility functions for a single module or subworkflow +""" +from pathlib import Path + + +class NFCoreComponent: + """ + A class to hold the information about a nf-core module or subworkflow + Includes functionality for linting + """ + + def __init__( + self, component_name, repo_url, component_dir, repo_type, base_dir, component_type, remote_component=True + ): + """ + Initialize the object + + Args: + component_name (str): The name of the module or subworkflow + repo_url (str): The URL of the repository + component_dir (Path): The absolute path to the module or subworkflow + repo_type (str): Either 'pipeline' or 'modules' depending on + whether the directory is a pipeline or clone + of nf-core/modules. + base_dir (Path): The absolute path to the pipeline base dir + component_type (str): Either 'modules' or 'subworkflows' + remote_component (bool): Whether the module is to be treated as a + nf-core or local component + """ + self.component_name = component_name + self.repo_url = repo_url + self.component_dir = component_dir + self.repo_type = repo_type + self.base_dir = base_dir + self.passed = [] + self.warned = [] + self.failed = [] + self.inputs = [] + self.outputs = [] + self.has_meta = False + self.git_sha = None + self.is_patched = False + + if remote_component: + # Initialize the important files + self.main_nf = self.component_dir / "main.nf" + self.meta_yml = self.component_dir / "meta.yml" + + repo_dir = self.component_dir.parts[: self.component_dir.parts.index(self.component_name.split("/")[0])][-1] + self.org = repo_dir + self.test_dir = Path(self.base_dir, "tests", component_type, repo_dir, self.component_name) + self.test_yml = self.test_dir / "test.yml" + self.test_main_nf = self.test_dir / "main.nf" + + if self.repo_type == "pipeline": + patch_fn = f"{self.component_name.replace('/', '-')}.diff" + patch_path = Path(self.component_dir, patch_fn) + if patch_path.exists(): + self.is_patched = True + self.patch_path = patch_path + else: + # The main file is just the local module + self.main_nf = self.component_dir + self.component_name = self.component_dir.stem + # These attributes are only used by nf-core modules + # so just initialize them to None + self.meta_yml = None + self.test_dir = None + self.test_yml = None + self.test_main_nf = None diff --git a/nf_core/lint/__init__.py b/nf_core/lint/__init__.py index a998c964a0..70f7ea925f 100644 --- a/nf_core/lint/__init__.py +++ b/nf_core/lint/__init__.py @@ -58,7 +58,7 @@ def run_linting( # Verify that the requested tests exist if key: all_tests = set(PipelineLint._get_all_lint_tests(release_mode)).union( - set(nf_core.modules.lint.ModuleLint.get_all_lint_tests(is_pipeline=True)) + set(nf_core.modules.lint.ModuleLint.get_all_module_lint_tests(is_pipeline=True)) ) bad_keys = [k for k in key if k not in all_tests] if len(bad_keys) > 0: @@ -96,7 +96,7 @@ def run_linting( if key: # Select only the module lint tests module_lint_tests = list( - set(key).intersection(set(nf_core.modules.lint.ModuleLint.get_all_lint_tests(is_pipeline=True))) + set(key).intersection(set(nf_core.modules.lint.ModuleLint.get_all_module_lint_tests(is_pipeline=True))) ) else: # If no key is supplied, run the default modules tests @@ -115,10 +115,10 @@ def run_linting( return lint_obj, module_lint_obj # Run the module lint tests - if len(module_lint_obj.all_local_modules) > 0: - module_lint_obj.lint_modules(module_lint_obj.all_local_modules, local=True) - if len(module_lint_obj.all_remote_modules) > 0: - module_lint_obj.lint_modules(module_lint_obj.all_remote_modules, local=False) + if len(module_lint_obj.all_local_components) > 0: + module_lint_obj.lint_modules(module_lint_obj.all_local_components, local=True) + if len(module_lint_obj.all_remote_components) > 0: + module_lint_obj.lint_modules(module_lint_obj.all_remote_components, local=False) # Print the results lint_obj._print_results(show_passed) diff --git a/nf_core/modules/bump_versions.py b/nf_core/modules/bump_versions.py index c925e497fa..e8c5060c89 100644 --- a/nf_core/modules/bump_versions.py +++ b/nf_core/modules/bump_versions.py @@ -83,7 +83,7 @@ def bump_versions(self, module=None, all_modules=False, show_uptodate=False): else: module = questionary.autocomplete( "Tool name:", - choices=[m.module_name for m in nfcore_modules], + choices=[m.component_name for m in nfcore_modules], style=nf_core.utils.nfcore_question_style, ).unsafe_ask() @@ -93,7 +93,7 @@ def bump_versions(self, module=None, all_modules=False, show_uptodate=False): raise nf_core.modules.modules_utils.ModuleException( "You cannot specify a tool and request all tools to be bumped." ) - nfcore_modules = [m for m in nfcore_modules if m.module_name == module] + nfcore_modules = [m for m in nfcore_modules if m.component_name == module] if len(nfcore_modules) == 0: raise nf_core.modules.modules_utils.ModuleException(f"Could not find the specified module: '{module}'") @@ -106,20 +106,22 @@ def bump_versions(self, module=None, all_modules=False, show_uptodate=False): ) with progress_bar: bump_progress = progress_bar.add_task( - "Bumping nf-core modules versions", total=len(nfcore_modules), test_name=nfcore_modules[0].module_name + "Bumping nf-core modules versions", + total=len(nfcore_modules), + test_name=nfcore_modules[0].component_name, ) for mod in nfcore_modules: - progress_bar.update(bump_progress, advance=1, test_name=mod.module_name) + progress_bar.update(bump_progress, advance=1, test_name=mod.component_name) self.bump_module_version(mod) self._print_results() def bump_module_version(self, module): """ - Bump the bioconda and container version of a single NFCoreModule + Bump the bioconda and container version of a single NFCoreComponent Args: - module: NFCoreModule + module: NFCoreComponent """ config_version = None # Extract bioconda version from `main.nf` @@ -127,15 +129,15 @@ def bump_module_version(self, module): # If multiple versions - don't update! (can't update mulled containers) if not bioconda_packages or len(bioconda_packages) > 1: - self.failed.append(("Ignoring mulled container", module.module_name)) + self.failed.append(("Ignoring mulled container", module.component_name)) return False # Don't update if blocked in blacklist self.bump_versions_config = self.tools_config.get("bump-versions", {}) - if module.module_name in self.bump_versions_config: - config_version = self.bump_versions_config[module.module_name] + if module.component_name in self.bump_versions_config: + config_version = self.bump_versions_config[module.component_name] if not config_version: - self.ignored.append(("Omitting module due to config.", module.module_name)) + self.ignored.append(("Omitting module due to config.", module.component_name)) return False # check for correct version and newer versions @@ -148,12 +150,12 @@ def bump_module_version(self, module): try: response = nf_core.utils.anaconda_package(bp) except (LookupError, ValueError): - self.failed.append((f"Conda version not specified correctly: {module.main_nf}", module.module_name)) + self.failed.append((f"Conda version not specified correctly: {module.main_nf}", module.component_name)) return False # Check that required version is available at all if bioconda_version not in response.get("versions"): - self.failed.append((f"Conda package had unknown version: `{module.main_nf}`", module.module_name)) + self.failed.append((f"Conda package had unknown version: `{module.main_nf}`", module.component_name)) return False # Check version is latest available @@ -162,12 +164,12 @@ def bump_module_version(self, module): last_ver = config_version if last_ver is not None and last_ver != bioconda_version: - log.debug(f"Updating version for {module.module_name}") + log.debug(f"Updating version for {module.component_name}") # Get docker and singularity container links try: docker_img, singularity_img = nf_core.utils.get_biocontainer_tag(bioconda_tool_name, last_ver) except LookupError as e: - self.failed.append((f"Could not download container tags: {e}", module.module_name)) + self.failed.append((f"Could not download container tags: {e}", module.component_name)) return False patterns = [ @@ -203,7 +205,7 @@ def bump_module_version(self, module): content = "\n".join(newcontent) + "\n" else: self.failed.append( - (f"Did not find pattern {pattern[0]} in module {module.module_name}", module.module_name) + (f"Did not find pattern {pattern[0]} in module {module.component_name}", module.component_name) ) return False @@ -214,13 +216,13 @@ def bump_module_version(self, module): self.updated.append( ( f"Module updated: {bioconda_version} --> {last_ver}", - module.module_name, + module.component_name, ) ) return True else: - self.up_to_date.append((f"Module version up to date: {module.module_name}", module.module_name)) + self.up_to_date.append((f"Module version up to date: {module.component_name}", module.component_name)) return True def get_bioconda_version(self, module): @@ -235,7 +237,7 @@ def get_bioconda_version(self, module): if "bioconda::" in l: bioconda_packages = [b for b in l.split() if "bioconda::" in b] except FileNotFoundError: - log.error(f"Could not read `main.nf` of {module.module_name} module.") + log.error(f"Could not read `main.nf` of {module.component_name} module.") return bioconda_packages diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 9d8fb177cd..adf8903189 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -20,33 +20,16 @@ import nf_core.modules.modules_utils import nf_core.utils -from nf_core.components.components_command import ComponentCommand +from nf_core.components.lint import ComponentLint, LintException, LintResult +from nf_core.components.nfcore_component import NFCoreComponent from nf_core.lint_utils import console from nf_core.modules.modules_json import ModulesJson -from nf_core.modules.nfcore_module import NFCoreModule from nf_core.utils import plural_s as _s log = logging.getLogger(__name__) -class ModuleLintException(Exception): - """Exception raised when there was an error with module linting""" - - pass - - -class LintResult: - """An object to hold the results of a lint test""" - - def __init__(self, mod, lint_test, message, file_path): - self.mod = mod - self.lint_test = lint_test - self.message = message - self.file_path = file_path - self.module_name = mod.module_name - - -class ModuleLint(ComponentCommand): +class ModuleLint(ComponentLint): """ An object for linting modules either in a clone of the 'nf-core/modules' repository or in any nf-core pipeline directory @@ -73,86 +56,15 @@ def __init__( hide_progress=False, ): super().__init__( - "modules", + component_type="modules", dir=dir, + fail_warned=fail_warned, remote_url=remote_url, branch=branch, no_pull=no_pull, hide_progress=hide_progress, ) - self.fail_warned = fail_warned - self.passed = [] - self.warned = [] - self.failed = [] - self.lint_tests = self.get_all_lint_tests(self.repo_type == "pipeline") - - if self.repo_type == "pipeline": - modules_json = ModulesJson(self.dir) - modules_json.check_up_to_date() - self.all_remote_modules = [] - for repo_url, components in modules_json.get_all_components(self.component_type).items(): - if remote_url is not None and remote_url != repo_url: - continue - for org, comp in components: - self.all_remote_modules.append( - NFCoreModule( - comp, - repo_url, - Path(self.dir, self.component_type, org, comp), - self.repo_type, - Path(self.dir), - ) - ) - if not self.all_remote_modules: - raise LookupError(f"No modules from {self.modules_repo.remote_url} installed in pipeline.") - local_module_dir = Path(self.dir, "modules", "local") - self.all_local_modules = [] - if local_module_dir.exists(): - self.all_local_modules = [ - NFCoreModule( - m, None, Path(local_module_dir, m), self.repo_type, Path(self.dir), remote_module=False - ) - for m in self.get_local_components() - ] - self.config = nf_core.utils.fetch_wf_config(self.dir, cache_config=True) - else: - module_dir = Path(self.dir, self.default_modules_path) - self.all_remote_modules = [ - NFCoreModule(m, None, module_dir / m, self.repo_type, Path(self.dir)) - for m in self.get_components_clone_modules() - ] - self.all_local_modules = [] - if not self.all_remote_modules: - raise LookupError("No modules in 'modules' directory") - - # This could be better, perhaps glob for all nextflow.config files in? - self.config = nf_core.utils.fetch_wf_config(Path(self.dir).joinpath("tests", "config"), cache_config=True) - - if registry is None: - self.registry = self.config.get("docker.registry", "quay.io") - else: - self.registry = registry - log.debug(f"Registry set to {self.registry}") - - self.lint_config = None - self.modules_json = None - - @staticmethod - def get_all_lint_tests(is_pipeline): - if is_pipeline: - return [ - "module_patch", - "module_version", - "main_nf", - "meta_yml", - "module_todos", - "module_deprecations", - "module_changes", - ] - else: - return ["main_nf", "meta_yml", "module_todos", "module_deprecations", "module_tests"] - def lint( self, module=None, @@ -202,7 +114,7 @@ def lint( "name": "tool_name", "message": "Tool name:", "when": lambda x: x["all_modules"] == "Named module", - "choices": [m.module_name for m in self.all_remote_modules], + "choices": [m.component_name for m in self.all_remote_components], }, ] answers = questionary.unsafe_prompt(questions, style=nf_core.utils.nfcore_question_style) @@ -212,14 +124,14 @@ def lint( # Only lint the given module if module: if all_modules: - raise ModuleLintException("You cannot specify a tool and request all tools to be linted.") + raise LintException("You cannot specify a tool and request all tools to be linted.") local_modules = [] - remote_modules = [m for m in self.all_remote_modules if m.module_name == module] + remote_modules = [m for m in self.all_remote_components if m.component_name == module] if len(remote_modules) == 0: - raise ModuleLintException(f"Could not find the specified module: '{module}'") + raise LintException(f"Could not find the specified module: '{module}'") else: - local_modules = self.all_local_modules - remote_modules = self.all_remote_modules + local_modules = self.all_local_components + remote_modules = self.all_remote_components if self.repo_type == "modules": log.info(f"Linting modules repo: [magenta]'{self.dir}'") @@ -249,39 +161,12 @@ def lint( self._print_results(show_passed=show_passed, sort_by=sort_by) self.print_summary() - def set_up_pipeline_files(self): - self.load_lint_config() - self.modules_json = ModulesJson(self.dir) - self.modules_json.load() - - # Only continue if a lint config has been loaded - if self.lint_config: - for test_name in self.lint_tests: - if self.lint_config.get(test_name, {}) is False: - log.info(f"Ignoring lint test: {test_name}") - self.lint_tests.remove(test_name) - - def filter_tests_by_key(self, key): - """Filters the tests by the supplied key""" - # Check that supplied test keys exist - bad_keys = [k for k in key if k not in self.lint_tests] - if len(bad_keys) > 0: - raise AssertionError( - "Test name{} not recognised: '{}'".format( - _s(bad_keys), - "', '".join(bad_keys), - ) - ) - - # If -k supplied, only run these tests - self.lint_tests = [k for k in self.lint_tests if k in key] - def lint_modules(self, modules, registry="quay.io", local=False, fix_version=False): """ Lint a list of modules Args: - modules ([NFCoreModule]): A list of module objects + modules ([NFCoreComponent]): A list of module objects registry (str): The container registry to use. Should be quay.io in most situations. local (boolean): Whether the list consist of local or nf-core modules fix_version (boolean): Fix the module version if a newer version is available @@ -298,11 +183,11 @@ def lint_modules(self, modules, registry="quay.io", local=False, fix_version=Fal lint_progress = progress_bar.add_task( f"Linting {'local' if local else 'nf-core'} modules", total=len(modules), - test_name=modules[0].module_name, + test_name=modules[0].component_name, ) for mod in modules: - progress_bar.update(lint_progress, advance=1, test_name=mod.module_name) + progress_bar.update(lint_progress, advance=1, test_name=mod.component_name) self.lint_module(mod, progress_bar, registry=registry, local=local, fix_version=fix_version) def lint_module(self, mod, progress_bar, registry, local=False, fix_version=False): @@ -348,118 +233,3 @@ def lint_module(self, mod, progress_bar, registry, local=False, fix_version=Fals self.failed += warned self.failed += [LintResult(mod, *m) for m in mod.failed] - - def _print_results(self, show_passed=False, sort_by="test"): - """Print linting results to the command line. - - Uses the ``rich`` library to print a set of formatted tables to the command line - summarising the linting results. - """ - - log.debug("Printing final results") - - sort_order = ["lint_test", "module_name", "message"] - if sort_by == "module": - sort_order = ["module_name", "lint_test", "message"] - - # Sort the results - self.passed.sort(key=operator.attrgetter(*sort_order)) - self.warned.sort(key=operator.attrgetter(*sort_order)) - self.failed.sort(key=operator.attrgetter(*sort_order)) - - # Find maximum module name length - max_mod_name_len = 40 - for tests in [self.passed, self.warned, self.failed]: - try: - for lint_result in tests: - max_mod_name_len = max(len(lint_result.module_name), max_mod_name_len) - except: - pass - - # Helper function to format test links nicely - def format_result(test_results, table): - """ - Given an list of error message IDs and the message texts, return a nicely formatted - string for the terminal with appropriate ASCII colours. - """ - # TODO: Row styles don't work current as table-level style overrides. - # I'd like to make an issue about this on the rich repo so leaving here in case there is a future fix - last_modname = False - even_row = False - for lint_result in test_results: - if last_modname and lint_result.module_name != last_modname: - even_row = not even_row - last_modname = lint_result.module_name - table.add_row( - Markdown(f"{lint_result.module_name}"), - os.path.relpath(lint_result.file_path, self.dir), - Markdown(f"{lint_result.message}"), - style="dim" if even_row else None, - ) - return table - - # Print blank line for spacing - console.print("") - - # Table of passed tests - if len(self.passed) > 0 and show_passed: - table = Table(style="green", box=rich.box.MINIMAL, pad_edge=False, border_style="dim") - table.add_column("Module name", width=max_mod_name_len) - table.add_column("File path") - table.add_column("Test message") - table = format_result(self.passed, table) - console.print( - rich.panel.Panel( - table, - title=rf"[bold][✔] {len(self.passed)} Module Test{_s(self.passed)} Passed", - title_align="left", - style="green", - padding=0, - ) - ) - - # Table of warning tests - if len(self.warned) > 0: - table = Table(style="yellow", box=rich.box.MINIMAL, pad_edge=False, border_style="dim") - table.add_column("Module name", width=max_mod_name_len) - table.add_column("File path") - table.add_column("Test message") - table = format_result(self.warned, table) - console.print( - rich.panel.Panel( - table, - title=rf"[bold][!] {len(self.warned)} Module Test Warning{_s(self.warned)}", - title_align="left", - style="yellow", - padding=0, - ) - ) - - # Table of failing tests - if len(self.failed) > 0: - table = Table(style="red", box=rich.box.MINIMAL, pad_edge=False, border_style="dim") - table.add_column("Module name", width=max_mod_name_len) - table.add_column("File path") - table.add_column("Test message") - table = format_result(self.failed, table) - console.print( - rich.panel.Panel( - table, - title=rf"[bold][✗] {len(self.failed)} Module Test{_s(self.failed)} Failed", - title_align="left", - style="red", - padding=0, - ) - ) - - def print_summary(self): - """Print a summary table to the console.""" - table = Table(box=rich.box.ROUNDED) - table.add_column("[bold green]LINT RESULTS SUMMARY", no_wrap=True) - table.add_row( - rf"[✔] {len(self.passed):>3} Test{_s(self.passed)} Passed", - style="green", - ) - table.add_row(rf"[!] {len(self.warned):>3} Test Warning{_s(self.warned)}", style="yellow") - table.add_row(rf"[✗] {len(self.failed):>3} Test{_s(self.failed)} Failed", style="red") - console.print(table) diff --git a/nf_core/modules/lint/main_nf.py b/nf_core/modules/lint/main_nf.py index 18d95bd37e..617e53c069 100644 --- a/nf_core/modules/lint/main_nf.py +++ b/nf_core/modules/lint/main_nf.py @@ -45,10 +45,10 @@ def main_nf(module_lint_object, module, fix_version, registry, progress_bar): lines = None if module.is_patched: lines = ModulesDiffer.try_apply_patch( - module.module_name, + module.component_name, module_lint_object.modules_repo.repo_path, module.patch_path, - Path(module.module_dir).relative_to(module.base_dir), + Path(module.component_dir).relative_to(module.base_dir), reverse=True, ).get("main.nf") if lines is None: diff --git a/nf_core/modules/lint/meta_yml.py b/nf_core/modules/lint/meta_yml.py index dd5e954f25..446cf6dbb8 100644 --- a/nf_core/modules/lint/meta_yml.py +++ b/nf_core/modules/lint/meta_yml.py @@ -25,10 +25,10 @@ def meta_yml(module_lint_object, module): meta_yaml = None if module.is_patched: lines = ModulesDiffer.try_apply_patch( - module.module_name, + module.component_name, module_lint_object.modules_repo.repo_path, module.patch_path, - Path(module.module_dir).relative_to(module.base_dir), + Path(module.component_dir).relative_to(module.base_dir), reverse=True, ).get("meta.yml") if lines is not None: @@ -59,7 +59,7 @@ def meta_yml(module_lint_object, module): module.failed.append( ( "meta_yml_valid", - f"The `meta.yml` of the module {module.module_name} is not valid: {e.message}.{hint}", + f"The `meta.yml` of the module {module.component_name} is not valid: {e.message}.{hint}", module.meta_yml, ) ) diff --git a/nf_core/modules/lint/module_changes.py b/nf_core/modules/lint/module_changes.py index 61b416e5f7..00184f8dfc 100644 --- a/nf_core/modules/lint/module_changes.py +++ b/nf_core/modules/lint/module_changes.py @@ -27,10 +27,14 @@ def module_changes(module_lint_object, module): # the patch in reverse before comparing with the remote tempdir_parent = Path(tempfile.mkdtemp()) tempdir = tempdir_parent / "tmp_module_dir" - shutil.copytree(module.module_dir, tempdir) + shutil.copytree(module.component_dir, tempdir) try: new_lines = ModulesDiffer.try_apply_patch( - module.module_name, module_lint_object.modules_repo.repo_path, module.patch_path, tempdir, reverse=True + module.component_name, + module_lint_object.modules_repo.repo_path, + module.patch_path, + tempdir, + reverse=True, ) for file, lines in new_lines.items(): with open(tempdir / file, "w") as fh: @@ -39,19 +43,19 @@ def module_changes(module_lint_object, module): # This error is already reported by module_patch, so just return return else: - tempdir = module.module_dir + tempdir = module.component_dir module.branch = module_lint_object.modules_json.get_component_branch( - "modules", module.module_name, module.repo_url, module.org + "modules", module.component_name, module.repo_url, module.org ) modules_repo = nf_core.modules.modules_repo.ModulesRepo(remote_url=module.repo_url, branch=module.branch) - for f, same in modules_repo.module_files_identical(module.module_name, tempdir, module.git_sha).items(): + for f, same in modules_repo.module_files_identical(module.component_name, tempdir, module.git_sha).items(): if same: module.passed.append( ( "check_local_copy", "Local copy of module up to date", - f"{Path(module.module_dir, f)}", + f"{Path(module.component_dir, f)}", ) ) else: @@ -59,6 +63,6 @@ def module_changes(module_lint_object, module): ( "check_local_copy", "Local copy of module does not match remote", - f"{Path(module.module_dir, f)}", + f"{Path(module.component_dir, f)}", ) ) diff --git a/nf_core/modules/lint/module_deprecations.py b/nf_core/modules/lint/module_deprecations.py index 8ab5b68c2e..79a255bbf6 100644 --- a/nf_core/modules/lint/module_deprecations.py +++ b/nf_core/modules/lint/module_deprecations.py @@ -8,12 +8,12 @@ def module_deprecations(_, module): """ Check that the modules are up to the latest nf-core standard """ - module.wf_path = module.module_dir - if "functions.nf" in os.listdir(module.module_dir): + module.wf_path = module.component_dir + if "functions.nf" in os.listdir(module.component_dir): module.failed.append( ( "module_deprecations", "Deprecated file `functions.nf` found. No longer required for the latest nf-core/modules syntax!", - module.module_dir, + module.component_dir, ) ) diff --git a/nf_core/modules/lint/module_patch.py b/nf_core/modules/lint/module_patch.py index 6d91b44816..d52962eabb 100644 --- a/nf_core/modules/lint/module_patch.py +++ b/nf_core/modules/lint/module_patch.py @@ -1,10 +1,10 @@ from pathlib import Path +from ...components.nfcore_component import NFCoreComponent from ..modules_differ import ModulesDiffer -from ..nfcore_module import NFCoreModule -def module_patch(module_lint_obj, module: NFCoreModule): +def module_patch(module_lint_obj, module: NFCoreComponent): """ Lint a patch file found in a module @@ -34,7 +34,7 @@ def check_patch_valid(module, patch_path): file creation or deletion we issue a lint warning. Args: - module (NFCoreModule): The module currently being linted + module (NFCoreComponent): The module currently being linted patch_path (Path): The absolute path to the patch file. Returns: @@ -154,7 +154,7 @@ def patch_reversible(module_lint_object, module, patch_path): Try applying a patch in reverse to see if it is up to date Args: - module (NFCoreModule): The module currently being linted + module (NFCoreComponent): The module currently being linted patch_path (Path): The absolute path to the patch file. Returns: @@ -162,10 +162,10 @@ def patch_reversible(module_lint_object, module, patch_path): """ try: ModulesDiffer.try_apply_patch( - module.module_name, + module.component_name, module_lint_object.modules_repo.repo_path, patch_path, - Path(module.module_dir).relative_to(module.base_dir), + Path(module.component_dir).relative_to(module.base_dir), reverse=True, ) except LookupError: diff --git a/nf_core/modules/lint/module_tests.py b/nf_core/modules/lint/module_tests.py index 0b76acb944..6fdf6713f0 100644 --- a/nf_core/modules/lint/module_tests.py +++ b/nf_core/modules/lint/module_tests.py @@ -38,7 +38,7 @@ def module_tests(_, module): pytest_yml_path = os.path.join(module.base_dir, "tests", "config", "pytest_modules.yml") with open(pytest_yml_path, "r") as fh: pytest_yml = yaml.safe_load(fh) - if module.module_name in pytest_yml.keys(): + if module.component_name in pytest_yml.keys(): module.passed.append(("test_pytest_yml", "correct entry in pytest_modules.yml", pytest_yml_path)) else: module.failed.append(("test_pytest_yml", "missing entry in pytest_modules.yml", pytest_yml_path)) @@ -55,7 +55,7 @@ def module_tests(_, module): all_tags_correct = True for test in test_yml: for tag in test["tags"]: - if not tag in [module.module_name, module.module_name.split("/")[0]]: + if not tag in [module.component_name, module.component_name.split("/")[0]]: all_tags_correct = False # Look for md5sums of empty files diff --git a/nf_core/modules/lint/module_todos.py b/nf_core/modules/lint/module_todos.py index b48725cd9e..ee12307512 100644 --- a/nf_core/modules/lint/module_todos.py +++ b/nf_core/modules/lint/module_todos.py @@ -33,11 +33,11 @@ def module_todos(_, module): """ # Main module directory - mod_results = pipeline_todos(None, root_dir=module.module_dir) + mod_results = pipeline_todos(None, root_dir=module.component_dir) for i, warning in enumerate(mod_results["warned"]): module.warned.append(("module_todo", warning, mod_results["file_paths"][i])) for i, passed in enumerate(mod_results["passed"]): - module.passed.append(("module_todo", passed, module.module_dir)) + module.passed.append(("module_todo", passed, module.component_dir)) # Module tests directory test_results = pipeline_todos(None, root_dir=module.test_dir) diff --git a/nf_core/modules/lint/module_version.py b/nf_core/modules/lint/module_version.py index 1cf142e8eb..d08658f5da 100644 --- a/nf_core/modules/lint/module_version.py +++ b/nf_core/modules/lint/module_version.py @@ -23,7 +23,7 @@ def module_version(module_lint_object, module): modules_json_path = Path(module_lint_object.dir, "modules.json") # Verify that a git_sha exists in the `modules.json` file for this module - version = module_lint_object.modules_json.get_module_version(module.module_name, module.repo_url, module.org) + version = module_lint_object.modules_json.get_module_version(module.component_name, module.repo_url, module.org) if version is None: module.failed.append(("git_sha", "No git_sha entry in `modules.json`", modules_json_path)) return @@ -34,14 +34,14 @@ def module_version(module_lint_object, module): # Check whether a new version is available try: module.branch = module_lint_object.modules_json.get_component_branch( - "modules", module.module_name, module.repo_url, module.org + "modules", module.component_name, module.repo_url, module.org ) modules_repo = nf_core.modules.modules_repo.ModulesRepo(remote_url=module.repo_url, branch=module.branch) - module_git_log = modules_repo.get_component_git_log(module.module_name, "modules") + module_git_log = modules_repo.get_component_git_log(module.component_name, "modules") if version == next(module_git_log)["git_sha"]: - module.passed.append(("module_version", "Module is the latest version", module.module_dir)) + module.passed.append(("module_version", "Module is the latest version", module.component_dir)) else: - module.warned.append(("module_version", "New version available", module.module_dir)) + module.warned.append(("module_version", "New version available", module.component_dir)) except UserWarning: - module.warned.append(("module_version", "Failed to fetch git log", module.module_dir)) + module.warned.append(("module_version", "Failed to fetch git log", module.component_dir)) diff --git a/nf_core/modules/modules_utils.py b/nf_core/modules/modules_utils.py index 47826d3804..a5af06b996 100644 --- a/nf_core/modules/modules_utils.py +++ b/nf_core/modules/modules_utils.py @@ -3,7 +3,7 @@ import urllib from pathlib import Path -from .nfcore_module import NFCoreModule +from ..components.nfcore_component import NFCoreComponent log = logging.getLogger(__name__) @@ -81,10 +81,17 @@ def get_installed_modules(dir, repo_type="modules"): else: nfcore_modules.append(m) - # Make full (relative) file paths and create NFCoreModule objects + # Make full (relative) file paths and create NFCoreComponent objects local_modules = [os.path.join(local_modules_dir, m) for m in local_modules] nfcore_modules = [ - NFCoreModule(m, "nf-core/modules", Path(nfcore_modules_dir, m), repo_type=repo_type, base_dir=Path(dir)) + NFCoreComponent( + m, + "nf-core/modules", + Path(nfcore_modules_dir, m), + repo_type=repo_type, + base_dir=Path(dir), + component_type="modules", + ) for m in nfcore_modules ] diff --git a/nf_core/modules/nfcore_module.py b/nf_core/modules/nfcore_module.py deleted file mode 100644 index 431ef23381..0000000000 --- a/nf_core/modules/nfcore_module.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -The NFCoreModule class holds information and utility functions for a single module -""" -from pathlib import Path - - -class NFCoreModule: - """ - A class to hold the information about a nf-core module - Includes functionality for linting - """ - - def __init__(self, module_name, repo_url, module_dir, repo_type, base_dir, remote_module=True): - """ - Initialize the object - - Args: - module_dir (Path): The absolute path to the module - repo_type (str): Either 'pipeline' or 'modules' depending on - whether the directory is a pipeline or clone - of nf-core/modules. - base_dir (Path): The absolute path to the pipeline base dir - remote_module (bool): Whether the module is to be treated as a - nf-core or local module - """ - self.module_name = module_name - self.repo_url = repo_url - self.module_dir = module_dir - self.repo_type = repo_type - self.base_dir = base_dir - self.passed = [] - self.warned = [] - self.failed = [] - self.inputs = [] - self.outputs = [] - self.has_meta = False - self.git_sha = None - self.is_patched = False - - if remote_module: - # Initialize the important files - self.main_nf = self.module_dir / "main.nf" - self.meta_yml = self.module_dir / "meta.yml" - - repo_dir = self.module_dir.parts[: self.module_dir.parts.index(self.module_name.split("/")[0])][-1] - self.org = repo_dir - self.test_dir = Path(self.base_dir, "tests", "modules", repo_dir, self.module_name) - self.test_yml = self.test_dir / "test.yml" - self.test_main_nf = self.test_dir / "main.nf" - - if self.repo_type == "pipeline": - patch_fn = f"{self.module_name.replace('/', '-')}.diff" - patch_path = Path(self.module_dir, patch_fn) - if patch_path.exists(): - self.is_patched = True - self.patch_path = patch_path - else: - # The main file is just the local module - self.main_nf = self.module_dir - self.module_name = self.module_dir.stem - # These attributes are only used by nf-core modules - # so just initialize them to None - self.meta_yml = None - self.test_dir = None - self.test_yml = None - self.test_main_nf = None diff --git a/nf_core/subworkflows/lint/__init__.py b/nf_core/subworkflows/lint/__init__.py new file mode 100644 index 0000000000..2bd726ee56 --- /dev/null +++ b/nf_core/subworkflows/lint/__init__.py @@ -0,0 +1,22 @@ +""" +Code for linting subworkflows in the nf-core/subworkflows repository and +in nf-core pipelines + +Command: +nf-core subworkflows lint +""" + +import logging + +from nf_core.components.lint import ComponentLint, LintException, LintResult + +log = logging.getLogger(__name__) + + +class SubworkflowLint(ComponentCommand): + """ + An object for linting subworkflows either in a clone of the 'nf-core/modules' + repository or in any nf-core pipeline directory + """ + + # Import lint functions From a7ec07d6c89a9cae6356b8fc3ec649654c7327e8 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Fri, 21 Jul 2023 10:33:52 +0200 Subject: [PATCH 02/16] add nf-core subworkflows lint and main_nf lint test --- nf_core/__main__.py | 72 ++++++++ nf_core/components/lint/__init__.py | 10 +- nf_core/modules/lint/__init__.py | 8 +- nf_core/subworkflows/__init__.py | 1 + nf_core/subworkflows/lint/__init__.py | 199 +++++++++++++++++++++- nf_core/subworkflows/lint/main_nf.py | 233 ++++++++++++++++++++++++++ 6 files changed, 513 insertions(+), 10 deletions(-) create mode 100644 nf_core/subworkflows/lint/main_nf.py diff --git a/nf_core/__main__.py b/nf_core/__main__.py index fd8fa9b993..469d0cf7be 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -1170,6 +1170,78 @@ def local(ctx, keywords, json, dir): # pylint: disable=redefined-builtin sys.exit(1) +# nf-core subworkflows lint +@subworkflows.command() +@click.pass_context +@click.argument("tool", type=str, required=False, metavar="subworkflow name") +@click.option("-d", "--dir", type=click.Path(exists=True), default=".", metavar="") +@click.option( + "-r", + "--registry", + type=str, + metavar="", + default=None, + help="Registry to use for containers. If not specified it will use docker.registry value in the nextflow.config file", +) +@click.option("-k", "--key", type=str, metavar="", multiple=True, help="Run only these lint tests") +@click.option("-a", "--all", is_flag=True, help="Run on all subworkflows") +@click.option("-w", "--fail-warned", is_flag=True, help="Convert warn tests to failures") +@click.option("--local", is_flag=True, help="Run additional lint tests for local subworkflows") +@click.option("--passed", is_flag=True, help="Show passed tests") +@click.option( + "--sort-by", + type=click.Choice(["subworkflow", "test"]), + default="test", + help="Sort lint output by subworkflow or test name.", + show_default=True, +) +@click.option("--fix-version", is_flag=True, help="Fix the subworkflow version if a newer version is available") +def lint( + ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version +): # pylint: disable=redefined-outer-name + """ + Lint one or more subworkflows in a directory. + + Checks DSL2 subworkflow code against nf-core guidelines to ensure + that all subworkflows follow the same standards. + + Test subworkflows within a pipeline or a clone of the + nf-core/modules repository. + """ + from nf_core.components.lint import LintException + from nf_core.subworkflows import SubworkflowLint + + try: + subworkflow_lint = SubworkflowLint( + dir, + fail_warned=fail_warned, + registry=ctx.params["registry"], + remote_url=ctx.obj["modules_repo_url"], + branch=ctx.obj["modules_repo_branch"], + no_pull=ctx.obj["modules_repo_no_pull"], + hide_progress=ctx.obj["hide_progress"], + ) + subworkflow_lint.lint( + subworkflow=tool, + registry=registry, + key=key, + all_subworkflows=all, + print_results=True, + local=local, + show_passed=passed, + sort_by=sort_by, + fix_version=fix_version, + ) + if len(subworkflow_lint.failed) > 0: + sys.exit(1) + except LintException as e: + log.error(e) + sys.exit(1) + except (UserWarning, LookupError) as e: + log.critical(e) + sys.exit(1) + + # nf-core subworkflows info @subworkflows.command() @click.pass_context diff --git a/nf_core/components/lint/__init__.py b/nf_core/components/lint/__init__.py index a1b40ab892..14d114ecd2 100644 --- a/nf_core/components/lint/__init__.py +++ b/nf_core/components/lint/__init__.py @@ -10,7 +10,6 @@ import os from pathlib import Path -import questionary import rich from rich.markdown import Markdown from rich.table import Table @@ -156,6 +155,13 @@ def get_all_module_lint_tests(is_pipeline): else: return ["main_nf", "meta_yml", "module_todos", "module_deprecations", "module_tests"] + @staticmethod + def get_all_subworkflow_lint_tests(is_pipeline): + if is_pipeline: + return ["main_nf"] + else: + return ["main_nf"] + def set_up_pipeline_files(self): self.load_lint_config() self.modules_json = ModulesJson(self.dir) @@ -193,7 +199,7 @@ def _print_results(self, show_passed=False, sort_by="test"): log.debug("Printing final results") sort_order = ["lint_test", "component_name", "message"] - if sort_by == "module": + if sort_by == "module" or sort_by == "subworkflow": sort_order = ["component_name", "lint_test", "message"] # Sort the results diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index adf8903189..4402e8b5a4 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -9,22 +9,15 @@ from __future__ import print_function import logging -import operator import os -from pathlib import Path import questionary import rich -from rich.markdown import Markdown -from rich.table import Table import nf_core.modules.modules_utils import nf_core.utils from nf_core.components.lint import ComponentLint, LintException, LintResult -from nf_core.components.nfcore_component import NFCoreComponent from nf_core.lint_utils import console -from nf_core.modules.modules_json import ModulesJson -from nf_core.utils import plural_s as _s log = logging.getLogger(__name__) @@ -62,6 +55,7 @@ def __init__( remote_url=remote_url, branch=branch, no_pull=no_pull, + registry=registry, hide_progress=hide_progress, ) diff --git a/nf_core/subworkflows/__init__.py b/nf_core/subworkflows/__init__.py index 3c93138852..1ceccd021f 100644 --- a/nf_core/subworkflows/__init__.py +++ b/nf_core/subworkflows/__init__.py @@ -1,6 +1,7 @@ from .create import SubworkflowCreate from .info import SubworkflowInfo from .install import SubworkflowInstall +from .lint import SubworkflowLint from .list import SubworkflowList from .remove import SubworkflowRemove from .subworkflows_test import SubworkflowsTest diff --git a/nf_core/subworkflows/lint/__init__.py b/nf_core/subworkflows/lint/__init__.py index 2bd726ee56..92e7382965 100644 --- a/nf_core/subworkflows/lint/__init__.py +++ b/nf_core/subworkflows/lint/__init__.py @@ -6,17 +6,214 @@ nf-core subworkflows lint """ +from __future__ import print_function + import logging +import os + +import questionary +import rich +import nf_core.modules.modules_utils +import nf_core.utils from nf_core.components.lint import ComponentLint, LintException, LintResult +from nf_core.lint_utils import console log = logging.getLogger(__name__) -class SubworkflowLint(ComponentCommand): +class SubworkflowLint(ComponentLint): """ An object for linting subworkflows either in a clone of the 'nf-core/modules' repository or in any nf-core pipeline directory """ # Import lint functions + from .main_nf import main_nf + + def __init__( + self, + dir, + fail_warned=False, + remote_url=None, + branch=None, + no_pull=False, + registry=None, + hide_progress=False, + ): + super().__init__( + component_type="subworkflows", + dir=dir, + fail_warned=fail_warned, + remote_url=remote_url, + branch=branch, + no_pull=no_pull, + registry=registry, + hide_progress=hide_progress, + ) + + def lint( + self, + subworkflow=None, + registry="quay.io", + key=(), + all_subworkflows=False, + print_results=True, + show_passed=False, + sort_by="test", + local=False, + fix_version=False, + ): + """ + Lint all or one specific subworkflow + + First gets a list of all local subworkflows (in subworkflows/local/process) and all subworkflows + installed from nf-core (in subworkflows/nf-core) + + For all nf-core subworkflows, the correct file structure is assured and important + file content is verified. If directory subject to linting is a clone of 'nf-core/modules', + the files necessary for testing the subworkflows are also inspected. + + For all local subworkflows, the '.nf' file is checked for some important flags, and warnings + are issued if untypical content is found. + + :param subworkflow: A specific subworkflow to lint + :param print_results: Whether to print the linting results + :param show_passed: Whether passed tests should be shown as well + :param fix_version: Update the subworkflow version if a newer version is available + :param hide_progress: Don't show progress bars + + :returns: A SubworkflowLint object containing information of + the passed, warned and failed tests + """ + + # Prompt for subworkflow or all + if subworkflow is None and not all_subworkflows: + questions = [ + { + "type": "list", + "name": "all_subworkflows", + "message": "Lint all subworkflows or a single named subworkflow?", + "choices": ["All subworkflows", "Named subworkflow"], + }, + { + "type": "autocomplete", + "name": "subworkflow_name", + "message": "Subworkflow name:", + "when": lambda x: x["all_subworkflows"] == "Named subworkflow", + "choices": [m.component_name for m in self.all_remote_components], + }, + ] + answers = questionary.unsafe_prompt(questions, style=nf_core.utils.nfcore_question_style) + all_subworkflows = answers["all_subworkflows"] == "All subworkflows" + subworkflow = answers.get("subworkflow_name") + + # Only lint the given module + if subworkflow: + if all_subworkflows: + raise LintException("You cannot specify a tool and request all tools to be linted.") + local_subworkflows = [] + remote_subworkflows = [s for s in self.all_remote_components if s.component_name == subworkflow] + if len(remote_subworkflows) == 0: + raise LintException(f"Could not find the specified subworkflow: '{subworkflow}'") + else: + local_subworkflows = self.all_local_components + remote_subworkflows = self.all_remote_components + + if self.repo_type == "modules": + log.info(f"Linting modules repo: [magenta]'{self.dir}'") + else: + log.info(f"Linting pipeline: [magenta]'{self.dir}'") + if subworkflow: + log.info(f"Linting subworkflow: [magenta]'{subworkflow}'") + + # Filter the tests by the key if one is supplied + if key: + self.filter_tests_by_key(key) + log.info("Only running tests: '{}'".format("', '".join(key))) + + # If it is a pipeline, load the lint config file and the modules.json file + if self.repo_type == "pipeline": + self.set_up_pipeline_files() + + # Lint local subworkflows + if local and len(local_subworkflows) > 0: + self.lint_subworkflows(local_subworkflows, registry=registry, local=True, fix_version=fix_version) + + # Lint nf-core subworkflows + if len(remote_subworkflows) > 0: + self.lint_subworkflows(remote_subworkflows, registry=registry, local=False, fix_version=fix_version) + + if print_results: + self._print_results(show_passed=show_passed, sort_by=sort_by) + self.print_summary() + + def lint_subworkflows(self, subworkflows, registry="quay.io", local=False, fix_version=False): + """ + Lint a list of subworkflows + + Args: + subworkflows ([NFCoreComponent]): A list of subworkflow objects + registry (str): The container registry to use. Should be quay.io in most situations. + local (boolean): Whether the list consist of local or nf-core subworkflows + fix_version (boolean): Fix the subworkflow version if a newer version is available + """ + progress_bar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[magenta]{task.completed} of {task.total}[reset] » [bold yellow]{task.fields[test_name]}", + transient=True, + console=console, + disable=self.hide_progress or os.environ.get("HIDE_PROGRESS", None) is not None, + ) + with progress_bar: + lint_progress = progress_bar.add_task( + f"Linting {'local' if local else 'nf-core'} subworkflows", + total=len(subworkflows), + test_name=subworkflows[0].component_name, + ) + + for swf in subworkflows: + progress_bar.update(lint_progress, advance=1, test_name=swf.component_name) + self.lint_subworkflow(swf, progress_bar, registry=registry, local=local, fix_version=fix_version) + + def lint_subworkflow(self, swf, progress_bar, registry, local=False, fix_version=False): + """ + Perform linting on one subworkflow + + If the subworkflow is a local subworkflow we only check the `main.nf` file, + and issue warnings instead of failures. + + If the subworkflow is a nf-core subworkflow we check for existence of the files + - main.nf + - meta.yml + And verify that their content conform to the nf-core standards. + + If the linting is run for subworkflows in the central nf-core/modules repo + (repo_type==modules), files that are relevant for subworkflow testing are + also examined + """ + + # Only check the main script in case of a local subworkflow + if local: + self.main_nf(swf) + self.passed += [LintResult(swf, *s) for s in swf.passed] + warned = [LintResult(swf, *m) for m in (swf.warned + swf.failed)] + if not self.fail_warned: + self.warned += warned + else: + self.failed += warned + + # Otherwise run all the lint tests + else: + for test_name in self.lint_tests: + getattr(self, test_name)(swf) + + self.passed += [LintResult(swf, *s) for s in swf.passed] + warned = [LintResult(swf, *s) for s in swf.warned] + if not self.fail_warned: + self.warned += warned + else: + self.failed += warned + + self.failed += [LintResult(swf, *s) for s in swf.failed] diff --git a/nf_core/subworkflows/lint/main_nf.py b/nf_core/subworkflows/lint/main_nf.py new file mode 100644 index 0000000000..5eacb1e93d --- /dev/null +++ b/nf_core/subworkflows/lint/main_nf.py @@ -0,0 +1,233 @@ +""" +Lint the main.nf file of a subworkflow +""" + +import logging +import re + +log = logging.getLogger(__name__) + + +def main_nf(subworkflow_lint_object, subworkflow): + """ + Lint a ``main.nf`` subworkflow file + + Can also be used to lint local subworkflow files, + in which case failures will be reported as + warnings. + + The test checks for the following: + + * A subworkflow SHOULD import at least two modules + * All included modules or subworkflows are used and their names are used for `versions.yml` + * The workflow name is all capital letters + * The subworkflow emits a software version + """ + + inputs = [] + outputs = [] + + # Read the lines directly from the subworkflow + lines = None + if lines is None: + try: + # Check whether file exists and load it + with open(subworkflow.main_nf, "r") as fh: + lines = fh.readlines() + subworkflow.passed.append(("main_nf_exists", "Subworkflow file exists", subworkflow.main_nf)) + except FileNotFoundError: + subworkflow.failed.append(("main_nf_exists", "Subworkflow file does not exist", subworkflow.main_nf)) + return + + # Go through subworkflow main.nf file and switch state according to current section + # Perform section-specific linting + state = "subworkflow" + subworkflow_lines = [] + workflow_lines = [] + main_lines = [] + for l in lines: + if re.search(r"^\s*workflow\s*\w*\s*{", l) and state == "subworkflow": + state = "workflow" + if re.search(r"take\s*:", l) and state in ["workflow"]: + state = "take" + continue + if re.search(r"main\s*:", l) and state in ["take", "workflow"]: + state = "main" + continue + if re.search(r"emit\s*:", l) and state in ["take", "main", "workflow"]: + state = "emit" + continue + + # Perform state-specific linting checks + if state == "subworkflow" and not _is_empty(l): + subworkflow_lines.append(l) + if state == "workflow" and not _is_empty(l): + workflow_lines.append(l) + if state == "take" and not _is_empty(l): + inputs.extend(_parse_input(subworkflow, l)) + if state == "emit" and not _is_empty(l): + outputs.extend(_parse_output(subworkflow, l)) + if state == "main" and not _is_empty(l): + main_lines.append(l) + + # Check that we have required sections + if not len(outputs): + subworkflow.failed.append(("main_nf_script_outputs", "No workflow 'emit' block found", subworkflow.main_nf)) + else: + subworkflow.passed.append(("main_nf_script_outputs", "Workflow 'emit' block found", subworkflow.main_nf)) + + # Check the subworkflow include statements + included_components = check_subworkflow_section(subworkflow, subworkflow_lines) + + # Check the workflow definition + check_workflow_section(subworkflow, workflow_lines) + + # Check the main definition + check_main_section(subworkflow, main_lines, included_components) + + # Check that a software version is emitted + if outputs: + if "versions" in outputs: + subworkflow.passed.append( + ("main_nf_version_emitted", "Subworkflow emits software version", subworkflow.main_nf) + ) + else: + subworkflow.warned.append( + ("main_nf_version_emitted", "Subworkflow does not emit software version", subworkflow.main_nf) + ) + + return inputs, outputs + + +def check_main_section(self, lines, included_components): + """ + Lint the main section of a subworkflow + Checks whether all included components are used and their names are used for `versions.yml`. + """ + # Check that we have a main section + if len(lines) == 0: + self.failed.append( + ( + "main_section", + "Subworkflow does not contain a main section", + self.main_nf, + ) + ) + return + self.passed.append(("main_section", "Subworkflow does contain a main section", self.main_nf)) + + script = "".join(lines) + + # Check that all included components are used + # Check that all included component versions are used + main_nf_include_used = True + main_nf_include_versions = True + for component in included_components: + if component not in script: + self.warned.append( + ( + "main_nf_include_used", + f"Not all included components are used in main.nf", + self.main_nf, + ) + ) + main_nf_include_used = False + if component + ".out.versions" not in script: + self.warned.append( + ( + "main_nf_include_versions", + f"Not all included component versions are added in main.nf", + self.main_nf, + ) + ) + main_nf_include_versions = False + if main_nf_include_used: + self.passed.append(("main_nf_include_used", f"All included components are used in main.nf", self.main_nf)) + if main_nf_include_versions: + self.passed.append( + ("main_nf_include_versions", f"All included component versions are added in main.nf", self.main_nf) + ) + + +def check_subworkflow_section(self, lines): + """Lint the section of a subworkflow before the workflow definition + Specifically checks if the subworkflow includes at least two modules or subworkflows + + Args: + lines (List[str]): Content of subworkflow. + + Returns: + List: List of included component names. If subworkflow doesn't contain any lines, return None. + """ + # Check that we have subworkflow content + if len(lines) == 0: + self.failed.append( + ( + "subworkflow_include", + "Subworkflow does not include any modules before the workflow definition", + self.main_nf, + ) + ) + return + self.passed.append( + ("subworkflow_include", "Subworkflow does include modules before the workflow definition", self.main_nf) + ) + + includes = [] + for l in lines: + if l.strip().startswith("include"): + component_name = l.split("{")[1].split("}")[0].strip() + if " as " in component_name: + component_name = component_name.split(" as ")[1].strip() + includes.append(component_name) + if len(includes) >= 2: + self.passed.append(("main_nf_include", "Subworkflow includes two or more modules", self.main_nf)) + else: + self.warned.append(("main_nf_include", "Subworkflow includes less than two modules", self.main_nf)) + + return includes + + +def check_workflow_section(self, lines): + """Lint the workflow definition of a subworkflow before + Specifically checks that the name is all capital letters + + Args: + lines (List[str]): Content of workflow definition. + + Returns: + None + """ + # Workflow name should be all capital letters + self.workflow_name = lines[0].split()[1] + if self.workflow_name == self.workflow_name.upper(): + self.passed.append(("workflow_capitals", "Workflow name is in capital letters", self.main_nf)) + else: + self.failed.append(("workflow_capitals", "Workflow name is not in capital letters", self.main_nf)) + + +def _parse_input(self, line): + """ + Return list of input channel names from a take section. + """ + inputs = [] + # Remove comments and trailing whitespace + inputs.append(line.split("//")[0].strip()) + return inputs + + +def _parse_output(self, line): + output = [] + if len(line) > 0: + output.append(line.split("=")[0].strip()) + return output + + +def _is_empty(line): + """Check whether a line is empty or a comment""" + empty = False + if line.strip().startswith("//"): + empty = True + if line.strip().replace(" ", "") == "": + empty = True + return empty From 59e0877a0c243ee38c1a9dcdd66b7652bda26216 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Fri, 21 Jul 2023 11:42:32 +0200 Subject: [PATCH 03/16] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ffe25a1b37..95b11b4591 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ ### Linting -- Add new command `nf-core subworkflows lint` ([]()) +- Add new command `nf-core subworkflows lint` ([#2379](https://github.com/nf-core/tools/pull/2379)) ### Modules From 00d5875dd2a668f77d35e7b16d3cd1d38c9d1258 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 25 Jul 2023 10:28:32 +0200 Subject: [PATCH 04/16] add all subworkflow lint tests: yml, changes, tests, todos, version --- nf_core/__main__.py | 4 +- nf_core/components/lint/__init__.py | 4 +- nf_core/modules/lint/module_changes.py | 4 +- nf_core/modules/lint/module_tests.py | 1 - nf_core/modules/modules_json.py | 6 +- nf_core/modules/patch.py | 4 +- .../subworkflows/meta.yml | 42 ++++---- nf_core/subworkflows/lint/__init__.py | 13 +-- nf_core/subworkflows/lint/main_nf.py | 2 +- nf_core/subworkflows/lint/meta_yml.py | 84 +++++++++++++++ .../subworkflows/lint/subworkflow_changes.py | 46 ++++++++ .../subworkflows/lint/subworkflow_tests.py | 102 ++++++++++++++++++ .../subworkflows/lint/subworkflow_todos.py | 47 ++++++++ .../subworkflows/lint/subworkflow_version.py | 53 +++++++++ nf_core/synced_repo.py | 20 ++-- 15 files changed, 383 insertions(+), 49 deletions(-) create mode 100644 nf_core/subworkflows/lint/meta_yml.py create mode 100644 nf_core/subworkflows/lint/subworkflow_changes.py create mode 100644 nf_core/subworkflows/lint/subworkflow_tests.py create mode 100644 nf_core/subworkflows/lint/subworkflow_todos.py create mode 100644 nf_core/subworkflows/lint/subworkflow_version.py diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 469d0cf7be..c5a2d4dc79 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -1195,9 +1195,8 @@ def local(ctx, keywords, json, dir): # pylint: disable=redefined-builtin help="Sort lint output by subworkflow or test name.", show_default=True, ) -@click.option("--fix-version", is_flag=True, help="Fix the subworkflow version if a newer version is available") def lint( - ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by, fix_version + ctx, tool, dir, registry, key, all, fail_warned, local, passed, sort_by ): # pylint: disable=redefined-outer-name """ Lint one or more subworkflows in a directory. @@ -1230,7 +1229,6 @@ def lint( local=local, show_passed=passed, sort_by=sort_by, - fix_version=fix_version, ) if len(subworkflow_lint.failed) > 0: sys.exit(1) diff --git a/nf_core/components/lint/__init__.py b/nf_core/components/lint/__init__.py index 14d114ecd2..7f1afe0bfd 100644 --- a/nf_core/components/lint/__init__.py +++ b/nf_core/components/lint/__init__.py @@ -158,9 +158,9 @@ def get_all_module_lint_tests(is_pipeline): @staticmethod def get_all_subworkflow_lint_tests(is_pipeline): if is_pipeline: - return ["main_nf"] + return ["main_nf", "meta_yml", "subworkflow_changes", "subworkflow_todos", "subworkflow_version"] else: - return ["main_nf"] + return ["main_nf", "meta_yml", "subworkflow_todos", "subworkflow_tests"] def set_up_pipeline_files(self): self.load_lint_config() diff --git a/nf_core/modules/lint/module_changes.py b/nf_core/modules/lint/module_changes.py index 00184f8dfc..0f4c2aad25 100644 --- a/nf_core/modules/lint/module_changes.py +++ b/nf_core/modules/lint/module_changes.py @@ -49,7 +49,9 @@ def module_changes(module_lint_object, module): ) modules_repo = nf_core.modules.modules_repo.ModulesRepo(remote_url=module.repo_url, branch=module.branch) - for f, same in modules_repo.module_files_identical(module.component_name, tempdir, module.git_sha).items(): + for f, same in modules_repo.component_files_identical( + module.component_name, tempdir, module.git_sha, "modules" + ).items(): if same: module.passed.append( ( diff --git a/nf_core/modules/lint/module_tests.py b/nf_core/modules/lint/module_tests.py index 6fdf6713f0..212b378748 100644 --- a/nf_core/modules/lint/module_tests.py +++ b/nf_core/modules/lint/module_tests.py @@ -48,7 +48,6 @@ def module_tests(_, module): # Lint the test.yml file try: with open(module.test_yml, "r") as fh: - # TODO: verify that the tags are correct test_yml = yaml.safe_load(fh) # Verify that tags are correct diff --git a/nf_core/modules/modules_json.py b/nf_core/modules/modules_json.py index e9e77f65c7..9c3d1ae9b1 100644 --- a/nf_core/modules/modules_json.py +++ b/nf_core/modules/modules_json.py @@ -354,7 +354,11 @@ def find_correct_commit_sha(self, component_type, component_name, component_path for commit in modules_repo.get_component_git_log(component_name, component_type, depth=1000) ) for commit_sha in commit_shas: - if all(modules_repo.module_files_identical(component_name, component_path, commit_sha).values()): + if all( + modules_repo.component_files_identical( + component_name, component_path, commit_sha, component_type + ).values() + ): return commit_sha return None diff --git a/nf_core/modules/patch.py b/nf_core/modules/patch.py index 4890345052..198bb70de5 100644 --- a/nf_core/modules/patch.py +++ b/nf_core/modules/patch.py @@ -202,7 +202,9 @@ def remove(self, module): # Write changes to module.json self.modules_json.remove_patch_entry(module, self.modules_repo.remote_url, module_dir) - if not all(self.modules_repo.module_files_identical(module, module_path, module_version).values()): + if not all( + self.modules_repo.component_files_identical(module, module_path, module_version, "modules").values() + ): log.error( f"Module files do not appear to match the remote for the commit sha in the 'module.json': {module_version}\n" f"Recommend reinstalling with 'nf-core modules install --force --sha {module_version} {module}' " diff --git a/nf_core/subworkflow-template/subworkflows/meta.yml b/nf_core/subworkflow-template/subworkflows/meta.yml index ae1689805d..9894e99c6f 100644 --- a/nf_core/subworkflow-template/subworkflows/meta.yml +++ b/nf_core/subworkflow-template/subworkflows/meta.yml @@ -11,39 +11,39 @@ keywords: modules: - samtools/sort - samtools/index -## TODO nf-core: List all of the variables used as input, including their types and descriptions +## TODO nf-core: List all of the channels used as input with a description and their structure input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` - - bam: + - ch_bam: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -## TODO nf-core: List all of the variables used as output, including their types and descriptions -output: - - meta: - type: map description: | - Groovy Map containing sample information - e.g. `[ id:'test' ]` + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" +## TODO nf-core: List all of the channels used as output with a descriptions and their structure +output: - bam: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" - bai: type: file - description: BAM/CRAM/SAM samtools index - pattern: "*.{bai,crai,sai}" + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" - csi: type: file - description: CSI samtools index + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] pattern: "*.csi" - versions: type: file - description: File containing software versions + description: | + File containing software versions + Structure: [ path(versions.yml) ] pattern: "versions.yml" authors: - "{{ author }}" diff --git a/nf_core/subworkflows/lint/__init__.py b/nf_core/subworkflows/lint/__init__.py index 92e7382965..4dd465ac0e 100644 --- a/nf_core/subworkflows/lint/__init__.py +++ b/nf_core/subworkflows/lint/__init__.py @@ -62,7 +62,6 @@ def lint( show_passed=False, sort_by="test", local=False, - fix_version=False, ): """ Lint all or one specific subworkflow @@ -80,7 +79,6 @@ def lint( :param subworkflow: A specific subworkflow to lint :param print_results: Whether to print the linting results :param show_passed: Whether passed tests should be shown as well - :param fix_version: Update the subworkflow version if a newer version is available :param hide_progress: Don't show progress bars :returns: A SubworkflowLint object containing information of @@ -138,17 +136,17 @@ def lint( # Lint local subworkflows if local and len(local_subworkflows) > 0: - self.lint_subworkflows(local_subworkflows, registry=registry, local=True, fix_version=fix_version) + self.lint_subworkflows(local_subworkflows, registry=registry, local=True) # Lint nf-core subworkflows if len(remote_subworkflows) > 0: - self.lint_subworkflows(remote_subworkflows, registry=registry, local=False, fix_version=fix_version) + self.lint_subworkflows(remote_subworkflows, registry=registry, local=False) if print_results: self._print_results(show_passed=show_passed, sort_by=sort_by) self.print_summary() - def lint_subworkflows(self, subworkflows, registry="quay.io", local=False, fix_version=False): + def lint_subworkflows(self, subworkflows, registry="quay.io", local=False): """ Lint a list of subworkflows @@ -156,7 +154,6 @@ def lint_subworkflows(self, subworkflows, registry="quay.io", local=False, fix_v subworkflows ([NFCoreComponent]): A list of subworkflow objects registry (str): The container registry to use. Should be quay.io in most situations. local (boolean): Whether the list consist of local or nf-core subworkflows - fix_version (boolean): Fix the subworkflow version if a newer version is available """ progress_bar = rich.progress.Progress( "[bold blue]{task.description}", @@ -175,9 +172,9 @@ def lint_subworkflows(self, subworkflows, registry="quay.io", local=False, fix_v for swf in subworkflows: progress_bar.update(lint_progress, advance=1, test_name=swf.component_name) - self.lint_subworkflow(swf, progress_bar, registry=registry, local=local, fix_version=fix_version) + self.lint_subworkflow(swf, progress_bar, registry=registry, local=local) - def lint_subworkflow(self, swf, progress_bar, registry, local=False, fix_version=False): + def lint_subworkflow(self, swf, progress_bar, registry, local=False): """ Perform linting on one subworkflow diff --git a/nf_core/subworkflows/lint/main_nf.py b/nf_core/subworkflows/lint/main_nf.py index 5eacb1e93d..058168cffa 100644 --- a/nf_core/subworkflows/lint/main_nf.py +++ b/nf_core/subworkflows/lint/main_nf.py @@ -8,7 +8,7 @@ log = logging.getLogger(__name__) -def main_nf(subworkflow_lint_object, subworkflow): +def main_nf(_, subworkflow): """ Lint a ``main.nf`` subworkflow file diff --git a/nf_core/subworkflows/lint/meta_yml.py b/nf_core/subworkflows/lint/meta_yml.py new file mode 100644 index 0000000000..f7e5f6b953 --- /dev/null +++ b/nf_core/subworkflows/lint/meta_yml.py @@ -0,0 +1,84 @@ +import json +from pathlib import Path + +import jsonschema.validators +import yaml + + +def meta_yml(_, subworkflow): + """ + Lint a ``meta.yml`` file + + The lint test checks that the subworkflow has + a ``meta.yml`` file and that it follows the + JSON schema defined in the ``subworkflows/yaml-schema.json`` + file in the nf-core/modules repository. + + In addition it checks that the subworkflow name + and subworkflow input is consistent between the + ``meta.yml`` and the ``main.nf``. + + """ + # Read the meta.yml file + try: + with open(subworkflow.meta_yml, "r") as fh: + meta_yaml = yaml.safe_load(fh) + subworkflow.passed.append(("meta_yml_exists", "Subworkflow `meta.yml` exists", subworkflow.meta_yml)) + except FileNotFoundError: + subworkflow.failed.append(("meta_yml_exists", "Subworkflow `meta.yml` does not exist", subworkflow.meta_yml)) + return + + # Confirm that the meta.yml file is valid according to the JSON schema + valid_meta_yml = True + try: + with open(Path(subworkflow_lint_object.modules_repo.local_repo_dir, "subworkflow/yaml-schema.json"), "r") as fh: + schema = json.load(fh) + jsonschema.validators.validate(instance=meta_yaml, schema=schema) + subworkflow.passed.append(("meta_yml_valid", "Subworkflow `meta.yml` is valid", subworkflow.meta_yml)) + except jsonschema.exceptions.ValidationError as e: + valid_meta_yml = False + hint = "" + if len(e.path) > 0: + hint = f"\nCheck the entry for `{e.path[0]}`." + if e.message.startswith("None is not of type 'object'") and len(e.path) > 2: + hint = f"\nCheck that the child entries of {e.path[0]+'.'+e.path[2]} are indented correctly." + subworkflow.failed.append( + ( + "meta_yml_valid", + f"The `meta.yml` of the subworkflow {subworkflow.component_name} is not valid: {e.message}.{hint}", + subworkflow.meta_yml, + ) + ) + return + + # Confirm that all input and output channels are specified + if valid_meta_yml: + if "input" in meta_yaml: + meta_input = [list(x.keys())[0] for x in meta_yaml["input"]] + for input in subworkflow.inputs: + if input in meta_input: + subworkflow.passed.append(("meta_input", f"`{input}` specified", subworkflow.meta_yml)) + else: + subworkflow.failed.append(("meta_input", f"`{input}` missing in `meta.yml`", subworkflow.meta_yml)) + + if "output" in meta_yaml: + meta_output = [list(x.keys())[0] for x in meta_yaml["output"]] + for output in subworkflow.outputs: + if output in meta_output: + subworkflow.passed.append(("meta_output", f"`{output}` specified", subworkflow.meta_yml)) + else: + subworkflow.failed.append( + ("meta_output", f"`{output}` missing in `meta.yml`", subworkflow.meta_yml) + ) + + # confirm that the name matches the process name in main.nf + if meta_yaml["name"].upper() == subworkflow.workflow_name: + subworkflow.passed.append(("meta_name", "Correct name specified in `meta.yml`", subworkflow.meta_yml)) + else: + subworkflow.failed.append( + ( + "meta_name", + f"Conflicting workflow name between meta.yml (`{meta_yaml['name']}`) and main.nf (`{subworkflow.workflow_name}`)", + subworkflow.meta_yml, + ) + ) diff --git a/nf_core/subworkflows/lint/subworkflow_changes.py b/nf_core/subworkflows/lint/subworkflow_changes.py new file mode 100644 index 0000000000..b7fa13d931 --- /dev/null +++ b/nf_core/subworkflows/lint/subworkflow_changes.py @@ -0,0 +1,46 @@ +""" +Check whether the content of a subworkflow has changed compared to the original repository +""" +from pathlib import Path + +import nf_core.modules.modules_repo + + +def subworkflow_changes(subworkflow_lint_object, subworkflow): + """ + Checks whether installed nf-core subworkflow have changed compared to the + original repository + + Downloads the ``main.nf`` and ``meta.yml`` files for every subworkflow + and compares them to the local copies + + If the subworkflow has a commit SHA entry in the ``modules.json``, the file content is + compared against the files in the remote at this SHA. + + Only runs when linting a pipeline, not the modules repository + """ + tempdir = subworkflow.component_dir + subworkflow.branch = subworkflow_lint_object.modules_json.get_component_branch( + "subworkflows", subworkflow.component_name, subworkflow.repo_url, subworkflow.org + ) + modules_repo = nf_core.modules.modules_repo.ModulesRepo(remote_url=subworkflow.repo_url, branch=subworkflow.branch) + + for f, same in modules_repo.component_files_identical( + subworkflow.component_name, tempdir, subworkflow.git_sha, "subworkflows" + ).items(): + if same: + subworkflow.passed.append( + ( + "check_local_copy", + "Local copy of subworkflow up to date", + f"{Path(subworkflow.component_dir, f)}", + ) + ) + else: + subworkflow.failed.append( + ( + "check_local_copy", + "Local copy of subworkflow does not match remote", + f"{Path(subworkflow.component_dir, f)}", + ) + ) diff --git a/nf_core/subworkflows/lint/subworkflow_tests.py b/nf_core/subworkflows/lint/subworkflow_tests.py new file mode 100644 index 0000000000..5499f72e75 --- /dev/null +++ b/nf_core/subworkflows/lint/subworkflow_tests.py @@ -0,0 +1,102 @@ +""" +Lint the tests of a subworkflow in nf-core/modules +""" +import logging +import os + +import yaml + +log = logging.getLogger(__name__) + + +def subworkflow_tests(_, subworkflow): + """ + Lint the tests of a subworkflow in ``nf-core/modules`` + + It verifies that the test directory exists + and contains a ``main.nf`` and a ``test.yml``, + and that the subworkflow is present in the ``pytest_modules.yml`` + file. + + """ + + if os.path.exists(subworkflow.test_dir): + subworkflow.passed.append(("test_dir_exists", "Test directory exists", subworkflow.test_dir)) + else: + subworkflow.failed.append(("test_dir_exists", "Test directory is missing", subworkflow.test_dir)) + return + + # Lint the test main.nf file + test_main_nf = os.path.join(subworkflow.test_dir, "main.nf") + if os.path.exists(test_main_nf): + subworkflow.passed.append(("test_main_exists", "test `main.nf` exists", subworkflow.test_main_nf)) + else: + subworkflow.failed.append(("test_main_exists", "test `main.nf` does not exist", subworkflow.test_main_nf)) + + # Check that entry in pytest_modules.yml exists + try: + pytest_yml_path = os.path.join(subworkflow.base_dir, "tests", "config", "pytest_modules.yml") + with open(pytest_yml_path, "r") as fh: + pytest_yml = yaml.safe_load(fh) + if subworkflow.component_name in pytest_yml.keys(): + subworkflow.passed.append(("test_pytest_yml", "correct entry in pytest_modules.yml", pytest_yml_path)) + else: + subworkflow.failed.append(("test_pytest_yml", "missing entry in pytest_modules.yml", pytest_yml_path)) + except FileNotFoundError: + subworkflow.failed.append(("test_pytest_yml", "Could not open pytest_modules.yml file", pytest_yml_path)) + + # Lint the test.yml file + try: + with open(subworkflow.test_yml, "r") as fh: + test_yml = yaml.safe_load(fh) + + # Verify that tags are correct + all_tags_correct = True + for test in test_yml: + if not sorted(test["tags"]) == sorted([subworkflow.component_name, "subworkflows"]): + all_tags_correct = False + + # Look for md5sums of empty files + for tfile in test.get("files", []): + if tfile.get("md5sum") == "d41d8cd98f00b204e9800998ecf8427e": + subworkflow.failed.append( + ( + "test_yml_md5sum", + "md5sum for empty file found: d41d8cd98f00b204e9800998ecf8427e", + subworkflow.test_yml, + ) + ) + else: + subworkflow.passed.append( + ( + "test_yml_md5sum", + "no md5sum for empty file found", + subworkflow.test_yml, + ) + ) + if tfile.get("md5sum") == "7029066c27ac6f5ef18d660d5741979a": + subworkflow.failed.append( + ( + "test_yml_md5sum", + "md5sum for compressed empty file found: 7029066c27ac6f5ef18d660d5741979a", + subworkflow.test_yml, + ) + ) + else: + subworkflow.passed.append( + ( + "test_yml_md5sum", + "no md5sum for compressed empty file found", + subworkflow.test_yml, + ) + ) + + if all_tags_correct: + subworkflow.passed.append(("test_yml_tags", "tags adhere to guidelines", subworkflow.test_yml)) + else: + subworkflow.failed.append(("test_yml_tags", "tags do not adhere to guidelines", subworkflow.test_yml)) + + # Test that the file exists + subworkflow.passed.append(("test_yml_exists", "Test `test.yml` exists", subworkflow.test_yml)) + except FileNotFoundError: + subworkflow.failed.append(("test_yml_exists", "Test `test.yml` does not exist", subworkflow.test_yml)) diff --git a/nf_core/subworkflows/lint/subworkflow_todos.py b/nf_core/subworkflows/lint/subworkflow_todos.py new file mode 100644 index 0000000000..1de02b18e2 --- /dev/null +++ b/nf_core/subworkflows/lint/subworkflow_todos.py @@ -0,0 +1,47 @@ +import logging + +from nf_core.lint.pipeline_todos import pipeline_todos + +log = logging.getLogger(__name__) + + +def subworkflow_todos(_, subworkflow): + """ + Look for TODO statements in the subworkflow files + + The nf-core subworkflow template contains a number of comment lines to help developers + of new subworkflow know where they need to edit files and add content. + They typically have the following format: + + .. code-block:: groovy + + // TODO nf-core: Make some kind of change to the workflow here + + ..or in markdown: + + .. code-block:: html + + + + This lint test runs through all files in the subworkflows and searches for these lines. + If any are found they will throw a warning. + + .. tip:: Note that many GUI code editors have plugins to list all instances of *TODO* + in a given project directory. This is a very quick and convenient way to get + started on your pipeline! + + """ + + # Main subworkflow directory + swf_results = pipeline_todos(None, root_dir=subworkflow.component_dir) + for i, warning in enumerate(swf_results["warned"]): + subworkflow.warned.append(("subworkflow_todo", warning, swf_results["file_paths"][i])) + for i, passed in enumerate(swf_results["passed"]): + subworkflow.passed.append(("subworkflow_todo", passed, subworkflow.component_dir)) + + # Module tests directory + test_results = pipeline_todos(None, root_dir=subworkflow.test_dir) + for i, warning in enumerate(test_results["warned"]): + subworkflow.warned.append(("subworkflow_todo", warning, test_results["file_paths"][i])) + for i, passed in enumerate(test_results["passed"]): + subworkflow.passed.append(("subworkflow_todo", passed, subworkflow.test_dir)) diff --git a/nf_core/subworkflows/lint/subworkflow_version.py b/nf_core/subworkflows/lint/subworkflow_version.py new file mode 100644 index 0000000000..5801abd885 --- /dev/null +++ b/nf_core/subworkflows/lint/subworkflow_version.py @@ -0,0 +1,53 @@ +""" +Verify that a subworkflow has a correct entry in the modules.json file +""" + +import logging +from pathlib import Path + +import nf_core +import nf_core.modules.modules_repo +import nf_core.modules.modules_utils + +log = logging.getLogger(__name__) + + +def subworkflow_version(subworkflow_lint_object, subworkflow): + """ + Verifies that the subworkflow has a version specified in the ``modules.json`` file + + It checks whether the subworkflow has an entry in the ``modules.json`` file + containing a commit SHA. If that is true, it verifies that there are no + newer version of the subworkflow available. + """ + + modules_json_path = Path(subworkflow_lint_object.dir, "modules.json") + # Verify that a git_sha exists in the `modules.json` file for this module + version = subworkflow_lint_object.modules_json.get_subworkflow_version( + subworkflow.component_name, subworkflow.repo_url, subworkflow.org + ) + if version is None: + subworkflow.failed.append(("git_sha", "No git_sha entry in `modules.json`", modules_json_path)) + return + + subworkflow.git_sha = version + subworkflow.passed.append(("git_sha", "Found git_sha entry in `modules.json`", modules_json_path)) + + # Check whether a new version is available + try: + subworkflow.branch = subworkflow_lint_object.modules_json.get_component_branch( + "subworkflows", subworkflow.component_name, subworkflow.repo_url, subworkflow.org + ) + modules_repo = nf_core.modules.modules_repo.ModulesRepo( + remote_url=subworkflow.repo_url, branch=subworkflow.branch + ) + + subworkflow_git_log = modules_repo.get_component_git_log(subworkflow.component_name, "subworkflows") + if version == next(subworkflow_git_log)["git_sha"]: + subworkflow.passed.append( + ("subworkflow_version", "Subworkflow is in the latest version", subworkflow.component_dir) + ) + else: + subworkflow.warned.append(("subworkflow_version", "New version available", subworkflow.component_dir)) + except UserWarning: + subworkflow.warned.append(("subworkflow_version", "Failed to fetch git log", subworkflow.component_dir)) diff --git a/nf_core/synced_repo.py b/nf_core/synced_repo.py index f78142c031..41e0853f2e 100644 --- a/nf_core/synced_repo.py +++ b/nf_core/synced_repo.py @@ -281,12 +281,12 @@ def install_component(self, component_name, install_dir, commit, component_type) self.checkout_branch() return True - def module_files_identical(self, module_name, base_path, commit): + def component_files_identical(self, component_name, base_path, commit, component_type): """ - Checks whether the module files in a pipeline are identical to the ones in the remote + Checks whether the module or subworkflow files in a pipeline are identical to the ones in the remote Args: - module_name (str): The name of the module - base_path (str): The path to the module in the pipeline + component_name (str): The name of the module or subworkflow + base_path (str): The path to the module/subworkflow in the pipeline Returns: (bool): Whether the pipeline files are identical to the repo files @@ -295,14 +295,14 @@ def module_files_identical(self, module_name, base_path, commit): self.checkout_branch() else: self.checkout(commit) - module_files = ["main.nf", "meta.yml"] - files_identical = {file: True for file in module_files} - module_dir = self.get_component_dir(module_name, "modules") - for file in module_files: + component_files = ["main.nf", "meta.yml"] + files_identical = {file: True for file in component_files} + component_dir = self.get_component_dir(component_name, component_type) + for file in component_files: try: - files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) + files_identical[file] = filecmp.cmp(os.path.join(component_dir, file), os.path.join(base_path, file)) except FileNotFoundError: - log.debug(f"Could not open file: {os.path.join(module_dir, file)}") + log.debug(f"Could not open file: {os.path.join(component_dir, file)}") continue self.checkout_branch() return files_identical From d15816aa81896e305dc2a500eba88f12e0a65666 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 25 Jul 2023 11:06:43 +0200 Subject: [PATCH 05/16] add subowrkflows lint tests --- tests/subworkflows/lint.py | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/subworkflows/lint.py diff --git a/tests/subworkflows/lint.py b/tests/subworkflows/lint.py new file mode 100644 index 0000000000..d754985265 --- /dev/null +++ b/tests/subworkflows/lint.py @@ -0,0 +1,57 @@ +import pytest + +import nf_core.subworkflows + +from ..utils import GITLAB_URL + + +def test_subworkflows_lint(self): + """Test linting the fastq_align_bowtie2 subworkflow""" + self.subworkflow_install.install("fastq_align_bowtie2") + subworkflow_lint = nf_core.subworkflows.SubworkflowLint(dir=self.pipeline_dir) + subworkflow_lint.lint(print_results=False, subworkflow="fastq_align_bowtie2") + assert len(subworkflow_lint.failed) == 0, f"Linting failed with {[x.__dict__ for x in subworkflow_lint.failed]}" + assert len(subworkflow_lint.passed) > 0 + assert len(subworkflow_lint.warned) >= 0 + + +def test_subworkflows_lint_empty(self): + """Test linting a pipeline with no subworkflows installed""" + with pytest.raises(LookupError): + nf_core.subworkflows.SubworkflowLint(dir=self.pipeline_dir) + + +def test_subworkflows_lint_new_subworkflow(self): + """lint a new subworkflow""" + subworkflow_lint = nf_core.subworkflows.SubworkflowLint(dir=self.nfcore_modules) + subworkflow_lint.lint(print_results=True, all_subworkflows=True) + assert len(subworkflow_lint.failed) == 0, f"Linting failed with {[x.__dict__ for x in subworkflow_lint.failed]}" + assert len(subworkflow_lint.passed) > 0 + assert len(subworkflow_lint.warned) >= 0 + + +def test_subworkflows_lint_no_gitlab(self): + """Test linting a pipeline with no subworkflows installed""" + with pytest.raises(LookupError): + nf_core.subworkflows.SubworkflowLint(dir=self.pipeline_dir, remote_url=GITLAB_URL) + + +def test_subworkflows_lint_gitlab_subworkflows(self): + """Lint subworkflows from a different remote""" + self.subworkflow_install_gitlab.install("bam_stats_samtools") + subworkflow_lint = nf_core.subworkflows.SubworkflowLint(dir=self.pipeline_dir, remote_url=GITLAB_URL) + subworkflow_lint.lint(print_results=False, all_subworkflows=True) + assert len(subworkflow_lint.failed) == 2 + assert len(subworkflow_lint.passed) > 0 + assert len(subworkflow_lint.warned) >= 0 + + +def test_subworkflows_lint_multiple_remotes(self): + """Lint subworkflows from a different remote""" + self.subworkflow_install_gitlab.install("bam_stats_samtools") + self.subworkflow_install.install("fastq_align_bowtie2") + subworkflow_lint = nf_core.subworkflows.SubworkflowLint(dir=self.pipeline_dir, remote_url=GITLAB_URL) + subworkflow_lint.lint(print_results=False, all_modules=True) + assert len(subworkflow_lint.failed) == 1 + assert len(subworkflow_lint.passed) > 0 + assert len(subworkflow_lint.warned) >= 0 From 4433767a454eef58c6abee7a737bfd317668e2f3 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 25 Jul 2023 11:22:58 +0200 Subject: [PATCH 06/16] add subworkflows lint API docs --- docs/api/make_lint_md.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/api/make_lint_md.py b/docs/api/make_lint_md.py index 9b5a706473..6c804db0f2 100644 --- a/docs/api/make_lint_md.py +++ b/docs/api/make_lint_md.py @@ -5,6 +5,7 @@ import nf_core.lint import nf_core.modules.lint +import nf_core.subworkflows.lint def make_docs(docs_basedir, lint_tests, md_template): @@ -55,3 +56,20 @@ def make_docs(docs_basedir, lint_tests, md_template): ``` """, ) + +# Create the subworkflows lint docs +subworkflows_docs_basedir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "_src", "subworkflow_lint_tests") +make_docs( + subworkflows_docs_basedir, + list( + set(nf_core.subworkflows.lint.SubworkflowLint.get_all_lint_tests(is_pipeline=True)).union( + nf_core.subworkflows.lint.SubworkflowLint.get_all_lint_tests(is_pipeline=False) + ) + ), + """# {0} + +```{{eval-rst}} +.. automethod:: nf_core.subworkflows.lint.SubworkflowLint.{0} +``` +""", +) From 29dbc71272b0bfe7b7a39e6ad5fce210ad354e9d Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 25 Jul 2023 11:29:25 +0200 Subject: [PATCH 07/16] add subworkflows lint docs in readme --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index e92e31516f..3a34f6c6cb 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ A python package with helper tools for the nf-core community. - [`subworkflows remove` - Remove a subworkflow from a pipeline](#remove-a-subworkflow-from-a-pipeline) - [`subworkflows create` - Create a subworkflow from the template](#create-a-new-subworkflow) - [`subworkflows create-test-yml` - Create the `test.yml` file for a subworkflow](#create-a-subworkflow-test-config-file) + - [`subworkflows lint` - Check a subworkflows against nf-core guidelines](#check-a-subworkflow-against-nf-core-guidelines) - [`subworkflows test` - Run the tests for a subworkflow](#run-the-tests-for-a-subworkflow-using-pytest) - [Citation](#citation) @@ -1218,6 +1219,20 @@ extra_env: ![`nf-core subworkflows create-test-yml bam_stats_samtools --no-prompts --force`](docs/images/nf-core-subworkflows-create-test.svg) +### Check a subworkflow against nf-core guidelines + +Run the `nf-core subworkflows lint` command to check subworkflows in the current working directory (pipeline or nf-core/modules clone) against nf-core guidelines. + +Use the `--all` flag to run linting on all subworkflows found. Use `--dir ` to specify another directory than the current working directory. + + + +![`nf-core subworkflows lint bam_stats_samtools`](docs/images/nf-core-subworkflows-lint.svg) + ### Run the tests for a subworkflow using pytest To run unit tests of a subworkflow that you have installed or the test created by the command [`nf-core subworkflow create-test-yml`](#create-a-subworkflow-test-config-file), you can use `nf-core subworkflows test` command. This command runs the tests specified in `tests/subworkflows//test.yml` file using [pytest](https://pytest-workflow.readthedocs.io/en/stable/). From 92e59f107aaf044eebc4fc47176abef4dafa6456 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 25 Jul 2023 13:19:59 +0200 Subject: [PATCH 08/16] lint all included components tags in meta.yml and test.yml --- nf_core/components/components_utils.py | 2 +- nf_core/subworkflows/lint/meta_yml.py | 29 ++++++++++++++++- .../subworkflows/lint/subworkflow_tests.py | 32 ++++++++++++++----- nf_core/subworkflows/test_yml_builder.py | 8 ++--- 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/nf_core/components/components_utils.py b/nf_core/components/components_utils.py index 9a0565296e..73378d12ca 100644 --- a/nf_core/components/components_utils.py +++ b/nf_core/components/components_utils.py @@ -128,7 +128,7 @@ def prompt_component_version_sha(component_name, component_type, modules_repo, i def get_components_to_install(subworkflow_dir): """ - Parse the subworkflow test main.nf file to retrieve all imported modules and subworkflows. + Parse the subworkflow main.nf file to retrieve all imported modules and subworkflows. """ modules = [] subworkflows = [] diff --git a/nf_core/subworkflows/lint/meta_yml.py b/nf_core/subworkflows/lint/meta_yml.py index f7e5f6b953..a453ad68df 100644 --- a/nf_core/subworkflows/lint/meta_yml.py +++ b/nf_core/subworkflows/lint/meta_yml.py @@ -4,8 +4,10 @@ import jsonschema.validators import yaml +import nf_core.components.components_utils -def meta_yml(_, subworkflow): + +def meta_yml(subworkflow_lint_object, subworkflow): """ Lint a ``meta.yml`` file @@ -18,6 +20,9 @@ def meta_yml(_, subworkflow): and subworkflow input is consistent between the ``meta.yml`` and the ``main.nf``. + Checks that all input and output channels are specified in ``meta.yml``. + Checks that all included components in ``main.nf`` are specified in ``meta.yml``. + """ # Read the meta.yml file try: @@ -82,3 +87,25 @@ def meta_yml(_, subworkflow): subworkflow.meta_yml, ) ) + + # confirm that all included components in ``main.nf`` are specified in ``meta.yml`` + included_components = nf_core.components.components_utils.get_components_to_install(subworkflow.component_dir) + if "modules" in meta_yaml: + meta_components = [x for x in meta_yaml["modules"]] + for component in meta_components: + if component in included_components: + subworkflow.passed.append( + ( + "meta_include", + f"Included module/subworkflow `{component}` specified in `meta.yml`", + subworkflow.meta_yml, + ) + ) + else: + subworkflow.failed.append( + ( + "meta_include", + f"Included module/subworkflow `{component}` missing in `meta.yml`", + subworkflow.meta_yml, + ) + ) diff --git a/nf_core/subworkflows/lint/subworkflow_tests.py b/nf_core/subworkflows/lint/subworkflow_tests.py index 5499f72e75..ef2d9d7cb2 100644 --- a/nf_core/subworkflows/lint/subworkflow_tests.py +++ b/nf_core/subworkflows/lint/subworkflow_tests.py @@ -6,6 +6,8 @@ import yaml +import nf_core.subworkflows.SubworkflowTestYmlBuilder + log = logging.getLogger(__name__) @@ -18,6 +20,7 @@ def subworkflow_tests(_, subworkflow): and that the subworkflow is present in the ``pytest_modules.yml`` file. + Additionally, hecks that all included components in test ``main.nf`` are specified in ``test.yml`` """ if os.path.exists(subworkflow.test_dir): @@ -51,10 +54,27 @@ def subworkflow_tests(_, subworkflow): test_yml = yaml.safe_load(fh) # Verify that tags are correct - all_tags_correct = True + included_components = nf_core.subworkflows.SubworkflowTestYmlBuilder.parse_module_tags( + subworkflow.component_dir + ) for test in test_yml: - if not sorted(test["tags"]) == sorted([subworkflow.component_name, "subworkflows"]): - all_tags_correct = False + for component in included_components: + if component in test["tags"]: + subworkflow.passed.append( + ( + "test_yml_tags", + f"Included module/subworkflow `{component}` specified in `test.yml`", + subworkflow.test_yml, + ) + ) + else: + subworkflow.failed.append( + ( + "test_yml_tags", + f"Included module/subworkflow `{component}` missing in `test.yml`", + subworkflow.test_yml, + ) + ) # Look for md5sums of empty files for tfile in test.get("files", []): @@ -91,12 +111,8 @@ def subworkflow_tests(_, subworkflow): ) ) - if all_tags_correct: - subworkflow.passed.append(("test_yml_tags", "tags adhere to guidelines", subworkflow.test_yml)) - else: - subworkflow.failed.append(("test_yml_tags", "tags do not adhere to guidelines", subworkflow.test_yml)) - # Test that the file exists subworkflow.passed.append(("test_yml_exists", "Test `test.yml` exists", subworkflow.test_yml)) except FileNotFoundError: subworkflow.failed.append(("test_yml_exists", "Test `test.yml` does not exist", subworkflow.test_yml)) + subworkflow.failed.append(("test_yml_exists", "Test `test.yml` does not exist", subworkflow.test_yml)) diff --git a/nf_core/subworkflows/test_yml_builder.py b/nf_core/subworkflows/test_yml_builder.py index 2ad50d4e25..468465c9df 100644 --- a/nf_core/subworkflows/test_yml_builder.py +++ b/nf_core/subworkflows/test_yml_builder.py @@ -185,7 +185,7 @@ def build_single_test(self, entry_point): while len(ep_test["tags"]) == 0: tag_defaults = ["subworkflows"] tag_defaults.append("subworkflows/" + self.subworkflow) - tag_defaults += self.parse_module_tags() + tag_defaults += self.parse_module_tags(self.subworkflow_dir) if self.no_prompts: ep_test["tags"] = sorted(tag_defaults) else: @@ -199,12 +199,12 @@ def build_single_test(self, entry_point): return ep_test - def parse_module_tags(self): + def parse_module_tags(self, subworkflow_dir): """ - Parse the subworkflow test main.nf file to retrieve all imported modules for adding tags. + Parse the subworkflow main.nf file to retrieve all imported modules for adding tags. """ tags = [] - with open(Path(self.subworkflow_dir, "main.nf"), "r") as fh: + with open(Path(subworkflow_dir, "main.nf"), "r") as fh: for line in fh: regex = re.compile( r"include(?: *{ *)([a-zA-Z\_0-9]*)(?: *as *)?(?:[a-zA-Z\_0-9]*)?(?: *})(?: *from *)(?:'|\")(.*)(?:'|\")" From 42f533d7b44bb90cf859783f660a76abaeeb6f78 Mon Sep 17 00:00:00 2001 From: mirpedrol Date: Tue, 25 Jul 2023 13:40:28 +0200 Subject: [PATCH 09/16] fix some errors --- nf_core/subworkflow-template/subworkflows/meta.yml | 2 +- nf_core/subworkflows/lint/__init__.py | 5 +++++ nf_core/subworkflows/lint/meta_yml.py | 11 ++++++++--- nf_core/subworkflows/lint/subworkflow_tests.py | 7 ++++--- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/nf_core/subworkflow-template/subworkflows/meta.yml b/nf_core/subworkflow-template/subworkflows/meta.yml index 9894e99c6f..c1aa80c576 100644 --- a/nf_core/subworkflow-template/subworkflows/meta.yml +++ b/nf_core/subworkflow-template/subworkflows/meta.yml @@ -7,7 +7,7 @@ keywords: - bam - sam - cram -## TODO nf-core: Add a list of the modules used in the subworkflow +## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow modules: - samtools/sort - samtools/index diff --git a/nf_core/subworkflows/lint/__init__.py b/nf_core/subworkflows/lint/__init__.py index 4dd465ac0e..b49934ac1a 100644 --- a/nf_core/subworkflows/lint/__init__.py +++ b/nf_core/subworkflows/lint/__init__.py @@ -30,6 +30,11 @@ class SubworkflowLint(ComponentLint): # Import lint functions from .main_nf import main_nf + from .meta_yml import meta_yml + from .subworkflow_changes import subworkflow_changes + from .subworkflow_tests import subworkflow_tests + from .subworkflow_todos import subworkflow_todos + from .subworkflow_version import subworkflow_version def __init__( self, diff --git a/nf_core/subworkflows/lint/meta_yml.py b/nf_core/subworkflows/lint/meta_yml.py index a453ad68df..8563710012 100644 --- a/nf_core/subworkflows/lint/meta_yml.py +++ b/nf_core/subworkflows/lint/meta_yml.py @@ -36,7 +36,9 @@ def meta_yml(subworkflow_lint_object, subworkflow): # Confirm that the meta.yml file is valid according to the JSON schema valid_meta_yml = True try: - with open(Path(subworkflow_lint_object.modules_repo.local_repo_dir, "subworkflow/yaml-schema.json"), "r") as fh: + with open( + Path(subworkflow_lint_object.modules_repo.local_repo_dir, "subworkflows/yaml-schema.json"), "r" + ) as fh: schema = json.load(fh) jsonschema.validators.validate(instance=meta_yaml, schema=schema) subworkflow.passed.append(("meta_yml_valid", "Subworkflow `meta.yml` is valid", subworkflow.meta_yml)) @@ -90,10 +92,13 @@ def meta_yml(subworkflow_lint_object, subworkflow): # confirm that all included components in ``main.nf`` are specified in ``meta.yml`` included_components = nf_core.components.components_utils.get_components_to_install(subworkflow.component_dir) + included_components = ( + included_components[0] + included_components[1] + ) # join included modules and included subworkflows in a single list if "modules" in meta_yaml: meta_components = [x for x in meta_yaml["modules"]] - for component in meta_components: - if component in included_components: + for component in included_components: + if component in meta_components: subworkflow.passed.append( ( "meta_include", diff --git a/nf_core/subworkflows/lint/subworkflow_tests.py b/nf_core/subworkflows/lint/subworkflow_tests.py index ef2d9d7cb2..9c07b122e3 100644 --- a/nf_core/subworkflows/lint/subworkflow_tests.py +++ b/nf_core/subworkflows/lint/subworkflow_tests.py @@ -6,7 +6,7 @@ import yaml -import nf_core.subworkflows.SubworkflowTestYmlBuilder +import nf_core.subworkflows log = logging.getLogger(__name__) @@ -41,7 +41,7 @@ def subworkflow_tests(_, subworkflow): pytest_yml_path = os.path.join(subworkflow.base_dir, "tests", "config", "pytest_modules.yml") with open(pytest_yml_path, "r") as fh: pytest_yml = yaml.safe_load(fh) - if subworkflow.component_name in pytest_yml.keys(): + if "subworkflows/" + subworkflow.component_name in pytest_yml.keys(): subworkflow.passed.append(("test_pytest_yml", "correct entry in pytest_modules.yml", pytest_yml_path)) else: subworkflow.failed.append(("test_pytest_yml", "missing entry in pytest_modules.yml", pytest_yml_path)) @@ -55,7 +55,7 @@ def subworkflow_tests(_, subworkflow): # Verify that tags are correct included_components = nf_core.subworkflows.SubworkflowTestYmlBuilder.parse_module_tags( - subworkflow.component_dir + subworkflow, subworkflow.component_dir ) for test in test_yml: for component in included_components: @@ -116,3 +116,4 @@ def subworkflow_tests(_, subworkflow): except FileNotFoundError: subworkflow.failed.append(("test_yml_exists", "Test `test.yml` does not exist", subworkflow.test_yml)) subworkflow.failed.append(("test_yml_exists", "Test `test.yml` does not exist", subworkflow.test_yml)) + subworkflow.failed.append(("test_yml_exists", "Test `test.yml` does not exist", subworkflow.test_yml)) From cb3844e7ca57782f47c4582468a9bca6d276fc1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlia=20Mir=20Pedrol?= Date: Wed, 26 Jul 2023 11:02:10 +0200 Subject: [PATCH 10/16] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Matthias Hörtenhuber --- README.md | 6 +++--- nf_core/__main__.py | 6 +++--- nf_core/components/lint/__init__.py | 4 ++-- nf_core/components/nfcore_component.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3a34f6c6cb..94e4528230 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ A python package with helper tools for the nf-core community. - [`subworkflows remove` - Remove a subworkflow from a pipeline](#remove-a-subworkflow-from-a-pipeline) - [`subworkflows create` - Create a subworkflow from the template](#create-a-new-subworkflow) - [`subworkflows create-test-yml` - Create the `test.yml` file for a subworkflow](#create-a-subworkflow-test-config-file) - - [`subworkflows lint` - Check a subworkflows against nf-core guidelines](#check-a-subworkflow-against-nf-core-guidelines) + - [`subworkflows lint` - Check a subworkflow against nf-core guidelines](#check-a-subworkflow-against-nf-core-guidelines) - [`subworkflows test` - Run the tests for a subworkflow](#run-the-tests-for-a-subworkflow-using-pytest) - [Citation](#citation) @@ -1221,9 +1221,9 @@ extra_env: ### Check a subworkflow against nf-core guidelines -Run the `nf-core subworkflows lint` command to check subworkflows in the current working directory (pipeline or nf-core/modules clone) against nf-core guidelines. +Run the `nf-core subworkflows lint` command to check subworkflows in the current working directory (a pipeline or a clone of nf-core/modules) against nf-core guidelines. -Use the `--all` flag to run linting on all subworkflows found. Use `--dir ` to specify another directory than the current working directory. +Use the `--all` flag to run linting on all subworkflows found. Use `--dir ` to specify a different directory than the current working directory.