diff --git a/CHANGELOG.md b/CHANGELOG.md index e0e91d4a7e..b004926ebb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - Add `--fix-version` flag to `nf-core modules lint` command to update modules to the latest version ([#1588](https://github.com/nf-core/tools/pull/1588)) - Fix a bug in the regex extracting the version from biocontainers URLs ([#1598](https://github.com/nf-core/tools/pull/1598)) +- Update how we interface with git remotes. ([#1626](https://github.com/nf-core/tools/issues/1626)) ## [v2.4.1 - Cobolt Koala Patch](https://github.com/nf-core/tools/releases/tag/2.4) - [2022-05-16] diff --git a/README.md b/README.md index e5abbc339e..fb5a426fbd 100644 --- a/README.md +++ b/README.md @@ -911,38 +911,19 @@ The nf-core DSL2 modules repository is at The modules supercommand comes with two flags for specifying a custom remote: -- `--github-repository `: Specify the repository from which the modules should be fetched. Defaults to `nf-core/modules`. -- `--branch `: Specify the branch from which the modules shoudl be fetched. Defaults to `master`. +- `--git-remote `: Specify the repository from which the modules should be fetched as a git URL. Defaults to the github repository of `nf-core/modules`. +- `--branch `: Specify the branch from which the modules should be fetched. Defaults to the default branch of your repository. Note that a custom remote must follow a similar directory structure to that of `nf-core/moduleś` for the `nf-core modules` commands to work properly. -### Private remote modules +The modules commands will during initalisation try to pull changes from the remote repositories. If you want to disable this, for example +due to performance reason or if you want to run the commands offline, you can use the flag `--no-pull`. Note however that the commands will +still need to clone repositories that have previously not been used. -In order to get access to your private modules repo, you need to create -the `~/.config/gh/hosts.yml` file, which is the same file required by -[GitHub CLI](https://cli.github.com/) to deal with private repositories. -Such file is structured as follow: +### Private remote repositories -```conf -github.com: - oauth_token: - user: - git_protocol: -``` - -The easiest way to create this configuration file is through _GitHub CLI_: follow -its [installation instructions](https://cli.github.com/manual/installation) -and then call: - -```bash -gh auth login -``` - -After that, you will be able to list and install your private modules without -providing your github credentials through command line, by using `--github-repository` -and `--branch` options properly. -See the documentation on [gh auth login](https://cli.github.com/manual/gh_auth_login>) -to get more information. +You can use the modules command with private remote repositories. Make sure that your local `git` is correctly configured with your private remote +and then specify the remote the same way you would do with a public remote repository. ### List modules @@ -1079,7 +1060,7 @@ There are three additional flags that you can use when installing a module: - `--force`: Overwrite a previously installed version of the module. - `--prompt`: Select the module version using a cli prompt. -- `--sha `: Install the module at a specific commit from the `nf-core/modules` repository. +- `--sha `: Install the module at a specific commit. ### Update modules in a pipeline diff --git a/nf_core/__main__.py b/nf_core/__main__.py index f3f0701fba..883ce1c034 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -28,6 +28,9 @@ # Submodules should all traverse back to this log = logging.getLogger() +# Set up .nfcore directory for storing files between sessions +nf_core.utils.setup_nfcore_dir() + # Set up nicer formatting of click cli help messages click.rich_click.MAX_WIDTH = 100 click.rich_click.USE_RICH_MARKUP = True @@ -342,14 +345,20 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow @nf_core_cli.group() @click.option( "-g", - "--github-repository", + "--git-remote", type=str, - default="nf-core/modules", - help="GitHub repository hosting modules.", + default=nf_core.modules.modules_repo.NF_CORE_MODULES_REMOTE, + help="Remote git repo to fetch files from", +) +@click.option("-b", "--branch", type=str, default=None, help="Branch of git repository hosting modules.") +@click.option( + "--no-pull", + is_flag=True, + default=False, + help="Do not pull in latest changes to local clone of modules repository.", ) -@click.option("-b", "--branch", type=str, default="master", help="Branch of GitHub repository hosting modules.") @click.pass_context -def modules(ctx, github_repository, branch): +def modules(ctx, git_remote, branch, no_pull): """ Commands to manage Nextflow DSL2 modules (tool wrappers). """ @@ -357,12 +366,10 @@ def modules(ctx, github_repository, branch): # by means other than the `if` block below) ctx.ensure_object(dict) - # Make repository object to pass to subcommands - try: - ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(github_repository, branch) - except LookupError as e: - log.critical(e) - sys.exit(1) + # Place the arguments in a context object + ctx.obj["modules_repo_url"] = git_remote + ctx.obj["modules_repo_branch"] = branch + ctx.obj["modules_repo_no_pull"] = no_pull # nf-core modules list subcommands @@ -385,10 +392,11 @@ def remote(ctx, keywords, json): List modules in a remote GitHub repo [dim i](e.g [link=https://github.com/nf-core/modules]nf-core/modules[/])[/]. """ try: - module_list = nf_core.modules.ModuleList(None, remote=True) - module_list.modules_repo = ctx.obj["modules_repo_obj"] + module_list = nf_core.modules.ModuleList( + None, True, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) print(module_list.list_modules(keywords, json)) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) @@ -410,10 +418,11 @@ def local(ctx, keywords, json, dir): List modules installed locally in a pipeline """ try: - module_list = nf_core.modules.ModuleList(dir, remote=False) - module_list.modules_repo = ctx.obj["modules_repo_obj"] + module_list = nf_core.modules.ModuleList( + dir, False, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) print(module_list.list_modules(keywords, json)) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) @@ -439,12 +448,19 @@ def install(ctx, tool, dir, prompt, force, sha): Fetches and installs module files from a remote repo e.g. nf-core/modules. """ try: - module_install = nf_core.modules.ModuleInstall(dir, force=force, prompt=prompt, sha=sha) - module_install.modules_repo = ctx.obj["modules_repo_obj"] + module_install = nf_core.modules.ModuleInstall( + dir, + force, + prompt, + sha, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ) exit_status = module_install.install(tool) if not exit_status and all: sys.exit(1) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.error(e) sys.exit(1) @@ -487,13 +503,21 @@ def update(ctx, tool, dir, force, prompt, sha, all, preview, save_diff): """ try: module_install = nf_core.modules.ModuleUpdate( - dir, force=force, prompt=prompt, sha=sha, update_all=all, show_diff=preview, save_diff_fn=save_diff + dir, + force, + prompt, + sha, + all, + preview, + save_diff, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], ) - module_install.modules_repo = ctx.obj["modules_repo_obj"] exit_status = module_install.update(tool) if not exit_status and all: sys.exit(1) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.error(e) sys.exit(1) @@ -514,10 +538,11 @@ def remove(ctx, dir, tool): Remove a module from a pipeline. """ try: - module_remove = nf_core.modules.ModuleRemove(dir) - module_remove.modules_repo = ctx.obj["modules_repo_obj"] + module_remove = nf_core.modules.ModuleRemove( + dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) module_remove.remove(tool) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) @@ -562,6 +587,9 @@ def create_module(ctx, tool, dir, author, label, meta, no_meta, force, conda_nam except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules create-test-yml @@ -582,7 +610,7 @@ def create_test_yml(ctx, tool, run_tests, output, force, no_prompts): try: meta_builder = nf_core.modules.ModulesTestYmlBuilder(tool, run_tests, output, force, no_prompts) meta_builder.run() - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) @@ -608,8 +636,9 @@ def lint(ctx, tool, dir, key, all, local, passed, fix_version): nf-core/modules repository. """ try: - module_lint = nf_core.modules.ModuleLint(dir=dir) - module_lint.modules_repo = ctx.obj["modules_repo_obj"] + module_lint = nf_core.modules.ModuleLint( + dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) module_lint.lint( module=tool, key=key, @@ -624,7 +653,7 @@ def lint(ctx, tool, dir, key, all, local, passed, fix_version): except nf_core.modules.lint.ModuleLintException as e: log.error(e) sys.exit(1) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) @@ -653,10 +682,11 @@ def info(ctx, tool, dir): If not, usage from the remote modules repo will be shown. """ try: - module_info = nf_core.modules.ModuleInfo(dir, tool) - module_info.modules_repo = ctx.obj["modules_repo_obj"] + module_info = nf_core.modules.ModuleInfo( + dir, tool, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) print(module_info.get_module_info()) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.error(e) sys.exit(1) @@ -674,12 +704,14 @@ def bump_versions(ctx, tool, dir, all, show_all): the nf-core/modules repo. """ try: - version_bumper = nf_core.modules.bump_versions.ModuleVersionBumper(pipeline_dir=dir) + version_bumper = nf_core.modules.bump_versions.ModuleVersionBumper( + dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) version_bumper.bump_versions(module=tool, all_modules=all, show_uptodate=show_all) except nf_core.modules.module_utils.ModuleException as e: log.error(e) sys.exit(1) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) @@ -742,7 +774,7 @@ def test_module(ctx, tool, no_prompts, pytest_args): try: meta_builder = nf_core.modules.ModulesTest(tool, no_prompts, pytest_args) meta_builder.run() - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) diff --git a/nf_core/lint/modules_json.py b/nf_core/lint/modules_json.py index 6e6ddd6d17..2a4aa1a5b3 100644 --- a/nf_core/lint/modules_json.py +++ b/nf_core/lint/modules_json.py @@ -27,7 +27,14 @@ def modules_json(self): all_modules_passed = True for repo in modules_json["repos"].keys(): - for key in modules_json["repos"][repo].keys(): + # Check if the modules.json has been updated to keep the + if "modules" not in modules_json["repos"][repo] or "git_url" not in modules_json["repos"][repo]: + failed.append( + f"Your `modules.json` file is outdated. Please remove it and reinstall it by running any module command" + ) + continue + + for key in modules_json["repos"][repo]["modules"]: if not key in modules_command.module_names[repo]: failed.append(f"Entry for `{key}` found in `modules.json` but module is not installed in pipeline.") all_modules_passed = False diff --git a/nf_core/modules/bump_versions.py b/nf_core/modules/bump_versions.py index 0da96c72d4..1c5fedaf8f 100644 --- a/nf_core/modules/bump_versions.py +++ b/nf_core/modules/bump_versions.py @@ -28,8 +28,8 @@ class ModuleVersionBumper(ModuleCommand): - def __init__(self, pipeline_dir): - super().__init__(pipeline_dir) + def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False): + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.up_to_date = None self.updated = None diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index 6d33c091ff..0800b41157 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -11,12 +11,7 @@ from rich.table import Table from rich.text import Text -from .module_utils import ( - get_installed_modules, - get_module_git_log, - get_repo_type, - module_exist_in_repo, -) +from .module_utils import get_repo_type from .modules_command import ModuleCommand from .modules_repo import ModulesRepo @@ -24,7 +19,7 @@ class ModuleInfo(ModuleCommand): - def __init__(self, pipeline_dir, tool): + def __init__(self, pipeline_dir, tool, remote_url, branch, no_pull): self.module = tool self.meta = None @@ -40,7 +35,7 @@ def __init__(self, pipeline_dir, tool): log.debug(f"Only showing remote info: {e}") pipeline_dir = None - super().__init__(pipeline_dir) + super().__init__(pipeline_dir, remote_url, branch, no_pull) def get_module_info(self): """Given the name of a module, parse meta.yml and print usage help.""" @@ -93,28 +88,14 @@ def get_remote_yaml(self): Returns: dict or bool: Parsed meta.yml found, False otherwise """ - # Fetch the remote repo information - self.modules_repo.get_modules_file_tree() - # Check if our requested module is there - if self.module not in self.modules_repo.modules_avail_module_names: + if self.module not in self.modules_repo.get_avail_modules(): return False - # Get the remote path - meta_url = None - for file_dict in self.modules_repo.modules_file_tree: - if file_dict.get("path") == f"modules/{self.module}/meta.yml": - meta_url = file_dict.get("url") - - if not meta_url: + file_contents = self.modules_repo.get_meta_yml(self.module) + if file_contents is None: return False - - # Download and parse - log.debug(f"Attempting to fetch {meta_url}") - response = requests.get(meta_url) - result = response.json() - file_contents = base64.b64decode(result["content"]) - self.remote_location = self.modules_repo.name + self.remote_location = self.modules_repo.fullname return yaml.safe_load(file_contents) def generate_module_info_help(self): diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 843943805a..ccce6200da 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -6,19 +6,18 @@ import nf_core.modules.module_utils import nf_core.utils -from .module_utils import get_module_git_log, module_exist_in_repo from .modules_command import ModuleCommand +from .modules_repo import NF_CORE_MODULES_NAME log = logging.getLogger(__name__) class ModuleInstall(ModuleCommand): - def __init__(self, pipeline_dir, force=False, prompt=False, sha=None, update_all=False): - super().__init__(pipeline_dir) + def __init__(self, pipeline_dir, force=False, prompt=False, sha=None, remote_url=None, branch=None, no_pull=False): + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.force = force self.prompt = prompt self.sha = sha - self.update_all = update_all def install(self, module): if self.repo_type == "modules": @@ -31,37 +30,25 @@ def install(self, module): # Verify that 'modules.json' is consistent with the installed modules self.modules_json_up_to_date() - # Get the available modules - try: - self.modules_repo.get_modules_file_tree() - except LookupError as e: - log.error(e) - return False - if self.prompt and self.sha is not None: log.error("Cannot use '--sha' and '--prompt' at the same time!") return False # Verify that the provided SHA exists in the repo if self.sha: - try: - nf_core.modules.module_utils.sha_exists(self.sha, self.modules_repo) - except UserWarning: - log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.name}'") - return False - except LookupError as e: - log.error(e) + if not self.modules_repo.sha_exists_on_branch(self.sha): + log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.fullname}'") return False if module is None: module = questionary.autocomplete( "Tool name:", - choices=self.modules_repo.modules_avail_module_names, + choices=self.modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, ).unsafe_ask() # Check that the supplied name is an available module - if module and module not in self.modules_repo.modules_avail_module_names: + if module and module not in self.modules_repo.get_avail_modules(): log.error(f"Module '{module}' not found in list of available modules.") log.info("Use the command 'nf-core modules list' to view available software") return False @@ -71,18 +58,21 @@ def install(self, module): if not modules_json: return False - if not module_exist_in_repo(module, self.modules_repo): - warn_msg = f"Module '{module}' not found in remote '{self.modules_repo.name}' ({self.modules_repo.branch})" + if not self.modules_repo.module_exists(module): + warn_msg = ( + f"Module '{module}' not found in remote '{self.modules_repo.fullname}' ({self.modules_repo.branch})" + ) log.warning(warn_msg) return False - if self.modules_repo.name in modules_json["repos"]: - current_entry = modules_json["repos"][self.modules_repo.name].get(module) + if self.modules_repo.fullname in modules_json["repos"]: + current_entry = modules_json["repos"][self.modules_repo.fullname]["modules"].get(module) else: current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules", self.modules_repo.owner, self.modules_repo.repo] + install_folder = [self.dir, "modules"] + install_folder.extend(os.path.split(self.modules_repo.fullname)) # Compute the module directory module_dir = os.path.join(*install_folder, module) @@ -91,7 +81,9 @@ def install(self, module): if (current_entry is not None and os.path.exists(module_dir)) and not self.force: log.error("Module is already installed.") - repo_flag = "" if self.modules_repo.name == "nf-core/modules" else f"-g {self.modules_repo.name} " + repo_flag = ( + "" if self.modules_repo.fullname == NF_CORE_MODULES_NAME else f"-g {self.modules_repo.fullname} " + ) branch_flag = "" if self.modules_repo.branch == "master" else f"-b {self.modules_repo.branch} " log.info( @@ -113,22 +105,18 @@ def install(self, module): return False else: # Fetch the latest commit for the module - try: - git_log = get_module_git_log(module, modules_repo=self.modules_repo, per_page=1, page_nbr=1) - except UserWarning: - log.error(f"Was unable to fetch version of module '{module}'") - return False + git_log = list(self.modules_repo.get_module_git_log(module, depth=1)) version = git_log[0]["git_sha"] if self.force: - log.info(f"Removing installed version of '{self.modules_repo.name}/{module}'") + log.info(f"Removing installed version of '{self.modules_repo.fullname}/{module}'") self.clear_module_dir(module, module_dir) log.info(f"{'Rei' if self.force else 'I'}nstalling '{module}'") - log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.name}") + log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.fullname}") # Download module files - if not self.download_module_file(module, version, self.modules_repo, install_folder): + if not self.install_module_files(module, version, self.modules_repo, install_folder): return False # Print include statement @@ -136,5 +124,5 @@ def install(self, module): log.info(f"Include statement: include {{ {module_name} }} from '.{os.path.join(*install_folder, module)}/main’") # Update module.json with newly installed module - self.update_modules_json(modules_json, self.modules_repo.name, module, version) + self.update_modules_json(modules_json, self.modules_repo, module, version) return True diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 9bbe173ef5..d7c40c8d09 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -69,7 +69,7 @@ class ModuleLint(ModuleCommand): from .module_todos import module_todos from .module_version import module_version - def __init__(self, dir): + def __init__(self, dir, remote_url=None, branch=None, no_pull=False): self.dir = dir try: self.dir, self.repo_type = nf_core.modules.module_utils.get_repo_type(self.dir) @@ -79,7 +79,7 @@ def __init__(self, dir): self.passed = [] self.warned = [] self.failed = [] - self.modules_repo = ModulesRepo() + self.modules_repo = ModulesRepo(remote_url, branch, no_pull) self.lint_tests = self._get_all_lint_tests() # Get lists of modules install in directory self.all_local_modules, self.all_nfcore_modules = self.get_installed_modules() diff --git a/nf_core/modules/lint/module_changes.py b/nf_core/modules/lint/module_changes.py index 9676481b43..2ee89102aa 100644 --- a/nf_core/modules/lint/module_changes.py +++ b/nf_core/modules/lint/module_changes.py @@ -6,8 +6,6 @@ import requests import rich -from nf_core.modules.lint import LintResult - def module_changes(module_lint_object, module): """ @@ -22,59 +20,22 @@ def module_changes(module_lint_object, module): Only runs when linting a pipeline, not the modules repository """ - files_to_check = ["main.nf", "meta.yml"] - - # Loop over nf-core modules - module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.name}/{module_lint_object.modules_repo.branch}/modules/{module.module_name}/" - - # If module.git_sha specified, check specific commit version for changes - if module.git_sha: - module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.name}/{module.git_sha}/modules/{module.module_name}/" - - for f in files_to_check: - # open local copy, continue if file not found (a failed message has already been issued in this case) - try: - local_copy = open(os.path.join(module.module_dir, f), "r").read() - except FileNotFoundError as e: - continue - - # Download remote copy and compare - url = module_base_url + f - r = requests.get(url=url) - - if r.status_code != 200: - module.warned.append( + for f, same in module_lint_object.modules_repo.module_files_identical( + module.module_name, module.module_dir, module.git_sha + ).items(): + if same: + module.passed.append( ( "check_local_copy", - "Could not fetch remote copy, skipping comparison.", + "Local copy of module up to date", f"{os.path.join(module.module_dir, f)}", ) ) else: - try: - remote_copy = r.content.decode("utf-8") - - if local_copy != remote_copy: - module.failed.append( - ( - "check_local_copy", - "Local copy of module does not match remote", - f"{os.path.join(module.module_dir, f)}", - ) - ) - else: - module.passed.append( - ( - "check_local_copy", - "Local copy of module up to date", - f"{os.path.join(module.module_dir, f)}", - ) - ) - except UnicodeDecodeError as e: - module.warned.append( - ( - "check_local_copy", - f"Could not decode file from {url}. Skipping comparison ({e})", - f"{os.path.join(module.module_dir, f)}", - ) + module.failed.append( + ( + "check_local_copy", + "Local copy of module does not match remote", + f"{os.path.join(module.module_dir, f)}", ) + ) diff --git a/nf_core/modules/lint/module_version.py b/nf_core/modules/lint/module_version.py index 64b4817719..979a4ae011 100644 --- a/nf_core/modules/lint/module_version.py +++ b/nf_core/modules/lint/module_version.py @@ -13,6 +13,7 @@ import nf_core import nf_core.modules.module_utils +import nf_core.modules.modules_repo log = logging.getLogger(__name__) @@ -30,7 +31,7 @@ def module_version(module_lint_object, module): # Verify that a git_sha exists in the `modules.json` file for this module try: - module_entry = module_lint_object.modules_json["repos"][module_lint_object.modules_repo.name][ + module_entry = module_lint_object.modules_json["repos"][module_lint_object.modules_repo.fullname]["modules"][ module.module_name ] git_sha = module_entry["git_sha"] @@ -39,8 +40,9 @@ def module_version(module_lint_object, module): # Check whether a new version is available try: - module_git_log = nf_core.modules.module_utils.get_module_git_log(module.module_name) - if git_sha == module_git_log[0]["git_sha"]: + modules_repo = nf_core.modules.modules_repo.ModulesRepo() + module_git_log = modules_repo.get_module_git_log(module.module_name) + if git_sha == next(module_git_log)["git_sha"]: module.passed.append(("module_version", "Module is the latest version", module.module_dir)) else: module.warned.append(("module_version", "New version available", module.module_dir)) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 537f3bd621..dc7f6cf91d 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -5,6 +5,7 @@ import rich import nf_core.modules.module_utils +from nf_core.modules.modules_repo import ModulesRepo from .modules_command import ModuleCommand @@ -12,8 +13,8 @@ class ModuleList(ModuleCommand): - def __init__(self, pipeline_dir, remote=True): - super().__init__(pipeline_dir) + def __init__(self, pipeline_dir, remote=True, remote_url=None, branch=None, no_pull=False): + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.remote = remote def list_modules(self, keywords=None, print_json=False): @@ -42,20 +43,13 @@ def pattern_msg(keywords): # No pipeline given - show all remote if self.remote: - # Get the list of available modules - try: - self.modules_repo.get_modules_file_tree() - except LookupError as e: - log.error(e) - return False - # Filter the modules by keywords - modules = [mod for mod in self.modules_repo.modules_avail_module_names if all(k in mod for k in keywords)] + modules = [mod for mod in self.modules_repo.get_avail_modules() if all(k in mod for k in keywords)] # Nothing found if len(modules) == 0: log.info( - f"No available modules found in {self.modules_repo.name} ({self.modules_repo.branch})" + f"No available modules found in {self.modules_repo.fullname} ({self.modules_repo.branch})" f"{pattern_msg(keywords)}" ) return "" @@ -100,12 +94,20 @@ def pattern_msg(keywords): for repo_name, modules in sorted(repos_with_mods.items()): repo_entry = modules_json["repos"].get(repo_name, {}) for module in sorted(modules): - module_entry = repo_entry.get(module) + repo_modules = repo_entry.get("modules") + if repo_modules is None: + raise UserWarning( + "You 'modules.json' file is not up to date. Please remove it and rerun the command" + ) + module_entry = repo_modules.get(module) + if module_entry: version_sha = module_entry["git_sha"] try: # pass repo_name to get info on modules even outside nf-core/modules - message, date = nf_core.modules.module_utils.get_commit_info(version_sha, repo_name) + message, date = ModulesRepo(remote_url=repo_entry["git_url"]).get_commit_info( + version_sha + ) # NOTE add support for other remotes except LookupError as e: log.warning(e) date = "[red]Not Available" @@ -122,7 +124,7 @@ def pattern_msg(keywords): if self.remote: log.info( - f"Modules available from {self.modules_repo.name} ({self.modules_repo.branch})" + f"Modules available from {self.modules_repo.fullname} ({self.modules_repo.branch})" f"{pattern_msg(keywords)}:\n" ) else: diff --git a/nf_core/modules/module_test.py b/nf_core/modules/module_test.py index 8e137f07a6..a57235027f 100644 --- a/nf_core/modules/module_test.py +++ b/nf_core/modules/module_test.py @@ -81,9 +81,9 @@ def _check_inputs(self): modules_repo.get_modules_file_tree() self.module_name = questionary.autocomplete( "Tool name:", - choices=modules_repo.modules_avail_module_names, + choices=modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, - ).ask() + ).unsafe_ask() module_dir = Path("modules") / self.module_name # First, sanity check that the module directory exists diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index c1522e2485..887f64bc12 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -3,19 +3,21 @@ import json import logging import os +import urllib +from sys import modules +import git import questionary import rich +from pyrsistent import m import nf_core.utils -from .modules_repo import ModulesRepo +from .modules_repo import NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE, ModulesRepo from .nfcore_module import NFCoreModule log = logging.getLogger(__name__) -gh_api = nf_core.utils.gh_api - class ModuleException(Exception): """Exception raised when there was an error with module commands""" @@ -23,100 +25,109 @@ class ModuleException(Exception): pass -def module_exist_in_repo(module_name, modules_repo): +def dir_tree_uncovered(modules_dir, repos): """ - Checks whether a module exists in a branch of a GitHub repository - - Args: - module_name (str): Name of module - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question - Returns: - boolean: Whether the module exist in the repo or not. + Does a BFS of the modules directory of a pipeline and rapports any directories + that are not found in the list of repos """ - api_url = ( - f"https://api.github.com/repos/{modules_repo.name}/contents/modules/{module_name}?ref={modules_repo.branch}" - ) - response = gh_api.get(api_url) - return not (response.status_code == 404) + # Initialise the FIFO queue. Note that we assume the directory to be correctly + # configured, i.e. no files etc. + fifo = [os.path.join(modules_dir, subdir) for subdir in os.listdir(modules_dir) if subdir != "local"] + depth = 1 + dirs_not_covered = [] + while len(fifo) > 0: + temp_queue = [] + repos_at_level = {os.path.join(*os.path.split(repo)[:depth]): len(os.path.split(repo)) for repo in repos} + for dir in fifo: + rel_dir = os.path.relpath(dir, modules_dir) + if rel_dir in repos_at_level.keys(): + # Go the next depth if this directory is not one of the repos + if depth < repos_at_level[rel_dir]: + temp_queue.extend([os.path.join(dir, subdir) for subdir in os.listdir(dir)]) + else: + # Otherwise add the directory to the ones not covered + dirs_not_covered.append(dir) + fifo = temp_queue + depth += 1 + return dirs_not_covered -def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1, since="2021-07-07T00:00:00Z"): +def path_from_remote(remote_url): """ - Fetches the commit history the of requested module since a given date. The default value is - not arbitrary - it is the last time the structure of the nf-core/modules repository was had an - update breaking backwards compatibility. - Args: - module_name (str): Name of module - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question - per_page (int): Number of commits per page returned by API - page_nbr (int): Page number of the retrieved commits - since (str): Only show commits later than this timestamp. - Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ. - - Returns: - [ dict ]: List of commit SHAs and associated (truncated) message + Extracts the path from the remote URL + See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns """ - if modules_repo is None: - modules_repo = ModulesRepo() - api_url = f"https://api.github.com/repos/{modules_repo.name}/commits" - api_url += f"?sha={modules_repo.branch}" - if module_name is not None: - api_url += f"&path=modules/{module_name}" - api_url += f"&page={page_nbr}" - api_url += f"&since={since}" - - log.debug(f"Fetching commit history of module '{module_name}' from github API") - response = gh_api.get(api_url) - if response.status_code == 200: - commits = response.json() - - if len(commits) == 0: - raise UserWarning(f"Reached end of commit history for '{module_name}'") - else: - # Return the commit SHAs and the first line of the commit message - return [ - {"git_sha": commit["sha"], "trunc_message": commit["commit"]["message"].partition("\n")[0]} - for commit in commits - ] - elif response.status_code == 404: - raise LookupError(f"Module '{module_name}' not found in '{modules_repo.name}'\n{api_url}") + # Check whether we have a https or ssh url + if remote_url.startswith("https"): + path = urllib.parse.urlparse(remote_url) + path = path.path + # Remove the intial '/' + path = path[1:] + path = os.path.splitext(path)[0] else: - gh_api.log_content_headers(response) - raise LookupError( - f"Unable to fetch commit SHA for module {module_name}. API responded with '{response.status_code}'" - ) + # Remove the initial `git@`` + path = remote_url.split("@") + path = path[-1] if len(path) > 1 else path[0] + path = urllib.parse.urlparse(path) + path = path.path + path = os.path.splitext(path)[0] + return path -def get_commit_info(commit_sha, repo_name="nf-core/modules"): +def get_pipeline_module_repositories(modules_dir): """ - Fetches metadata about the commit (dates, message, etc.) + Finds all module repositories in the modules directory. Ignores the local modules. Args: - commit_sha (str): The SHA of the requested commit - repo_name (str): module repos name (def. nf-core/modules) - Returns: - message (str): The commit message for the requested commit - date (str): The commit date for the requested commit - Raises: - LookupError: If the call to the API fails. + modules_dir (str): base directory for the module files + Returns + repos [ (str, str) ]: List of tuples of repo name and repo remote URL """ - api_url = f"https://api.github.com/repos/{repo_name}/commits/{commit_sha}?stats=false" - log.debug(f"Fetching commit metadata for commit at {commit_sha}") - response = gh_api.get(api_url) - if response.status_code == 200: - commit = response.json() - message = commit["commit"]["message"].partition("\n")[0] - raw_date = commit["commit"]["author"]["date"] - - # Parse the date returned from the API - date_obj = datetime.datetime.strptime(raw_date, "%Y-%m-%dT%H:%M:%SZ") - date = str(date_obj.date()) - - return message, date - elif response.status_code == 404: - raise LookupError(f"Commit '{commit_sha}' not found in 'nf-core/modules/'\n{api_url}") + # Check if there are any nf-core modules installed + if os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME)): + repos = [(NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE)] else: - gh_api.log_content_headers(response) - raise LookupError(f"Unable to fetch metadata for commit SHA {commit_sha}") + repos = [] + # Check if there are any untracked repositories + dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) + if len(dirs_not_covered) > 0: + log.info("Found custom module repositories when creating 'modules.json'") + # Loop until all directories in the base directory are covered by a remote + while len(dirs_not_covered) > 0: + log.info( + "The following director{s} in the modules directory are untracked: '{l}'".format( + s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dirs_not_covered) + ) + ) + nrepo_remote = questionary.text( + "Please provide a URL for for one of the repos contained in the untracked directories" + ).unsafe_ask() + # Verify that the remote exists + while True: + try: + git.Git().ls_remote(nrepo_remote) + break + except git.exc.GitCommandError: + nrepo_remote = questionary.text( + "The provided remote does not seem to exist, please provide a new remote." + ).unsafe_ask() + + # Verify that there is a directory corresponding the remote + nrepo_name = path_from_remote(nrepo_remote) + if not os.path.exists(os.path.join(modules_dir, nrepo_name)): + log.info( + "The provided remote does not seem to correspond to a local directory. " + "The directory structure should be the same as in the remote" + ) + dir_name = questionary.text( + "Please provide the correct directory, it will be renamed. If left empty, the remote will be ignored" + ).unsafe_ask() + if dir_name: + os.rename(os.path.join(modules_dir, dir_name), os.path.join(modules_dir, nrepo_name)) + else: + continue + repos.append((nrepo_name, nrepo_remote)) + dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) + return repos def create_modules_json(pipeline_dir): @@ -135,26 +146,21 @@ def create_modules_json(pipeline_dir): if not os.path.exists(modules_dir): raise UserWarning("Can't find a ./modules directory. Is this a DSL2 pipeline?") - # Extract all modules repos in the pipeline directory - repo_names = [ - f"{user_name}/{repo_name}" - for user_name in os.listdir(modules_dir) - if os.path.isdir(os.path.join(modules_dir, user_name)) and user_name != "local" - for repo_name in os.listdir(os.path.join(modules_dir, user_name)) - ] + repos = get_pipeline_module_repositories(modules_dir) # Get all module names in the repos - repo_module_names = { - repo_name: list( - { - os.path.relpath(os.path.dirname(path), os.path.join(modules_dir, repo_name)) - for path in glob.glob(f"{modules_dir}/{repo_name}/**/*", recursive=True) - if os.path.isfile(path) - } + repo_module_names = [ + ( + repo_name, + [ + os.path.relpath(dir_name, os.path.join(modules_dir, repo_name)) + for dir_name, _, file_names in os.walk(os.path.join(modules_dir, repo_name)) + if "main.nf" in file_names + ], + repo_remote, ) - for repo_name in repo_names - } - + for repo_name, repo_remote in repos + ] progress_bar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), @@ -162,29 +168,27 @@ def create_modules_json(pipeline_dir): transient=True, ) with progress_bar: + n_total_modules = sum(len(modules) for _, modules, _ in repo_module_names) file_progress = progress_bar.add_task( - "Creating 'modules.json' file", total=sum(map(len, repo_module_names.values())), test_name="module.json" + "Creating 'modules.json' file", total=n_total_modules, test_name="module.json" ) - for repo_name, module_names in sorted(repo_module_names.items()): + for repo_name, module_names, remote in sorted(repo_module_names): try: - modules_repo = ModulesRepo(repo=repo_name) + # Create a ModulesRepo object without progress bar to not conflict with the other one + modules_repo = ModulesRepo(remote_url=remote, no_progress=True) except LookupError as e: raise UserWarning(e) repo_path = os.path.join(modules_dir, repo_name) modules_json["repos"][repo_name] = dict() + modules_json["repos"][repo_name]["git_url"] = remote + modules_json["repos"][repo_name]["modules"] = dict() for module_name in sorted(module_names): module_path = os.path.join(repo_path, module_name) progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") - try: - correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo) + correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo) - except (LookupError, UserWarning) as e: - log.warn( - f"Could not fetch 'git_sha' for module: '{module_name}'. Please try to install a newer version of this module. ({e})" - ) - continue - modules_json["repos"][repo_name][module_name] = {"git_sha": correct_commit_sha} + modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": correct_commit_sha} modules_json_path = os.path.join(pipeline_dir, "modules.json") with open(modules_json_path, "w") as fh: @@ -202,92 +206,16 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): Returns: commit_sha (str): The latest commit SHA where local files are identical to remote files """ - try: - # Find the correct commit SHA for the local files. - # We iterate over the commit log pages until we either - # find a matching commit or we reach the end of the commits - correct_commit_sha = None - commit_page_nbr = 1 - while correct_commit_sha is None: - commit_shas = [ - commit["git_sha"] - for commit in get_module_git_log(module_name, modules_repo=modules_repo, page_nbr=commit_page_nbr) - ] - correct_commit_sha = iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) - commit_page_nbr += 1 - return correct_commit_sha - except (UserWarning, LookupError) as e: - raise - - -def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas): - """ - Iterates through a list of commits for a module and checks if the local file contents match the remote - Args: - module_name (str): Name of module - module_path (str): Path to module in local repo - module_repo (str): Remote repo for module - commit_shas ([ str ]): List of commit SHAs for module, sorted in descending order - Returns: - commit_sha (str): The latest commit SHA from 'commit_shas' where local files - are identical to remote files - """ - - files_to_check = ["main.nf", "meta.yml"] - local_file_contents = [None, None, None] - for i, file in enumerate(files_to_check): - try: - local_file_contents[i] = open(os.path.join(module_path, file), "r").read() - except FileNotFoundError as e: - log.debug(f"Could not open file: {os.path.join(module_path, file)}") - continue + # Find the correct commit SHA for the local module files. + # We iterate over the commit history for the module until we find + # a revision that matches the file contents + commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) for commit_sha in commit_shas: - if local_module_equal_to_commit(local_file_contents, module_name, modules_repo, commit_sha): + if all(modules_repo.module_files_identical(module_name, module_path, commit_sha).values()): return commit_sha return None -def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_sha): - """ - Compares the local module files to the module files for the given commit sha - Args: - local_files ([ str ]): Contents of local files. `None` if files doesn't exist - module_name (str): Name of module - module_repo (str): Remote repo for module - commit_sha (str): Commit SHA for remote version to compare against local version - Returns: - bool: Whether all local files are identical to remote version - """ - - files_to_check = ["main.nf", "meta.yml"] - files_are_equal = [False, False, False] - remote_copies = [None, None, None] - - module_base_url = f"https://raw.githubusercontent.com/{modules_repo.name}/{commit_sha}/modules/{module_name}" - for i, file in enumerate(files_to_check): - # Download remote copy and compare - api_url = f"{module_base_url}/{file}" - r = gh_api.get(api_url) - # TODO: Remove debugging - gh_api.log_content_headers(r) - if r.status_code != 200: - gh_api.log_content_headers(r) - log.debug(f"Could not download remote copy of file module {module_name}/{file}") - else: - try: - remote_copies[i] = r.content.decode("utf-8") - except UnicodeDecodeError as e: - log.debug(f"Could not decode remote copy of {file} for the {module_name} module") - - # Compare the contents of the files. - # If the file is missing from both the local and remote repo - # we will get the comparision None == None - if local_files[i] == remote_copies[i]: - files_are_equal[i] = True - - return all(files_are_equal) - - def get_installed_modules(dir, repo_type="modules"): """ Make a list of all modules installed in this repository @@ -404,65 +332,36 @@ def get_repo_type(dir, repo_type=None, use_prompt=True): return [dir, repo_type] -def verify_pipeline_dir(dir): - modules_dir = os.path.join(dir, "modules") - if os.path.exists(modules_dir): - repo_names = ( - f"{user}/{repo}" - for user in os.listdir(modules_dir) - if user != "local" - for repo in os.listdir(os.path.join(modules_dir, user)) - ) - missing_remote = [] - modules_is_software = False - for repo_name in repo_names: - api_url = f"https://api.github.com/repos/{repo_name}/contents" - response = gh_api.get(api_url) - if response.status_code == 404: - missing_remote.append(repo_name) - if repo_name == "nf-core/software": - modules_is_software = True - - if len(missing_remote) > 0: - missing_remote = [f"'{repo_name}'" for repo_name in missing_remote] - error_msg = "Could not find GitHub repository for: " + ", ".join(missing_remote) - if modules_is_software: - error_msg += ( - "\nAs of version 2.0, remote modules are installed in 'modules//'" - ) - error_msg += "\nThe 'nf-core/software' directory should therefore be renamed to 'nf-core/modules'" - raise UserWarning(error_msg) - - def prompt_module_version_sha(module, modules_repo, installed_sha=None): + """ + Creates an interactive questionary prompt for selecting the module version + Args: + module (str): Module name + modules_repo (ModulesRepo): Modules repo the module originate in + installed_sha (str): Optional extra argument to highlight the current installed version + + Returns: + git_sha (str): The selected version of the module + """ older_commits_choice = questionary.Choice( title=[("fg:ansiyellow", "older commits"), ("class:choice-default", "")], value="" ) git_sha = "" page_nbr = 1 - try: - next_page_commits = get_module_git_log(module, modules_repo=modules_repo, per_page=10, page_nbr=page_nbr) - except UserWarning: - next_page_commits = None - except LookupError as e: - log.warning(e) - next_page_commits = None + + all_commits = modules_repo.get_module_git_log(module) + next_page_commits = [next(all_commits, None) for _ in range(10)] + next_page_commits = [commit for commit in next_page_commits if commit is not None] while git_sha == "": commits = next_page_commits - try: - next_page_commits = get_module_git_log( - module, modules_repo=modules_repo, per_page=10, page_nbr=page_nbr + 1 - ) - except UserWarning: - next_page_commits = None - except LookupError as e: - log.warning(e) + next_page_commits = [next(all_commits, None) for _ in range(10)] + next_page_commits = [commit for commit in next_page_commits if commit is not None] + if all(commit is None for commit in next_page_commits): next_page_commits = None choices = [] for title, sha in map(lambda commit: (commit["trunc_message"], commit["git_sha"]), commits): - display_color = "fg:ansiblue" if sha != installed_sha else "fg:ansired" message = f"{title} {sha}" if installed_sha == sha: @@ -476,14 +375,3 @@ def prompt_module_version_sha(module, modules_repo, installed_sha=None): ).unsafe_ask() page_nbr += 1 return git_sha - - -def sha_exists(sha, modules_repo): - i = 1 - while True: - try: - if sha in {commit["git_sha"] for commit in get_module_git_log(None, modules_repo, page_nbr=i)}: - return True - i += 1 - except (UserWarning, LookupError): - raise diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 08eea0ae4b..22231179e4 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -4,14 +4,19 @@ import logging import os import shutil -from posixpath import dirname +from datetime import datetime +import git +import questionary import yaml import nf_core.modules.module_utils import nf_core.utils -from nf_core import modules -from nf_core.modules.modules_repo import ModulesRepo +from nf_core.modules.modules_repo import ( + NF_CORE_MODULES_NAME, + NF_CORE_MODULES_REMOTE, + ModulesRepo, +) from nf_core.utils import plural_s as _s log = logging.getLogger(__name__) @@ -22,11 +27,11 @@ class ModuleCommand: Base class for the 'nf-core modules' commands """ - def __init__(self, dir): + def __init__(self, dir, remote_url=None, branch=None, no_pull=False): """ Initialise the ModulesCommand object """ - self.modules_repo = ModulesRepo() + self.modules_repo = ModulesRepo(remote_url, branch, no_pull) self.dir = dir self.module_names = [] try: @@ -37,12 +42,6 @@ def __init__(self, dir): except LookupError as e: raise UserWarning(e) - if self.repo_type == "pipeline": - try: - nf_core.modules.module_utils.verify_pipeline_dir(self.dir) - except UserWarning: - raise - def get_pipeline_modules(self): """ Get the modules installed in the current directory. @@ -107,10 +106,7 @@ def has_modules_file(self): modules_json_path = os.path.join(self.dir, "modules.json") if not os.path.exists(modules_json_path): log.info("Creating missing 'module.json' file.") - try: - nf_core.modules.module_utils.create_modules_json(self.dir) - except UserWarning as e: - raise + nf_core.modules.module_utils.create_modules_json(self.dir) def modules_json_up_to_date(self): """ @@ -135,37 +131,40 @@ def modules_json_up_to_date(self): for repo, modules in self.module_names.items(): if repo in mod_json["repos"]: for module in modules: - if module in mod_json["repos"][repo]: - mod_json["repos"][repo].pop(module) + repo_modules = mod_json["repos"][repo].get("modules") + if repo_modules is None: + raise UserWarning( + "Your 'modules.json' is not up to date. " + "Please reinstall it by removing it and rerunning the command." + ) + if module in mod_json["repos"][repo]["modules"]: + mod_json["repos"][repo]["modules"].pop(module) else: if repo not in missing_from_modules_json: - missing_from_modules_json[repo] = [] - missing_from_modules_json[repo].append(module) - if len(mod_json["repos"][repo]) == 0: + missing_from_modules_json[repo] = ([], mod_json["repos"][repo]["git_url"]) + missing_from_modules_json[repo][0].append(module) + if len(mod_json["repos"][repo]["modules"]) == 0: mod_json["repos"].pop(repo) else: - missing_from_modules_json[repo] = modules + missing_from_modules_json[repo] = (modules, None) - # If there are any modules left in 'modules.json' after all installed are removed, + # If there are any modules left in 'modules.json' after all installed are removed, # we try to reinstall them if len(mod_json["repos"]) > 0: missing_but_in_mod_json = [ - f"'{repo}/{module}'" for repo, modules in mod_json["repos"].items() for module in modules + f"'{repo}/{module}'" for repo, contents in mod_json["repos"].items() for module in contents["modules"] ] log.info( f"Reinstalling modules found in 'modules.json' but missing from directory: {', '.join(missing_but_in_mod_json)}" ) remove_from_mod_json = {} - for repo, modules in mod_json["repos"].items(): - try: - modules_repo = ModulesRepo(repo=repo) - modules_repo.get_modules_file_tree() - install_folder = [modules_repo.owner, modules_repo.repo] - except LookupError as e: - log.warning(f"Could not get module's file tree for '{repo}': {e}") - remove_from_mod_json[repo] = list(modules.keys()) - continue + for repo, contents in mod_json["repos"].items(): + modules = contents["modules"] + remote = contents["git_url"] + + modules_repo = ModulesRepo(remote_url=remote) + install_folder = os.path.split(modules_repo.fullname) for module, entry in modules.items(): sha = entry.get("git_sha") @@ -178,7 +177,7 @@ def modules_json_up_to_date(self): remove_from_mod_json[repo].append(module) continue module_dir = os.path.join(self.dir, "modules", *install_folder, module) - self.download_module_file(module, sha, modules_repo, install_folder, module_dir) + self.install_module_files(module, sha, modules_repo, install_folder, module_dir) # If the reinstall fails, we remove those entries in 'modules.json' if sum(map(len, remove_from_mod_json.values())) > 0: @@ -200,10 +199,12 @@ def modules_json_up_to_date(self): # If some modules didn't have an entry in the 'modules.json' file # we try to determine the SHA from the commit log of the remote + dead_repos = [] + sb_local_repos = [] if sum(map(len, missing_from_modules_json.values())) > 0: format_missing = [ - f"'{repo}/{module}'" for repo, modules in missing_from_modules_json.items() for module in modules + f"'{repo}/{module}'" for repo, contents in missing_from_modules_json.items() for module in contents[0] ] if len(format_missing) == 1: log.info(f"Recomputing commit SHA for module {format_missing[0]} which was missing from 'modules.json'") @@ -211,29 +212,90 @@ def modules_json_up_to_date(self): log.info( f"Recomputing commit SHAs for modules which were missing from 'modules.json': {', '.join(format_missing)}" ) + failed_to_find_commit_sha = [] - for repo, modules in missing_from_modules_json.items(): - modules_repo = ModulesRepo(repo=repo) + for repo, (modules, remote) in missing_from_modules_json.items(): + if remote is None: + if repo == NF_CORE_MODULES_NAME: + remote = NF_CORE_MODULES_REMOTE + else: + choice = questionary.select( + f"Found untracked files in {repo}. Please select a choice", + choices=[ + questionary.Choice("Provide the remote", value=0), + questionary.Choice("Move the directory to 'local'", value=1), + questionary.Choice("Remove the files", value=2), + ], + ) + if choice == 0: + remote = questionary.text("Please provide the URL of the remote") + # Verify that the remote exists + while True: + try: + git.Git().ls_remote(remote) + break + except git.exc.GitCommandError: + remote = questionary.text( + "The provided remote does not seem to exist, please provide a new remote." + ).unsafe_ask() + elif choice == 1: + sb_local_repos.append(repo) + continue + else: + dead_repos.append(repo) + continue + + remote = questionary.text(f"Please provide a remote for these files ") + + modules_repo = ModulesRepo(remote_url=remote) repo_path = os.path.join(self.dir, "modules", repo) for module in modules: module_path = os.path.join(repo_path, module) - try: - correct_commit_sha = nf_core.modules.module_utils.find_correct_commit_sha( - module, module_path, modules_repo - ) - if repo not in fresh_mod_json["repos"]: - fresh_mod_json["repos"][repo] = {} + correct_commit_sha = nf_core.modules.module_utils.find_correct_commit_sha( + module, module_path, modules_repo + ) + log.info(correct_commit_sha) + if repo not in fresh_mod_json["repos"]: + fresh_mod_json["repos"][repo] = {} - fresh_mod_json["repos"][repo][module] = {"git_sha": correct_commit_sha} - except (LookupError, UserWarning) as e: - failed_to_find_commit_sha.append(f"'{repo}/{module}'") + fresh_mod_json["repos"][repo][module] = {"git_sha": correct_commit_sha} if len(failed_to_find_commit_sha) > 0: log.info( - f"Could not determine 'git_sha' for module{_s(failed_to_find_commit_sha)}: {', '.join(failed_to_find_commit_sha)}." - f"\nPlease try to install a newer version of {'this' if len(failed_to_find_commit_sha) == 1 else 'these'} module{_s(failed_to_find_commit_sha)}." + f"Could not determine 'git_sha' for module{_s(failed_to_find_commit_sha)}: " + f"{', '.join(failed_to_find_commit_sha)}." + f"\nPlease try to install a newer version of " + f"{'this' if len(failed_to_find_commit_sha) == 1 else 'these'} " + f"module{_s(failed_to_find_commit_sha)}." ) + # Remove the requested repos + for repo in dead_repos: + path = os.path.join(self.dir, "modules", repo) + shutil.rmtree(path) + + # Copy the untracked repos to local + for repo in sb_local_repos: + modules_path = os.path.join(self.dir, "modules") + path = os.path.join(modules_path, repo) + local_path = os.path.join(modules_path, "local") + + # Create the local module directory if it doesn't already exist + if not os.path.exists(local_path): + os.makedirs(local_path) + + # Check if there is already a subdirectory with the name + if os.path.exists(os.path.join(local_path, to_path)): + to_path = path + while os.path.exists(os.path.join(local_path, to_path)): + # Add a time suffix to the path to make it unique + # (do it again and again if it didn't work out...) + to_path += f"-{datetime.datetime.now().strftime('%y%m%d%H%M%S')}" + shutil.move(path, to_path) + path = to_path + + shutil.move(path, local_path) + self.dump_modules_json(fresh_mod_json) def clear_module_dir(self, module_name, module_dir): @@ -255,24 +317,28 @@ def clear_module_dir(self, module_name, module_dir): log.error(f"Could not remove module: {e}") return False - def download_module_file(self, module_name, module_version, modules_repo, install_folder, dry_run=False): - """Downloads the files of a module from the remote repo""" - files = modules_repo.get_module_file_urls(module_name, module_version) - log.debug("Fetching module files:\n - {}".format("\n - ".join(files.keys()))) - for filename, api_url in files.items(): - split_filename = filename.split("/") - dl_filename = os.path.join(*install_folder, *split_filename[1:]) - try: - self.modules_repo.download_gh_file(dl_filename, api_url) - except (SystemError, LookupError) as e: - log.error(e) - return False - if not dry_run: - log.info(f"Downloaded {len(files)} files to {os.path.join(*install_folder, module_name)}") - return True + def install_module_files(self, module_name, module_version, modules_repo, install_dir): + """ + Installs a module into the given directory + + Args: + module_name (str): The name of the module + module_versioN (str): Git SHA for the version of the module to be installed + modules_repo (ModulesRepo): A correctly configured ModulesRepo object + install_dir (str): The path to where the module should be installed (should be the 'modules/' dir of the pipeline) + + Returns: + (bool): Whether the operation was successful of not + """ + return modules_repo.install_module(module_name, install_dir, module_version) def load_modules_json(self): - """Loads the modules.json file""" + """ + Loads the modules.json file + + Returns: + (nested dict...): The parsed 'modules.json' file + """ modules_json_path = os.path.join(self.dir, "modules.json") try: with open(modules_json_path, "r") as fh: @@ -282,11 +348,15 @@ def load_modules_json(self): modules_json = None return modules_json - def update_modules_json(self, modules_json, repo_name, module_name, module_version, write_file=True): - """Updates the 'module.json' file with new module info""" + def update_modules_json(self, modules_json, modules_repo, module_name, module_version, write_file=True): + """ + Updates the 'module.json' file with new module info + """ + repo_name = modules_repo.fullname + remote_url = modules_repo.remote_url if repo_name not in modules_json["repos"]: - modules_json["repos"][repo_name] = dict() - modules_json["repos"][repo_name][module_name] = {"git_sha": module_version} + modules_json["repos"][repo_name] = {"modules": {}, "git_url": remote_url} + modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": module_version} # Sort the 'modules.json' repo entries modules_json["repos"] = nf_core.utils.sort_dictionary(modules_json["repos"]) if write_file: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index bf6d7b48c8..1b8b744fd9 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -1,11 +1,59 @@ -import base64 +import filecmp import logging import os +import shutil +import sys +import urllib.parse -from nf_core.utils import gh_api +import git +import rich.progress + +import nf_core.modules.module_utils +from nf_core.utils import NFCORE_DIR, gh_api log = logging.getLogger(__name__) +# Constants for the nf-core/modules repo used throughout the module files +NF_CORE_MODULES_NAME = "nf-core/modules" +NF_CORE_MODULES_REMOTE = "https://github.com/nf-core/modules.git" + + +class RemoteProgressbar(git.RemoteProgress): + """ + An object to create a progressbar for when doing an operation with the remote. + Note that an initialized rich Progress (progress bar) object must be past + during initialization. + """ + + def __init__(self, progress_bar, repo_name, remote_url, operation): + """ + Initializes the object and adds a task to the progressbar passed as 'progress_bar' + + Args: + progress_bar (rich.progress.Progress): A rich progress bar object + repo_name (str): Name of the repository the operation is performed on + remote_url (str): Git URL of the repository the operation is performed on + operation (str): The operation performed on the repository, i.e. 'Pulling', 'Cloning' etc. + """ + super().__init__() + self.progress_bar = progress_bar + self.tid = self.progress_bar.add_task( + f"{operation} from [bold green]'{repo_name}'[/bold green] ([link={remote_url}]{remote_url}[/link])", + start=False, + state="Waiting for response", + ) + + def update(self, op_code, cur_count, max_count=None, message=""): + """ + Overrides git.RemoteProgress.update. + Called every time there is a change in the remote operation + """ + if not self.progress_bar.tasks[self.tid].started: + self.progress_bar.start_task(self.tid) + self.progress_bar.update( + self.tid, total=max_count, completed=cur_count, state=f"{cur_count / max_count * 100:.1f}%" + ) + class ModulesRepo(object): """ @@ -13,153 +61,335 @@ class ModulesRepo(object): Used by the `nf-core modules` top-level command with -r and -b flags, so that this can be used in the same way by all sub-commands. + + We keep track of the pull-status of the different installed repos in + the static variable local_repo_status. This is so we don't need to + pull a remote several times in one command. """ - def __init__(self, repo="nf-core/modules", branch=None): - self.name = repo - self.branch = branch + local_repo_statuses = dict() + no_pull_global = False + + @staticmethod + def local_repo_synced(repo_name): + """ + Checks whether a local repo has been cloned/pull in the current session + """ + return ModulesRepo.local_repo_statuses.get(repo_name, False) + + @staticmethod + def update_local_repo_status(repo_name, up_to_date): + """ + Updates the clone/pull status of a local repo + """ + ModulesRepo.local_repo_statuses[repo_name] = up_to_date + + def __init__(self, remote_url=None, branch=None, no_pull=False, no_progress=False): + """ + Initializes the object and clones the git repository if it is not already present + """ - # Don't bother fetching default branch if we're using nf-core - if not self.branch and self.name == "nf-core/modules": - self.branch = "master" + # This allows us to set this one time and then keep track of the user's choice + ModulesRepo.no_pull_global |= no_pull + + # Check if the remote seems to be well formed + if remote_url is None: + remote_url = NF_CORE_MODULES_REMOTE + + self.remote_url = remote_url + + self.fullname = nf_core.modules.module_utils.path_from_remote(self.remote_url) + + self.setup_local_repo(remote_url, branch, no_progress) # Verify that the repo seems to be correctly configured - if self.name != "nf-core/modules" or self.branch: + if self.fullname != NF_CORE_MODULES_NAME or self.branch: + self.verify_branch() + + # Convenience variable + self.modules_dir = os.path.join(self.local_repo_dir, "modules") - # Get the default branch if not set - if not self.branch: - self.get_default_branch() + self.avail_module_names = None + def setup_local_repo(self, remote, branch, no_progress=True): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Args: + remote (str): git url of remote + branch (str): name of branch to use + Sets self.repo + """ + self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) + if not os.path.exists(self.local_repo_dir): try: - self.verify_modules_repo() - except LookupError: - raise + if no_progress: + self.repo = git.Repo.clone_from(remote, self.local_repo_dir) + else: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) + ModulesRepo.update_local_repo_status(self.fullname, True) + except git.exc.GitCommandError: + raise LookupError(f"Failed to clone from the remote: `{remote}`") + # Verify that the requested branch exists by checking it out + self.setup_branch(branch) + else: + self.repo = git.Repo(self.local_repo_dir) + + # Verify that the requested branch exists by checking it out + self.setup_branch(branch) + + if ModulesRepo.no_pull_global: + ModulesRepo.update_local_repo_status(self.fullname, True) + # If the repo is already cloned, pull the latest changes from the remote + if not ModulesRepo.local_repo_synced(self.fullname): + if no_progress: + self.repo.remotes.origin.pull() + else: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + ) + with pbar: + self.repo.remotes.origin.pull( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) + ModulesRepo.update_local_repo_status(self.fullname, True) + + def setup_branch(self, branch): + """ + Verify that we have a branch and otherwise use the default one. + The branch is then checked out to verify that it exists in the repo. - self.owner, self.repo = self.name.split("/") - self.modules_file_tree = {} - self.modules_avail_module_names = [] + Args: + branch (str): Name of branch + """ + if branch is None: + # Don't bother fetching default branch if we're using nf-core + if self.fullname == NF_CORE_MODULES_NAME: + self.branch = "master" + else: + self.branch = self.get_default_branch() + else: + self.branch = branch + + # Verify that the branch exists by checking it out + self.branch_exists() def get_default_branch(self): - """Get the default branch for a GitHub repo""" - api_url = f"https://api.github.com/repos/{self.name}" - response = gh_api.get(api_url) - if response.status_code == 200: - self.branch = response.json()["default_branch"] - log.debug(f"Found default branch to be '{self.branch}'") - else: - raise LookupError(f"Could not find repository '{self.name}' on GitHub") + """ + Gets the default branch for the repo (the branch origin/HEAD is pointing to) + """ + origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") + _, branch = origin_head.ref.name.split("/") + return branch - def verify_modules_repo(self): + def branch_exists(self): + """ + Verifies that the branch exists in the repository by trying to check it out + """ + try: + self.checkout_branch() + except git.exc.GitCommandError: + raise LookupError(f"Branch '{self.branch}' not found in '{self.fullname}'") - # Check if name seems to be well formed - if self.name.count("/") != 1: - raise LookupError(f"Repository name '{self.name}' should be of the format '/'") + def verify_branch(self): + """ + Verifies the active branch conforms do the correct directory structure + """ + dir_names = os.listdir(self.local_repo_dir) + if "modules" not in dir_names: + err_str = f"Repository '{self.fullname}' ({self.branch}) does not contain a 'modules/' directory" + if "software" in dir_names: + err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" + raise LookupError(err_str) + + def checkout_branch(self): + """ + Checks out the specified branch of the repository + """ + self.repo.git.checkout(self.branch) - # Check if repository exist - api_url = f"https://api.github.com/repos/{self.name}/branches" - response = gh_api.get(api_url) - if response.status_code == 200: - branches = [branch["name"] for branch in response.json()] - if self.branch not in branches: - raise LookupError(f"Branch '{self.branch}' not found in '{self.name}'") - else: - raise LookupError(f"Repository '{self.name}' is not available on GitHub") - - api_url = f"https://api.github.com/repos/{self.name}/contents?ref={self.branch}" - response = gh_api.get(api_url) - if response.status_code == 200: - dir_names = [entry["name"] for entry in response.json() if entry["type"] == "dir"] - if "modules" not in dir_names: - err_str = f"Repository '{self.name}' ({self.branch}) does not contain a 'modules/' directory" - if "software" in dir_names: - err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" - raise LookupError(err_str) - else: - raise LookupError(f"Unable to fetch repository information from '{self.name}' ({self.branch})") + def checkout(self, commit): + """ + Checks out the repository at the requested commit - def get_modules_file_tree(self): + Args: + commit (str): Git SHA of the commit """ - Fetch the file list from the repo, using the GitHub API + self.repo.git.checkout(commit) - Sets self.modules_file_tree - self.modules_avail_module_names + def module_exists(self, module_name): """ - api_url = f"https://api.github.com/repos/{self.name}/git/trees/{self.branch}?recursive=1" - r = gh_api.get(api_url) - if r.status_code == 404: - raise LookupError(f"Repository / branch not found: {self.name} ({self.branch})\n{api_url}") - elif r.status_code != 200: - raise LookupError(f"Could not fetch {self.name} ({self.branch}) tree: {r.status_code}\n{api_url}") + Check if a module exists in the branch of the repo - result = r.json() - assert result["truncated"] == False + Args: + module_name (str): The name of the module - self.modules_file_tree = result["tree"] - for f in result["tree"]: - if f["path"].startswith("modules/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]: - # remove modules/ and /main.nf - self.modules_avail_module_names.append(f["path"].replace("modules/", "").replace("/main.nf", "")) - if len(self.modules_avail_module_names) == 0: - raise LookupError(f"Found no modules in '{self.name}'") + Returns: + (bool): Whether the module exists in this branch of the repository + """ + return module_name in self.get_avail_modules() - def get_module_file_urls(self, module, commit=""): - """Fetch list of URLs for a specific module + def get_module_dir(self, module_name): + """ + Returns the file path of a module directory in the repo. + Does not verify that the path exists. + Args: + module_name (str): The name of the module - Takes the name of a module and iterates over the GitHub repo file tree. - Loops over items that are prefixed with the path 'modules/' and ignores - anything that's not a blob. Also ignores the test/ subfolder. + Returns: + module_path (str): The path of the module in the local copy of the repository + """ + return os.path.join(self.modules_dir, module_name) - Returns a dictionary with keys as filenames and values as GitHub API URLs. - These can be used to then download file contents. + def install_module(self, module_name, install_dir, commit): + """ + Install the module files into a pipeline at the given commit Args: - module (string): Name of module for which to fetch a set of URLs + module_name (str): The name of the module + install_dir (str): The path where the module should be installed + commit (str): The git SHA for the version of the module to be installed Returns: - dict: Set of files and associated URLs as follows: + (bool): Whether the operation was successful or not + """ + # Check out the repository at the requested ref + self.checkout(commit) + + # Check if the module exists in the branch + if not self.module_exists(module_name): + log.error(f"The requested module does not exists in the '{self.branch}' of {self.fullname}'") + return False + + # Copy the files from the repo to the install folder + shutil.copytree(self.get_module_dir(module_name), os.path.join(*install_dir, module_name)) - { - 'modules/fastqc/main.nf': 'https://api.github.com/repos/nf-core/modules/git/blobs/65ba598119206a2b851b86a9b5880b5476e263c3', - 'modules/fastqc/meta.yml': 'https://api.github.com/repos/nf-core/modules/git/blobs/0d5afc23ba44d44a805c35902febc0a382b17651' - } + # Switch back to the tip of the branch + self.checkout_branch() + return True + + def module_files_identical(self, module_name, base_path, commit): """ - results = {} - for f in self.modules_file_tree: - if not f["path"].startswith(f"modules/{module}/"): - continue - if f["type"] != "blob": - continue - if "/test/" in f["path"]: - continue - results[f["path"]] = f["url"] - if commit != "": - for path in results: - results[path] = f"https://api.github.com/repos/{self.name}/contents/{path}?ref={commit}" - return results + Checks whether the module files in a pipeline are identical to the ones in the remote + Args: + module_name (str): The name of the module + base_path (str): The path to the module in the pipeline - def download_gh_file(self, dl_filename, api_url): - """Download a file from GitHub using the GitHub API + Returns: + (bool): Whether the pipeline files are identical to the repo files + """ + if commit is None: + self.checkout_branch() + else: + self.checkout(commit) + module_files = ["main.nf", "meta.yml"] + module_dir = self.get_module_dir(module_name) + files_identical = {file: True for file in module_files} + for file in module_files: + try: + files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) + except FileNotFoundError as e: + log.debug(f"Could not open file: {os.path.join(module_dir, file)}") + continue + self.checkout_branch() + return files_identical + def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00Z"): + """ + Fetches the commit history the of requested module since a given date. The default value is + not arbitrary - it is the last time the structure of the nf-core/modules repository was had an + update breaking backwards compatibility. Args: - dl_filename (string): Path to save file to - api_url (string): GitHub API URL for file + module_name (str): Name of module + modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question + per_page (int): Number of commits per page returned by API + page_nbr (int): Page number of the retrieved commits + since (str): Only show commits later than this timestamp. + Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ. + + Returns: + ( dict ): Iterator of commit SHAs and associated (truncated) message + """ + self.checkout_branch() + module_path = os.path.join("modules", module_name) + commits = self.repo.iter_commits(max_count=depth, paths=module_path) + commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) + return commits + + def sha_exists_on_branch(self, sha): + """ + Verifies that a given commit sha exists on the branch + """ + self.checkout() + return sha in (commit.hexsha for commit in self.repo.iter_commits()) + def get_commit_info(self, sha): + """ + Fetches metadata about the commit (dates, message, etc.) + Args: + commit_sha (str): The SHA of the requested commit + Returns: + message (str): The commit message for the requested commit + date (str): The commit date for the requested commit Raises: - If a problem, raises an error + LookupError: If the search for the commit fails + """ + self.checkout_branch() + for commit in self.repo.iter_commits(): + if commit.hexsha == sha: + message = commit.message.partition("\n")[0] + date_obj = commit.committed_datetime + date = str(date_obj.date()) + return message, date + raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") + + def get_avail_modules(self): """ + Gets the names of the modules in the repository. They are detected by + checking which directories have a 'main.nf' file - # Make target directory if it doesn't already exist - dl_directory = os.path.dirname(dl_filename) - if not os.path.exists(dl_directory): - os.makedirs(dl_directory) + Returns: + ([ str ]): The module names + """ + if self.avail_module_names is None: + # Module directories are characterized by having a 'main.nf' file + self.avail_module_names = [ + os.path.relpath(dirpath, start=self.modules_dir) + for dirpath, _, file_names in os.walk(self.modules_dir) + if "main.nf" in file_names + ] + return self.avail_module_names + + def get_meta_yml(self, module_name): + """ + Returns the contents of the 'meta.yml' file of a module - # Call the GitHub API - r = gh_api.get(api_url) - if r.status_code != 200: - raise LookupError(f"Could not fetch {self.name} file: {r.status_code}\n {api_url}") - result = r.json() - file_contents = base64.b64decode(result["content"]) + Args: + module_name (str): The name of the module - # Write the file contents - with open(dl_filename, "wb") as fh: - fh.write(file_contents) + Returns: + (str): The contents of the file in text format + """ + self.checkout_branch() + path = os.path.join(self.modules_dir, module_name, "meta.yml") + if not os.path.exists(path): + return None + with open(path) as fh: + contents = fh.read() + return contents diff --git a/nf_core/modules/remove.py b/nf_core/modules/remove.py index 7533b142a5..5a3748370c 100644 --- a/nf_core/modules/remove.py +++ b/nf_core/modules/remove.py @@ -13,11 +13,11 @@ class ModuleRemove(ModuleCommand): - def __init__(self, pipeline_dir): + def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False): """ Initialise the ModulesRemove object and run remove command """ - super().__init__(pipeline_dir) + super().__init__(pipeline_dir, remote_url, branch, no_pull) def remove(self, module): """ @@ -40,7 +40,7 @@ def remove(self, module): # Decide from which repo the module was installed # TODO Configure the prompt for repository name in a nice way if True: - repo_name = self.modules_repo.name + repo_name = self.modules_repo.fullname elif len(self.module_names) == 1: repo_name = list(self.module_names.keys())[0] else: @@ -64,7 +64,7 @@ def remove(self, module): log.error(f"Module directory does not exist: '{module_dir}'") modules_json = self.load_modules_json() - if self.modules_repo.name in modules_json["repos"] and module in modules_json["repos"][repo_name]: + if self.modules_repo.fullname in modules_json["repos"] and module in modules_json["repos"][repo_name]: log.error(f"Found entry for '{module}' in 'modules.json'. Removing...") self.remove_modules_json_entry(module, repo_name, modules_json) return False diff --git a/nf_core/modules/test_yml_builder.py b/nf_core/modules/test_yml_builder.py index 91767f3f34..8dcf1f4505 100644 --- a/nf_core/modules/test_yml_builder.py +++ b/nf_core/modules/test_yml_builder.py @@ -69,12 +69,11 @@ def check_inputs(self): # Get the tool name if not specified if self.module_name is None: modules_repo = ModulesRepo() - modules_repo.get_modules_file_tree() self.module_name = questionary.autocomplete( "Tool name:", - choices=modules_repo.modules_avail_module_names, + choices=modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, - ).ask() + ).unsafe_ask() self.module_dir = os.path.join("modules", *self.module_name.split("/")) self.module_test_main = os.path.join("tests", "modules", *self.module_name.split("/"), "main.nf") diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 150e787a74..c96f0b9b92 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -15,11 +15,6 @@ import nf_core.modules.module_utils import nf_core.utils -from .module_utils import ( - get_installed_modules, - get_module_git_log, - module_exist_in_repo, -) from .modules_command import ModuleCommand from .modules_repo import ModulesRepo @@ -28,9 +23,19 @@ class ModuleUpdate(ModuleCommand): def __init__( - self, pipeline_dir, force=False, prompt=False, sha=None, update_all=False, show_diff=None, save_diff_fn=None + self, + pipeline_dir, + force=False, + prompt=False, + sha=None, + update_all=False, + show_diff=None, + save_diff_fn=None, + remote_url=None, + branch=None, + no_pull=False, ): - super().__init__(pipeline_dir) + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.force = force self.prompt = prompt self.sha = sha @@ -68,25 +73,13 @@ def update(self, module): # Verify that the provided SHA exists in the repo if self.sha: - try: - nf_core.modules.module_utils.sha_exists(self.sha, self.modules_repo) - except UserWarning: - log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.name}'") - return False - except LookupError as e: - log.error(e) + if not self.modules_repo.sha_exists_on_branch(self.sha): + log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.fullname}'") return False if not self.update_all: - # Get the available modules - try: - self.modules_repo.get_modules_file_tree() - except LookupError as e: - log.error(e) - return False - - # Check if there are any modules installed from - repo_name = self.modules_repo.name + # Check if there are any modules installed from the repo + repo_name = self.modules_repo.fullname if repo_name not in self.module_names: log.error(f"No modules installed from '{repo_name}'") return False @@ -105,8 +98,8 @@ def update(self, module): return False sha = self.sha - if module in update_config.get(self.modules_repo.name, {}): - config_entry = update_config[self.modules_repo.name].get(module) + if module in update_config.get(self.modules_repo.fullname, {}): + config_entry = update_config[self.modules_repo.fullname].get(module) if config_entry is not None and config_entry is not True: if config_entry is False: log.info("Module's update entry in '.nf-core.yml' is set to False") @@ -126,13 +119,16 @@ def update(self, module): return False # Check that the supplied name is an available module - if module and module not in self.modules_repo.modules_avail_module_names: + if module and module not in self.modules_repo.get_avail_modules(): log.error(f"Module '{module}' not found in list of available modules.") log.info("Use the command 'nf-core modules list remote' to view available software") return False repos_mods_shas = [(self.modules_repo, module, sha)] + # Load 'modules.json' (loaded here for consistency with the '--all' case) + modules_json = self.load_modules_json() + else: if module: raise UserWarning("You cannot specify a module and use the '--all' flag at the same time") @@ -178,18 +174,18 @@ def update(self, module): skipped_str = "', '".join(skipped_modules) log.info(f"Skipping module{'' if len(skipped_modules) == 1 else 's'}: '{skipped_str}'") + # Get the git urls from the modules.json + modules_json = self.load_modules_json() repos_mods_shas = [ - (ModulesRepo(repo=repo_name), mods_shas) for repo_name, mods_shas in repos_mods_shas.items() + (modules_json["repos"][repo_name]["git_url"], mods_shas) + for repo_name, mods_shas in repos_mods_shas.items() ] - for repo, _ in repos_mods_shas: - repo.get_modules_file_tree() + repos_mods_shas = [(ModulesRepo(remote_url=repo_url), mods_shas) for repo_url, mods_shas in repos_mods_shas] # Flatten the list repos_mods_shas = [(repo, mod, sha) for repo, mods_shas in repos_mods_shas for mod, sha in mods_shas] - # Load 'modules.json' - modules_json = self.load_modules_json() old_modules_json = copy.deepcopy(modules_json) # Deep copy to avoid mutability if not modules_json: return False @@ -237,21 +233,22 @@ def update(self, module): dry_run = self.show_diff or self.save_diff_fn # Check if the module we've been asked to update actually exists - if not module_exist_in_repo(module, modules_repo): - warn_msg = f"Module '{module}' not found in remote '{modules_repo.name}' ({modules_repo.branch})" + if not modules_repo.module_exists(module): + warn_msg = f"Module '{module}' not found in remote '{modules_repo.fullname}' ({modules_repo.branch})" if self.update_all: warn_msg += ". Skipping..." log.warning(warn_msg) exit_value = False continue - if modules_repo.name in modules_json["repos"]: - current_entry = modules_json["repos"][modules_repo.name].get(module) + if modules_repo.fullname in modules_json["repos"]: + current_entry = modules_json["repos"][modules_repo.fullname]["modules"].get(module) else: current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules", modules_repo.owner, modules_repo.repo] + install_folder = [self.dir, "modules"] + install_folder.extend(os.path.split(modules_repo.fullname)) # Compute the module directory module_dir = os.path.join(*install_folder, module) @@ -271,12 +268,7 @@ def update(self, module): continue else: # Fetch the latest commit for the module - try: - git_log = get_module_git_log(module, modules_repo=modules_repo, per_page=1, page_nbr=1) - except UserWarning: - log.error(f"Was unable to fetch version of module '{module}'") - exit_value = False - continue + git_log = list(modules_repo.get_module_git_log(module, depth=1)) version = git_log[0]["git_sha"] if current_entry is not None and not self.force: @@ -284,14 +276,14 @@ def update(self, module): current_version = current_entry["git_sha"] if current_version == version: if self.sha or self.prompt: - log.info(f"'{modules_repo.name}/{module}' is already installed at {version}") + log.info(f"'{modules_repo.fullname}/{module}' is already installed at {version}") else: - log.info(f"'{modules_repo.name}/{module}' is already up to date") + log.info(f"'{modules_repo.fullname}/{module}' is already up to date") continue if not dry_run: - log.info(f"Updating '{modules_repo.name}/{module}'") - log.debug(f"Updating module '{module}' to {version} from {modules_repo.name}") + log.info(f"Updating '{modules_repo.fullname}/{module}'") + log.debug(f"Updating module '{module}' to {version} from {modules_repo.fullname}") log.debug(f"Removing old version of module '{module}'") self.clear_module_dir(module, module_dir) @@ -301,7 +293,7 @@ def update(self, module): install_folder = ["/tmp", next(tempfile._get_candidate_names())] # Download module files - if not self.download_module_file(module, version, modules_repo, install_folder, dry_run=dry_run): + if not self.install_module_files(module, version, modules_repo, install_folder): exit_value = False continue @@ -426,18 +418,16 @@ class DiffEnum(enum.Enum): path = os.path.join(temp_folder, file) if os.path.exists(path): shutil.move(path, os.path.join(module_dir, file)) - log.info(f"Updating '{modules_repo.name}/{module}'") - log.debug(f"Updating module '{module}' to {version} from {modules_repo.name}") + log.info(f"Updating '{modules_repo.fullname}/{module}'") + log.debug(f"Updating module '{module}' to {version} from {modules_repo.fullname}") # Update modules.json with newly installed module if not dry_run: - self.update_modules_json(modules_json, modules_repo.name, module, version) + self.update_modules_json(modules_json, modules_repo, module, version) # Don't save to a file, just iteratively update the variable else: - modules_json = self.update_modules_json( - modules_json, modules_repo.name, module, version, write_file=False - ) + modules_json = self.update_modules_json(modules_json, modules_repo, module, version, write_file=False) if self.save_diff_fn: # Compare the new modules.json and build a diff diff --git a/nf_core/pipeline-template/modules.json b/nf_core/pipeline-template/modules.json index 8a6a36ec3b..af2cb416d8 100644 --- a/nf_core/pipeline-template/modules.json +++ b/nf_core/pipeline-template/modules.json @@ -3,14 +3,17 @@ "homePage": "https://github.com/{{ name }}", "repos": { "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_url": "git@github.com@nf-core/modules.git", + "modules": { + "custom/dumpsoftwareversions": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "fastqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "multiqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + } } } } diff --git a/nf_core/utils.py b/nf_core/utils.py index fbfcae1b04..db45cb8b2f 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -54,6 +54,7 @@ os.environ.get("XDG_CACHE_HOME", os.path.join(os.getenv("HOME"), ".cache")), "nf-core", ) +NFCORE_DIR = os.path.join(os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME"), ".config")), "nfcore") def check_if_outdated(current_version=None, remote_version=None, source_url="https://nf-co.re/tools_version"): @@ -298,6 +299,15 @@ def nextflow_cmd(cmd): ) +def setup_nfcore_dir(): + """Creates a directory for files that need to be kept between sessions + + Currently only used for keeping local copies of modules repos + """ + if not os.path.exists(NFCORE_DIR): + os.makedirs(NFCORE_DIR) + + def setup_requests_cachedir(): """Sets up local caching for faster remote HTTP requests. diff --git a/tests/test_modules.py b/tests/test_modules.py index db04c55302..798686d4ed 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -64,7 +64,7 @@ def tearDown(self): def test_modulesrepo_class(self): """Initialise a modules repo object""" modrepo = nf_core.modules.ModulesRepo() - assert modrepo.name == "nf-core/modules" + assert modrepo.fullname == "nf-core/modules" assert modrepo.branch == "master" ############################################