From fb6d3c2df7802f0f871b47cc0379bb62b6802fc6 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Thu, 16 Jun 2022 11:37:42 +0200 Subject: [PATCH 01/55] Rewriting intialisation of ModulesRepo --- nf_core/__main__.py | 3 + nf_core/modules/info.py | 2 +- nf_core/modules/install.py | 20 ++--- nf_core/modules/list.py | 4 +- nf_core/modules/module_utils.py | 4 +- nf_core/modules/modules_command.py | 2 +- nf_core/modules/modules_repo.py | 122 +++++++++++++++-------------- nf_core/modules/remove.py | 4 +- nf_core/modules/update.py | 32 ++++---- nf_core/utils.py | 10 +++ tests/test_modules.py | 2 +- 11 files changed, 114 insertions(+), 91 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index f3f0701fba..a778189ed9 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -28,6 +28,9 @@ # Submodules should all traverse back to this log = logging.getLogger() +# Set up .nfcore directory +nf_core.utils.setup_nfcore_dir() + # Set up nicer formatting of click cli help messages click.rich_click.MAX_WIDTH = 100 click.rich_click.USE_RICH_MARKUP = True diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index 6d33c091ff..65462586b6 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -114,7 +114,7 @@ def get_remote_yaml(self): response = requests.get(meta_url) result = response.json() file_contents = base64.b64decode(result["content"]) - self.remote_location = self.modules_repo.name + self.remote_location = self.modules_repo.fullname return yaml.safe_load(file_contents) def generate_module_info_help(self): diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 843943805a..94efbb50c8 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -47,7 +47,7 @@ def install(self, module): try: nf_core.modules.module_utils.sha_exists(self.sha, self.modules_repo) except UserWarning: - log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.name}'") + log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.fullname}'") return False except LookupError as e: log.error(e) @@ -72,17 +72,19 @@ def install(self, module): return False if not module_exist_in_repo(module, self.modules_repo): - warn_msg = f"Module '{module}' not found in remote '{self.modules_repo.name}' ({self.modules_repo.branch})" + warn_msg = ( + f"Module '{module}' not found in remote '{self.modules_repo.fullname}' ({self.modules_repo.branch})" + ) log.warning(warn_msg) return False - if self.modules_repo.name in modules_json["repos"]: - current_entry = modules_json["repos"][self.modules_repo.name].get(module) + if self.modules_repo.fullname in modules_json["repos"]: + current_entry = modules_json["repos"][self.modules_repo.fullname].get(module) else: current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules", self.modules_repo.owner, self.modules_repo.repo] + install_folder = [self.dir, "modules", self.modules_repo.owner, self.modules_repo.name] # Compute the module directory module_dir = os.path.join(*install_folder, module) @@ -91,7 +93,7 @@ def install(self, module): if (current_entry is not None and os.path.exists(module_dir)) and not self.force: log.error("Module is already installed.") - repo_flag = "" if self.modules_repo.name == "nf-core/modules" else f"-g {self.modules_repo.name} " + repo_flag = "" if self.modules_repo.fullname == "nf-core/modules" else f"-g {self.modules_repo.fullname} " branch_flag = "" if self.modules_repo.branch == "master" else f"-b {self.modules_repo.branch} " log.info( @@ -121,11 +123,11 @@ def install(self, module): version = git_log[0]["git_sha"] if self.force: - log.info(f"Removing installed version of '{self.modules_repo.name}/{module}'") + log.info(f"Removing installed version of '{self.modules_repo.fullname}/{module}'") self.clear_module_dir(module, module_dir) log.info(f"{'Rei' if self.force else 'I'}nstalling '{module}'") - log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.name}") + log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.fullname}") # Download module files if not self.download_module_file(module, version, self.modules_repo, install_folder): @@ -136,5 +138,5 @@ def install(self, module): log.info(f"Include statement: include {{ {module_name} }} from '.{os.path.join(*install_folder, module)}/main’") # Update module.json with newly installed module - self.update_modules_json(modules_json, self.modules_repo.name, module, version) + self.update_modules_json(modules_json, self.modules_repo.fullname, module, version) return True diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 537f3bd621..04ad7c3bad 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -55,7 +55,7 @@ def pattern_msg(keywords): # Nothing found if len(modules) == 0: log.info( - f"No available modules found in {self.modules_repo.name} ({self.modules_repo.branch})" + f"No available modules found in {self.modules_repo.fullname} ({self.modules_repo.branch})" f"{pattern_msg(keywords)}" ) return "" @@ -122,7 +122,7 @@ def pattern_msg(keywords): if self.remote: log.info( - f"Modules available from {self.modules_repo.name} ({self.modules_repo.branch})" + f"Modules available from {self.modules_repo.fullname} ({self.modules_repo.branch})" f"{pattern_msg(keywords)}:\n" ) else: diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index c1522e2485..50b11c3c96 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -58,7 +58,7 @@ def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1, """ if modules_repo is None: modules_repo = ModulesRepo() - api_url = f"https://api.github.com/repos/{modules_repo.name}/commits" + api_url = f"https://api.github.com/repos/{modules_repo.fullname}/commits" api_url += f"?sha={modules_repo.branch}" if module_name is not None: api_url += f"&path=modules/{module_name}" @@ -79,7 +79,7 @@ def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1, for commit in commits ] elif response.status_code == 404: - raise LookupError(f"Module '{module_name}' not found in '{modules_repo.name}'\n{api_url}") + raise LookupError(f"Module '{module_name}' not found in '{modules_repo.fullname}'\n{api_url}") else: gh_api.log_content_headers(response) raise LookupError( diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 83dec17b0c..914c0e37d5 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -161,7 +161,7 @@ def modules_json_up_to_date(self): try: modules_repo = ModulesRepo(repo=repo) modules_repo.get_modules_file_tree() - install_folder = [modules_repo.owner, modules_repo.repo] + install_folder = [modules_repo.owner, modules_repo.name] except LookupError as e: log.warn(f"Could not get module's file tree for '{repo}': {e}") remove_from_mod_json[repo] = list(modules.keys()) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index bf6d7b48c8..ab3c1642f2 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -1,8 +1,9 @@ import base64 import logging import os +import git -from nf_core.utils import gh_api +from nf_core.utils import NFCORE_DIR, gh_api log = logging.getLogger(__name__) @@ -15,67 +16,74 @@ class ModulesRepo(object): so that this can be used in the same way by all sub-commands. """ - def __init__(self, repo="nf-core/modules", branch=None): - self.name = repo + def __init__(self, repo="nf-core/modules", branch=None, remote=None): + """ + Initializes the object and clones the git repository if it is not already present + """ + + # Check if name seems to be well formed + if self.fullname.count("/") != 1: + raise LookupError(f"Repository name '{self.fullname}' should be of the format '/'") + + self.fullname = repo self.branch = branch - # Don't bother fetching default branch if we're using nf-core - if not self.branch and self.name == "nf-core/modules": - self.branch = "master" + if self.branch is None: + # Don't bother fetching default branch if we're using nf-core + if self.fullname == "nf-core/modules": + self.branch = "master" + else: + self.branch = self.get_default_branch() - # Verify that the repo seems to be correctly configured - if self.name != "nf-core/modules" or self.branch: + if remote is None and self.fullname == "nf-core/modules": + self.remote = "git@github.com:nf-core/modules.git" - # Get the default branch if not set - if not self.branch: - self.get_default_branch() + self.owner, self.name = self.fullname.split("/") + self.repo = self.setup_local_repo(self.owner, self.name, remote) - try: - self.verify_modules_repo() - except LookupError: - raise + # Verify that the repo seems to be correctly configured + if self.fullname != "nf-core/modules" or self.branch: + self.verify_branch() - self.owner, self.repo = self.name.split("/") self.modules_file_tree = {} self.modules_avail_module_names = [] - def get_default_branch(self): - """Get the default branch for a GitHub repo""" - api_url = f"https://api.github.com/repos/{self.name}" - response = gh_api.get(api_url) - if response.status_code == 200: - self.branch = response.json()["default_branch"] - log.debug(f"Found default branch to be '{self.branch}'") - else: - raise LookupError(f"Could not find repository '{self.name}' on GitHub") + def setup_local_repo(self, owner, name, remote=None): + owner_local_dir = os.path.join(NFCORE_DIR, owner) + if not os.path.exists(owner_local_dir): + os.makedirs(owner_local_dir) + self.local_dir = os.path.join(owner_local_dir, name) + if not os.path.exists(self.local_dir): + if remote == None: + raise Exception( + f"The git repo {os.path.join(owner, name)} has not been previously used and you did not provide a link to the remote" + ) + try: + return git.Repo.clone_from(remote, self.local_dir) + except git.exc.GitCommandError: + raise LookupError(f"Failed to clone from the remote: `{remote}`") - def verify_modules_repo(self): + return git.Repo(self.local_dir) - # Check if name seems to be well formed - if self.name.count("/") != 1: - raise LookupError(f"Repository name '{self.name}' should be of the format '/'") - - # Check if repository exist - api_url = f"https://api.github.com/repos/{self.name}/branches" - response = gh_api.get(api_url) - if response.status_code == 200: - branches = [branch["name"] for branch in response.json()] - if self.branch not in branches: - raise LookupError(f"Branch '{self.branch}' not found in '{self.name}'") - else: - raise LookupError(f"Repository '{self.name}' is not available on GitHub") - - api_url = f"https://api.github.com/repos/{self.name}/contents?ref={self.branch}" - response = gh_api.get(api_url) - if response.status_code == 200: - dir_names = [entry["name"] for entry in response.json() if entry["type"] == "dir"] - if "modules" not in dir_names: - err_str = f"Repository '{self.name}' ({self.branch}) does not contain a 'modules/' directory" - if "software" in dir_names: - err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" - raise LookupError(err_str) - else: - raise LookupError(f"Unable to fetch repository information from '{self.name}' ({self.branch})") + def get_default_branch(self): + """Get the default branch for the repo (the branch origin/HEAD is pointing to)""" + origin_head = next(ref for ref in self.repo.refs if ref == "origin/HEAD") + _, self.branch = origin_head.ref.name.split("/") + + def verify_branch(self): + # Check if the branch name exists by trying to check out the branch + try: + self.repo.git.checkout(self.branch) + except git.exc.GitCommandError: + raise LookupError(f"Branch '{self.branch}' not found in '{self.fullname}'") + + # Make sure the directory is well formed + dir_names = os.listdir(self.local_dir) + if "modules" not in dir_names: + err_str = f"Repository '{self.fullname}' ({self.branch}) does not contain a 'modules/' directory" + if "software" in dir_names: + err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" + raise LookupError(err_str) def get_modules_file_tree(self): """ @@ -84,12 +92,12 @@ def get_modules_file_tree(self): Sets self.modules_file_tree self.modules_avail_module_names """ - api_url = f"https://api.github.com/repos/{self.name}/git/trees/{self.branch}?recursive=1" + api_url = f"https://api.github.com/repos/{self.fullname}/git/trees/{self.branch}?recursive=1" r = gh_api.get(api_url) if r.status_code == 404: - raise LookupError(f"Repository / branch not found: {self.name} ({self.branch})\n{api_url}") + raise LookupError(f"Repository / branch not found: {self.fullname} ({self.branch})\n{api_url}") elif r.status_code != 200: - raise LookupError(f"Could not fetch {self.name} ({self.branch}) tree: {r.status_code}\n{api_url}") + raise LookupError(f"Could not fetch {self.fullname} ({self.branch}) tree: {r.status_code}\n{api_url}") result = r.json() assert result["truncated"] == False @@ -100,7 +108,7 @@ def get_modules_file_tree(self): # remove modules/ and /main.nf self.modules_avail_module_names.append(f["path"].replace("modules/", "").replace("/main.nf", "")) if len(self.modules_avail_module_names) == 0: - raise LookupError(f"Found no modules in '{self.name}'") + raise LookupError(f"Found no modules in '{self.fullname}'") def get_module_file_urls(self, module, commit=""): """Fetch list of URLs for a specific module @@ -134,7 +142,7 @@ def get_module_file_urls(self, module, commit=""): results[f["path"]] = f["url"] if commit != "": for path in results: - results[path] = f"https://api.github.com/repos/{self.name}/contents/{path}?ref={commit}" + results[path] = f"https://api.github.com/repos/{self.fullname}/contents/{path}?ref={commit}" return results def download_gh_file(self, dl_filename, api_url): @@ -156,7 +164,7 @@ def download_gh_file(self, dl_filename, api_url): # Call the GitHub API r = gh_api.get(api_url) if r.status_code != 200: - raise LookupError(f"Could not fetch {self.name} file: {r.status_code}\n {api_url}") + raise LookupError(f"Could not fetch {self.fullname} file: {r.status_code}\n {api_url}") result = r.json() file_contents = base64.b64decode(result["content"]) diff --git a/nf_core/modules/remove.py b/nf_core/modules/remove.py index 7533b142a5..1bcf8c8dd5 100644 --- a/nf_core/modules/remove.py +++ b/nf_core/modules/remove.py @@ -40,7 +40,7 @@ def remove(self, module): # Decide from which repo the module was installed # TODO Configure the prompt for repository name in a nice way if True: - repo_name = self.modules_repo.name + repo_name = self.modules_repo.fullname elif len(self.module_names) == 1: repo_name = list(self.module_names.keys())[0] else: @@ -64,7 +64,7 @@ def remove(self, module): log.error(f"Module directory does not exist: '{module_dir}'") modules_json = self.load_modules_json() - if self.modules_repo.name in modules_json["repos"] and module in modules_json["repos"][repo_name]: + if self.modules_repo.fullname in modules_json["repos"] and module in modules_json["repos"][repo_name]: log.error(f"Found entry for '{module}' in 'modules.json'. Removing...") self.remove_modules_json_entry(module, repo_name, modules_json) return False diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 150e787a74..0d308c3b75 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -71,7 +71,7 @@ def update(self, module): try: nf_core.modules.module_utils.sha_exists(self.sha, self.modules_repo) except UserWarning: - log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.name}'") + log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.fullname}'") return False except LookupError as e: log.error(e) @@ -86,7 +86,7 @@ def update(self, module): return False # Check if there are any modules installed from - repo_name = self.modules_repo.name + repo_name = self.modules_repo.fullname if repo_name not in self.module_names: log.error(f"No modules installed from '{repo_name}'") return False @@ -105,8 +105,8 @@ def update(self, module): return False sha = self.sha - if module in update_config.get(self.modules_repo.name, {}): - config_entry = update_config[self.modules_repo.name].get(module) + if module in update_config.get(self.modules_repo.fullname, {}): + config_entry = update_config[self.modules_repo.fullname].get(module) if config_entry is not None and config_entry is not True: if config_entry is False: log.info("Module's update entry in '.nf-core.yml' is set to False") @@ -238,20 +238,20 @@ def update(self, module): # Check if the module we've been asked to update actually exists if not module_exist_in_repo(module, modules_repo): - warn_msg = f"Module '{module}' not found in remote '{modules_repo.name}' ({modules_repo.branch})" + warn_msg = f"Module '{module}' not found in remote '{modules_repo.fullname}' ({modules_repo.branch})" if self.update_all: warn_msg += ". Skipping..." log.warning(warn_msg) exit_value = False continue - if modules_repo.name in modules_json["repos"]: - current_entry = modules_json["repos"][modules_repo.name].get(module) + if modules_repo.fullname in modules_json["repos"]: + current_entry = modules_json["repos"][modules_repo.fullname].get(module) else: current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules", modules_repo.owner, modules_repo.repo] + install_folder = [self.dir, "modules", modules_repo.owner, modules_repo.name] # Compute the module directory module_dir = os.path.join(*install_folder, module) @@ -284,14 +284,14 @@ def update(self, module): current_version = current_entry["git_sha"] if current_version == version: if self.sha or self.prompt: - log.info(f"'{modules_repo.name}/{module}' is already installed at {version}") + log.info(f"'{modules_repo.fullname}/{module}' is already installed at {version}") else: - log.info(f"'{modules_repo.name}/{module}' is already up to date") + log.info(f"'{modules_repo.fullname}/{module}' is already up to date") continue if not dry_run: - log.info(f"Updating '{modules_repo.name}/{module}'") - log.debug(f"Updating module '{module}' to {version} from {modules_repo.name}") + log.info(f"Updating '{modules_repo.fullname}/{module}'") + log.debug(f"Updating module '{module}' to {version} from {modules_repo.fullname}") log.debug(f"Removing old version of module '{module}'") self.clear_module_dir(module, module_dir) @@ -426,17 +426,17 @@ class DiffEnum(enum.Enum): path = os.path.join(temp_folder, file) if os.path.exists(path): shutil.move(path, os.path.join(module_dir, file)) - log.info(f"Updating '{modules_repo.name}/{module}'") - log.debug(f"Updating module '{module}' to {version} from {modules_repo.name}") + log.info(f"Updating '{modules_repo.fullname}/{module}'") + log.debug(f"Updating module '{module}' to {version} from {modules_repo.fullname}") # Update modules.json with newly installed module if not dry_run: - self.update_modules_json(modules_json, modules_repo.name, module, version) + self.update_modules_json(modules_json, modules_repo.fullname, module, version) # Don't save to a file, just iteratively update the variable else: modules_json = self.update_modules_json( - modules_json, modules_repo.name, module, version, write_file=False + modules_json, modules_repo.fullname, module, version, write_file=False ) if self.save_diff_fn: diff --git a/nf_core/utils.py b/nf_core/utils.py index 4f4f38304a..e368c38d2c 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -54,6 +54,7 @@ os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME"), ".config")), "nf-core", ) +NFCORE_DIR = os.path.join(os.getenv("HOME"), ".nfcore") def check_if_outdated(current_version=None, remote_version=None, source_url="https://nf-co.re/tools_version"): @@ -298,6 +299,15 @@ def nextflow_cmd(cmd): ) +def setup_nfcore_dir(): + """Creates a directory for files that need to be kept between sessions + + Currently only used for keeping local copies of modules repos + """ + if not os.path.exists(NFCORE_DIR): + os.makedirs(NFCORE_DIR) + + def setup_requests_cachedir(): """Sets up local caching for faster remote HTTP requests. diff --git a/tests/test_modules.py b/tests/test_modules.py index db04c55302..798686d4ed 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -64,7 +64,7 @@ def tearDown(self): def test_modulesrepo_class(self): """Initialise a modules repo object""" modrepo = nf_core.modules.ModulesRepo() - assert modrepo.name == "nf-core/modules" + assert modrepo.fullname == "nf-core/modules" assert modrepo.branch == "master" ############################################ From 63beb2c5ad857a8fe4d335e7451aea4bd7756a81 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Thu, 16 Jun 2022 16:05:53 +0200 Subject: [PATCH 02/55] Setup should be done --- nf_core/modules/module_utils.py | 2 +- nf_core/modules/modules_command.py | 4 +- nf_core/modules/modules_repo.py | 62 ++++++++++++++++++++++-------- nf_core/modules/update.py | 2 +- 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 50b11c3c96..45be7461c9 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -167,7 +167,7 @@ def create_modules_json(pipeline_dir): ) for repo_name, module_names in sorted(repo_module_names.items()): try: - modules_repo = ModulesRepo(repo=repo_name) + modules_repo = ModulesRepo(remote_path=repo_name) except LookupError as e: raise UserWarning(e) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 914c0e37d5..d2dcc7355d 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -159,7 +159,7 @@ def modules_json_up_to_date(self): remove_from_mod_json = {} for repo, modules in mod_json["repos"].items(): try: - modules_repo = ModulesRepo(repo=repo) + modules_repo = ModulesRepo(remote_path=repo) modules_repo.get_modules_file_tree() install_folder = [modules_repo.owner, modules_repo.name] except LookupError as e: @@ -213,7 +213,7 @@ def modules_json_up_to_date(self): ) failed_to_find_commit_sha = [] for repo, modules in missing_from_modules_json.items(): - modules_repo = ModulesRepo(repo=repo) + modules_repo = ModulesRepo(remote_path=repo) repo_path = os.path.join(self.dir, "modules", repo) for module in modules: module_path = os.path.join(repo_path, module) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index ab3c1642f2..a777af9136 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -2,6 +2,7 @@ import logging import os import git +import urllib.parse from nf_core.utils import NFCORE_DIR, gh_api @@ -16,16 +17,23 @@ class ModulesRepo(object): so that this can be used in the same way by all sub-commands. """ - def __init__(self, repo="nf-core/modules", branch=None, remote=None): + def __init__(self, remote_url="git@github.com:nf-core/modules.git", branch=None): """ Initializes the object and clones the git repository if it is not already present """ - # Check if name seems to be well formed - if self.fullname.count("/") != 1: - raise LookupError(f"Repository name '{self.fullname}' should be of the format '/'") + # Check if the remote seems to be well formed + if remote_url is None: + raise LookupError("You have to provide a remote URL when working with a private repository") - self.fullname = repo + # Extract the repo path from the remote url + # See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns + # Remove the initial `git@`` if it is present + path = remote_url.split("@")[1] + path = urllib.parse.urlparse(path) + path = path.path + + self.fullname = os.path.splitext(path)[0] self.branch = branch if self.branch is None: @@ -35,11 +43,11 @@ def __init__(self, repo="nf-core/modules", branch=None, remote=None): else: self.branch = self.get_default_branch() - if remote is None and self.fullname == "nf-core/modules": - self.remote = "git@github.com:nf-core/modules.git" - self.owner, self.name = self.fullname.split("/") - self.repo = self.setup_local_repo(self.owner, self.name, remote) + self.repo = self.setup_local_repo(self.owner, self.name, remote_url) + + # Verify that the requested branch exists by checking it out + self.branch_exists() # Verify that the repo seems to be correctly configured if self.fullname != "nf-core/modules" or self.branch: @@ -49,6 +57,13 @@ def __init__(self, repo="nf-core/modules", branch=None, remote=None): self.modules_avail_module_names = [] def setup_local_repo(self, owner, name, remote=None): + """ + Sets up the local git repository. If the repository has been cloned previously, it + returns a git.Repo object of that clone. Otherwise it tries to clone the repository from + the provided remote URL and returns a git.Repo of the new clone. + + Returns repo: git.Repo + """ owner_local_dir = os.path.join(NFCORE_DIR, owner) if not os.path.exists(owner_local_dir): os.makedirs(owner_local_dir) @@ -59,25 +74,33 @@ def setup_local_repo(self, owner, name, remote=None): f"The git repo {os.path.join(owner, name)} has not been previously used and you did not provide a link to the remote" ) try: - return git.Repo.clone_from(remote, self.local_dir) + repo = git.Repo.clone_from(remote, self.local_dir) except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") - - return git.Repo(self.local_dir) + else: + # If the repo is already cloned, pull the latest changes from the remote + repo = git.Repo(self.local_dir) + repo.remotes.origin.pull() + return repo def get_default_branch(self): - """Get the default branch for the repo (the branch origin/HEAD is pointing to)""" + """ + Gets the default branch for the repo (the branch origin/HEAD is pointing to) + """ origin_head = next(ref for ref in self.repo.refs if ref == "origin/HEAD") _, self.branch = origin_head.ref.name.split("/") - def verify_branch(self): - # Check if the branch name exists by trying to check out the branch + def branch_exists(self): + """Verifies that the branch exists in the repository by trying to check it out""" try: self.repo.git.checkout(self.branch) except git.exc.GitCommandError: raise LookupError(f"Branch '{self.branch}' not found in '{self.fullname}'") - # Make sure the directory is well formed + def verify_branch(self): + """ + Verifies the active branch conforms do the correct directory structure + """ dir_names = os.listdir(self.local_dir) if "modules" not in dir_names: err_str = f"Repository '{self.fullname}' ({self.branch}) does not contain a 'modules/' directory" @@ -85,6 +108,13 @@ def verify_branch(self): err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" raise LookupError(err_str) + def checkout(self): + """ + Checks out the correct branch in the local repository + """ + if self.repo.active_branch.name != self.branch: + self.repo.git.checkout(self.branch) + def get_modules_file_tree(self): """ Fetch the file list from the repo, using the GitHub API diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 0d308c3b75..121f8e2db0 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -179,7 +179,7 @@ def update(self, module): log.info(f"Skipping module{'' if len(skipped_modules) == 1 else 's'}: '{skipped_str}'") repos_mods_shas = [ - (ModulesRepo(repo=repo_name), mods_shas) for repo_name, mods_shas in repos_mods_shas.items() + (ModulesRepo(remote_path=repo_name), mods_shas) for repo_name, mods_shas in repos_mods_shas.items() ] for repo, _ in repos_mods_shas: From b0b640fde833272d60b08aae9b594a7a3d0e6164 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Thu, 16 Jun 2022 16:19:46 +0200 Subject: [PATCH 03/55] Rationalize variables --- nf_core/modules/install.py | 2 +- nf_core/modules/modules_repo.py | 11 +++-------- nf_core/modules/update.py | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 94efbb50c8..b4c3914e16 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -84,7 +84,7 @@ def install(self, module): current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules", self.modules_repo.owner, self.modules_repo.name] + install_folder = [self.dir, "modules"].extend(os.path.split(self.modules_repo.fullname)) # Compute the module directory module_dir = os.path.join(*install_folder, module) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index a777af9136..107d5f7e3e 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -43,7 +43,6 @@ def __init__(self, remote_url="git@github.com:nf-core/modules.git", branch=None) else: self.branch = self.get_default_branch() - self.owner, self.name = self.fullname.split("/") self.repo = self.setup_local_repo(self.owner, self.name, remote_url) # Verify that the requested branch exists by checking it out @@ -56,7 +55,7 @@ def __init__(self, remote_url="git@github.com:nf-core/modules.git", branch=None) self.modules_file_tree = {} self.modules_avail_module_names = [] - def setup_local_repo(self, owner, name, remote=None): + def setup_local_repo(self, remote): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -64,15 +63,11 @@ def setup_local_repo(self, owner, name, remote=None): Returns repo: git.Repo """ - owner_local_dir = os.path.join(NFCORE_DIR, owner) + owner_local_dir = os.path.join(NFCORE_DIR, self.fullname) if not os.path.exists(owner_local_dir): os.makedirs(owner_local_dir) - self.local_dir = os.path.join(owner_local_dir, name) + self.local_dir = os.path.join(owner_local_dir, self.fullname) if not os.path.exists(self.local_dir): - if remote == None: - raise Exception( - f"The git repo {os.path.join(owner, name)} has not been previously used and you did not provide a link to the remote" - ) try: repo = git.Repo.clone_from(remote, self.local_dir) except git.exc.GitCommandError: diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 121f8e2db0..129bd1a279 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -251,7 +251,7 @@ def update(self, module): current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules", modules_repo.owner, modules_repo.name] + install_folder = [self.dir, "modules"].extend(os.path.split(modules_repo.fullname)) # Compute the module directory module_dir = os.path.join(*install_folder, module) From babf143b02396909feb0891ddbe53068c6e6b8a0 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Thu, 16 Jun 2022 16:44:26 +0200 Subject: [PATCH 04/55] Update module file download --- nf_core/modules/modules_command.py | 30 +++++++++++++++------------- nf_core/modules/modules_repo.py | 32 ++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index d2dcc7355d..495ee645b7 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -161,7 +161,7 @@ def modules_json_up_to_date(self): try: modules_repo = ModulesRepo(remote_path=repo) modules_repo.get_modules_file_tree() - install_folder = [modules_repo.owner, modules_repo.name] + install_folder = os.path.split(modules_repo.fullname) except LookupError as e: log.warn(f"Could not get module's file tree for '{repo}': {e}") remove_from_mod_json[repo] = list(modules.keys()) @@ -256,19 +256,21 @@ def clear_module_dir(self, module_name, module_dir): return False def download_module_file(self, module_name, module_version, modules_repo, install_folder, dry_run=False): - """Downloads the files of a module from the remote repo""" - files = modules_repo.get_module_file_urls(module_name, module_version) - log.debug("Fetching module files:\n - {}".format("\n - ".join(files.keys()))) - for filename, api_url in files.items(): - split_filename = filename.split("/") - dl_filename = os.path.join(*install_folder, *split_filename[1:]) - try: - self.modules_repo.download_gh_file(dl_filename, api_url) - except (SystemError, LookupError) as e: - log.error(e) - return False - if not dry_run: - log.info(f"Downloaded {len(files)} files to {os.path.join(*install_folder, module_name)}") + """ + Copies the files of a module from the local copy of the repo + """ + # Make sure that the correct branch of the repo is checked out + modules_repo.checkout() + + # Check if the module exists in the branch + if not modules_repo.module_exists(module_name): + log.error( + f"The requested module does not exists in the '{modules_repo.branch}' of {modules_repo.fullname}'" + ) + return False + + # Copy the files from the repo to the install folder + shutil.copytree(modules_repo.get_module_dir()) return True def load_modules_json(self): diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 107d5f7e3e..f5ba689df8 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -63,18 +63,17 @@ def setup_local_repo(self, remote): Returns repo: git.Repo """ - owner_local_dir = os.path.join(NFCORE_DIR, self.fullname) - if not os.path.exists(owner_local_dir): - os.makedirs(owner_local_dir) - self.local_dir = os.path.join(owner_local_dir, self.fullname) - if not os.path.exists(self.local_dir): + self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) + if not os.path.exists(self.local_repo_dir): + os.makedirs(self.local_repo_dir) + if not os.path.exists(self.local_repo_dir): try: - repo = git.Repo.clone_from(remote, self.local_dir) + repo = git.Repo.clone_from(remote, self.local_repo_dir) except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") else: # If the repo is already cloned, pull the latest changes from the remote - repo = git.Repo(self.local_dir) + repo = git.Repo(self.local_repo_dir) repo.remotes.origin.pull() return repo @@ -96,7 +95,7 @@ def verify_branch(self): """ Verifies the active branch conforms do the correct directory structure """ - dir_names = os.listdir(self.local_dir) + dir_names = os.listdir(self.local_repo_dir) if "modules" not in dir_names: err_str = f"Repository '{self.fullname}' ({self.branch}) does not contain a 'modules/' directory" if "software" in dir_names: @@ -110,6 +109,23 @@ def checkout(self): if self.repo.active_branch.name != self.branch: self.repo.git.checkout(self.branch) + def module_exists(self, module_name): + """ + Check if a module exists in the branch of the repo + + Returns bool + """ + return module_name in os.listdir(self.local_repo_dir) + + def get_module_dir(self, module_name): + """ + Returns the file path of a module directory in the repo. + Does not verify that the path exists. + + Returns module_path: str + """ + return os.path.join(self.local_repo_dir, module_name) + def get_modules_file_tree(self): """ Fetch the file list from the repo, using the GitHub API From e55ab0af8e8f2cd00c890221fdffc2742a7f9966 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Thu, 16 Jun 2022 17:01:34 +0200 Subject: [PATCH 05/55] Downloading at different refs --- nf_core/modules/modules_command.py | 9 ++++++--- nf_core/modules/modules_repo.py | 6 ++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 495ee645b7..662d1ba1fe 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -259,8 +259,8 @@ def download_module_file(self, module_name, module_version, modules_repo, instal """ Copies the files of a module from the local copy of the repo """ - # Make sure that the correct branch of the repo is checked out - modules_repo.checkout() + # Check out the repository at the requested ref + modules_repo.checkout_ref(module_version) # Check if the module exists in the branch if not modules_repo.module_exists(module_name): @@ -270,7 +270,10 @@ def download_module_file(self, module_name, module_version, modules_repo, instal return False # Copy the files from the repo to the install folder - shutil.copytree(modules_repo.get_module_dir()) + shutil.copytree(modules_repo.get_module_dir(), os.path.join(install_folder)) + + # Switch back to the tip of the branch (needed?) + modules_repo.checkout() return True def load_modules_json(self): diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index f5ba689df8..0497f819b2 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -109,6 +109,12 @@ def checkout(self): if self.repo.active_branch.name != self.branch: self.repo.git.checkout(self.branch) + def checkout_ref(self, ref): + """ + Checks out the repository at the requested ref + """ + self.repo.git.checkout(ref) + def module_exists(self, module_name): """ Check if a module exists in the branch of the repo From 112b1031944287383800dcc4654e8ad49801c2e6 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Thu, 16 Jun 2022 18:42:17 +0200 Subject: [PATCH 06/55] Fix get_module_git_log --- nf_core/modules/info.py | 8 ++------ nf_core/modules/install.py | 8 ++------ nf_core/modules/lint/module_version.py | 4 +++- nf_core/modules/module_utils.py | 16 +++++----------- nf_core/modules/modules_repo.py | 23 +++++++++++++++++++++++ nf_core/modules/update.py | 14 +++----------- 6 files changed, 38 insertions(+), 35 deletions(-) diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index 65462586b6..f4366c91d1 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -11,12 +11,8 @@ from rich.table import Table from rich.text import Text -from .module_utils import ( - get_installed_modules, - get_module_git_log, - get_repo_type, - module_exist_in_repo, -) +from .module_utils import get_repo_type + from .modules_command import ModuleCommand from .modules_repo import ModulesRepo diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index b4c3914e16..249556b278 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -6,7 +6,7 @@ import nf_core.modules.module_utils import nf_core.utils -from .module_utils import get_module_git_log, module_exist_in_repo +from .module_utils import module_exist_in_repo from .modules_command import ModuleCommand log = logging.getLogger(__name__) @@ -115,11 +115,7 @@ def install(self, module): return False else: # Fetch the latest commit for the module - try: - git_log = get_module_git_log(module, modules_repo=self.modules_repo, per_page=1, page_nbr=1) - except UserWarning: - log.error(f"Was unable to fetch version of module '{module}'") - return False + git_log = list(self.modules_repo.get_module_git_log(module, depth=1)) version = git_log[0]["git_sha"] if self.force: diff --git a/nf_core/modules/lint/module_version.py b/nf_core/modules/lint/module_version.py index 64b4817719..36febcc5b4 100644 --- a/nf_core/modules/lint/module_version.py +++ b/nf_core/modules/lint/module_version.py @@ -13,6 +13,7 @@ import nf_core import nf_core.modules.module_utils +import nf_core.modules.modules_repo log = logging.getLogger(__name__) @@ -39,7 +40,8 @@ def module_version(module_lint_object, module): # Check whether a new version is available try: - module_git_log = nf_core.modules.module_utils.get_module_git_log(module.module_name) + modules_repo = nf_core.modules.modules_repo.ModulesRepo() + module_git_log = modules_repo.get_module_git_log(module.module_name) if git_sha == module_git_log[0]["git_sha"]: module.passed.append(("module_version", "Module is the latest version", module.module_dir)) else: diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 45be7461c9..80c084eedf 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -203,18 +203,12 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): commit_sha (str): The latest commit SHA where local files are identical to remote files """ try: - # Find the correct commit SHA for the local files. - # We iterate over the commit log pages until we either - # find a matching commit or we reach the end of the commits + # Find the correct commit SHA for the local module files. + # We iterate over the commit history for the module until we find + # a revision that matches the file contents correct_commit_sha = None - commit_page_nbr = 1 - while correct_commit_sha is None: - commit_shas = [ - commit["git_sha"] - for commit in get_module_git_log(module_name, modules_repo=modules_repo, page_nbr=commit_page_nbr) - ] - correct_commit_sha = iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) - commit_page_nbr += 1 + commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name)) + correct_commit_sha = iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) return correct_commit_sha except (UserWarning, LookupError) as e: raise diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 0497f819b2..90f07dbb1d 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -132,6 +132,29 @@ def get_module_dir(self, module_name): """ return os.path.join(self.local_repo_dir, module_name) + def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00Z"): + """ + Fetches the commit history the of requested module since a given date. The default value is + not arbitrary - it is the last time the structure of the nf-core/modules repository was had an + update breaking backwards compatibility. + Args: + module_name (str): Name of module + modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question + per_page (int): Number of commits per page returned by API + page_nbr (int): Page number of the retrieved commits + since (str): Only show commits later than this timestamp. + Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ. + + Returns: + ( dict ): Iterator of commit SHAs and associated (truncated) message + """ + module_path = os.path.join("modules", module_name) + commits = self.repo.iter_commits( + rev=f"{self.branch}@{{now}}...{self.branch}@{{{since}}}", max_count=depth, paths=module_path + ) + commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) + return commits + def get_modules_file_tree(self): """ Fetch the file list from the repo, using the GitHub API diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 129bd1a279..d56995d731 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -15,11 +15,8 @@ import nf_core.modules.module_utils import nf_core.utils -from .module_utils import ( - get_installed_modules, - get_module_git_log, - module_exist_in_repo, -) +from .module_utils import module_exist_in_repo + from .modules_command import ModuleCommand from .modules_repo import ModulesRepo @@ -271,12 +268,7 @@ def update(self, module): continue else: # Fetch the latest commit for the module - try: - git_log = get_module_git_log(module, modules_repo=modules_repo, per_page=1, page_nbr=1) - except UserWarning: - log.error(f"Was unable to fetch version of module '{module}'") - exit_value = False - continue + git_log = list(modules_repo.get_module_git_log(module, depth=1)) version = git_log[0]["git_sha"] if current_entry is not None and not self.force: From 6953cf97d8091180cd4d1caebe227cfeb1d0a3a1 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 09:52:46 +0200 Subject: [PATCH 07/55] Update file comparision functionality --- nf_core/modules/module_utils.py | 29 +++-------------------------- nf_core/modules/modules_repo.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 80c084eedf..3f55f0e2e5 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -226,7 +226,6 @@ def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) commit_sha (str): The latest commit SHA from 'commit_shas' where local files are identical to remote files """ - files_to_check = ["main.nf", "meta.yml"] local_file_contents = [None, None, None] for i, file in enumerate(files_to_check): @@ -254,32 +253,10 @@ def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_ """ files_to_check = ["main.nf", "meta.yml"] - files_are_equal = [False, False, False] - remote_copies = [None, None, None] - - module_base_url = f"https://raw.githubusercontent.com/{modules_repo.name}/{commit_sha}/modules/{module_name}" - for i, file in enumerate(files_to_check): - # Download remote copy and compare - api_url = f"{module_base_url}/{file}" - r = gh_api.get(api_url) - # TODO: Remove debugging - gh_api.log_content_headers(r) - if r.status_code != 200: - gh_api.log_content_headers(r) - log.debug(f"Could not download remote copy of file module {module_name}/{file}") - else: - try: - remote_copies[i] = r.content.decode("utf-8") - except UnicodeDecodeError as e: - log.debug(f"Could not decode remote copy of {file} for the {module_name} module") - - # Compare the contents of the files. - # If the file is missing from both the local and remote repo - # we will get the comparision None == None - if local_files[i] == remote_copies[i]: - files_are_equal[i] = True - return all(files_are_equal) + modules_repo.checkout_ref(commit_sha) + remote_files = modules_repo.get_module_files(module_name, files_to_check) + return all(lfile == rfile for lfile, rfile in zip(local_files, remote_files)) def get_installed_modules(dir, repo_type="modules"): diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 90f07dbb1d..64aefe2d56 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -132,6 +132,25 @@ def get_module_dir(self, module_name): """ return os.path.join(self.local_repo_dir, module_name) + def get_module_files(self, module_name, files, commit_sha): + """ + Returns the contents requested files for a module at the current + checked out ref + + Returns contents: [ str ] + """ + self.checkout_ref(commit_sha) + + contents = [None] * len(files) + module_path = self.get_module_dir(module_name) + for i, file in enumerate(files): + try: + contents[i] = open(os.path.join(module_path, file), "r").read() + except FileNotFoundError as e: + log.debug(f"Could not open file: {os.path.join(module_path, file)}") + continue + return contents + def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00Z"): """ Fetches the commit history the of requested module since a given date. The default value is From bc1c067e86c40d718ac5f17d6f04408559766a00 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 10:09:01 +0200 Subject: [PATCH 08/55] add comment to prompt func --- nf_core/modules/module_utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 3f55f0e2e5..2cc74757e4 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -406,6 +406,16 @@ def verify_pipeline_dir(dir): def prompt_module_version_sha(module, modules_repo, installed_sha=None): + """ + Creates an interactive questionary prompt for selecting the module version + Args: + module (str): Module name + modules_repo (ModulesRepo): Modules repo the module originate in + installed_sha (str): Optional extra argument to highlight the current installed version + + Returns: + git_sha (str): The selected version of the module + """ older_commits_choice = questionary.Choice( title=[("fg:ansiyellow", "older commits"), ("class:choice-default", "")], value="" ) From 318629287e0ede0b8f5ea18e3418cc8faa96f137 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 10:29:10 +0200 Subject: [PATCH 09/55] Update prompt function --- nf_core/modules/module_utils.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 2cc74757e4..dc245eb1df 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -421,24 +421,14 @@ def prompt_module_version_sha(module, modules_repo, installed_sha=None): ) git_sha = "" page_nbr = 1 - try: - next_page_commits = get_module_git_log(module, modules_repo=modules_repo, per_page=10, page_nbr=page_nbr) - except UserWarning: - next_page_commits = None - except LookupError as e: - log.warning(e) - next_page_commits = None + + all_commits = modules_repo.get_module_git_log(module) + next_page_commits = [next(all_commits, None) for _ in range(10)] while git_sha == "": commits = next_page_commits - try: - next_page_commits = get_module_git_log( - module, modules_repo=modules_repo, per_page=10, page_nbr=page_nbr + 1 - ) - except UserWarning: - next_page_commits = None - except LookupError as e: - log.warning(e) + next_page_commits = [next(all_commits, None) for _ in range(10)] + if all(commit is None for commit in next_page_commits): next_page_commits = None choices = [] From 9c61c72962af12b4fcd8ba2a588768f176a5d4db Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 10:43:38 +0200 Subject: [PATCH 10/55] Update sha_exists --- nf_core/modules/install.py | 7 +------ nf_core/modules/modules_repo.py | 7 +++++++ nf_core/modules/update.py | 7 +------ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 249556b278..2e3d652162 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -44,14 +44,9 @@ def install(self, module): # Verify that the provided SHA exists in the repo if self.sha: - try: - nf_core.modules.module_utils.sha_exists(self.sha, self.modules_repo) - except UserWarning: + if not self.modules_repo.sha_exists_on_branch(self.sha): log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.fullname}'") return False - except LookupError as e: - log.error(e) - return False if module is None: module = questionary.autocomplete( diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 64aefe2d56..525a6413ec 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -174,6 +174,13 @@ def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00 commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) return commits + def sha_exists_on_branch(self, sha): + """ + Verifies that a given commit sha exists on the branch + """ + self.checkout() + return sha in (commit.hexsha for commit in self.repo.iter_commits()) + def get_modules_file_tree(self): """ Fetch the file list from the repo, using the GitHub API diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index d56995d731..53b42e4582 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -65,14 +65,9 @@ def update(self, module): # Verify that the provided SHA exists in the repo if self.sha: - try: - nf_core.modules.module_utils.sha_exists(self.sha, self.modules_repo) - except UserWarning: + if not self.modules_repo.sha_exists_on_branch(self.sha): log.error(f"Commit SHA '{self.sha}' doesn't exist in '{self.modules_repo.fullname}'") return False - except LookupError as e: - log.error(e) - return False if not self.update_all: # Get the available modules From a31870e881c4b2d9a9af312d95200c4b0f0a2b79 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 12:51:24 +0200 Subject: [PATCH 11/55] Bugfixing - starting to work now --- nf_core/__main__.py | 8 ++--- nf_core/modules/install.py | 6 ++-- nf_core/modules/module_utils.py | 19 +----------- nf_core/modules/modules_command.py | 8 +++-- nf_core/modules/modules_repo.py | 50 +++++++++++++----------------- nf_core/modules/update.py | 4 +-- 6 files changed, 33 insertions(+), 62 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index a778189ed9..a1d7d33b30 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -347,7 +347,7 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow "-g", "--github-repository", type=str, - default="nf-core/modules", + default="git@github.com:nf-core/modules.git", help="GitHub repository hosting modules.", ) @click.option("-b", "--branch", type=str, default="master", help="Branch of GitHub repository hosting modules.") @@ -361,11 +361,7 @@ def modules(ctx, github_repository, branch): ctx.ensure_object(dict) # Make repository object to pass to subcommands - try: - ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(github_repository, branch) - except LookupError as e: - log.critical(e) - sys.exit(1) + ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(github_repository, branch) # nf-core modules list subcommands diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 2e3d652162..fd233e8289 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -6,7 +6,6 @@ import nf_core.modules.module_utils import nf_core.utils -from .module_utils import module_exist_in_repo from .modules_command import ModuleCommand log = logging.getLogger(__name__) @@ -66,7 +65,7 @@ def install(self, module): if not modules_json: return False - if not module_exist_in_repo(module, self.modules_repo): + if not self.modules_repo.module_exists(module): warn_msg = ( f"Module '{module}' not found in remote '{self.modules_repo.fullname}' ({self.modules_repo.branch})" ) @@ -79,7 +78,8 @@ def install(self, module): current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules"].extend(os.path.split(self.modules_repo.fullname)) + install_folder = [self.dir, "modules"] + install_folder.extend(os.path.split(self.modules_repo.fullname)) # Compute the module directory module_dir = os.path.join(*install_folder, module) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index dc245eb1df..2a6644f189 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -23,23 +23,6 @@ class ModuleException(Exception): pass -def module_exist_in_repo(module_name, modules_repo): - """ - Checks whether a module exists in a branch of a GitHub repository - - Args: - module_name (str): Name of module - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question - Returns: - boolean: Whether the module exist in the repo or not. - """ - api_url = ( - f"https://api.github.com/repos/{modules_repo.name}/contents/modules/{module_name}?ref={modules_repo.branch}" - ) - response = gh_api.get(api_url) - return not (response.status_code == 404) - - def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1, since="2021-07-07T00:00:00Z"): """ Fetches the commit history the of requested module since a given date. The default value is @@ -254,7 +237,7 @@ def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_ files_to_check = ["main.nf", "meta.yml"] - modules_repo.checkout_ref(commit_sha) + modules_repo.checkout(commit_sha) remote_files = modules_repo.get_module_files(module_name, files_to_check) return all(lfile == rfile for lfile, rfile in zip(local_files, remote_files)) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 662d1ba1fe..c42ba53db6 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -5,6 +5,7 @@ import os import shutil from posixpath import dirname +import sys import yaml @@ -29,6 +30,7 @@ def __init__(self, dir): self.modules_repo = ModulesRepo() self.dir = dir self.module_names = [] + log.info("Hello") try: if self.dir: self.dir, self.repo_type = nf_core.modules.module_utils.get_repo_type(self.dir) @@ -213,7 +215,7 @@ def modules_json_up_to_date(self): ) failed_to_find_commit_sha = [] for repo, modules in missing_from_modules_json.items(): - modules_repo = ModulesRepo(remote_path=repo) + modules_repo = ModulesRepo() # NOTE TO SELF: Must allow other remotes repo_path = os.path.join(self.dir, "modules", repo) for module in modules: module_path = os.path.join(repo_path, module) @@ -260,7 +262,7 @@ def download_module_file(self, module_name, module_version, modules_repo, instal Copies the files of a module from the local copy of the repo """ # Check out the repository at the requested ref - modules_repo.checkout_ref(module_version) + modules_repo.checkout(module_version) # Check if the module exists in the branch if not modules_repo.module_exists(module_name): @@ -270,7 +272,7 @@ def download_module_file(self, module_name, module_version, modules_repo, instal return False # Copy the files from the repo to the install folder - shutil.copytree(modules_repo.get_module_dir(), os.path.join(install_folder)) + shutil.copytree(modules_repo.get_module_dir(module_name), os.path.join(*install_folder, module_name)) # Switch back to the tip of the branch (needed?) modules_repo.checkout() diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 525a6413ec..a51e6fba48 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -17,19 +17,21 @@ class ModulesRepo(object): so that this can be used in the same way by all sub-commands. """ - def __init__(self, remote_url="git@github.com:nf-core/modules.git", branch=None): + def __init__(self, remote_url=None, branch=None): """ Initializes the object and clones the git repository if it is not already present """ # Check if the remote seems to be well formed if remote_url is None: - raise LookupError("You have to provide a remote URL when working with a private repository") + remote_url = "git@github.com:nf-core/modules.git" # Extract the repo path from the remote url # See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns # Remove the initial `git@`` if it is present - path = remote_url.split("@")[1] + log.info(remote_url) + path = remote_url.split("@") + path = path[-1] if len(path) > 1 else path[0] path = urllib.parse.urlparse(path) path = path.path @@ -43,10 +45,7 @@ def __init__(self, remote_url="git@github.com:nf-core/modules.git", branch=None) else: self.branch = self.get_default_branch() - self.repo = self.setup_local_repo(self.owner, self.name, remote_url) - - # Verify that the requested branch exists by checking it out - self.branch_exists() + self.setup_local_repo(remote_url) # Verify that the repo seems to be correctly configured if self.fullname != "nf-core/modules" or self.branch: @@ -61,21 +60,24 @@ def setup_local_repo(self, remote): returns a git.Repo object of that clone. Otherwise it tries to clone the repository from the provided remote URL and returns a git.Repo of the new clone. - Returns repo: git.Repo + Sets self.repo """ self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) - if not os.path.exists(self.local_repo_dir): - os.makedirs(self.local_repo_dir) if not os.path.exists(self.local_repo_dir): try: - repo = git.Repo.clone_from(remote, self.local_repo_dir) + self.repo = git.Repo.clone_from(remote, self.local_repo_dir) except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") + # Verify that the requested branch exists by checking it out + self.branch_exists() else: + self.repo = git.Repo(self.local_repo_dir) + + # Verify that the requested branch exists by checking it out + self.branch_exists() + # If the repo is already cloned, pull the latest changes from the remote - repo = git.Repo(self.local_repo_dir) - repo.remotes.origin.pull() - return repo + self.repo.remotes.origin.pull() def get_default_branch(self): """ @@ -102,14 +104,7 @@ def verify_branch(self): err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" raise LookupError(err_str) - def checkout(self): - """ - Checks out the correct branch in the local repository - """ - if self.repo.active_branch.name != self.branch: - self.repo.git.checkout(self.branch) - - def checkout_ref(self, ref): + def checkout(self, ref): """ Checks out the repository at the requested ref """ @@ -121,7 +116,7 @@ def module_exists(self, module_name): Returns bool """ - return module_name in os.listdir(self.local_repo_dir) + return module_name in os.listdir(os.path.join(self.local_repo_dir, "modules")) def get_module_dir(self, module_name): """ @@ -130,16 +125,15 @@ def get_module_dir(self, module_name): Returns module_path: str """ - return os.path.join(self.local_repo_dir, module_name) + return os.path.join(self.local_repo_dir, "modules", module_name) - def get_module_files(self, module_name, files, commit_sha): + def get_module_files(self, module_name, files): """ Returns the contents requested files for a module at the current checked out ref Returns contents: [ str ] """ - self.checkout_ref(commit_sha) contents = [None] * len(files) module_path = self.get_module_dir(module_name) @@ -168,9 +162,7 @@ def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00 ( dict ): Iterator of commit SHAs and associated (truncated) message """ module_path = os.path.join("modules", module_name) - commits = self.repo.iter_commits( - rev=f"{self.branch}@{{now}}...{self.branch}@{{{since}}}", max_count=depth, paths=module_path - ) + commits = self.repo.iter_commits(max_count=depth, paths=module_path) commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) return commits diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 53b42e4582..ae789d37de 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -15,8 +15,6 @@ import nf_core.modules.module_utils import nf_core.utils -from .module_utils import module_exist_in_repo - from .modules_command import ModuleCommand from .modules_repo import ModulesRepo @@ -229,7 +227,7 @@ def update(self, module): dry_run = self.show_diff or self.save_diff_fn # Check if the module we've been asked to update actually exists - if not module_exist_in_repo(module, modules_repo): + if not modules_repo.module_exists(module): warn_msg = f"Module '{module}' not found in remote '{modules_repo.fullname}' ({modules_repo.branch})" if self.update_all: warn_msg += ". Skipping..." From ee2f8d69057c4bd254cb4b1de46bdf56b615e556 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 14:37:06 +0200 Subject: [PATCH 12/55] Update git commit info --- nf_core/modules/list.py | 5 ++++- nf_core/modules/module_utils.py | 3 ++- nf_core/modules/modules_repo.py | 26 ++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 04ad7c3bad..12f63aee6d 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -5,6 +5,7 @@ import rich import nf_core.modules.module_utils +from nf_core.modules.modules_repo import ModulesRepo from .modules_command import ModuleCommand @@ -105,7 +106,9 @@ def pattern_msg(keywords): version_sha = module_entry["git_sha"] try: # pass repo_name to get info on modules even outside nf-core/modules - message, date = nf_core.modules.module_utils.get_commit_info(version_sha, repo_name) + message, date = ModulesRepo().get_commit_info( + version_sha + ) # NOTE add support for other remotes except LookupError as e: log.warning(e) date = "[red]Not Available" diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 2a6644f189..317d6e97a6 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -70,7 +70,7 @@ def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1, ) -def get_commit_info(commit_sha, repo_name="nf-core/modules"): +def get_commit_info(commit_sha, repo_name="git@github.com:nf-core/modules.git"): """ Fetches metadata about the commit (dates, message, etc.) Args: @@ -82,6 +82,7 @@ def get_commit_info(commit_sha, repo_name="nf-core/modules"): Raises: LookupError: If the call to the API fails. """ + api_url = f"https://api.github.com/repos/{repo_name}/commits/{commit_sha}?stats=false" log.debug(f"Fetching commit metadata for commit at {commit_sha}") response = gh_api.get(api_url) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index a51e6fba48..7a936b82ee 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -104,6 +104,12 @@ def verify_branch(self): err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" raise LookupError(err_str) + def branch_checkout(self): + """ + Checks out the specified branch of the repository + """ + self.repo.git.checkout(self.branch) + def checkout(self, ref): """ Checks out the repository at the requested ref @@ -173,6 +179,26 @@ def sha_exists_on_branch(self, sha): self.checkout() return sha in (commit.hexsha for commit in self.repo.iter_commits()) + def get_commit_info(self, sha): + """ + Fetches metadata about the commit (dates, message, etc.) + Args: + commit_sha (str): The SHA of the requested commit + Returns: + message (str): The commit message for the requested commit + date (str): The commit date for the requested commit + Raises: + LookupError: If the search for the commit fails + """ + self.branch_checkout() + for commit in self.repo.iter_commits(): + if commit.hexsha == sha: + message = commit.message.partition("\n")[0] + date_obj = commit.committed_datetime + date = date_obj.date() + return message, date + raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") + def get_modules_file_tree(self): """ Fetch the file list from the repo, using the GitHub API From b7a7ce429a18f29dd875fde12bd3e2095ff2fcb7 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 14:48:51 +0200 Subject: [PATCH 13/55] Add no-pull option --- nf_core/modules/list.py | 2 +- nf_core/modules/modules_repo.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 12f63aee6d..81034bafb4 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -106,7 +106,7 @@ def pattern_msg(keywords): version_sha = module_entry["git_sha"] try: # pass repo_name to get info on modules even outside nf-core/modules - message, date = ModulesRepo().get_commit_info( + message, date = ModulesRepo(no_pull=True).get_commit_info( version_sha ) # NOTE add support for other remotes except LookupError as e: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 7a936b82ee..99250fa807 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -17,7 +17,7 @@ class ModulesRepo(object): so that this can be used in the same way by all sub-commands. """ - def __init__(self, remote_url=None, branch=None): + def __init__(self, remote_url=None, branch=None, no_pull=False): """ Initializes the object and clones the git repository if it is not already present """ @@ -29,7 +29,6 @@ def __init__(self, remote_url=None, branch=None): # Extract the repo path from the remote url # See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns # Remove the initial `git@`` if it is present - log.info(remote_url) path = remote_url.split("@") path = path[-1] if len(path) > 1 else path[0] path = urllib.parse.urlparse(path) @@ -45,7 +44,7 @@ def __init__(self, remote_url=None, branch=None): else: self.branch = self.get_default_branch() - self.setup_local_repo(remote_url) + self.setup_local_repo(remote_url, no_pull) # Verify that the repo seems to be correctly configured if self.fullname != "nf-core/modules" or self.branch: @@ -54,7 +53,7 @@ def __init__(self, remote_url=None, branch=None): self.modules_file_tree = {} self.modules_avail_module_names = [] - def setup_local_repo(self, remote): + def setup_local_repo(self, remote, no_pull): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -77,7 +76,8 @@ def setup_local_repo(self, remote): self.branch_exists() # If the repo is already cloned, pull the latest changes from the remote - self.repo.remotes.origin.pull() + if not no_pull: + self.repo.remotes.origin.pull() def get_default_branch(self): """ @@ -195,7 +195,7 @@ def get_commit_info(self, sha): if commit.hexsha == sha: message = commit.message.partition("\n")[0] date_obj = commit.committed_datetime - date = date_obj.date() + date = str(date_obj.date()) return message, date raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") From 23aefc9e67d2eac3e0398b4f7af0800d58bbb79a Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Fri, 17 Jun 2022 17:18:48 +0200 Subject: [PATCH 14/55] Start updating modules.json functionality --- nf_core/modules/module_utils.py | 139 +++++++++++++++++++++++++++----- 1 file changed, 120 insertions(+), 19 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 317d6e97a6..820cd7bd8e 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -3,6 +3,10 @@ import json import logging import os +from sys import modules +import git +import urllib +from pyrsistent import m import questionary import rich @@ -17,6 +21,11 @@ gh_api = nf_core.utils.gh_api +# Constants for used throughout the module files +NF_CORE_MODULES_NAME = "nf-core/modules" +NF_CORE_MODULES_REMOTE = "git@github.com:nf-core/modules.git" + + class ModuleException(Exception): """Exception raised when there was an error with module commands""" @@ -103,6 +112,98 @@ def get_commit_info(commit_sha, repo_name="git@github.com:nf-core/modules.git"): raise LookupError(f"Unable to fetch metadata for commit SHA {commit_sha}") +def dir_tree_uncovered(modules_dir, repos): + """ + Does a BFS of the modules directory of a pipeline and rapports any directories + that are not found in the current list of repos + """ + # Initialise the FIFO queue. Note that we assume the directory to be correctly + # configured, i.e. no files etc. + fifo = [os.path.join(modules_dir, subdir) for subdir in os.listdir(modules_dir) if subdir != "local"] + depth = 1 + dirs_not_covered = [] + while len(fifo) > 0: + temp_queue = [] + repos_at_level = [os.path.join(*[os.path.split(repo) for repo in repos][:depth])] + for dir in fifo: + rel_dir = os.path.relpath(dir, modules_dir) + if rel_dir in repos_at_level: + # Go the next depth if this directory was found + temp_queue.extend([os.path.join(dir, subdir) for subdir in os.listdir(dir)]) + else: + # Otherwise add the directory to the ones not covered + dirs_not_covered.append(dir) + fifo = temp_queue + depth += 1 + return dirs_not_covered + + +def path_from_remote(remote_url): + """ + Extracts the path from the remote URL + See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns + """ + # Remove the initial `git@`` if it is present + path = remote_url.split("@") + path = path[-1] if len(path) > 1 else path[0] + path = urllib.parse.urlparse(path) + path = path.path + + +def get_pipeline_module_repositories(modules_dir): + """ + Finds all module repositories in the modules directory. Ignores the local modules. + Args: + modules_dir (str): base directory for the module files + Returns + repos [ (str, str) ]: List of tuples of repo name and repo remote URL + """ + # Check if there are any nf-core modules installed + if os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME)): + repos = [(NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE)] + else: + repos = [] + # Check if there are any untrack repositories + dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) + if len(dirs_not_covered) > 0: + log.info("Found custom module repositories when creating 'modules.json'") + # Loop until all directories in the base directory are covered by a remote + while len(dirs_not_covered) > 0: + log.info( + "The following director{s} in the modules directory are untracked: '{l}'".format( + s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dir_tree_uncovered) + ) + ) + nrepo_remote = questionary.text("Please provide a URL for for one of the remaining repos").ask() + # Verify that the remote exists + while True: + try: + git.Git().ls_remote(nrepo_remote) + break + except git.exc.GitCommandError: + nrepo_remote = questionary.text( + "The provided remote does not seem to exist, please provide a new remote." + ).ask() + + # Verify that there is a directory corresponding the remote + nrepo_name = path_from_remote(nrepo_remote) + if not os.path.exists(os.path.join(modules_dir, nrepo_name)): + log.info( + "The provided remote does not seem to correspond to a local directory. " + "The directory structure should correspond to the one in the remote" + ) + dir_name = questionary.text( + "Please provide the correct directory, it will be renamed. If left empty, the remote will be ignored" + ) + if dir_name: + os.rename(os.path.join(modules_dir, dir_name), os.path.join(modules_dir, nrepo_name)) + else: + continue + repos.append((nrepo_name, nrepo_remote)) + dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) + return dirs_not_covered + + def create_modules_json(pipeline_dir): """ Create the modules.json files @@ -119,25 +220,23 @@ def create_modules_json(pipeline_dir): if not os.path.exists(modules_dir): raise UserWarning("Can't find a ./modules directory. Is this a DSL2 pipeline?") - # Extract all modules repos in the pipeline directory - repo_names = [ - f"{user_name}/{repo_name}" - for user_name in os.listdir(modules_dir) - if os.path.isdir(os.path.join(modules_dir, user_name)) and user_name != "local" - for repo_name in os.listdir(os.path.join(modules_dir, user_name)) - ] + repos = get_pipeline_module_repositories(modules_dir) # Get all module names in the repos - repo_module_names = { - repo_name: list( - { - os.path.relpath(os.path.dirname(path), os.path.join(modules_dir, repo_name)) - for path in glob.glob(f"{modules_dir}/{repo_name}/**/*", recursive=True) - if os.path.isfile(path) - } + repo_module_names = [ + ( + repo_name, + list( + { + os.path.relpath(os.path.dirname(path), os.path.join(modules_dir, repo_name)) + for path in glob.glob(f"{modules_dir}/{repo_name}/**/*", recursive=True) + if os.path.isfile(path) + } + ), + repo_remote, ) - for repo_name in repo_names - } + for repo_name, repo_remote in repos + ] progress_bar = rich.progress.Progress( "[bold blue]{task.description}", @@ -149,14 +248,16 @@ def create_modules_json(pipeline_dir): file_progress = progress_bar.add_task( "Creating 'modules.json' file", total=sum(map(len, repo_module_names.values())), test_name="module.json" ) - for repo_name, module_names in sorted(repo_module_names.items()): + for repo_name, module_names, remote in sorted(repo_module_names.items()): try: - modules_repo = ModulesRepo(remote_path=repo_name) + modules_repo = ModulesRepo(remote_url=remote) except LookupError as e: raise UserWarning(e) repo_path = os.path.join(modules_dir, repo_name) modules_json["repos"][repo_name] = dict() + modules_json["repos"][repo_name]["git_url"] = remote + modules_json["repos"][repo_name]["modules"] = dict() for module_name in sorted(module_names): module_path = os.path.join(repo_path, module_name) progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") @@ -168,7 +269,7 @@ def create_modules_json(pipeline_dir): f"Could not fetch 'git_sha' for module: '{module_name}'. Please try to install a newer version of this module. ({e})" ) continue - modules_json["repos"][repo_name][module_name] = {"git_sha": correct_commit_sha} + modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": correct_commit_sha} modules_json_path = os.path.join(pipeline_dir, "modules.json") with open(modules_json_path, "w") as fh: From 948a4db193055b84803188e6ab9c047d04528727 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 00:16:59 +0200 Subject: [PATCH 15/55] Update remaining modules.json functionality. Add new entry for git URL --- nf_core/modules/install.py | 4 +- nf_core/modules/module_utils.py | 2 +- nf_core/modules/modules_command.py | 122 ++++++++++++++++++++++------- nf_core/modules/modules_repo.py | 2 + nf_core/modules/update.py | 8 +- 5 files changed, 103 insertions(+), 35 deletions(-) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index fd233e8289..47a4162dcd 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -121,7 +121,7 @@ def install(self, module): log.debug(f"Installing module '{module}' at modules hash {version} from {self.modules_repo.fullname}") # Download module files - if not self.download_module_file(module, version, self.modules_repo, install_folder): + if not self.install_module_files(module, version, self.modules_repo, install_folder): return False # Print include statement @@ -129,5 +129,5 @@ def install(self, module): log.info(f"Include statement: include {{ {module_name} }} from '.{os.path.join(*install_folder, module)}/main’") # Update module.json with newly installed module - self.update_modules_json(modules_json, self.modules_repo.fullname, module, version) + self.update_modules_json(modules_json, self.modules_repo, module, version) return True diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 820cd7bd8e..cd5b4dcdd8 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -190,7 +190,7 @@ def get_pipeline_module_repositories(modules_dir): if not os.path.exists(os.path.join(modules_dir, nrepo_name)): log.info( "The provided remote does not seem to correspond to a local directory. " - "The directory structure should correspond to the one in the remote" + "The directory structure should be the same as in the remote" ) dir_name = questionary.text( "Please provide the correct directory, it will be renamed. If left empty, the remote will be ignored" diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index c42ba53db6..8a51b4b28a 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -1,19 +1,23 @@ import copy +from datetime import datetime import glob import json import logging import os import shutil from posixpath import dirname -import sys +import questionary +import git import yaml import nf_core.modules.module_utils import nf_core.utils + from nf_core import modules from nf_core.modules.modules_repo import ModulesRepo from nf_core.utils import plural_s as _s +from nf_core.modules.module_utils import NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE log = logging.getLogger(__name__) @@ -137,37 +141,34 @@ def modules_json_up_to_date(self): for repo, modules in self.module_names.items(): if repo in mod_json["repos"]: for module in modules: - if module in mod_json["repos"][repo]: - mod_json["repos"][repo].pop(module) + if module in mod_json["repos"][repo]["modules"]: + mod_json["repos"][repo]["modules"].pop(module) else: if repo not in missing_from_modules_json: - missing_from_modules_json[repo] = [] - missing_from_modules_json[repo].append(module) - if len(mod_json["repos"][repo]) == 0: + missing_from_modules_json[repo] = ([], mod_json["repos"]["git_url"]) + missing_from_modules_json[repo][0].append(module) + if len(mod_json["repos"][repo]["modules"]) == 0: mod_json["repos"].pop(repo) else: - missing_from_modules_json[repo] = modules + missing_from_modules_json[repo] = (modules, None) - # If there are any modules left in 'modules.json' after all installed are removed, + # If there are any modules left in 'modules.json' after all installed are removed, # we try to reinstall them if len(mod_json["repos"]) > 0: missing_but_in_mod_json = [ - f"'{repo}/{module}'" for repo, modules in mod_json["repos"].items() for module in modules + f"'{repo}/{module}'" for repo, contents in mod_json["repos"].items() for module in contents["modules"] ] log.info( f"Reinstalling modules found in 'modules.json' but missing from directory: {', '.join(missing_but_in_mod_json)}" ) remove_from_mod_json = {} - for repo, modules in mod_json["repos"].items(): - try: - modules_repo = ModulesRepo(remote_path=repo) - modules_repo.get_modules_file_tree() - install_folder = os.path.split(modules_repo.fullname) - except LookupError as e: - log.warn(f"Could not get module's file tree for '{repo}': {e}") - remove_from_mod_json[repo] = list(modules.keys()) - continue + for repo, contents in mod_json["repos"].items(): + modules = contents["modules"] + remote = contents["git_url"] + + modules_repo = ModulesRepo(remote_path=remote) + install_folder = os.path.split(modules_repo.fullname) for module, entry in modules.items(): sha = entry.get("git_sha") @@ -180,7 +181,7 @@ def modules_json_up_to_date(self): remove_from_mod_json[repo].append(module) continue module_dir = os.path.join(self.dir, "modules", *install_folder, module) - self.download_module_file(module, sha, modules_repo, install_folder, module_dir) + self.install_module_files(module, sha, modules_repo, install_folder, module_dir) # If the reinstall fails, we remove those entries in 'modules.json' if sum(map(len, remove_from_mod_json.values())) > 0: @@ -202,6 +203,8 @@ def modules_json_up_to_date(self): # If some modules didn't have an entry in the 'modules.json' file # we try to determine the SHA from the commit log of the remote + dead_repos = [] + sb_local_repos = [] if sum(map(len, missing_from_modules_json.values())) > 0: format_missing = [ @@ -213,9 +216,42 @@ def modules_json_up_to_date(self): log.info( f"Recomputing commit SHAs for modules which were missing from 'modules.json': {', '.join(format_missing)}" ) + failed_to_find_commit_sha = [] - for repo, modules in missing_from_modules_json.items(): - modules_repo = ModulesRepo() # NOTE TO SELF: Must allow other remotes + for repo, (modules, remote) in missing_from_modules_json.items(): + if remote is None: + if repo == NF_CORE_MODULES_NAME: + remote = NF_CORE_MODULES_REMOTE + else: + choice = questionary.select( + f"Found untracked files in {repo}. Please select a choice", + choices=[ + questionary.Choice("Provide the remote", value=0), + questionary.Choice("Move the directory to 'local'", value=1), + questionary.Choice("Remove the files", value=2), + ], + ) + if choice == 0: + remote = questionary.text("Please provide the URL of the remote") + # Verify that the remote exists + while True: + try: + git.Git().ls_remote(remote) + break + except git.exc.GitCommandError: + remote = questionary.text( + "The provided remote does not seem to exist, please provide a new remote." + ).ask() + elif choice == 1: + sb_local_repos.append(repo) + continue + else: + dead_repos.append(repo) + continue + + remote = questionary.text(f"Please provide a remote for these files ") + + modules_repo = ModulesRepo(remote_url=remote) repo_path = os.path.join(self.dir, "modules", repo) for module in modules: module_path = os.path.join(repo_path, module) @@ -232,10 +268,40 @@ def modules_json_up_to_date(self): if len(failed_to_find_commit_sha) > 0: log.info( - f"Could not determine 'git_sha' for module{_s(failed_to_find_commit_sha)}: {', '.join(failed_to_find_commit_sha)}." - f"\nPlease try to install a newer version of {'this' if len(failed_to_find_commit_sha) == 1 else 'these'} module{_s(failed_to_find_commit_sha)}." + f"Could not determine 'git_sha' for module{_s(failed_to_find_commit_sha)}: " + f"{', '.join(failed_to_find_commit_sha)}." + f"\nPlease try to install a newer version of " + f"{'this' if len(failed_to_find_commit_sha) == 1 else 'these'} " + f"module{_s(failed_to_find_commit_sha)}." ) + # Remove the requested repos + for repo in dead_repos: + path = os.path.join(self.dir, "modules", repo) + shutil.rmtree(path) + + # Copy the untracked repos to local + for repo in sb_local_repos: + modules_path = os.path.join(self.dir, "modules") + path = os.path.join(modules_path, repo) + local_path = os.path.join(modules_path, "local") + + # Create the local module directory if it doesn't already exist + if not os.path.exists(local_path): + os.makedirs(local_path) + + # Check if there is already a subdirectory with the name + if os.path.exists(os.path.join(local_path, to_path)): + to_path = path + while os.path.exists(os.path.join(local_path, to_path)): + # Add a time suffix to the path to make it unique + # (do it again and again if it didn't work out...) + to_path += f"-{datetime.datetime.now().strftime('%y%m%d%H%M%S')}" + shutil.move(path, to_path) + path = to_path + + shutil.move(path, local_path) + self.dump_modules_json(fresh_mod_json) def clear_module_dir(self, module_name, module_dir): @@ -257,7 +323,7 @@ def clear_module_dir(self, module_name, module_dir): log.error(f"Could not remove module: {e}") return False - def download_module_file(self, module_name, module_version, modules_repo, install_folder, dry_run=False): + def install_module_files(self, module_name, module_version, modules_repo, install_folder, dry_run=False): """ Copies the files of a module from the local copy of the repo """ @@ -289,11 +355,13 @@ def load_modules_json(self): modules_json = None return modules_json - def update_modules_json(self, modules_json, repo_name, module_name, module_version, write_file=True): + def update_modules_json(self, modules_json, modules_repo, module_name, module_version, write_file=True): """Updates the 'module.json' file with new module info""" + repo_name = modules_repo.fullname + remote_url = modules_repo.remove_url if repo_name not in modules_json["repos"]: - modules_json["repos"][repo_name] = dict() - modules_json["repos"][repo_name][module_name] = {"git_sha": module_version} + modules_json["repos"][repo_name] = {"modules": {}, "git_url": remote_url} + modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": module_version} # Sort the 'modules.json' repo entries modules_json["repos"] = nf_core.utils.sort_dictionary(modules_json["repos"]) if write_file: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 99250fa807..2d5860a591 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -26,6 +26,8 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): if remote_url is None: remote_url = "git@github.com:nf-core/modules.git" + self.remote_url = remote_url + # Extract the repo path from the remote url # See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns # Remove the initial `git@`` if it is present diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index ae789d37de..1aa894ed5d 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -286,7 +286,7 @@ def update(self, module): install_folder = ["/tmp", next(tempfile._get_candidate_names())] # Download module files - if not self.download_module_file(module, version, modules_repo, install_folder, dry_run=dry_run): + if not self.install_module_files(module, version, modules_repo, install_folder, dry_run=dry_run): exit_value = False continue @@ -416,13 +416,11 @@ class DiffEnum(enum.Enum): # Update modules.json with newly installed module if not dry_run: - self.update_modules_json(modules_json, modules_repo.fullname, module, version) + self.update_modules_json(modules_json, modules_repo, module, version) # Don't save to a file, just iteratively update the variable else: - modules_json = self.update_modules_json( - modules_json, modules_repo.fullname, module, version, write_file=False - ) + modules_json = self.update_modules_json(modules_json, modules_repo, module, version, write_file=False) if self.save_diff_fn: # Compare the new modules.json and build a diff From ff4a6ee59100f045c2f38ae601819779366d9de2 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 00:25:56 +0200 Subject: [PATCH 16/55] Remove gh api functions --- nf_core/modules/modules_repo.py | 87 --------------------------------- 1 file changed, 87 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 2d5860a591..c57fabb34c 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -200,90 +200,3 @@ def get_commit_info(self, sha): date = str(date_obj.date()) return message, date raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") - - def get_modules_file_tree(self): - """ - Fetch the file list from the repo, using the GitHub API - - Sets self.modules_file_tree - self.modules_avail_module_names - """ - api_url = f"https://api.github.com/repos/{self.fullname}/git/trees/{self.branch}?recursive=1" - r = gh_api.get(api_url) - if r.status_code == 404: - raise LookupError(f"Repository / branch not found: {self.fullname} ({self.branch})\n{api_url}") - elif r.status_code != 200: - raise LookupError(f"Could not fetch {self.fullname} ({self.branch}) tree: {r.status_code}\n{api_url}") - - result = r.json() - assert result["truncated"] == False - - self.modules_file_tree = result["tree"] - for f in result["tree"]: - if f["path"].startswith("modules/") and f["path"].endswith("/main.nf") and "/test/" not in f["path"]: - # remove modules/ and /main.nf - self.modules_avail_module_names.append(f["path"].replace("modules/", "").replace("/main.nf", "")) - if len(self.modules_avail_module_names) == 0: - raise LookupError(f"Found no modules in '{self.fullname}'") - - def get_module_file_urls(self, module, commit=""): - """Fetch list of URLs for a specific module - - Takes the name of a module and iterates over the GitHub repo file tree. - Loops over items that are prefixed with the path 'modules/' and ignores - anything that's not a blob. Also ignores the test/ subfolder. - - Returns a dictionary with keys as filenames and values as GitHub API URLs. - These can be used to then download file contents. - - Args: - module (string): Name of module for which to fetch a set of URLs - - Returns: - dict: Set of files and associated URLs as follows: - - { - 'modules/fastqc/main.nf': 'https://api.github.com/repos/nf-core/modules/git/blobs/65ba598119206a2b851b86a9b5880b5476e263c3', - 'modules/fastqc/meta.yml': 'https://api.github.com/repos/nf-core/modules/git/blobs/0d5afc23ba44d44a805c35902febc0a382b17651' - } - """ - results = {} - for f in self.modules_file_tree: - if not f["path"].startswith(f"modules/{module}/"): - continue - if f["type"] != "blob": - continue - if "/test/" in f["path"]: - continue - results[f["path"]] = f["url"] - if commit != "": - for path in results: - results[path] = f"https://api.github.com/repos/{self.fullname}/contents/{path}?ref={commit}" - return results - - def download_gh_file(self, dl_filename, api_url): - """Download a file from GitHub using the GitHub API - - Args: - dl_filename (string): Path to save file to - api_url (string): GitHub API URL for file - - Raises: - If a problem, raises an error - """ - - # Make target directory if it doesn't already exist - dl_directory = os.path.dirname(dl_filename) - if not os.path.exists(dl_directory): - os.makedirs(dl_directory) - - # Call the GitHub API - r = gh_api.get(api_url) - if r.status_code != 200: - raise LookupError(f"Could not fetch {self.fullname} file: {r.status_code}\n {api_url}") - result = r.json() - file_contents = base64.b64decode(result["content"]) - - # Write the file contents - with open(dl_filename, "wb") as fh: - fh.write(file_contents) From b85c212287d02a24855321197089cf6af0c6bac6 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 00:29:05 +0200 Subject: [PATCH 17/55] Remove more api stuff --- nf_core/modules/module_utils.py | 91 --------------------------------- 1 file changed, 91 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index cd5b4dcdd8..acffbf8216 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -32,86 +32,6 @@ class ModuleException(Exception): pass -def get_module_git_log(module_name, modules_repo=None, per_page=30, page_nbr=1, since="2021-07-07T00:00:00Z"): - """ - Fetches the commit history the of requested module since a given date. The default value is - not arbitrary - it is the last time the structure of the nf-core/modules repository was had an - update breaking backwards compatibility. - Args: - module_name (str): Name of module - modules_repo (ModulesRepo): A ModulesRepo object configured for the repository in question - per_page (int): Number of commits per page returned by API - page_nbr (int): Page number of the retrieved commits - since (str): Only show commits later than this timestamp. - Time should be given in ISO-8601 format: YYYY-MM-DDTHH:MM:SSZ. - - Returns: - [ dict ]: List of commit SHAs and associated (truncated) message - """ - if modules_repo is None: - modules_repo = ModulesRepo() - api_url = f"https://api.github.com/repos/{modules_repo.fullname}/commits" - api_url += f"?sha={modules_repo.branch}" - if module_name is not None: - api_url += f"&path=modules/{module_name}" - api_url += f"&page={page_nbr}" - api_url += f"&since={since}" - - log.debug(f"Fetching commit history of module '{module_name}' from github API") - response = gh_api.get(api_url) - if response.status_code == 200: - commits = response.json() - - if len(commits) == 0: - raise UserWarning(f"Reached end of commit history for '{module_name}'") - else: - # Return the commit SHAs and the first line of the commit message - return [ - {"git_sha": commit["sha"], "trunc_message": commit["commit"]["message"].partition("\n")[0]} - for commit in commits - ] - elif response.status_code == 404: - raise LookupError(f"Module '{module_name}' not found in '{modules_repo.fullname}'\n{api_url}") - else: - gh_api.log_content_headers(response) - raise LookupError( - f"Unable to fetch commit SHA for module {module_name}. API responded with '{response.status_code}'" - ) - - -def get_commit_info(commit_sha, repo_name="git@github.com:nf-core/modules.git"): - """ - Fetches metadata about the commit (dates, message, etc.) - Args: - commit_sha (str): The SHA of the requested commit - repo_name (str): module repos name (def. nf-core/modules) - Returns: - message (str): The commit message for the requested commit - date (str): The commit date for the requested commit - Raises: - LookupError: If the call to the API fails. - """ - - api_url = f"https://api.github.com/repos/{repo_name}/commits/{commit_sha}?stats=false" - log.debug(f"Fetching commit metadata for commit at {commit_sha}") - response = gh_api.get(api_url) - if response.status_code == 200: - commit = response.json() - message = commit["commit"]["message"].partition("\n")[0] - raw_date = commit["commit"]["author"]["date"] - - # Parse the date returned from the API - date_obj = datetime.datetime.strptime(raw_date, "%Y-%m-%dT%H:%M:%SZ") - date = str(date_obj.date()) - - return message, date - elif response.status_code == 404: - raise LookupError(f"Commit '{commit_sha}' not found in 'nf-core/modules/'\n{api_url}") - else: - gh_api.log_content_headers(response) - raise LookupError(f"Unable to fetch metadata for commit SHA {commit_sha}") - - def dir_tree_uncovered(modules_dir, repos): """ Does a BFS of the modules directory of a pipeline and rapports any directories @@ -532,14 +452,3 @@ def prompt_module_version_sha(module, modules_repo, installed_sha=None): ).unsafe_ask() page_nbr += 1 return git_sha - - -def sha_exists(sha, modules_repo): - i = 1 - while True: - try: - if sha in {commit["git_sha"] for commit in get_module_git_log(None, modules_repo, page_nbr=i)}: - return True - i += 1 - except (UserWarning, LookupError): - raise From c251e74e9b2574144c395b7a47ca63cf1d67b15b Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 00:54:46 +0200 Subject: [PATCH 18/55] Remove verify_pipeline. We cannot provide this functionality with the more flexible git support --- nf_core/modules/module_utils.py | 30 ------------------------------ nf_core/modules/modules_command.py | 6 ------ 2 files changed, 36 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index acffbf8216..4acd5ebbfb 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -380,36 +380,6 @@ def get_repo_type(dir, repo_type=None, use_prompt=True): return [dir, repo_type] -def verify_pipeline_dir(dir): - modules_dir = os.path.join(dir, "modules") - if os.path.exists(modules_dir): - repo_names = ( - f"{user}/{repo}" - for user in os.listdir(modules_dir) - if user != "local" - for repo in os.listdir(os.path.join(modules_dir, user)) - ) - missing_remote = [] - modules_is_software = False - for repo_name in repo_names: - api_url = f"https://api.github.com/repos/{repo_name}/contents" - response = gh_api.get(api_url) - if response.status_code == 404: - missing_remote.append(repo_name) - if repo_name == "nf-core/software": - modules_is_software = True - - if len(missing_remote) > 0: - missing_remote = [f"'{repo_name}'" for repo_name in missing_remote] - error_msg = "Could not find GitHub repository for: " + ", ".join(missing_remote) - if modules_is_software: - error_msg += ( - "\nAs of version 2.0, remote modules are installed in 'modules//'" - ) - error_msg += "\nThe 'nf-core/software' directory should therefore be renamed to 'nf-core/modules'" - raise UserWarning(error_msg) - - def prompt_module_version_sha(module, modules_repo, installed_sha=None): """ Creates an interactive questionary prompt for selecting the module version diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 8a51b4b28a..fead4ddbb0 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -43,12 +43,6 @@ def __init__(self, dir): except LookupError as e: raise UserWarning(e) - if self.repo_type == "pipeline": - try: - nf_core.modules.module_utils.verify_pipeline_dir(self.dir) - except UserWarning: - raise - def get_pipeline_modules(self): """ Get the modules installed in the current directory. From 458e6bc7a2568a44957ebf84a3b1a822a50065d6 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 00:55:29 +0200 Subject: [PATCH 19/55] Remove gh api --- nf_core/modules/module_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 4acd5ebbfb..478e9694b7 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -18,9 +18,6 @@ log = logging.getLogger(__name__) -gh_api = nf_core.utils.gh_api - - # Constants for used throughout the module files NF_CORE_MODULES_NAME = "nf-core/modules" NF_CORE_MODULES_REMOTE = "git@github.com:nf-core/modules.git" From 0c99465b95d9470c5b483dc554148d97ca3efa08 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 01:50:15 +0200 Subject: [PATCH 20/55] Misc bugfix --- nf_core/modules/install.py | 7 ----- nf_core/modules/module_utils.py | 46 ++++++++++++------------------ nf_core/modules/modules_command.py | 16 +++++------ nf_core/modules/modules_repo.py | 13 +++++++++ 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 47a4162dcd..fc40b5a221 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -30,13 +30,6 @@ def install(self, module): # Verify that 'modules.json' is consistent with the installed modules self.modules_json_up_to_date() - # Get the available modules - try: - self.modules_repo.get_modules_file_tree() - except LookupError as e: - log.error(e) - return False - if self.prompt and self.sha is not None: log.error("Cannot use '--sha' and '--prompt' at the same time!") return False diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 478e9694b7..4703a385ad 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -32,7 +32,7 @@ class ModuleException(Exception): def dir_tree_uncovered(modules_dir, repos): """ Does a BFS of the modules directory of a pipeline and rapports any directories - that are not found in the current list of repos + that are not found in the list of repos """ # Initialise the FIFO queue. Note that we assume the directory to be correctly # configured, i.e. no files etc. @@ -41,12 +41,14 @@ def dir_tree_uncovered(modules_dir, repos): dirs_not_covered = [] while len(fifo) > 0: temp_queue = [] - repos_at_level = [os.path.join(*[os.path.split(repo) for repo in repos][:depth])] + log.info([os.path.join(*os.path.split(repo)[:depth]) for repo in repos]) + repos_at_level = {os.path.join(*os.path.split(repo)[:depth]): len(os.path.split(repo)) for repo in repos} for dir in fifo: rel_dir = os.path.relpath(dir, modules_dir) - if rel_dir in repos_at_level: - # Go the next depth if this directory was found - temp_queue.extend([os.path.join(dir, subdir) for subdir in os.listdir(dir)]) + if rel_dir in repos_at_level.keys(): + # Go the next depth if this directory is not one of the repos + if depth < repos_at_level[rel_dir]: + temp_queue.extend([os.path.join(dir, subdir) for subdir in os.listdir(dir)]) else: # Otherwise add the directory to the ones not covered dirs_not_covered.append(dir) @@ -80,7 +82,7 @@ def get_pipeline_module_repositories(modules_dir): repos = [(NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE)] else: repos = [] - # Check if there are any untrack repositories + # Check if there are any untracked repositories dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) if len(dirs_not_covered) > 0: log.info("Found custom module repositories when creating 'modules.json'") @@ -88,10 +90,10 @@ def get_pipeline_module_repositories(modules_dir): while len(dirs_not_covered) > 0: log.info( "The following director{s} in the modules directory are untracked: '{l}'".format( - s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dir_tree_uncovered) + s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dirs_not_covered) ) ) - nrepo_remote = questionary.text("Please provide a URL for for one of the remaining repos").ask() + nrepo_remote = questionary.text("Please provide a URL for for one of the remaining repos").unsafe_ask() # Verify that the remote exists while True: try: @@ -163,9 +165,9 @@ def create_modules_json(pipeline_dir): ) with progress_bar: file_progress = progress_bar.add_task( - "Creating 'modules.json' file", total=sum(map(len, repo_module_names.values())), test_name="module.json" + "Creating 'modules.json' file", total=sum(map(len, repo_module_names)), test_name="module.json" ) - for repo_name, module_names, remote in sorted(repo_module_names.items()): + for repo_name, module_names, remote in sorted(repo_module_names): try: modules_repo = ModulesRepo(remote_url=remote) except LookupError as e: @@ -178,14 +180,9 @@ def create_modules_json(pipeline_dir): for module_name in sorted(module_names): module_path = os.path.join(repo_path, module_name) progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") - try: - correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo) + correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo) + log.info(correct_commit_sha) - except (LookupError, UserWarning) as e: - log.warn( - f"Could not fetch 'git_sha' for module: '{module_name}'. Please try to install a newer version of this module. ({e})" - ) - continue modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": correct_commit_sha} modules_json_path = os.path.join(pipeline_dir, "modules.json") @@ -209,7 +206,7 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): # We iterate over the commit history for the module until we find # a revision that matches the file contents correct_commit_sha = None - commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name)) + commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) correct_commit_sha = iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) return correct_commit_sha except (UserWarning, LookupError) as e: @@ -228,16 +225,11 @@ def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) commit_sha (str): The latest commit SHA from 'commit_shas' where local files are identical to remote files """ - files_to_check = ["main.nf", "meta.yml"] - local_file_contents = [None, None, None] - for i, file in enumerate(files_to_check): - try: - local_file_contents[i] = open(os.path.join(module_path, file), "r").read() - except FileNotFoundError as e: - log.debug(f"Could not open file: {os.path.join(module_path, file)}") - continue + commit_shas = list(commit_shas) + print(len(commit_shas)) for commit_sha in commit_shas: - if local_module_equal_to_commit(local_file_contents, module_name, modules_repo, commit_sha): + modules_repo.checkout(commit_sha) + if modules_repo.module_files_identical(module_name, module_path): return commit_sha return None diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index fead4ddbb0..b18666be03 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -249,16 +249,14 @@ def modules_json_up_to_date(self): repo_path = os.path.join(self.dir, "modules", repo) for module in modules: module_path = os.path.join(repo_path, module) - try: - correct_commit_sha = nf_core.modules.module_utils.find_correct_commit_sha( - module, module_path, modules_repo - ) - if repo not in fresh_mod_json["repos"]: - fresh_mod_json["repos"][repo] = {} + correct_commit_sha = nf_core.modules.module_utils.find_correct_commit_sha( + module, module_path, modules_repo + ) + log.info(correct_commit_sha) + if repo not in fresh_mod_json["repos"]: + fresh_mod_json["repos"][repo] = {} - fresh_mod_json["repos"][repo][module] = {"git_sha": correct_commit_sha} - except (LookupError, UserWarning) as e: - failed_to_find_commit_sha.append(f"'{repo}/{module}'") + fresh_mod_json["repos"][repo][module] = {"git_sha": correct_commit_sha} if len(failed_to_find_commit_sha) > 0: log.info( diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index c57fabb34c..e3c913a78c 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -1,4 +1,5 @@ import base64 +import filecmp import logging import os import git @@ -135,6 +136,17 @@ def get_module_dir(self, module_name): """ return os.path.join(self.local_repo_dir, "modules", module_name) + def module_files_identical(self, module_name, base_path): + module_dir = self.get_module_dir(module_name) + for file in os.listdir(base_path): + try: + if not filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)): + return False + except FileNotFoundError as e: + log.debug(f"Could not open file: {os.path.join(module_dir, file)}") + continue + return True + def get_module_files(self, module_name, files): """ Returns the contents requested files for a module at the current @@ -170,6 +182,7 @@ def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00 ( dict ): Iterator of commit SHAs and associated (truncated) message """ module_path = os.path.join("modules", module_name) + log.info(module_path) commits = self.repo.iter_commits(max_count=depth, paths=module_path) commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) return commits From 637999261d41d5b319c9671c04147a8f31f65a34 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 14:04:08 +0200 Subject: [PATCH 21/55] Fix bugs in create mod json and init of ModulesRepo --- nf_core/modules/module_utils.py | 24 ++++++++--------- nf_core/modules/modules_repo.py | 46 +++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 4703a385ad..a0162ef708 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -201,16 +201,18 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): Returns: commit_sha (str): The latest commit SHA where local files are identical to remote files """ - try: - # Find the correct commit SHA for the local module files. - # We iterate over the commit history for the module until we find - # a revision that matches the file contents - correct_commit_sha = None - commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) - correct_commit_sha = iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) - return correct_commit_sha - except (UserWarning, LookupError) as e: - raise + # Find the correct commit SHA for the local module files. + # We iterate over the commit history for the module until we find + # a revision that matches the file contents + commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) + commit_shas = list(commit_shas) + log.debug(len(commit_shas)) + for commit_sha in commit_shas: + modules_repo.checkout(commit_sha) + if modules_repo.module_files_identical(module_name, module_path): + return commit_sha + modules_repo.checkout_branch() + return None def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas): @@ -225,8 +227,6 @@ def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas) commit_sha (str): The latest commit SHA from 'commit_shas' where local files are identical to remote files """ - commit_shas = list(commit_shas) - print(len(commit_shas)) for commit_sha in commit_shas: modules_repo.checkout(commit_sha) if modules_repo.module_files_identical(module_name, module_path): diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index e3c913a78c..cd11c5982b 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -38,16 +38,8 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): path = path.path self.fullname = os.path.splitext(path)[0] - self.branch = branch - if self.branch is None: - # Don't bother fetching default branch if we're using nf-core - if self.fullname == "nf-core/modules": - self.branch = "master" - else: - self.branch = self.get_default_branch() - - self.setup_local_repo(remote_url, no_pull) + self.setup_local_repo(remote_url, branch, no_pull) # Verify that the repo seems to be correctly configured if self.fullname != "nf-core/modules" or self.branch: @@ -56,12 +48,16 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): self.modules_file_tree = {} self.modules_avail_module_names = [] - def setup_local_repo(self, remote, no_pull): + def setup_local_repo(self, remote, branch, no_pull=False): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from the provided remote URL and returns a git.Repo of the new clone. + Args: + remote (str): git url of remote + branch (str): name of branch to use + no_pull (bool): Don't pull the repo. (Used for performance reasons) Sets self.repo """ self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) @@ -71,28 +67,40 @@ def setup_local_repo(self, remote, no_pull): except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") # Verify that the requested branch exists by checking it out - self.branch_exists() + self.setup_branch(branch) else: self.repo = git.Repo(self.local_repo_dir) # Verify that the requested branch exists by checking it out - self.branch_exists() + self.setup_branch(branch) # If the repo is already cloned, pull the latest changes from the remote if not no_pull: self.repo.remotes.origin.pull() + def setup_branch(self, branch): + if branch is None: + # Don't bother fetching default branch if we're using nf-core + if self.fullname == "nf-core/modules": + self.branch = "master" + else: + self.branch = self.get_default_branch() + else: + self.branch = branch + # Verify that the branch exists by checking it out + self.branch_exists() + def get_default_branch(self): """ Gets the default branch for the repo (the branch origin/HEAD is pointing to) """ - origin_head = next(ref for ref in self.repo.refs if ref == "origin/HEAD") + origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") _, self.branch = origin_head.ref.name.split("/") def branch_exists(self): """Verifies that the branch exists in the repository by trying to check it out""" try: - self.repo.git.checkout(self.branch) + self.checkout_branch() except git.exc.GitCommandError: raise LookupError(f"Branch '{self.branch}' not found in '{self.fullname}'") @@ -107,17 +115,17 @@ def verify_branch(self): err_str += ".\nAs of version 2.0, the 'software/' directory should be renamed to 'modules/'" raise LookupError(err_str) - def branch_checkout(self): + def checkout_branch(self): """ Checks out the specified branch of the repository """ self.repo.git.checkout(self.branch) - def checkout(self, ref): + def checkout(self, commit): """ - Checks out the repository at the requested ref + Checks out the repository at the requested commit """ - self.repo.git.checkout(ref) + self.repo.git.checkout(commit) def module_exists(self, module_name): """ @@ -205,7 +213,7 @@ def get_commit_info(self, sha): Raises: LookupError: If the search for the commit fails """ - self.branch_checkout() + self.checkout_branch() for commit in self.repo.iter_commits(): if commit.hexsha == sha: message = commit.message.partition("\n")[0] From b093fc97d6ab3b374c628610c8bf696967c6739b Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 14:05:57 +0200 Subject: [PATCH 22/55] Remove old functions --- nf_core/modules/module_utils.py | 38 --------------------------------- 1 file changed, 38 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index a0162ef708..43996b6cd0 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -215,44 +215,6 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): return None -def iterate_commit_log_page(module_name, module_path, modules_repo, commit_shas): - """ - Iterates through a list of commits for a module and checks if the local file contents match the remote - Args: - module_name (str): Name of module - module_path (str): Path to module in local repo - module_repo (str): Remote repo for module - commit_shas ([ str ]): List of commit SHAs for module, sorted in descending order - Returns: - commit_sha (str): The latest commit SHA from 'commit_shas' where local files - are identical to remote files - """ - for commit_sha in commit_shas: - modules_repo.checkout(commit_sha) - if modules_repo.module_files_identical(module_name, module_path): - return commit_sha - return None - - -def local_module_equal_to_commit(local_files, module_name, modules_repo, commit_sha): - """ - Compares the local module files to the module files for the given commit sha - Args: - local_files ([ str ]): Contents of local files. `None` if files doesn't exist - module_name (str): Name of module - module_repo (str): Remote repo for module - commit_sha (str): Commit SHA for remote version to compare against local version - Returns: - bool: Whether all local files are identical to remote version - """ - - files_to_check = ["main.nf", "meta.yml"] - - modules_repo.checkout(commit_sha) - remote_files = modules_repo.get_module_files(module_name, files_to_check) - return all(lfile == rfile for lfile, rfile in zip(local_files, remote_files)) - - def get_installed_modules(dir, repo_type="modules"): """ Make a list of all modules installed in this repository From 4b405fd736b6c99221707e29006db6d596eddb00 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 19:07:01 +0200 Subject: [PATCH 23/55] Update module names functionality --- nf_core/modules/info.py | 2 +- nf_core/modules/install.py | 4 ++-- nf_core/modules/list.py | 2 +- nf_core/modules/module_test.py | 4 ++-- nf_core/modules/module_utils.py | 14 ++++++++------ nf_core/modules/modules_command.py | 8 +++----- nf_core/modules/modules_repo.py | 13 +++++++++---- nf_core/modules/test_yml_builder.py | 2 +- nf_core/modules/update.py | 2 +- 9 files changed, 28 insertions(+), 23 deletions(-) diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index f4366c91d1..4ed1ed7e6a 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -93,7 +93,7 @@ def get_remote_yaml(self): self.modules_repo.get_modules_file_tree() # Check if our requested module is there - if self.module not in self.modules_repo.modules_avail_module_names: + if self.module not in self.modules_repo.avail_module_names: return False # Get the remote path diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index fc40b5a221..eeaf4d5c09 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -43,12 +43,12 @@ def install(self, module): if module is None: module = questionary.autocomplete( "Tool name:", - choices=self.modules_repo.modules_avail_module_names, + choices=self.modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, ).unsafe_ask() # Check that the supplied name is an available module - if module and module not in self.modules_repo.modules_avail_module_names: + if module and module not in self.modules_repo.get_avail_modules(): log.error(f"Module '{module}' not found in list of available modules.") log.info("Use the command 'nf-core modules list' to view available software") return False diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 81034bafb4..48e7d804aa 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -51,7 +51,7 @@ def pattern_msg(keywords): return False # Filter the modules by keywords - modules = [mod for mod in self.modules_repo.modules_avail_module_names if all(k in mod for k in keywords)] + modules = [mod for mod in self.modules_repo.get_avail_module() if all(k in mod for k in keywords)] # Nothing found if len(modules) == 0: diff --git a/nf_core/modules/module_test.py b/nf_core/modules/module_test.py index 8e137f07a6..a57235027f 100644 --- a/nf_core/modules/module_test.py +++ b/nf_core/modules/module_test.py @@ -81,9 +81,9 @@ def _check_inputs(self): modules_repo.get_modules_file_tree() self.module_name = questionary.autocomplete( "Tool name:", - choices=modules_repo.modules_avail_module_names, + choices=modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, - ).ask() + ).unsafe_ask() module_dir = Path("modules") / self.module_name # First, sanity check that the module directory exists diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 43996b6cd0..66ad48d570 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -78,6 +78,9 @@ def get_pipeline_module_repositories(modules_dir): repos [ (str, str) ]: List of tuples of repo name and repo remote URL """ # Check if there are any nf-core modules installed + log.info( + f"Nf-core path {os.path.join(modules_dir, NF_CORE_MODULES_NAME)} exists {os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME))}" + ) if os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME)): repos = [(NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE)] else: @@ -120,7 +123,7 @@ def get_pipeline_module_repositories(modules_dir): continue repos.append((nrepo_name, nrepo_remote)) dirs_not_covered = dir_tree_uncovered(modules_dir, [name for name, _ in repos]) - return dirs_not_covered + return repos def create_modules_json(pipeline_dir): @@ -156,7 +159,7 @@ def create_modules_json(pipeline_dir): ) for repo_name, repo_remote in repos ] - + log.info(f"Module names: {[x[0] for x in repo_module_names]}") progress_bar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), @@ -164,8 +167,9 @@ def create_modules_json(pipeline_dir): transient=True, ) with progress_bar: + n_total_modules = sum(len(modules) for _, modules, _ in repo_module_names) file_progress = progress_bar.add_task( - "Creating 'modules.json' file", total=sum(map(len, repo_module_names)), test_name="module.json" + "Creating 'modules.json' file", total=n_total_modules, test_name="module.json" ) for repo_name, module_names, remote in sorted(repo_module_names): try: @@ -175,13 +179,13 @@ def create_modules_json(pipeline_dir): repo_path = os.path.join(modules_dir, repo_name) modules_json["repos"][repo_name] = dict() + log.info(f"HELLO: {remote}") modules_json["repos"][repo_name]["git_url"] = remote modules_json["repos"][repo_name]["modules"] = dict() for module_name in sorted(module_names): module_path = os.path.join(repo_path, module_name) progress_bar.update(file_progress, advance=1, test_name=f"{repo_name}/{module_name}") correct_commit_sha = find_correct_commit_sha(module_name, module_path, modules_repo) - log.info(correct_commit_sha) modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": correct_commit_sha} @@ -205,8 +209,6 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): # We iterate over the commit history for the module until we find # a revision that matches the file contents commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) - commit_shas = list(commit_shas) - log.debug(len(commit_shas)) for commit_sha in commit_shas: modules_repo.checkout(commit_sha) if modules_repo.module_files_identical(module_name, module_path): diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index b18666be03..ccbcbcb4a8 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -107,10 +107,7 @@ def has_modules_file(self): modules_json_path = os.path.join(self.dir, "modules.json") if not os.path.exists(modules_json_path): log.info("Creating missing 'module.json' file.") - try: - nf_core.modules.module_utils.create_modules_json(self.dir) - except UserWarning as e: - raise + nf_core.modules.module_utils.create_modules_json(self.dir) def modules_json_up_to_date(self): """ @@ -161,7 +158,7 @@ def modules_json_up_to_date(self): modules = contents["modules"] remote = contents["git_url"] - modules_repo = ModulesRepo(remote_path=remote) + modules_repo = ModulesRepo(remote_url=remote) install_folder = os.path.split(modules_repo.fullname) for module, entry in modules.items(): @@ -339,6 +336,7 @@ def install_module_files(self, module_name, module_version, modules_repo, instal def load_modules_json(self): """Loads the modules.json file""" modules_json_path = os.path.join(self.dir, "modules.json") + log.info(f"Module.json exists {os.path.exists(modules_json_path)}") try: with open(modules_json_path, "r") as fh: modules_json = json.load(fh) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index cd11c5982b..63805bef0b 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -45,8 +45,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): if self.fullname != "nf-core/modules" or self.branch: self.verify_branch() - self.modules_file_tree = {} - self.modules_avail_module_names = [] + self.avail_module_names = None def setup_local_repo(self, remote, branch, no_pull=False): """ @@ -145,8 +144,9 @@ def get_module_dir(self, module_name): return os.path.join(self.local_repo_dir, "modules", module_name) def module_files_identical(self, module_name, base_path): + module_files = ["main.nf", "meta.yml"] module_dir = self.get_module_dir(module_name) - for file in os.listdir(base_path): + for file in module_files: try: if not filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)): return False @@ -189,8 +189,8 @@ def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00 Returns: ( dict ): Iterator of commit SHAs and associated (truncated) message """ + self.checkout_branch() module_path = os.path.join("modules", module_name) - log.info(module_path) commits = self.repo.iter_commits(max_count=depth, paths=module_path) commits = ({"git_sha": commit.hexsha, "trunc_message": commit.message.partition("\n")[0]} for commit in commits) return commits @@ -221,3 +221,8 @@ def get_commit_info(self, sha): date = str(date_obj.date()) return message, date raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") + + def get_avail_modules(self): + if self.avail_module_names is None: + self.avail_module_names = os.listdir(self.get_module_dir()) + return self.avail_module_names diff --git a/nf_core/modules/test_yml_builder.py b/nf_core/modules/test_yml_builder.py index 91767f3f34..e0df5864af 100644 --- a/nf_core/modules/test_yml_builder.py +++ b/nf_core/modules/test_yml_builder.py @@ -72,7 +72,7 @@ def check_inputs(self): modules_repo.get_modules_file_tree() self.module_name = questionary.autocomplete( "Tool name:", - choices=modules_repo.modules_avail_module_names, + choices=modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, ).ask() self.module_dir = os.path.join("modules", *self.module_name.split("/")) diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 1aa894ed5d..7a0a1259f6 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -116,7 +116,7 @@ def update(self, module): return False # Check that the supplied name is an available module - if module and module not in self.modules_repo.modules_avail_module_names: + if module and module not in self.modules_repo.get_avail_modules(): log.error(f"Module '{module}' not found in list of available modules.") log.info("Use the command 'nf-core modules list remote' to view available software") return False From d7e8e8b134a252aa5b01d50bbcd34a478b607c4f Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 19:10:38 +0200 Subject: [PATCH 24/55] ask -> unsafe_ask --- nf_core/modules/info.py | 2 +- nf_core/modules/module_utils.py | 2 +- nf_core/modules/modules_command.py | 2 +- nf_core/modules/test_yml_builder.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index 4ed1ed7e6a..5e5f09b7a0 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -93,7 +93,7 @@ def get_remote_yaml(self): self.modules_repo.get_modules_file_tree() # Check if our requested module is there - if self.module not in self.modules_repo.avail_module_names: + if self.module not in self.modules_repo.get_avail_modules(): return False # Get the remote path diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 66ad48d570..f36ea0d706 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -105,7 +105,7 @@ def get_pipeline_module_repositories(modules_dir): except git.exc.GitCommandError: nrepo_remote = questionary.text( "The provided remote does not seem to exist, please provide a new remote." - ).ask() + ).unsafe_ask() # Verify that there is a directory corresponding the remote nrepo_name = path_from_remote(nrepo_remote) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index ccbcbcb4a8..dc8a454697 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -232,7 +232,7 @@ def modules_json_up_to_date(self): except git.exc.GitCommandError: remote = questionary.text( "The provided remote does not seem to exist, please provide a new remote." - ).ask() + ).unsafe_ask() elif choice == 1: sb_local_repos.append(repo) continue diff --git a/nf_core/modules/test_yml_builder.py b/nf_core/modules/test_yml_builder.py index e0df5864af..abb7c7c35f 100644 --- a/nf_core/modules/test_yml_builder.py +++ b/nf_core/modules/test_yml_builder.py @@ -74,7 +74,7 @@ def check_inputs(self): "Tool name:", choices=modules_repo.get_avail_modules(), style=nf_core.utils.nfcore_question_style, - ).ask() + ).unsafe_ask() self.module_dir = os.path.join("modules", *self.module_name.split("/")) self.module_test_main = os.path.join("tests", "modules", *self.module_name.split("/"), "main.nf") From 928c905d862a55682b58bdbe1bb4afba930df860 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 19:17:07 +0200 Subject: [PATCH 25/55] Remove print statements --- nf_core/modules/module_utils.py | 6 ------ nf_core/modules/modules_command.py | 1 - 2 files changed, 7 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index f36ea0d706..43b722ffda 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -41,7 +41,6 @@ def dir_tree_uncovered(modules_dir, repos): dirs_not_covered = [] while len(fifo) > 0: temp_queue = [] - log.info([os.path.join(*os.path.split(repo)[:depth]) for repo in repos]) repos_at_level = {os.path.join(*os.path.split(repo)[:depth]): len(os.path.split(repo)) for repo in repos} for dir in fifo: rel_dir = os.path.relpath(dir, modules_dir) @@ -78,9 +77,6 @@ def get_pipeline_module_repositories(modules_dir): repos [ (str, str) ]: List of tuples of repo name and repo remote URL """ # Check if there are any nf-core modules installed - log.info( - f"Nf-core path {os.path.join(modules_dir, NF_CORE_MODULES_NAME)} exists {os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME))}" - ) if os.path.exists(os.path.join(modules_dir, NF_CORE_MODULES_NAME)): repos = [(NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE)] else: @@ -159,7 +155,6 @@ def create_modules_json(pipeline_dir): ) for repo_name, repo_remote in repos ] - log.info(f"Module names: {[x[0] for x in repo_module_names]}") progress_bar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), @@ -179,7 +174,6 @@ def create_modules_json(pipeline_dir): repo_path = os.path.join(modules_dir, repo_name) modules_json["repos"][repo_name] = dict() - log.info(f"HELLO: {remote}") modules_json["repos"][repo_name]["git_url"] = remote modules_json["repos"][repo_name]["modules"] = dict() for module_name in sorted(module_names): diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index dc8a454697..590f5299ce 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -336,7 +336,6 @@ def install_module_files(self, module_name, module_version, modules_repo, instal def load_modules_json(self): """Loads the modules.json file""" modules_json_path = os.path.join(self.dir, "modules.json") - log.info(f"Module.json exists {os.path.exists(modules_json_path)}") try: with open(modules_json_path, "r") as fh: modules_json = json.load(fh) From cc6ccef54596a52d3522b66d5e961d5af4f24827 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 20:25:12 +0200 Subject: [PATCH 26/55] Fix get_avail_modules --- nf_core/modules/module_utils.py | 6 +----- nf_core/modules/modules_command.py | 4 +--- nf_core/modules/modules_repo.py | 25 ++++++++++++++++++------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 43b722ffda..d43aa82011 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -13,15 +13,11 @@ import nf_core.utils -from .modules_repo import ModulesRepo +from .modules_repo import ModulesRepo, NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE from .nfcore_module import NFCoreModule log = logging.getLogger(__name__) -# Constants for used throughout the module files -NF_CORE_MODULES_NAME = "nf-core/modules" -NF_CORE_MODULES_REMOTE = "git@github.com:nf-core/modules.git" - class ModuleException(Exception): """Exception raised when there was an error with module commands""" diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 590f5299ce..b149902dc4 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -5,7 +5,6 @@ import logging import os import shutil -from posixpath import dirname import questionary import git @@ -15,9 +14,8 @@ import nf_core.utils from nf_core import modules -from nf_core.modules.modules_repo import ModulesRepo +from nf_core.modules.modules_repo import ModulesRepo, NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE from nf_core.utils import plural_s as _s -from nf_core.modules.module_utils import NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE log = logging.getLogger(__name__) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 63805bef0b..a2372800da 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -1,4 +1,3 @@ -import base64 import filecmp import logging import os @@ -9,6 +8,10 @@ log = logging.getLogger(__name__) +# Constants for the nf-core/modules repo used throughout the module files +NF_CORE_MODULES_NAME = "nf-core/modules" +NF_CORE_MODULES_REMOTE = "git@github.com:nf-core/modules.git" + class ModulesRepo(object): """ @@ -25,7 +28,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): # Check if the remote seems to be well formed if remote_url is None: - remote_url = "git@github.com:nf-core/modules.git" + remote_url = NF_CORE_MODULES_REMOTE self.remote_url = remote_url @@ -42,9 +45,12 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): self.setup_local_repo(remote_url, branch, no_pull) # Verify that the repo seems to be correctly configured - if self.fullname != "nf-core/modules" or self.branch: + if self.fullname != NF_CORE_MODULES_NAME or self.branch: self.verify_branch() + # Convenience variable + self.modules_dir = os.path.join(self.local_repo_dir, "modules") + self.avail_module_names = None def setup_local_repo(self, remote, branch, no_pull=False): @@ -80,7 +86,7 @@ def setup_local_repo(self, remote, branch, no_pull=False): def setup_branch(self, branch): if branch is None: # Don't bother fetching default branch if we're using nf-core - if self.fullname == "nf-core/modules": + if self.fullname == NF_CORE_MODULES_NAME: self.branch = "master" else: self.branch = self.get_default_branch() @@ -132,7 +138,7 @@ def module_exists(self, module_name): Returns bool """ - return module_name in os.listdir(os.path.join(self.local_repo_dir, "modules")) + return module_name in os.listdir(self.modules_dir) def get_module_dir(self, module_name): """ @@ -141,7 +147,7 @@ def get_module_dir(self, module_name): Returns module_path: str """ - return os.path.join(self.local_repo_dir, "modules", module_name) + return os.path.join(self.modules_dir, module_name) def module_files_identical(self, module_name, base_path): module_files = ["main.nf", "meta.yml"] @@ -224,5 +230,10 @@ def get_commit_info(self, sha): def get_avail_modules(self): if self.avail_module_names is None: - self.avail_module_names = os.listdir(self.get_module_dir()) + # Module directories are characterized by having a 'main.nf' file + self.avail_module_names = [ + os.path.relpath(dirpath, start=self.modules_dir) + for dirpath, _, file_names in os.walk(self.modules_dir) + if "main.nf" in file_names + ] return self.avail_module_names From 829f09f406b7c2a17cdf02dcda3cac787a572bf2 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 21:41:46 +0200 Subject: [PATCH 27/55] Fix module name selection --- nf_core/modules/module_utils.py | 12 +++++------- nf_core/modules/modules_command.py | 1 - 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index d43aa82011..2f41a296bc 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -140,13 +140,11 @@ def create_modules_json(pipeline_dir): repo_module_names = [ ( repo_name, - list( - { - os.path.relpath(os.path.dirname(path), os.path.join(modules_dir, repo_name)) - for path in glob.glob(f"{modules_dir}/{repo_name}/**/*", recursive=True) - if os.path.isfile(path) - } - ), + [ + os.path.relpath(dir_name, os.path.join(modules_dir, repo_name)) + for dir_name, _, file_names in os.walk(os.path.join(modules_dir, repo_name)) + if "main.nf" in file_names + ], repo_remote, ) for repo_name, repo_remote in repos diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index b149902dc4..d180118779 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -32,7 +32,6 @@ def __init__(self, dir): self.modules_repo = ModulesRepo() self.dir = dir self.module_names = [] - log.info("Hello") try: if self.dir: self.dir, self.repo_type = nf_core.modules.module_utils.get_repo_type(self.dir) From 1a69c4e578644c81ef6b58009366fdc92178db0d Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sat, 18 Jun 2022 23:04:24 +0200 Subject: [PATCH 28/55] Make update work --- nf_core/modules/module_utils.py | 3 ++- nf_core/modules/modules_command.py | 8 ++++---- nf_core/modules/modules_repo.py | 4 ++-- nf_core/modules/update.py | 27 ++++++++++++--------------- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 2f41a296bc..c5019310f8 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -340,16 +340,17 @@ def prompt_module_version_sha(module, modules_repo, installed_sha=None): all_commits = modules_repo.get_module_git_log(module) next_page_commits = [next(all_commits, None) for _ in range(10)] + next_page_commits = [commit for commit in next_page_commits if commit is not None] while git_sha == "": commits = next_page_commits next_page_commits = [next(all_commits, None) for _ in range(10)] + next_page_commits = [commit for commit in next_page_commits if commit is not None] if all(commit is None for commit in next_page_commits): next_page_commits = None choices = [] for title, sha in map(lambda commit: (commit["trunc_message"], commit["git_sha"]), commits): - display_color = "fg:ansiblue" if sha != installed_sha else "fg:ansired" message = f"{title} {sha}" if installed_sha == sha: diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index d180118779..8a384ed3ac 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -133,7 +133,7 @@ def modules_json_up_to_date(self): mod_json["repos"][repo]["modules"].pop(module) else: if repo not in missing_from_modules_json: - missing_from_modules_json[repo] = ([], mod_json["repos"]["git_url"]) + missing_from_modules_json[repo] = ([], mod_json["repos"][repo]["git_url"]) missing_from_modules_json[repo][0].append(module) if len(mod_json["repos"][repo]["modules"]) == 0: mod_json["repos"].pop(repo) @@ -196,7 +196,7 @@ def modules_json_up_to_date(self): if sum(map(len, missing_from_modules_json.values())) > 0: format_missing = [ - f"'{repo}/{module}'" for repo, modules in missing_from_modules_json.items() for module in modules + f"'{repo}/{module}'" for repo, contents in missing_from_modules_json.items() for module in contents[0] ] if len(format_missing) == 1: log.info(f"Recomputing commit SHA for module {format_missing[0]} which was missing from 'modules.json'") @@ -327,7 +327,7 @@ def install_module_files(self, module_name, module_version, modules_repo, instal shutil.copytree(modules_repo.get_module_dir(module_name), os.path.join(*install_folder, module_name)) # Switch back to the tip of the branch (needed?) - modules_repo.checkout() + modules_repo.checkout_branch() return True def load_modules_json(self): @@ -344,7 +344,7 @@ def load_modules_json(self): def update_modules_json(self, modules_json, modules_repo, module_name, module_version, write_file=True): """Updates the 'module.json' file with new module info""" repo_name = modules_repo.fullname - remote_url = modules_repo.remove_url + remote_url = modules_repo.remote_url if repo_name not in modules_json["repos"]: modules_json["repos"][repo_name] = {"modules": {}, "git_url": remote_url} modules_json["repos"][repo_name]["modules"][module_name] = {"git_sha": module_version} diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index a2372800da..a9d4cd0ce7 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -80,7 +80,7 @@ def setup_local_repo(self, remote, branch, no_pull=False): self.setup_branch(branch) # If the repo is already cloned, pull the latest changes from the remote - if not no_pull: + if False: self.repo.remotes.origin.pull() def setup_branch(self, branch): @@ -138,7 +138,7 @@ def module_exists(self, module_name): Returns bool """ - return module_name in os.listdir(self.modules_dir) + return module_name in self.get_avail_modules() def get_module_dir(self, module_name): """ diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 7a0a1259f6..ca203a49db 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -68,14 +68,7 @@ def update(self, module): return False if not self.update_all: - # Get the available modules - try: - self.modules_repo.get_modules_file_tree() - except LookupError as e: - log.error(e) - return False - - # Check if there are any modules installed from + # Check if there are any modules installed from the repo repo_name = self.modules_repo.fullname if repo_name not in self.module_names: log.error(f"No modules installed from '{repo_name}'") @@ -123,6 +116,9 @@ def update(self, module): repos_mods_shas = [(self.modules_repo, module, sha)] + # Load 'modules.json' (loaded here for consistency with the '--all' case) + modules_json = self.load_modules_json() + else: if module: raise UserWarning("You cannot specify a module and use the '--all' flag at the same time") @@ -168,18 +164,18 @@ def update(self, module): skipped_str = "', '".join(skipped_modules) log.info(f"Skipping module{'' if len(skipped_modules) == 1 else 's'}: '{skipped_str}'") + # Get the git urls from the modules.json + modules_json = self.load_modules_json() repos_mods_shas = [ - (ModulesRepo(remote_path=repo_name), mods_shas) for repo_name, mods_shas in repos_mods_shas.items() + (modules_json["repos"][repo_name]["git_url"], mods_shas) + for repo_name, mods_shas in repos_mods_shas.items() ] - for repo, _ in repos_mods_shas: - repo.get_modules_file_tree() + repos_mods_shas = [(ModulesRepo(remote_url=repo_url), mods_shas) for repo_url, mods_shas in repos_mods_shas] # Flatten the list repos_mods_shas = [(repo, mod, sha) for repo, mods_shas in repos_mods_shas for mod, sha in mods_shas] - # Load 'modules.json' - modules_json = self.load_modules_json() old_modules_json = copy.deepcopy(modules_json) # Deep copy to avoid mutability if not modules_json: return False @@ -236,12 +232,13 @@ def update(self, module): continue if modules_repo.fullname in modules_json["repos"]: - current_entry = modules_json["repos"][modules_repo.fullname].get(module) + current_entry = modules_json["repos"][modules_repo.fullname]["modules"].get(module) else: current_entry = None # Set the install folder based on the repository name - install_folder = [self.dir, "modules"].extend(os.path.split(modules_repo.fullname)) + install_folder = [self.dir, "modules"] + install_folder.extend(os.path.split(modules_repo.fullname)) # Compute the module directory module_dir = os.path.join(*install_folder, module) From a9eaf1438ea717e6b595f5991302ed7de21ee63b Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 00:08:55 +0200 Subject: [PATCH 29/55] Add progress bars when cloning and pulling remote --- nf_core/modules/modules_repo.py | 39 ++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index a9d4cd0ce7..5d572d2cea 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -3,6 +3,7 @@ import os import git import urllib.parse +import rich.progress from nf_core.utils import NFCORE_DIR, gh_api @@ -13,6 +14,18 @@ NF_CORE_MODULES_REMOTE = "git@github.com:nf-core/modules.git" +class RemoteProgressbar(git.RemoteProgress): + def __init__(self, progress_bar, repo_name, remote_url, operation): + super().__init__() + self.progress_bar = progress_bar + self.tid = self.progress_bar.add_task(f"{operation} '{repo_name}' ({remote_url})", start=False, state="Started") + + def update(self, op_code, cur_count, max_count=None, message=""): + if not self.progress_bar.tasks[self.tid].started: + self.progress_bar.start_task(self.tid) + self.progress_bar.update(self.tid, total=max_count, completed=cur_count, state=message) + + class ModulesRepo(object): """ An object to store details about the repository being used for modules. @@ -68,7 +81,18 @@ def setup_local_repo(self, remote, branch, no_pull=False): self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) if not os.path.exists(self.local_repo_dir): try: - self.repo = git.Repo.clone_from(remote, self.local_repo_dir) + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") # Verify that the requested branch exists by checking it out @@ -80,8 +104,17 @@ def setup_local_repo(self, remote, branch, no_pull=False): self.setup_branch(branch) # If the repo is already cloned, pull the latest changes from the remote - if False: - self.repo.remotes.origin.pull() + if not no_pull: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, + ) + with pbar: + self.repo.remotes.origin.pull( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) def setup_branch(self, branch): if branch is None: From 99f581d155857e09d7557efc88d8e8c8e29383b8 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 00:18:50 +0200 Subject: [PATCH 30/55] Add static variables for keeping track of pull status --- nf_core/modules/list.py | 2 +- nf_core/modules/modules_repo.py | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 48e7d804aa..a8ea1737c4 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -106,7 +106,7 @@ def pattern_msg(keywords): version_sha = module_entry["git_sha"] try: # pass repo_name to get info on modules even outside nf-core/modules - message, date = ModulesRepo(no_pull=True).get_commit_info( + message, date = ModulesRepo().get_commit_info( version_sha ) # NOTE add support for other remotes except LookupError as e: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 5d572d2cea..e6cbb63f68 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -18,7 +18,9 @@ class RemoteProgressbar(git.RemoteProgress): def __init__(self, progress_bar, repo_name, remote_url, operation): super().__init__() self.progress_bar = progress_bar - self.tid = self.progress_bar.add_task(f"{operation} '{repo_name}' ({remote_url})", start=False, state="Started") + self.tid = self.progress_bar.add_task( + f"{operation} '{repo_name}' ({remote_url})", start=False, state="Waiting for response" + ) def update(self, op_code, cur_count, max_count=None, message=""): if not self.progress_bar.tasks[self.tid].started: @@ -34,7 +36,23 @@ class ModulesRepo(object): so that this can be used in the same way by all sub-commands. """ - def __init__(self, remote_url=None, branch=None, no_pull=False): + local_repo_up_to_date = dict() + + @classmethod + def local_repo_is_up_to_date(repo_name): + """ + Checks whether a local repo has been cloned/pull in the current session + """ + return ModulesRepo.local_repo_is_up_to_date.get(repo_name, False) + + @classmethod + def update_local_repo_status(repo_name, up_to_date): + """ + Updates the clone/pull status of a local repo + """ + ModulesRepo.local_repo_up_do_date = up_to_date + + def __init__(self, remote_url=None, branch=None): """ Initializes the object and clones the git repository if it is not already present """ @@ -55,7 +73,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): self.fullname = os.path.splitext(path)[0] - self.setup_local_repo(remote_url, branch, no_pull) + self.setup_local_repo(remote_url, branch) # Verify that the repo seems to be correctly configured if self.fullname != NF_CORE_MODULES_NAME or self.branch: @@ -66,7 +84,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): self.avail_module_names = None - def setup_local_repo(self, remote, branch, no_pull=False): + def setup_local_repo(self, remote, branch): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -75,7 +93,6 @@ def setup_local_repo(self, remote, branch, no_pull=False): Args: remote (str): git url of remote branch (str): name of branch to use - no_pull (bool): Don't pull the repo. (Used for performance reasons) Sets self.repo """ self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) @@ -93,6 +110,7 @@ def setup_local_repo(self, remote, branch, no_pull=False): self.local_repo_dir, progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), ) + ModulesRepo.update_local_repo_status(self.fullname, True) except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") # Verify that the requested branch exists by checking it out @@ -104,7 +122,7 @@ def setup_local_repo(self, remote, branch, no_pull=False): self.setup_branch(branch) # If the repo is already cloned, pull the latest changes from the remote - if not no_pull: + if ModulesRepo.local_repo_is_up_to_date(self.fullname) pbar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), @@ -115,6 +133,7 @@ def setup_local_repo(self, remote, branch, no_pull=False): self.repo.remotes.origin.pull( progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") ) + ModulesRepo.update_local_repo_status(self.fullname, True) def setup_branch(self, branch): if branch is None: From 6824918d209b0e06ea6721d43d11f38af388671f Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 00:27:47 +0200 Subject: [PATCH 31/55] Use @staticmethod method instead of @classmethod... --- nf_core/modules/modules_repo.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index e6cbb63f68..b4d448b692 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -36,21 +36,21 @@ class ModulesRepo(object): so that this can be used in the same way by all sub-commands. """ - local_repo_up_to_date = dict() + local_repo_statuses = dict() - @classmethod - def local_repo_is_up_to_date(repo_name): + @staticmethod + def local_repo_synced(repo_name): """ Checks whether a local repo has been cloned/pull in the current session """ - return ModulesRepo.local_repo_is_up_to_date.get(repo_name, False) + return ModulesRepo.local_repo_statuses.get(repo_name, False) - @classmethod + @staticmethod def update_local_repo_status(repo_name, up_to_date): """ Updates the clone/pull status of a local repo """ - ModulesRepo.local_repo_up_do_date = up_to_date + ModulesRepo.local_repo_statuses[repo_name] = up_to_date def __init__(self, remote_url=None, branch=None): """ @@ -122,7 +122,7 @@ def setup_local_repo(self, remote, branch): self.setup_branch(branch) # If the repo is already cloned, pull the latest changes from the remote - if ModulesRepo.local_repo_is_up_to_date(self.fullname) + if not ModulesRepo.local_repo_synced(self.fullname): pbar = rich.progress.Progress( "[bold blue]{task.description}", rich.progress.BarColumn(bar_width=None), From 770e4e1776496f3da0cf22a80c387f096ffe915e Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 00:38:16 +0200 Subject: [PATCH 32/55] Fix modules list --- nf_core/modules/list.py | 17 ++++++++--------- nf_core/modules/test_yml_builder.py | 1 - 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index a8ea1737c4..9d9c4aad64 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -43,15 +43,8 @@ def pattern_msg(keywords): # No pipeline given - show all remote if self.remote: - # Get the list of available modules - try: - self.modules_repo.get_modules_file_tree() - except LookupError as e: - log.error(e) - return False - # Filter the modules by keywords - modules = [mod for mod in self.modules_repo.get_avail_module() if all(k in mod for k in keywords)] + modules = [mod for mod in self.modules_repo.get_avail_modules() if all(k in mod for k in keywords)] # Nothing found if len(modules) == 0: @@ -101,7 +94,13 @@ def pattern_msg(keywords): for repo_name, modules in sorted(repos_with_mods.items()): repo_entry = modules_json["repos"].get(repo_name, {}) for module in sorted(modules): - module_entry = repo_entry.get(module) + repo_modules = repo_entry.get("modules") + if repo_modules is None: + raise UserWarning( + "You 'modules.json' file is not up to date. Please remove it and rerun the command" + ) + module_entry = repo_modules.get(module) + if module_entry: version_sha = module_entry["git_sha"] try: diff --git a/nf_core/modules/test_yml_builder.py b/nf_core/modules/test_yml_builder.py index abb7c7c35f..8dcf1f4505 100644 --- a/nf_core/modules/test_yml_builder.py +++ b/nf_core/modules/test_yml_builder.py @@ -69,7 +69,6 @@ def check_inputs(self): # Get the tool name if not specified if self.module_name is None: modules_repo = ModulesRepo() - modules_repo.get_modules_file_tree() self.module_name = questionary.autocomplete( "Tool name:", choices=modules_repo.get_avail_modules(), From ab09a58c130f19c2c82fa71fb762139f2f375801 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 10:26:57 +0200 Subject: [PATCH 33/55] Fix info --- nf_core/modules/info.py | 18 ++---------------- nf_core/modules/install.py | 5 ++++- nf_core/modules/modules_repo.py | 9 +++++++++ 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index 5e5f09b7a0..d0f79ebe79 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -89,27 +89,13 @@ def get_remote_yaml(self): Returns: dict or bool: Parsed meta.yml found, False otherwise """ - # Fetch the remote repo information - self.modules_repo.get_modules_file_tree() - # Check if our requested module is there if self.module not in self.modules_repo.get_avail_modules(): return False - # Get the remote path - meta_url = None - for file_dict in self.modules_repo.modules_file_tree: - if file_dict.get("path") == f"modules/{self.module}/meta.yml": - meta_url = file_dict.get("url") - - if not meta_url: + file_contents = self.modules_repo.get_meta_yml(self.module) + if file_contents is None: return False - - # Download and parse - log.debug(f"Attempting to fetch {meta_url}") - response = requests.get(meta_url) - result = response.json() - file_contents = base64.b64decode(result["content"]) self.remote_location = self.modules_repo.fullname return yaml.safe_load(file_contents) diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index eeaf4d5c09..f0b668d751 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -7,6 +7,7 @@ import nf_core.utils from .modules_command import ModuleCommand +from .modules_repo import NF_CORE_MODULES_NAME log = logging.getLogger(__name__) @@ -81,7 +82,9 @@ def install(self, module): if (current_entry is not None and os.path.exists(module_dir)) and not self.force: log.error("Module is already installed.") - repo_flag = "" if self.modules_repo.fullname == "nf-core/modules" else f"-g {self.modules_repo.fullname} " + repo_flag = ( + "" if self.modules_repo.fullname == NF_CORE_MODULES_NAME else f"-g {self.modules_repo.fullname} " + ) branch_flag = "" if self.modules_repo.branch == "master" else f"-b {self.modules_repo.branch} " log.info( diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index b4d448b692..106d882afc 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -289,3 +289,12 @@ def get_avail_modules(self): if "main.nf" in file_names ] return self.avail_module_names + + def get_meta_yml(self, module): + self.checkout_branch() + path = os.path.join(self.modules_dir, module, "meta.yml") + if not os.path.exists(path): + return None + with open(path) as fh: + contents = fh.read() + return contents From b698585b045b6a12cf86606b0b51b63cd80ba971 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 10:34:56 +0200 Subject: [PATCH 34/55] Update main function --- nf_core/__main__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index a1d7d33b30..61664a7be6 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -345,14 +345,14 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow @nf_core_cli.group() @click.option( "-g", - "--github-repository", + "--git-remote", type=str, default="git@github.com:nf-core/modules.git", - help="GitHub repository hosting modules.", + help="Remote git repo to fetch files from", ) -@click.option("-b", "--branch", type=str, default="master", help="Branch of GitHub repository hosting modules.") +@click.option("-b", "--branch", type=str, default="master", help="Branch of git repository hosting modules.") @click.pass_context -def modules(ctx, github_repository, branch): +def modules(ctx, git_remote, branch): """ Commands to manage Nextflow DSL2 modules (tool wrappers). """ @@ -361,7 +361,7 @@ def modules(ctx, github_repository, branch): ctx.ensure_object(dict) # Make repository object to pass to subcommands - ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(github_repository, branch) + ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(remote_url=git_remote, branch=branch) # nf-core modules list subcommands From 632fc67859ff0275c691109bf38297bf2b91d85c Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 14:58:36 +0200 Subject: [PATCH 35/55] Bug fix, user info and new option --no-pull --- nf_core/__main__.py | 12 +++++++++--- nf_core/modules/install.py | 2 +- nf_core/modules/modules_command.py | 6 ++++++ nf_core/modules/modules_repo.py | 8 +++++--- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 61664a7be6..d91a42e364 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -28,7 +28,7 @@ # Submodules should all traverse back to this log = logging.getLogger() -# Set up .nfcore directory +# Set up .nfcore directory for storing files between sessions nf_core.utils.setup_nfcore_dir() # Set up nicer formatting of click cli help messages @@ -351,8 +351,14 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow help="Remote git repo to fetch files from", ) @click.option("-b", "--branch", type=str, default="master", help="Branch of git repository hosting modules.") +@click.option( + "--no-pull", + is_flag=True, + default=False, + help="Use this option for faster commands if you know there have been no recent changes to the repository", +) @click.pass_context -def modules(ctx, git_remote, branch): +def modules(ctx, git_remote, branch, no_pull): """ Commands to manage Nextflow DSL2 modules (tool wrappers). """ @@ -361,7 +367,7 @@ def modules(ctx, git_remote, branch): ctx.ensure_object(dict) # Make repository object to pass to subcommands - ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(remote_url=git_remote, branch=branch) + ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(remote_url=git_remote, branch=branch, no_pull=no_pull) # nf-core modules list subcommands diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index f0b668d751..c84f11d776 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -67,7 +67,7 @@ def install(self, module): return False if self.modules_repo.fullname in modules_json["repos"]: - current_entry = modules_json["repos"][self.modules_repo.fullname].get(module) + current_entry = modules_json["repos"][self.modules_repo.fullname]["modules"].get(module) else: current_entry = None diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 8a384ed3ac..a009bbf48a 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -129,6 +129,12 @@ def modules_json_up_to_date(self): for repo, modules in self.module_names.items(): if repo in mod_json["repos"]: for module in modules: + repo_modules = mod_json["repos"][repo].get("modules") + if repo_modules is None: + raise UserWarning( + "Your 'modules.json' is not up to date. " + "Please reinstall it by removing it and rerunning the command." + ) if module in mod_json["repos"][repo]["modules"]: mod_json["repos"][repo]["modules"].pop(module) else: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 106d882afc..8cc069adae 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -52,7 +52,7 @@ def update_local_repo_status(repo_name, up_to_date): """ ModulesRepo.local_repo_statuses[repo_name] = up_to_date - def __init__(self, remote_url=None, branch=None): + def __init__(self, remote_url=None, branch=None, no_pull=False): """ Initializes the object and clones the git repository if it is not already present """ @@ -73,7 +73,7 @@ def __init__(self, remote_url=None, branch=None): self.fullname = os.path.splitext(path)[0] - self.setup_local_repo(remote_url, branch) + self.setup_local_repo(remote_url, branch, no_pull) # Verify that the repo seems to be correctly configured if self.fullname != NF_CORE_MODULES_NAME or self.branch: @@ -84,7 +84,7 @@ def __init__(self, remote_url=None, branch=None): self.avail_module_names = None - def setup_local_repo(self, remote, branch): + def setup_local_repo(self, remote, branch, no_pull): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -121,6 +121,8 @@ def setup_local_repo(self, remote, branch): # Verify that the requested branch exists by checking it out self.setup_branch(branch) + if no_pull: + ModulesRepo.update_local_repo_status(self.fullname, True) # If the repo is already cloned, pull the latest changes from the remote if not ModulesRepo.local_repo_synced(self.fullname): pbar = rich.progress.Progress( From fdfde82dc82b42f6e9af6de7eb50b9c6ef0a6a83 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 17:50:27 +0200 Subject: [PATCH 36/55] Remove unused function and update comments --- nf_core/modules/module_utils.py | 4 +- nf_core/modules/modules_command.py | 42 +++++----- nf_core/modules/modules_repo.py | 127 +++++++++++++++++++++++------ 3 files changed, 127 insertions(+), 46 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index c5019310f8..3fdaccbf10 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -198,10 +198,8 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): # a revision that matches the file contents commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) for commit_sha in commit_shas: - modules_repo.checkout(commit_sha) - if modules_repo.module_files_identical(module_name, module_path): + if modules_repo.module_files_identical(module_name, module_path, commit_sha): return commit_sha - modules_repo.checkout_branch() return None diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index a009bbf48a..137704114a 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -315,29 +315,31 @@ def clear_module_dir(self, module_name, module_dir): log.error(f"Could not remove module: {e}") return False - def install_module_files(self, module_name, module_version, modules_repo, install_folder, dry_run=False): + def install_module_files(self, module_name, module_version, modules_repo, install_dir, dry_run=False): """ - Copies the files of a module from the local copy of the repo - """ - # Check out the repository at the requested ref - modules_repo.checkout(module_version) - - # Check if the module exists in the branch - if not modules_repo.module_exists(module_name): - log.error( - f"The requested module does not exists in the '{modules_repo.branch}' of {modules_repo.fullname}'" - ) - return False + Installs a module - # Copy the files from the repo to the install folder - shutil.copytree(modules_repo.get_module_dir(module_name), os.path.join(*install_folder, module_name)) + Args: + module_name (str): The name of the module + module_versioN (str): Git SHA for the version of the module to be installed + modules_repo (ModulesRepo): A correctly configured ModulesRepo object + install_dir (str): The path to where the module should be installed (should be the 'modules/' dir of the pipeline) + dry_run (bool): The command does nothing if True - # Switch back to the tip of the branch (needed?) - modules_repo.checkout_branch() - return True + Returns: + (bool): Whether the operation was successful of not + """ + if dry_run: + return True + return modules_repo.install_module(module_name, install_dir, module_version) def load_modules_json(self): - """Loads the modules.json file""" + """ + Loads the modules.json file + + Returns: + (nested dict...): The parsed 'modules.json' file + """ modules_json_path = os.path.join(self.dir, "modules.json") try: with open(modules_json_path, "r") as fh: @@ -348,7 +350,9 @@ def load_modules_json(self): return modules_json def update_modules_json(self, modules_json, modules_repo, module_name, module_version, write_file=True): - """Updates the 'module.json' file with new module info""" + """ + Updates the 'module.json' file with new module info + """ repo_name = modules_repo.fullname remote_url = modules_repo.remote_url if repo_name not in modules_json["repos"]: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 8cc069adae..5e6cb48a1e 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -1,6 +1,7 @@ import filecmp import logging import os +import shutil import git import urllib.parse import rich.progress @@ -15,7 +16,22 @@ class RemoteProgressbar(git.RemoteProgress): + """ + An object to create a progressbar for when doing an operation with the remote. + Note that an initialized rich Progress (progress bar) object must be past + during initialization. + """ + def __init__(self, progress_bar, repo_name, remote_url, operation): + """ + Initializes the object and adds a task to the progressbar passed as 'progress_bar' + + Args: + progress_bar (rich.progress.Progress): A rich progress bar object + repo_name (str): Name of the repository the operation is performed on + remote_url (str): Git URL of the repository the operation is performed on + operation (str): The operation performed on the repository, i.e. 'Pulling', 'Cloning' etc. + """ super().__init__() self.progress_bar = progress_bar self.tid = self.progress_bar.add_task( @@ -23,6 +39,10 @@ def __init__(self, progress_bar, repo_name, remote_url, operation): ) def update(self, op_code, cur_count, max_count=None, message=""): + """ + Overrides git.RemoteProgress.update. + Called every time there is a change in the remote operation + """ if not self.progress_bar.tasks[self.tid].started: self.progress_bar.start_task(self.tid) self.progress_bar.update(self.tid, total=max_count, completed=cur_count, state=message) @@ -34,6 +54,10 @@ class ModulesRepo(object): Used by the `nf-core modules` top-level command with -r and -b flags, so that this can be used in the same way by all sub-commands. + + We keep track of the pull-status of the different installed repos in + the static variable local_repo_status. This is so we don't need to + pull a remote several times in one command. """ local_repo_statuses = dict() @@ -138,6 +162,13 @@ def setup_local_repo(self, remote, branch, no_pull): ModulesRepo.update_local_repo_status(self.fullname, True) def setup_branch(self, branch): + """ + Verify that we have a branch and otherwise use the default one. + The branch is then checked out to verify that it exists in the repo. + + Args: + branch (str): Name of branch + """ if branch is None: # Don't bother fetching default branch if we're using nf-core if self.fullname == NF_CORE_MODULES_NAME: @@ -157,7 +188,9 @@ def get_default_branch(self): _, self.branch = origin_head.ref.name.split("/") def branch_exists(self): - """Verifies that the branch exists in the repository by trying to check it out""" + """ + Verifies that the branch exists in the repository by trying to check it out + """ try: self.checkout_branch() except git.exc.GitCommandError: @@ -183,6 +216,9 @@ def checkout_branch(self): def checkout(self, commit): """ Checks out the repository at the requested commit + + Args: + commit (str): Git SHA of the commit """ self.repo.git.checkout(commit) @@ -190,7 +226,11 @@ def module_exists(self, module_name): """ Check if a module exists in the branch of the repo - Returns bool + Args: + module_name (str): The name of the module + + Returns: + (bool): Whether the module exists in this branch of the repository """ return module_name in self.get_avail_modules() @@ -198,12 +238,52 @@ def get_module_dir(self, module_name): """ Returns the file path of a module directory in the repo. Does not verify that the path exists. + Args: + module_name (str): The name of the module - Returns module_path: str + Returns: + module_path (str): The path of the module in the local copy of the repository """ return os.path.join(self.modules_dir, module_name) - def module_files_identical(self, module_name, base_path): + def install_module(self, module_name, install_dir, commit): + """ + Install the module files into a pipeline at the given commit + + Args: + module_name (str): The name of the module + install_dir (str): The path where the module should be installed + commit (str): The git SHA for the version of the module to be installed + + Returns: + (bool): Whether the operation was successful or not + """ + # Check out the repository at the requested ref + self.checkout(commit) + + # Check if the module exists in the branch + if not self.module_exists(module_name): + log.error(f"The requested module does not exists in the '{self.branch}' of {self.fullname}'") + return False + + # Copy the files from the repo to the install folder + shutil.copytree(self.get_module_dir(module_name), os.path.join(*install_dir, module_name)) + + # Switch back to the tip of the branch + self.checkout_branch() + return True + + def module_files_identical(self, module_name, base_path, commit): + """ + Checks whether the module files in a pipeline are identical to the ones in the remote + Args: + module_name (str): The name of the module + base_path (str): The path to the module in the pipeline + + Returns: + (bool): Whether the pipeline files are identical to the repo files + """ + self.checkout(commit) module_files = ["main.nf", "meta.yml"] module_dir = self.get_module_dir(module_name) for file in module_files: @@ -213,26 +293,9 @@ def module_files_identical(self, module_name, base_path): except FileNotFoundError as e: log.debug(f"Could not open file: {os.path.join(module_dir, file)}") continue + self.checkout_branch() return True - def get_module_files(self, module_name, files): - """ - Returns the contents requested files for a module at the current - checked out ref - - Returns contents: [ str ] - """ - - contents = [None] * len(files) - module_path = self.get_module_dir(module_name) - for i, file in enumerate(files): - try: - contents[i] = open(os.path.join(module_path, file), "r").read() - except FileNotFoundError as e: - log.debug(f"Could not open file: {os.path.join(module_path, file)}") - continue - return contents - def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00Z"): """ Fetches the commit history the of requested module since a given date. The default value is @@ -283,6 +346,13 @@ def get_commit_info(self, sha): raise LookupError(f"Commit '{sha}' not found in the '{self.fullname}'") def get_avail_modules(self): + """ + Gets the names of the modules in the repository. They are detected by + checking which directories have a 'main.nf' file + + Returns: + ([ str ]): The module names + """ if self.avail_module_names is None: # Module directories are characterized by having a 'main.nf' file self.avail_module_names = [ @@ -292,9 +362,18 @@ def get_avail_modules(self): ] return self.avail_module_names - def get_meta_yml(self, module): + def get_meta_yml(self, module_name): + """ + Returns the contents of the 'meta.yml' file of a module + + Args: + module_name (str): The name of the module + + Returns: + (str): The contents of the file in text format + """ self.checkout_branch() - path = os.path.join(self.modules_dir, module, "meta.yml") + path = os.path.join(self.modules_dir, module_name, "meta.yml") if not os.path.exists(path): return None with open(path) as fh: From d7c46bbb24f9d61dd96d490cfefe66fe5afe25c2 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 19:54:00 +0200 Subject: [PATCH 37/55] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6de9b35e60..554ed0ef84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ - Add `--fix-version` flag to `nf-core modules lint` command to update modules to the latest version ([#1588](https://github.com/nf-core/tools/pull/1588)) - Fix a bug in the regex extracting the version from biocontainers URLs ([#1598](https://github.com/nf-core/tools/pull/1598)) +- Update how we interface with git remotes. ([#1626](https://github.com/nf-core/tools/issues/1626)) ## [v2.4.1 - Cobolt Koala Patch](https://github.com/nf-core/tools/releases/tag/2.4) - [2022-05-16] @@ -59,7 +60,7 @@ - Add a new command `nf-core modules test` which runs pytests locally. - Print include statement to terminal when `modules install` ([#1520](https://github.com/nf-core/tools/pull/1520)) - Allow follow links when generating `test.yml` file with `nf-core modules create-test-yml` ([1570](https://github.com/nf-core/tools/pull/1570)) -- Escaped test run output before logging it, to avoid a rich ` MarkupError` +- Escaped test run output before logging it, to avoid a rich `MarkupError` ### Linting From a6b6521b96e3a427ac1956b37a933dccae9e8dcb Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 20:29:53 +0200 Subject: [PATCH 38/55] Update docs --- README.md | 41 ++++++++++++----------------------------- nf_core/__main__.py | 2 +- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index cfdef5df16..60e67606ca 100644 --- a/README.md +++ b/README.md @@ -907,42 +907,25 @@ This allows multiple pipelines to use the same code for share tools and gives a The nf-core DSL2 modules repository is at +### Running the module commands + +The `nf-core/tools` package contains a suite of commands for working DSL2 modules in your pipeline. The modules commands use `git` to interface +with the `nf-core/modules` repository. Therefore you need to have have `git` installed to use the modules commands. You can then use the +the modules commands for a variety of tasks such as creating new DSL2 modules, viewing and getting metadata about the modules in your pipeline, +and installing and updating modules from a remote git repository. + ### Custom remote modules The modules supercommand comes with two flags for specifying a custom remote: -- `--github-repository `: Specify the repository from which the modules should be fetched. Defaults to `nf-core/modules`. -- `--branch `: Specify the branch from which the modules shoudl be fetched. Defaults to `master`. +- `--git-remote `: Specify the repository from which the modules should be fetched as a git URL. Defaults to the github repository of `nf-core/modules`. +- `--branch `: Specify the branch from which the modules should be fetched. Defaults to the default branch of your repository Note that a custom remote must follow a similar directory structure to that of `nf-core/moduleś` for the `nf-core modules` commands to work properly. -### Private remote modules - -In order to get access to your private modules repo, you need to create -the `~/.config/gh/hosts.yml` file, which is the same file required by -[GitHub CLI](https://cli.github.com/) to deal with private repositories. -Such file is structured as follow: - -```conf -github.com: - oauth_token: - user: - git_protocol: -``` - -The easiest way to create this configuration file is through _GitHub CLI_: follow -its [installation instructions](https://cli.github.com/manual/installation) -and then call: - -```bash -gh auth login -``` - -After that, you will be able to list and install your private modules without -providing your github credentials through command line, by using `--github-repository` -and `--branch` options properly. -See the documentation on [gh auth login](https://cli.github.com/manual/gh_auth_login>) -to get more information. +The modules commands will during initalisation pull try to pull changes from the remote repositories. If you want to disable this, for example +due to performance reason or if you want to run the commands offline, you can diable `git pulls` by using the flag `--no-pull`. Note however +that the commands will still need to clone repositories that have previously not been used. ### List modules diff --git a/nf_core/__main__.py b/nf_core/__main__.py index d91a42e364..b02d8a7abf 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -350,7 +350,7 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow default="git@github.com:nf-core/modules.git", help="Remote git repo to fetch files from", ) -@click.option("-b", "--branch", type=str, default="master", help="Branch of git repository hosting modules.") +@click.option("-b", "--branch", type=str, default=None, help="Branch of git repository hosting modules.") @click.option( "--no-pull", is_flag=True, From 91d7bf83111495724cb5d0e8c1d6b7697c98fb59 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Sun, 19 Jun 2022 20:34:32 +0200 Subject: [PATCH 39/55] Fix docs --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 60e67606ca..ea352ecc86 100644 --- a/README.md +++ b/README.md @@ -923,9 +923,9 @@ The modules supercommand comes with two flags for specifying a custom remote: Note that a custom remote must follow a similar directory structure to that of `nf-core/moduleś` for the `nf-core modules` commands to work properly. -The modules commands will during initalisation pull try to pull changes from the remote repositories. If you want to disable this, for example -due to performance reason or if you want to run the commands offline, you can diable `git pulls` by using the flag `--no-pull`. Note however -that the commands will still need to clone repositories that have previously not been used. +The modules commands will during initalisation try to pull changes from the remote repositories. If you want to disable this, for example +due to performance reason or if you want to run the commands offline, you can use the flag `--no-pull`. Note however that the commands will +still need to clone repositories that have previously not been used. ### List modules @@ -1062,7 +1062,7 @@ There are three additional flags that you can use when installing a module: - `--force`: Overwrite a previously installed version of the module. - `--prompt`: Select the module version using a cli prompt. -- `--sha `: Install the module at a specific commit from the `nf-core/modules` repository. +- `--sha `: Install the module at a specific commit. ### Update modules in a pipeline From ae05e736490f740a0265ff675051b15c221dea07 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 08:38:30 +0200 Subject: [PATCH 40/55] Fix dry run bug --- nf_core/modules/modules_command.py | 7 ++----- nf_core/modules/update.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 137704114a..dd59474d86 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -315,22 +315,19 @@ def clear_module_dir(self, module_name, module_dir): log.error(f"Could not remove module: {e}") return False - def install_module_files(self, module_name, module_version, modules_repo, install_dir, dry_run=False): + def install_module_files(self, module_name, module_version, modules_repo, install_dir): """ - Installs a module + Installs a module into the given directory Args: module_name (str): The name of the module module_versioN (str): Git SHA for the version of the module to be installed modules_repo (ModulesRepo): A correctly configured ModulesRepo object install_dir (str): The path to where the module should be installed (should be the 'modules/' dir of the pipeline) - dry_run (bool): The command does nothing if True Returns: (bool): Whether the operation was successful of not """ - if dry_run: - return True return modules_repo.install_module(module_name, install_dir, module_version) def load_modules_json(self): diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index ca203a49db..a84dcb87b0 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -283,7 +283,7 @@ def update(self, module): install_folder = ["/tmp", next(tempfile._get_candidate_names())] # Download module files - if not self.install_module_files(module, version, modules_repo, install_folder, dry_run=dry_run): + if not self.install_module_files(module, version, modules_repo, install_folder): exit_value = False continue From c97378314c2404d9436badfcda947a3a6a4a800a Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 09:12:42 +0200 Subject: [PATCH 41/55] Remove double creation of ModulesRepo --- nf_core/__main__.py | 53 +++++++++++++++++++++--------- nf_core/modules/info.py | 4 +-- nf_core/modules/install.py | 5 ++- nf_core/modules/lint/__init__.py | 4 +-- nf_core/modules/list.py | 4 +-- nf_core/modules/modules_command.py | 4 +-- nf_core/modules/modules_repo.py | 4 ++- nf_core/modules/remove.py | 4 +-- nf_core/modules/update.py | 4 +-- 9 files changed, 55 insertions(+), 31 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index b02d8a7abf..04f08b363e 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -366,8 +366,10 @@ def modules(ctx, git_remote, branch, no_pull): # by means other than the `if` block below) ctx.ensure_object(dict) - # Make repository object to pass to subcommands - ctx.obj["modules_repo_obj"] = nf_core.modules.ModulesRepo(remote_url=git_remote, branch=branch, no_pull=no_pull) + # Place the arguments in a context object + ctx.obj["modules_repo_url"] = git_remote + ctx.obj["modules_repo_branch"] = branch + ctx.obj["modules_repo_no_pull"] = no_pull # nf-core modules list subcommands @@ -390,7 +392,9 @@ def remote(ctx, keywords, json): List modules in a remote GitHub repo [dim i](e.g [link=https://github.com/nf-core/modules]nf-core/modules[/])[/]. """ try: - module_list = nf_core.modules.ModuleList(None, remote=True) + module_list = nf_core.modules.ModuleList( + None, True, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) module_list.modules_repo = ctx.obj["modules_repo_obj"] print(module_list.list_modules(keywords, json)) except UserWarning as e: @@ -415,8 +419,9 @@ def local(ctx, keywords, json, dir): List modules installed locally in a pipeline """ try: - module_list = nf_core.modules.ModuleList(dir, remote=False) - module_list.modules_repo = ctx.obj["modules_repo_obj"] + module_list = nf_core.modules.ModuleList( + dir, False, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) print(module_list.list_modules(keywords, json)) except UserWarning as e: log.critical(e) @@ -444,8 +449,15 @@ def install(ctx, tool, dir, prompt, force, sha): Fetches and installs module files from a remote repo e.g. nf-core/modules. """ try: - module_install = nf_core.modules.ModuleInstall(dir, force=force, prompt=prompt, sha=sha) - module_install.modules_repo = ctx.obj["modules_repo_obj"] + module_install = nf_core.modules.ModuleInstall( + dir, + force, + prompt, + sha, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], + ) exit_status = module_install.install(tool) if not exit_status and all: sys.exit(1) @@ -492,9 +504,17 @@ def update(ctx, tool, dir, force, prompt, sha, all, preview, save_diff): """ try: module_install = nf_core.modules.ModuleUpdate( - dir, force=force, prompt=prompt, sha=sha, update_all=all, show_diff=preview, save_diff_fn=save_diff + dir, + force, + prompt, + sha, + all, + preview, + save_diff, + ctx.obj["modules_repo_url"], + ctx.obj["modules_repo_branch"], + ctx.obj["modules_repo_no_pull"], ) - module_install.modules_repo = ctx.obj["modules_repo_obj"] exit_status = module_install.update(tool) if not exit_status and all: sys.exit(1) @@ -519,8 +539,9 @@ def remove(ctx, dir, tool): Remove a module from a pipeline. """ try: - module_remove = nf_core.modules.ModuleRemove(dir) - module_remove.modules_repo = ctx.obj["modules_repo_obj"] + module_remove = nf_core.modules.ModuleRemove( + dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) module_remove.remove(tool) except UserWarning as e: log.critical(e) @@ -613,8 +634,9 @@ def lint(ctx, tool, dir, key, all, local, passed, fix_version): nf-core/modules repository. """ try: - module_lint = nf_core.modules.ModuleLint(dir=dir) - module_lint.modules_repo = ctx.obj["modules_repo_obj"] + module_lint = nf_core.modules.ModuleLint( + dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) module_lint.lint( module=tool, key=key, @@ -658,8 +680,9 @@ def info(ctx, tool, dir): If not, usage from the remote modules repo will be shown. """ try: - module_info = nf_core.modules.ModuleInfo(dir, tool) - module_info.modules_repo = ctx.obj["modules_repo_obj"] + module_info = nf_core.modules.ModuleInfo( + dir, tool, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) print(module_info.get_module_info()) except UserWarning as e: log.error(e) diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index d0f79ebe79..4c4f6bb938 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -20,7 +20,7 @@ class ModuleInfo(ModuleCommand): - def __init__(self, pipeline_dir, tool): + def __init__(self, pipeline_dir, tool, remote_url, branch, no_pull): self.module = tool self.meta = None @@ -36,7 +36,7 @@ def __init__(self, pipeline_dir, tool): log.debug(f"Only showing remote info: {e}") pipeline_dir = None - super().__init__(pipeline_dir) + super().__init__(pipeline_dir, remote_url, branch, no_pull) def get_module_info(self): """Given the name of a module, parse meta.yml and print usage help.""" diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index c84f11d776..3176a96d49 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -13,12 +13,11 @@ class ModuleInstall(ModuleCommand): - def __init__(self, pipeline_dir, force=False, prompt=False, sha=None, update_all=False): - super().__init__(pipeline_dir) + def __init__(self, pipeline_dir, force, prompt, sha, remote_url, branch, no_pull): + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.force = force self.prompt = prompt self.sha = sha - self.update_all = update_all def install(self, module): if self.repo_type == "modules": diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 9bbe173ef5..788ec7d6bb 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -69,7 +69,7 @@ class ModuleLint(ModuleCommand): from .module_todos import module_todos from .module_version import module_version - def __init__(self, dir): + def __init__(self, dir, remote_url, branch, no_pull): self.dir = dir try: self.dir, self.repo_type = nf_core.modules.module_utils.get_repo_type(self.dir) @@ -79,7 +79,7 @@ def __init__(self, dir): self.passed = [] self.warned = [] self.failed = [] - self.modules_repo = ModulesRepo() + self.modules_repo = ModulesRepo(remote_url, branch, no_pull) self.lint_tests = self._get_all_lint_tests() # Get lists of modules install in directory self.all_local_modules, self.all_nfcore_modules = self.get_installed_modules() diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index 9d9c4aad64..f8d1205971 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -13,8 +13,8 @@ class ModuleList(ModuleCommand): - def __init__(self, pipeline_dir, remote=True): - super().__init__(pipeline_dir) + def __init__(self, pipeline_dir, remote, remote_url, branch, no_pull): + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.remote = remote def list_modules(self, keywords=None, print_json=False): diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index dd59474d86..e347ea1907 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -25,11 +25,11 @@ class ModuleCommand: Base class for the 'nf-core modules' commands """ - def __init__(self, dir): + def __init__(self, dir, remote_url, branch, no_pull): """ Initialise the ModulesCommand object """ - self.modules_repo = ModulesRepo() + self.modules_repo = ModulesRepo(remote_url, branch, no_pull) self.dir = dir self.module_names = [] try: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 5e6cb48a1e..c38df3eec2 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -45,7 +45,9 @@ def update(self, op_code, cur_count, max_count=None, message=""): """ if not self.progress_bar.tasks[self.tid].started: self.progress_bar.start_task(self.tid) - self.progress_bar.update(self.tid, total=max_count, completed=cur_count, state=message) + self.progress_bar.update( + self.tid, total=max_count, completed=cur_count, state=f"{int(cur_count)}/{int(max_count)}" + ) class ModulesRepo(object): diff --git a/nf_core/modules/remove.py b/nf_core/modules/remove.py index 1bcf8c8dd5..e3429488e7 100644 --- a/nf_core/modules/remove.py +++ b/nf_core/modules/remove.py @@ -13,11 +13,11 @@ class ModuleRemove(ModuleCommand): - def __init__(self, pipeline_dir): + def __init__(self, pipeline_dir, remote_url, branch, no_pull): """ Initialise the ModulesRemove object and run remove command """ - super().__init__(pipeline_dir) + super().__init__(pipeline_dir, remote_url, branch, no_pull) def remove(self, module): """ diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index a84dcb87b0..4a2d3d5a81 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -23,9 +23,9 @@ class ModuleUpdate(ModuleCommand): def __init__( - self, pipeline_dir, force=False, prompt=False, sha=None, update_all=False, show_diff=None, save_diff_fn=None + self, pipeline_dir, force, prompt, sha, update_all, show_diff, save_diff_fn, remote_url, branch, no_pull ): - super().__init__(pipeline_dir) + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.force = force self.prompt = prompt self.sha = sha From 1bf1db72ee837843d0fd73ef5fb1622d1acf8db5 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 09:23:43 +0200 Subject: [PATCH 42/55] Fix list local --- nf_core/modules/list.py | 2 +- nf_core/modules/modules_repo.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index f8d1205971..b735224571 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -105,7 +105,7 @@ def pattern_msg(keywords): version_sha = module_entry["git_sha"] try: # pass repo_name to get info on modules even outside nf-core/modules - message, date = ModulesRepo().get_commit_info( + message, date = ModulesRepo(remote_url=repo_entry["git_url"]).get_commit_info( version_sha ) # NOTE add support for other remotes except LookupError as e: diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index c38df3eec2..62d58b52da 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -179,6 +179,7 @@ def setup_branch(self, branch): self.branch = self.get_default_branch() else: self.branch = branch + # Verify that the branch exists by checking it out self.branch_exists() @@ -187,7 +188,8 @@ def get_default_branch(self): Gets the default branch for the repo (the branch origin/HEAD is pointing to) """ origin_head = next(ref for ref in self.repo.refs if ref.name == "origin/HEAD") - _, self.branch = origin_head.ref.name.split("/") + _, branch = origin_head.ref.name.split("/") + return branch def branch_exists(self): """ From 76374e390bd0fc60871f592e9f0aff4b07ad3b64 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 09:29:00 +0200 Subject: [PATCH 43/55] Fix no-pull --- nf_core/modules/modules_repo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 62d58b52da..f869a89f9f 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -63,6 +63,7 @@ class ModulesRepo(object): """ local_repo_statuses = dict() + no_pull_global = False @staticmethod def local_repo_synced(repo_name): @@ -83,6 +84,9 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): Initializes the object and clones the git repository if it is not already present """ + # This allows us to set this one time and then keep track of the user's choice + ModulesRepo.no_pull_global |= no_pull + # Check if the remote seems to be well formed if remote_url is None: remote_url = NF_CORE_MODULES_REMOTE @@ -147,7 +151,7 @@ def setup_local_repo(self, remote, branch, no_pull): # Verify that the requested branch exists by checking it out self.setup_branch(branch) - if no_pull: + if ModulesRepo.no_pull_global: ModulesRepo.update_local_repo_status(self.fullname, True) # If the repo is already cloned, pull the latest changes from the remote if not ModulesRepo.local_repo_synced(self.fullname): From c166aa19f54f31a4ad6f27014bfbdd837b7220ad Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 10:35:39 +0200 Subject: [PATCH 44/55] Update tests --- nf_core/__main__.py | 4 +++- nf_core/lint/modules_json.py | 9 ++++++++- nf_core/modules/bump_versions.py | 4 ++-- nf_core/modules/install.py | 2 +- nf_core/modules/lint/__init__.py | 2 +- nf_core/modules/lint/module_changes.py | 4 ++-- nf_core/modules/lint/module_version.py | 4 ++-- nf_core/modules/list.py | 2 +- nf_core/modules/modules_command.py | 2 +- nf_core/modules/modules_repo.py | 1 + nf_core/modules/remove.py | 2 +- nf_core/modules/update.py | 12 +++++++++++- nf_core/pipeline-template/modules.json | 19 +++++++++++-------- 13 files changed, 45 insertions(+), 22 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 04f08b363e..ce93258957 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -702,7 +702,9 @@ def bump_versions(ctx, tool, dir, all, show_all): the nf-core/modules repo. """ try: - version_bumper = nf_core.modules.bump_versions.ModuleVersionBumper(pipeline_dir=dir) + version_bumper = nf_core.modules.bump_versions.ModuleVersionBumper( + dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] + ) version_bumper.bump_versions(module=tool, all_modules=all, show_uptodate=show_all) except nf_core.modules.module_utils.ModuleException as e: log.error(e) diff --git a/nf_core/lint/modules_json.py b/nf_core/lint/modules_json.py index 6e6ddd6d17..2a4aa1a5b3 100644 --- a/nf_core/lint/modules_json.py +++ b/nf_core/lint/modules_json.py @@ -27,7 +27,14 @@ def modules_json(self): all_modules_passed = True for repo in modules_json["repos"].keys(): - for key in modules_json["repos"][repo].keys(): + # Check if the modules.json has been updated to keep the + if "modules" not in modules_json["repos"][repo] or "git_url" not in modules_json["repos"][repo]: + failed.append( + f"Your `modules.json` file is outdated. Please remove it and reinstall it by running any module command" + ) + continue + + for key in modules_json["repos"][repo]["modules"]: if not key in modules_command.module_names[repo]: failed.append(f"Entry for `{key}` found in `modules.json` but module is not installed in pipeline.") all_modules_passed = False diff --git a/nf_core/modules/bump_versions.py b/nf_core/modules/bump_versions.py index 0da96c72d4..1c5fedaf8f 100644 --- a/nf_core/modules/bump_versions.py +++ b/nf_core/modules/bump_versions.py @@ -28,8 +28,8 @@ class ModuleVersionBumper(ModuleCommand): - def __init__(self, pipeline_dir): - super().__init__(pipeline_dir) + def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False): + super().__init__(pipeline_dir, remote_url, branch, no_pull) self.up_to_date = None self.updated = None diff --git a/nf_core/modules/install.py b/nf_core/modules/install.py index 3176a96d49..ccce6200da 100644 --- a/nf_core/modules/install.py +++ b/nf_core/modules/install.py @@ -13,7 +13,7 @@ class ModuleInstall(ModuleCommand): - def __init__(self, pipeline_dir, force, prompt, sha, remote_url, branch, no_pull): + def __init__(self, pipeline_dir, force=False, prompt=False, sha=None, remote_url=None, branch=None, no_pull=False): super().__init__(pipeline_dir, remote_url, branch, no_pull) self.force = force self.prompt = prompt diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 788ec7d6bb..d7c40c8d09 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -69,7 +69,7 @@ class ModuleLint(ModuleCommand): from .module_todos import module_todos from .module_version import module_version - def __init__(self, dir, remote_url, branch, no_pull): + def __init__(self, dir, remote_url=None, branch=None, no_pull=False): self.dir = dir try: self.dir, self.repo_type = nf_core.modules.module_utils.get_repo_type(self.dir) diff --git a/nf_core/modules/lint/module_changes.py b/nf_core/modules/lint/module_changes.py index 9676481b43..c97e704389 100644 --- a/nf_core/modules/lint/module_changes.py +++ b/nf_core/modules/lint/module_changes.py @@ -25,11 +25,11 @@ def module_changes(module_lint_object, module): files_to_check = ["main.nf", "meta.yml"] # Loop over nf-core modules - module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.name}/{module_lint_object.modules_repo.branch}/modules/{module.module_name}/" + module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.fullname}/{module_lint_object.modules_repo.branch}/modules/{module.module_name}/" # If module.git_sha specified, check specific commit version for changes if module.git_sha: - module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.name}/{module.git_sha}/modules/{module.module_name}/" + module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.fullname}/{module.git_sha}/modules/{module.module_name}/" for f in files_to_check: # open local copy, continue if file not found (a failed message has already been issued in this case) diff --git a/nf_core/modules/lint/module_version.py b/nf_core/modules/lint/module_version.py index 36febcc5b4..979a4ae011 100644 --- a/nf_core/modules/lint/module_version.py +++ b/nf_core/modules/lint/module_version.py @@ -31,7 +31,7 @@ def module_version(module_lint_object, module): # Verify that a git_sha exists in the `modules.json` file for this module try: - module_entry = module_lint_object.modules_json["repos"][module_lint_object.modules_repo.name][ + module_entry = module_lint_object.modules_json["repos"][module_lint_object.modules_repo.fullname]["modules"][ module.module_name ] git_sha = module_entry["git_sha"] @@ -42,7 +42,7 @@ def module_version(module_lint_object, module): try: modules_repo = nf_core.modules.modules_repo.ModulesRepo() module_git_log = modules_repo.get_module_git_log(module.module_name) - if git_sha == module_git_log[0]["git_sha"]: + if git_sha == next(module_git_log)["git_sha"]: module.passed.append(("module_version", "Module is the latest version", module.module_dir)) else: module.warned.append(("module_version", "New version available", module.module_dir)) diff --git a/nf_core/modules/list.py b/nf_core/modules/list.py index b735224571..dc7f6cf91d 100644 --- a/nf_core/modules/list.py +++ b/nf_core/modules/list.py @@ -13,7 +13,7 @@ class ModuleList(ModuleCommand): - def __init__(self, pipeline_dir, remote, remote_url, branch, no_pull): + def __init__(self, pipeline_dir, remote=True, remote_url=None, branch=None, no_pull=False): super().__init__(pipeline_dir, remote_url, branch, no_pull) self.remote = remote diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index e347ea1907..9b4f435ef1 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -25,7 +25,7 @@ class ModuleCommand: Base class for the 'nf-core modules' commands """ - def __init__(self, dir, remote_url, branch, no_pull): + def __init__(self, dir, remote_url=None, branch=None, no_pull=False): """ Initialise the ModulesCommand object """ diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index f869a89f9f..478c07df9e 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -2,6 +2,7 @@ import logging import os import shutil +import sys import git import urllib.parse import rich.progress diff --git a/nf_core/modules/remove.py b/nf_core/modules/remove.py index e3429488e7..5a3748370c 100644 --- a/nf_core/modules/remove.py +++ b/nf_core/modules/remove.py @@ -13,7 +13,7 @@ class ModuleRemove(ModuleCommand): - def __init__(self, pipeline_dir, remote_url, branch, no_pull): + def __init__(self, pipeline_dir, remote_url=None, branch=None, no_pull=False): """ Initialise the ModulesRemove object and run remove command """ diff --git a/nf_core/modules/update.py b/nf_core/modules/update.py index 4a2d3d5a81..c96f0b9b92 100644 --- a/nf_core/modules/update.py +++ b/nf_core/modules/update.py @@ -23,7 +23,17 @@ class ModuleUpdate(ModuleCommand): def __init__( - self, pipeline_dir, force, prompt, sha, update_all, show_diff, save_diff_fn, remote_url, branch, no_pull + self, + pipeline_dir, + force=False, + prompt=False, + sha=None, + update_all=False, + show_diff=None, + save_diff_fn=None, + remote_url=None, + branch=None, + no_pull=False, ): super().__init__(pipeline_dir, remote_url, branch, no_pull) self.force = force diff --git a/nf_core/pipeline-template/modules.json b/nf_core/pipeline-template/modules.json index 8a6a36ec3b..af2cb416d8 100644 --- a/nf_core/pipeline-template/modules.json +++ b/nf_core/pipeline-template/modules.json @@ -3,14 +3,17 @@ "homePage": "https://github.com/{{ name }}", "repos": { "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_url": "git@github.com@nf-core/modules.git", + "modules": { + "custom/dumpsoftwareversions": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "fastqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "multiqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + } } } } From e97d30c14f3ebb9c7dd1a5878bc0a42c7e47edc3 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 11:01:25 +0200 Subject: [PATCH 45/55] Fix the last bugs --- nf_core/modules/module_utils.py | 11 +++++-- nf_core/modules/modules_repo.py | 52 ++++++++++++++++++--------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 3fdaccbf10..1b0ff6fc04 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -62,6 +62,8 @@ def path_from_remote(remote_url): path = path[-1] if len(path) > 1 else path[0] path = urllib.parse.urlparse(path) path = path.path + path = os.path.splitext(path)[0] + return path def get_pipeline_module_repositories(modules_dir): @@ -88,7 +90,9 @@ def get_pipeline_module_repositories(modules_dir): s="ies" if len(dirs_not_covered) > 0 else "y", l="', '".join(dirs_not_covered) ) ) - nrepo_remote = questionary.text("Please provide a URL for for one of the remaining repos").unsafe_ask() + nrepo_remote = questionary.text( + "Please provide a URL for for one of the repos contained in the untracked directories" + ).unsafe_ask() # Verify that the remote exists while True: try: @@ -108,7 +112,7 @@ def get_pipeline_module_repositories(modules_dir): ) dir_name = questionary.text( "Please provide the correct directory, it will be renamed. If left empty, the remote will be ignored" - ) + ).unsafe_ask() if dir_name: os.rename(os.path.join(modules_dir, dir_name), os.path.join(modules_dir, nrepo_name)) else: @@ -162,7 +166,8 @@ def create_modules_json(pipeline_dir): ) for repo_name, module_names, remote in sorted(repo_module_names): try: - modules_repo = ModulesRepo(remote_url=remote) + # Create a ModulesRepo object without progress bar to not conflict with the other one + modules_repo = ModulesRepo(remote_url=remote, no_progress=True) except LookupError as e: raise UserWarning(e) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 478c07df9e..dfb3a99804 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -80,7 +80,7 @@ def update_local_repo_status(repo_name, up_to_date): """ ModulesRepo.local_repo_statuses[repo_name] = up_to_date - def __init__(self, remote_url=None, branch=None, no_pull=False): + def __init__(self, remote_url=None, branch=None, no_pull=False, no_progress=False): """ Initializes the object and clones the git repository if it is not already present """ @@ -104,7 +104,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): self.fullname = os.path.splitext(path)[0] - self.setup_local_repo(remote_url, branch, no_pull) + self.setup_local_repo(remote_url, branch, no_progress) # Verify that the repo seems to be correctly configured if self.fullname != NF_CORE_MODULES_NAME or self.branch: @@ -115,7 +115,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False): self.avail_module_names = None - def setup_local_repo(self, remote, branch, no_pull): + def setup_local_repo(self, remote, branch, no_progress=True): """ Sets up the local git repository. If the repository has been cloned previously, it returns a git.Repo object of that clone. Otherwise it tries to clone the repository from @@ -129,18 +129,21 @@ def setup_local_repo(self, remote, branch, no_pull): self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) if not os.path.exists(self.local_repo_dir): try: - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - ) - with pbar: - self.repo = git.Repo.clone_from( - remote, - self.local_repo_dir, - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + if no_progress: + self.repo = git.Repo.clone_from(remote, self.local_repo_dir) + else: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, ) + with pbar: + self.repo = git.Repo.clone_from( + remote, + self.local_repo_dir, + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Cloning"), + ) ModulesRepo.update_local_repo_status(self.fullname, True) except git.exc.GitCommandError: raise LookupError(f"Failed to clone from the remote: `{remote}`") @@ -156,16 +159,19 @@ def setup_local_repo(self, remote, branch, no_pull): ModulesRepo.update_local_repo_status(self.fullname, True) # If the repo is already cloned, pull the latest changes from the remote if not ModulesRepo.local_repo_synced(self.fullname): - pbar = rich.progress.Progress( - "[bold blue]{task.description}", - rich.progress.BarColumn(bar_width=None), - "[bold yellow]{task.fields[state]}", - transient=True, - ) - with pbar: - self.repo.remotes.origin.pull( - progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + if no_progress: + self.repo.remotes.origin.pull() + else: + pbar = rich.progress.Progress( + "[bold blue]{task.description}", + rich.progress.BarColumn(bar_width=None), + "[bold yellow]{task.fields[state]}", + transient=True, ) + with pbar: + self.repo.remotes.origin.pull( + progress=RemoteProgressbar(pbar, self.fullname, self.remote_url, "Pulling") + ) ModulesRepo.update_local_repo_status(self.fullname, True) def setup_branch(self, branch): From 7c70169acfa0e34b11520e4f9c9d77a2e2018509 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 12:39:12 +0200 Subject: [PATCH 46/55] Remove sneaky api calls --- nf_core/modules/lint/module_changes.py | 63 +++++--------------------- nf_core/modules/module_utils.py | 2 +- nf_core/modules/modules_repo.py | 11 +++-- 3 files changed, 20 insertions(+), 56 deletions(-) diff --git a/nf_core/modules/lint/module_changes.py b/nf_core/modules/lint/module_changes.py index c97e704389..2ee89102aa 100644 --- a/nf_core/modules/lint/module_changes.py +++ b/nf_core/modules/lint/module_changes.py @@ -6,8 +6,6 @@ import requests import rich -from nf_core.modules.lint import LintResult - def module_changes(module_lint_object, module): """ @@ -22,59 +20,22 @@ def module_changes(module_lint_object, module): Only runs when linting a pipeline, not the modules repository """ - files_to_check = ["main.nf", "meta.yml"] - - # Loop over nf-core modules - module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.fullname}/{module_lint_object.modules_repo.branch}/modules/{module.module_name}/" - - # If module.git_sha specified, check specific commit version for changes - if module.git_sha: - module_base_url = f"https://raw.githubusercontent.com/{module_lint_object.modules_repo.fullname}/{module.git_sha}/modules/{module.module_name}/" - - for f in files_to_check: - # open local copy, continue if file not found (a failed message has already been issued in this case) - try: - local_copy = open(os.path.join(module.module_dir, f), "r").read() - except FileNotFoundError as e: - continue - - # Download remote copy and compare - url = module_base_url + f - r = requests.get(url=url) - - if r.status_code != 200: - module.warned.append( + for f, same in module_lint_object.modules_repo.module_files_identical( + module.module_name, module.module_dir, module.git_sha + ).items(): + if same: + module.passed.append( ( "check_local_copy", - "Could not fetch remote copy, skipping comparison.", + "Local copy of module up to date", f"{os.path.join(module.module_dir, f)}", ) ) else: - try: - remote_copy = r.content.decode("utf-8") - - if local_copy != remote_copy: - module.failed.append( - ( - "check_local_copy", - "Local copy of module does not match remote", - f"{os.path.join(module.module_dir, f)}", - ) - ) - else: - module.passed.append( - ( - "check_local_copy", - "Local copy of module up to date", - f"{os.path.join(module.module_dir, f)}", - ) - ) - except UnicodeDecodeError as e: - module.warned.append( - ( - "check_local_copy", - f"Could not decode file from {url}. Skipping comparison ({e})", - f"{os.path.join(module.module_dir, f)}", - ) + module.failed.append( + ( + "check_local_copy", + "Local copy of module does not match remote", + f"{os.path.join(module.module_dir, f)}", ) + ) diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 1b0ff6fc04..4ff7a5e963 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -203,7 +203,7 @@ def find_correct_commit_sha(module_name, module_path, modules_repo): # a revision that matches the file contents commit_shas = (commit["git_sha"] for commit in modules_repo.get_module_git_log(module_name, depth=1000)) for commit_sha in commit_shas: - if modules_repo.module_files_identical(module_name, module_path, commit_sha): + if all(modules_repo.module_files_identical(module_name, module_path, commit_sha).values()): return commit_sha return None diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index dfb3a99804..da2e893c72 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -298,18 +298,21 @@ def module_files_identical(self, module_name, base_path, commit): Returns: (bool): Whether the pipeline files are identical to the repo files """ - self.checkout(commit) + if commit is None: + self.checkout_branch() + else: + self.checkout(commit) module_files = ["main.nf", "meta.yml"] module_dir = self.get_module_dir(module_name) + files_identical = {file: True for file in module_files} for file in module_files: try: - if not filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)): - return False + files_identical[file] = filecmp.cmp(os.path.join(module_dir, file), os.path.join(base_path, file)) except FileNotFoundError as e: log.debug(f"Could not open file: {os.path.join(module_dir, file)}") continue self.checkout_branch() - return True + return files_identical def get_module_git_log(self, module_name, depth=None, since="2021-07-07T00:00:00Z"): """ From 08b92d8b5cdf7c6d0e96ba0e02ba56a4d769ad92 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 13:15:42 +0200 Subject: [PATCH 47/55] make work with https --- nf_core/__main__.py | 2 +- nf_core/modules/module_utils.py | 20 ++++++++++++++------ nf_core/modules/modules_repo.py | 15 +++++---------- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index ce93258957..a2b56620bf 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -347,7 +347,7 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow "-g", "--git-remote", type=str, - default="git@github.com:nf-core/modules.git", + default=nf_core.modules.modules_repo.NF_CORE_MODULES_REMOTE, help="Remote git repo to fetch files from", ) @click.option("-b", "--branch", type=str, default=None, help="Branch of git repository hosting modules.") diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index 4ff7a5e963..f382793f62 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -57,12 +57,20 @@ def path_from_remote(remote_url): Extracts the path from the remote URL See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns """ - # Remove the initial `git@`` if it is present - path = remote_url.split("@") - path = path[-1] if len(path) > 1 else path[0] - path = urllib.parse.urlparse(path) - path = path.path - path = os.path.splitext(path)[0] + # Check whether we have a https or ssh url + if remote_url.startswith("https"): + path = urllib.parse.urlparse(remote_url) + path = path.path + # Remove the intial '/' + path = path[1:] + path = os.path.splitext(path)[0] + else: + # Remove the initial `git@`` + path = remote_url.split("@") + path = path[-1] if len(path) > 1 else path[0] + path = urllib.parse.urlparse(path) + path = path.path + path = os.path.splitext(path)[0] return path diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index da2e893c72..7e98cdedf2 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -7,13 +7,15 @@ import urllib.parse import rich.progress +import nf_core.modules.module_utils from nf_core.utils import NFCORE_DIR, gh_api + log = logging.getLogger(__name__) # Constants for the nf-core/modules repo used throughout the module files NF_CORE_MODULES_NAME = "nf-core/modules" -NF_CORE_MODULES_REMOTE = "git@github.com:nf-core/modules.git" +NF_CORE_MODULES_REMOTE = "https://github.com/nf-core/modules.git" class RemoteProgressbar(git.RemoteProgress): @@ -94,15 +96,7 @@ def __init__(self, remote_url=None, branch=None, no_pull=False, no_progress=Fals self.remote_url = remote_url - # Extract the repo path from the remote url - # See https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS for the possible URL patterns - # Remove the initial `git@`` if it is present - path = remote_url.split("@") - path = path[-1] if len(path) > 1 else path[0] - path = urllib.parse.urlparse(path) - path = path.path - - self.fullname = os.path.splitext(path)[0] + self.fullname = nf_core.modules.module_utils.path_from_remote(self.remote_url) self.setup_local_repo(remote_url, branch, no_progress) @@ -127,6 +121,7 @@ def setup_local_repo(self, remote, branch, no_progress=True): Sets self.repo """ self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) + log.info(f"'{self.fullname}'") if not os.path.exists(self.local_repo_dir): try: if no_progress: From 305ead83364829c635e8e4d0dbce3ca364939bbe Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 13:23:45 +0200 Subject: [PATCH 48/55] isort --- nf_core/modules/info.py | 1 - nf_core/modules/module_utils.py | 8 ++++---- nf_core/modules/modules_command.py | 13 ++++++++----- nf_core/modules/modules_repo.py | 4 ++-- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/nf_core/modules/info.py b/nf_core/modules/info.py index 4c4f6bb938..0800b41157 100644 --- a/nf_core/modules/info.py +++ b/nf_core/modules/info.py @@ -12,7 +12,6 @@ from rich.text import Text from .module_utils import get_repo_type - from .modules_command import ModuleCommand from .modules_repo import ModulesRepo diff --git a/nf_core/modules/module_utils.py b/nf_core/modules/module_utils.py index f382793f62..887f64bc12 100644 --- a/nf_core/modules/module_utils.py +++ b/nf_core/modules/module_utils.py @@ -3,17 +3,17 @@ import json import logging import os -from sys import modules -import git import urllib -from pyrsistent import m +from sys import modules +import git import questionary import rich +from pyrsistent import m import nf_core.utils -from .modules_repo import ModulesRepo, NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE +from .modules_repo import NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE, ModulesRepo from .nfcore_module import NFCoreModule log = logging.getLogger(__name__) diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 9b4f435ef1..86b0ff699e 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -1,20 +1,23 @@ import copy -from datetime import datetime import glob import json import logging import os import shutil -import questionary -import git +from datetime import datetime +import git +import questionary import yaml import nf_core.modules.module_utils import nf_core.utils - from nf_core import modules -from nf_core.modules.modules_repo import ModulesRepo, NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE +from nf_core.modules.modules_repo import ( + NF_CORE_MODULES_NAME, + NF_CORE_MODULES_REMOTE, + ModulesRepo, +) from nf_core.utils import plural_s as _s log = logging.getLogger(__name__) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 7e98cdedf2..bec31cf06d 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -3,14 +3,14 @@ import os import shutil import sys -import git import urllib.parse + +import git import rich.progress import nf_core.modules.module_utils from nf_core.utils import NFCORE_DIR, gh_api - log = logging.getLogger(__name__) # Constants for the nf-core/modules repo used throughout the module files From 5e7c1a33caa51958318fee49f4cce5754a0954c9 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 13:36:28 +0200 Subject: [PATCH 49/55] Don't fail horribly due to LookupError --- nf_core/__main__.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index a2b56620bf..392b836afc 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -400,6 +400,9 @@ def remote(ctx, keywords, json): except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules list local @@ -426,6 +429,9 @@ def local(ctx, keywords, json, dir): except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules install @@ -464,6 +470,9 @@ def install(ctx, tool, dir, prompt, force, sha): except UserWarning as e: log.error(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules update @@ -521,6 +530,9 @@ def update(ctx, tool, dir, force, prompt, sha, all, preview, save_diff): except UserWarning as e: log.error(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules remove @@ -546,6 +558,9 @@ def remove(ctx, dir, tool): except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules create @@ -588,6 +603,9 @@ def create_module(ctx, tool, dir, author, label, meta, no_meta, force, conda_nam except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules create-test-yml @@ -611,7 +629,9 @@ def create_test_yml(ctx, tool, run_tests, output, force, no_prompts): except UserWarning as e: log.critical(e) sys.exit(1) - + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules lint @modules.command() @@ -654,6 +674,9 @@ def lint(ctx, tool, dir, key, all, local, passed, fix_version): except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules info @@ -687,7 +710,9 @@ def info(ctx, tool, dir): except UserWarning as e: log.error(e) sys.exit(1) - + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules bump-versions @modules.command() @@ -712,7 +737,9 @@ def bump_versions(ctx, tool, dir, all, show_all): except UserWarning as e: log.critical(e) sys.exit(1) - + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core modules mulled @modules.command() From 228973ab5b7495cee58e6c5461002f2fce4e34df Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Mon, 20 Jun 2022 13:47:43 +0200 Subject: [PATCH 50/55] black --- nf_core/__main__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index 392b836afc..b6921210cc 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -633,6 +633,7 @@ def create_test_yml(ctx, tool, run_tests, output, force, no_prompts): log.error(e) sys.exit(1) + # nf-core modules lint @modules.command() @click.pass_context @@ -714,6 +715,7 @@ def info(ctx, tool, dir): log.error(e) sys.exit(1) + # nf-core modules bump-versions @modules.command() @click.pass_context @@ -741,6 +743,7 @@ def bump_versions(ctx, tool, dir, all, show_all): log.error(e) sys.exit(1) + # nf-core modules mulled @modules.command() @click.argument("specifications", required=True, nargs=-1, metavar=" <...>") @@ -802,6 +805,9 @@ def test_module(ctx, tool, no_prompts, pytest_args): except UserWarning as e: log.critical(e) sys.exit(1) + except LookupError as e: + log.error(e) + sys.exit(1) # nf-core schema subcommands From b30007ec5c0177bf09b68a63b490d6776bfdd7d7 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Tue, 21 Jun 2022 11:28:19 +0200 Subject: [PATCH 51/55] Fix lookup bug --- nf_core/__main__.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index b6921210cc..c15c4a1d44 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -395,14 +395,10 @@ def remote(ctx, keywords, json): module_list = nf_core.modules.ModuleList( None, True, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] ) - module_list.modules_repo = ctx.obj["modules_repo_obj"] print(module_list.list_modules(keywords, json)) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core modules list local From 80c6c20cddeace324a68c9da5c6073b2514830ee Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Tue, 21 Jun 2022 11:38:06 +0200 Subject: [PATCH 52/55] General cleanup. Change where files are cloned --- nf_core/__main__.py | 45 ++++++------------------------ nf_core/modules/modules_command.py | 1 - 2 files changed, 9 insertions(+), 37 deletions(-) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index c15c4a1d44..da438467c0 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -422,12 +422,9 @@ def local(ctx, keywords, json, dir): dir, False, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] ) print(module_list.list_modules(keywords, json)) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core modules install @@ -463,10 +460,7 @@ def install(ctx, tool, dir, prompt, force, sha): exit_status = module_install.install(tool) if not exit_status and all: sys.exit(1) - except UserWarning as e: - log.error(e) - sys.exit(1) - except LookupError as e: + except (UserWarning, LookupError) as e: log.error(e) sys.exit(1) @@ -523,10 +517,7 @@ def update(ctx, tool, dir, force, prompt, sha, all, preview, save_diff): exit_status = module_install.update(tool) if not exit_status and all: sys.exit(1) - except UserWarning as e: - log.error(e) - sys.exit(1) - except LookupError as e: + except (UserWarning, LookupError) as e: log.error(e) sys.exit(1) @@ -551,12 +542,9 @@ def remove(ctx, dir, tool): dir, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] ) module_remove.remove(tool) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core modules create @@ -622,12 +610,9 @@ def create_test_yml(ctx, tool, run_tests, output, force, no_prompts): try: meta_builder = nf_core.modules.ModulesTestYmlBuilder(tool, run_tests, output, force, no_prompts) meta_builder.run() - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core modules lint @@ -668,12 +653,9 @@ def lint(ctx, tool, dir, key, all, local, passed, fix_version): except nf_core.modules.lint.ModuleLintException as e: log.error(e) sys.exit(1) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core modules info @@ -704,10 +686,7 @@ def info(ctx, tool, dir): dir, tool, ctx.obj["modules_repo_url"], ctx.obj["modules_repo_branch"], ctx.obj["modules_repo_no_pull"] ) print(module_info.get_module_info()) - except UserWarning as e: - log.error(e) - sys.exit(1) - except LookupError as e: + except (UserWarning, LookupError) as e: log.error(e) sys.exit(1) @@ -732,12 +711,9 @@ def bump_versions(ctx, tool, dir, all, show_all): except nf_core.modules.module_utils.ModuleException as e: log.error(e) sys.exit(1) - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core modules mulled @@ -798,12 +774,9 @@ def test_module(ctx, tool, no_prompts, pytest_args): try: meta_builder = nf_core.modules.ModulesTest(tool, no_prompts, pytest_args) meta_builder.run() - except UserWarning as e: + except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) - except LookupError as e: - log.error(e) - sys.exit(1) # nf-core schema subcommands diff --git a/nf_core/modules/modules_command.py b/nf_core/modules/modules_command.py index 86b0ff699e..67b7195adf 100644 --- a/nf_core/modules/modules_command.py +++ b/nf_core/modules/modules_command.py @@ -12,7 +12,6 @@ import nf_core.modules.module_utils import nf_core.utils -from nf_core import modules from nf_core.modules.modules_repo import ( NF_CORE_MODULES_NAME, NF_CORE_MODULES_REMOTE, From 0bee100afb821e8596d55b0b7d5731ec1efec423 Mon Sep 17 00:00:00 2001 From: Erik Danielsson <53212377+ErikDanielsson@users.noreply.github.com> Date: Tue, 21 Jun 2022 11:39:49 +0200 Subject: [PATCH 53/55] Apply suggestions from code review Co-authored-by: Phil Ewels --- README.md | 2 +- nf_core/__main__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ea352ecc86..3d63c57c5b 100644 --- a/README.md +++ b/README.md @@ -919,7 +919,7 @@ and installing and updating modules from a remote git repository. The modules supercommand comes with two flags for specifying a custom remote: - `--git-remote `: Specify the repository from which the modules should be fetched as a git URL. Defaults to the github repository of `nf-core/modules`. -- `--branch `: Specify the branch from which the modules should be fetched. Defaults to the default branch of your repository +- `--branch `: Specify the branch from which the modules should be fetched. Defaults to the default branch of your repository. Note that a custom remote must follow a similar directory structure to that of `nf-core/moduleś` for the `nf-core modules` commands to work properly. diff --git a/nf_core/__main__.py b/nf_core/__main__.py index da438467c0..883ce1c034 100755 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -355,7 +355,7 @@ def lint(dir, release, fix, key, show_passed, fail_ignored, fail_warned, markdow "--no-pull", is_flag=True, default=False, - help="Use this option for faster commands if you know there have been no recent changes to the repository", + help="Do not pull in latest changes to local clone of modules repository.", ) @click.pass_context def modules(ctx, git_remote, branch, no_pull): From f994bd3fb2e1f944935f792e58b4b33ed9ffeaa5 Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Tue, 21 Jun 2022 12:01:31 +0200 Subject: [PATCH 54/55] Use .config --- nf_core/modules/modules_repo.py | 8 +++++--- nf_core/utils.py | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index bec31cf06d..62074326d3 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -38,7 +38,9 @@ def __init__(self, progress_bar, repo_name, remote_url, operation): super().__init__() self.progress_bar = progress_bar self.tid = self.progress_bar.add_task( - f"{operation} '{repo_name}' ({remote_url})", start=False, state="Waiting for response" + f"{operation} from [bold green]'{repo_name}'[/bold green] ([link={remote_url}]{remote_url}[/link])", + start=False, + state="Waiting for response", ) def update(self, op_code, cur_count, max_count=None, message=""): @@ -49,7 +51,7 @@ def update(self, op_code, cur_count, max_count=None, message=""): if not self.progress_bar.tasks[self.tid].started: self.progress_bar.start_task(self.tid) self.progress_bar.update( - self.tid, total=max_count, completed=cur_count, state=f"{int(cur_count)}/{int(max_count)}" + self.tid, total=max_count, completed=cur_count, state=f"{cur_count / max_count * 100:.1f}%" ) @@ -121,7 +123,7 @@ def setup_local_repo(self, remote, branch, no_progress=True): Sets self.repo """ self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) - log.info(f"'{self.fullname}'") + log.info(NFCORE_DIR) if not os.path.exists(self.local_repo_dir): try: if no_progress: diff --git a/nf_core/utils.py b/nf_core/utils.py index e368c38d2c..2da1b28f80 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -54,7 +54,8 @@ os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME"), ".config")), "nf-core", ) -NFCORE_DIR = os.path.join(os.getenv("HOME"), ".nfcore") +NFCORE_DIR = os.path.join(os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME"), ".config")), "nfcore") +print(f"NF_CORE_DIR: {NFCORE_DIR}") def check_if_outdated(current_version=None, remote_version=None, source_url="https://nf-co.re/tools_version"): From b4d4b892cc6ac01a4425a280c8ba6e82c48e7dcb Mon Sep 17 00:00:00 2001 From: Erik Danielsson Date: Tue, 21 Jun 2022 13:57:37 +0200 Subject: [PATCH 55/55] Update docs --- README.md | 12 +++++------- nf_core/modules/modules_repo.py | 1 - nf_core/utils.py | 1 - 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ea352ecc86..d84f9d6f86 100644 --- a/README.md +++ b/README.md @@ -907,13 +907,6 @@ This allows multiple pipelines to use the same code for share tools and gives a The nf-core DSL2 modules repository is at -### Running the module commands - -The `nf-core/tools` package contains a suite of commands for working DSL2 modules in your pipeline. The modules commands use `git` to interface -with the `nf-core/modules` repository. Therefore you need to have have `git` installed to use the modules commands. You can then use the -the modules commands for a variety of tasks such as creating new DSL2 modules, viewing and getting metadata about the modules in your pipeline, -and installing and updating modules from a remote git repository. - ### Custom remote modules The modules supercommand comes with two flags for specifying a custom remote: @@ -927,6 +920,11 @@ The modules commands will during initalisation try to pull changes from the remo due to performance reason or if you want to run the commands offline, you can use the flag `--no-pull`. Note however that the commands will still need to clone repositories that have previously not been used. +### Private remote repositories + +You can use the modules command with private remote repositories. Make sure that your local `git` is correctly configured with your private remote +and then specify the remote the same way you would do with a public remote repository. + ### List modules The `nf-core modules list` command provides the subcommands `remote` and `local` for listing modules installed in a remote repository and in the local pipeline respectively. Both subcommands come with the `--key ` option for filtering the modules by keywords. diff --git a/nf_core/modules/modules_repo.py b/nf_core/modules/modules_repo.py index 62074326d3..1b8b744fd9 100644 --- a/nf_core/modules/modules_repo.py +++ b/nf_core/modules/modules_repo.py @@ -123,7 +123,6 @@ def setup_local_repo(self, remote, branch, no_progress=True): Sets self.repo """ self.local_repo_dir = os.path.join(NFCORE_DIR, self.fullname) - log.info(NFCORE_DIR) if not os.path.exists(self.local_repo_dir): try: if no_progress: diff --git a/nf_core/utils.py b/nf_core/utils.py index 2da1b28f80..6e47c54da8 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -55,7 +55,6 @@ "nf-core", ) NFCORE_DIR = os.path.join(os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME"), ".config")), "nfcore") -print(f"NF_CORE_DIR: {NFCORE_DIR}") def check_if_outdated(current_version=None, remote_version=None, source_url="https://nf-co.re/tools_version"):