From 6bbbd4a298d6a7e665a50f3c4f93c4542c193de5 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 1 Jun 2023 20:34:26 +0200 Subject: [PATCH 1/8] Small bugfixes. --- nf_core/download.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 70f61f35a4..0b32ba115e 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -157,7 +157,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...,['+str(len(self.revision)-2)+' more revisions],...,'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -531,7 +531,7 @@ def read_remote_containers(self): except (FileNotFoundError, LookupError) as e: log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") if stderr.is_interactive and rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): - self.prompt_singularity_cachedir_remote(retry=True) + self.prompt_singularity_cachedir_remote() else: log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") self.singularity_cache_index = None @@ -731,7 +731,7 @@ def find_container_images(self, workflow_directory): r"(?<=container)[^\${}]+\${([^{}]+)}(?![^{]*})", contents ) - if bool(container_definition) & bool(container_definition.group(1)): + if bool(container_definition) and bool(container_definition.group(1)): pattern = re.escape(container_definition.group(1)) # extract the quoted string(s) following the variable assignment container_names = re.findall(r"%s\s*=\s*[\"\']([^\"\']+)[\"\']" % pattern, contents) From d454093e514d9595bd3a3f0def71fe9eefdc8e28 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 8 Jun 2023 17:30:17 +0200 Subject: [PATCH 2/8] The repo must not be in detached state for Tower's UI. --- nf_core/download.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 70f61f35a4..6fed091948 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -157,7 +157,7 @@ def download_workflow(self): sys.exit(1) summary_log = [ - f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',...,['+str(len(self.revision)-2)+' more revisions],...,'+self.revision[-1]}'", + f"Pipeline revision: '{', '.join(self.revision) if len(self.revision) < 5 else self.revision[0]+',['+str(len(self.revision)-2)+' more revisions],'+self.revision[-1]}'", f"Pull containers: '{self.container}'", ] if self.container == "singularity" and os.environ.get("NXF_SINGULARITY_CACHEDIR") is not None: @@ -1268,9 +1268,14 @@ def tidy_tags_and_branches(self): for head in heads_to_remove: self.repo.delete_head(head) - # ensure all desired branches are available + # ensure all desired revisions/branches are available for revision in desired_revisions: - self.checkout(revision) + if self.repo.is_valid_object(revision): + self.repo.create_head(revision, revision) + self.checkout(revision) + if self.repo.head.is_detached: + self.repo.head.reset(index=True, working_tree=True) + self.heads = self.repo.heads # get all tags and available remote_branches From 130aac7f6e03185734a1e4d5cabe1d71493d1f38 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 8 Jun 2023 18:53:18 +0200 Subject: [PATCH 3/8] Introduce latest branch for all Tower downloads, bugix in prompt_singularity_cachedir_remote() --- nf_core/download.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index f644d1c188..e5386126ad 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -13,6 +13,7 @@ import tarfile import textwrap from datetime import datetime +from distutils.version import StrictVersion from zipfile import ZipFile import git @@ -531,6 +532,7 @@ def read_remote_containers(self): except (FileNotFoundError, LookupError) as e: log.error(f"[red]Issue with reading the specified remote $NXF_SINGULARITY_CACHE index:[/]\n{e}\n") if stderr.is_interactive and rich.prompt.Confirm.ask(f"[blue]Specify a new index file and try again?"): + self.singularity_cache_index = None # reset chosen path to index file. self.prompt_singularity_cachedir_remote() else: log.info("Proceeding without consideration of the remote $NXF_SINGULARITY_CACHE index.") @@ -1271,10 +1273,14 @@ def tidy_tags_and_branches(self): # ensure all desired revisions/branches are available for revision in desired_revisions: if self.repo.is_valid_object(revision): - self.repo.create_head(revision, revision) self.checkout(revision) - if self.repo.head.is_detached: - self.repo.head.reset(index=True, working_tree=True) + + # create "latest" branch to ensure at least one branch exists (required for Tower's UI to display revisions correctly) + latest = sorted(desired_revisions, key=StrictVersion)[-1] + self.repo.create_head("latest", latest) + self.checkout(latest) + if self.repo.head.is_detached: + self.repo.head.reset(index=True, working_tree=True) self.heads = self.repo.heads From 27a18b909a032847b6ce687104e24efb9ad95fdd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 8 Jun 2023 19:24:46 +0200 Subject: [PATCH 4/8] Bugfix in with file validation in prompt_singularity_cachedir_remote(). Type error, because it needs to be callable and not a plain string. --- nf_core/download.py | 10 +++++++--- nf_core/utils.py | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index e5386126ad..ee1e6e523f 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -28,7 +28,11 @@ import nf_core.list import nf_core.utils from nf_core.synced_repo import RemoteProgressbar, SyncedRepo -from nf_core.utils import NFCORE_CACHE_DIR, NFCORE_DIR +from nf_core.utils import ( + NFCORE_CACHE_DIR, + NFCORE_DIR, + SingularityCacheFilePathValidator, +) log = logging.getLogger(__name__) stderr = rich.console.Console( @@ -494,8 +498,8 @@ def prompt_singularity_cachedir_remote(self): cachedir_index = None while cachedir_index is None: prompt_cachedir_index = questionary.path( - "Specify a list of the remote images already present in the remote system :", - file_filter="*.txt", + "Specify a list of the container images that are already present on the remote system:", + validate=SingularityCacheFilePathValidator, style=nf_core.utils.nfcore_question_style, ).unsafe_ask() cachedir_index = os.path.abspath(os.path.expanduser(prompt_cachedir_index)) diff --git a/nf_core/utils.py b/nf_core/utils.py index 3ddce9b870..ba0a4fabf3 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -880,6 +880,23 @@ def prompt_pipeline_release_branch(wf_releases, wf_branches, multiple=False): ) +class SingularityCacheFilePathValidator(questionary.Validator): + """ + Validator for file path specified as --singularity-cache-index argument in nf-core download + """ + + def validate(self, value): + if len(value.text): + if os.path.isfile(value.text): + return True + else: + raise questionary.ValidationError( + message="Invalid remote cache index file", cursor_position=len(value.text) + ) + else: + return True + + def get_repo_releases_branches(pipeline, wfs): """Fetches details of a nf-core workflow to download. From 9ce5ce4a7d1b760ac51b9e24c5ef847b94a48adc Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 8 Jun 2023 19:49:14 +0200 Subject: [PATCH 5/8] Adapting the test to the new behavior. --- tests/test_download.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_download.py b/tests/test_download.py index aa2e959f3d..4ee8d3ef6e 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -261,8 +261,14 @@ def test_download_workflow_for_tower(self, tmp_dir): assert download_obj.workflow_repo assert isinstance(download_obj.workflow_repo, WorkflowRepo) assert issubclass(type(download_obj.workflow_repo), SyncedRepo) + # corroborate that the other revisions are inaccessible to the user. - assert len(download_obj.workflow_repo.tags) == len(download_obj.revision) + all_tags = {tag.name for tag in download_obj.workflow_repo.tags} + all_heads = {head.name for head in download_obj.workflow_repo.heads} + + assert set(download_obj.revision) == all_tags + # assert that the download has a "latest" branch. + assert "latest" in all_heads # download_obj.download_workflow_tower(location=tmp_dir) will run container image detection for all requested revisions assert isinstance(download_obj.containers, list) and len(download_obj.containers) == 33 From 225864a6cb71d7bbd21c7d95aa1f419b5898a2b1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 9 Jun 2023 13:10:52 +0200 Subject: [PATCH 6/8] @alneberg suggested that lastest should not be enforced if other branches exist. --- nf_core/download.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index ee1e6e523f..3886fed0bf 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -1258,8 +1258,8 @@ def tidy_tags_and_branches(self): desired_revisions = set(self.revision) # determine what needs pruning - tags_to_remove = {tag for tag in self.repo.tags if tag.name not in desired_revisions} - heads_to_remove = {head for head in self.repo.heads if head.name not in desired_revisions} + tags_to_remove = {tag for tag in self.repo.tags if tag.name not in desired_revisions.union({"latest"})} + heads_to_remove = {head for head in self.repo.heads if head.name not in desired_revisions.union({"latest"})} try: # delete unwanted tags from repository @@ -1276,15 +1276,30 @@ def tidy_tags_and_branches(self): # ensure all desired revisions/branches are available for revision in desired_revisions: - if self.repo.is_valid_object(revision): + if not self.repo.is_valid_object(revision): self.checkout(revision) - # create "latest" branch to ensure at least one branch exists (required for Tower's UI to display revisions correctly) - latest = sorted(desired_revisions, key=StrictVersion)[-1] - self.repo.create_head("latest", latest) - self.checkout(latest) - if self.repo.head.is_detached: - self.repo.head.reset(index=True, working_tree=True) + # no branch exists, but one is required for Tower's UI to display revisions correctly). Thus, "latest" will be created. + if not bool(self.repo.heads): + if self.repo.is_valid_object("latest"): + # "latest" exists as tag but not as branch + self.repo.create_head("latest", "latest") # create a new head for latest + self.checkout("latest") + else: + # "latest" is neither a branch (no heads exist) nor a tag, since is_valid_object() returned False. + # try to determine highest contained version number from desired revisions, to which latest will be pinned. + try: + latest = sorted(desired_revisions, key=StrictVersion, reverse=True)[0] + except ( + ValueError + ): # sorting might fail, because desired revisions may have names that do not comply with semantic version requirements. + # retry sorting using first letters only, this should also sort 3.10 before 3.5 + latest = sorted(desired_revisions, key=lambda x: str(x)[0:4])[0] + finally: + self.repo.create_head("latest", latest) + self.checkout(latest) + if self.repo.head.is_detached: + self.repo.head.reset(index=True, working_tree=True) self.heads = self.repo.heads From 974565299827e929943c0cdaafe340347840d756 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 9 Jun 2023 15:38:22 +0200 Subject: [PATCH 7/8] Better error handling for failed singularity pullcommands. --- nf_core/download.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 3886fed0bf..2b3ae5894b 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -1054,9 +1054,18 @@ def singularity_pull_image(self, container, out_path, cache_path, progress): if lines: # something went wrong with the container retrieval if any("FATAL: " in line for line in lines): - log.info("Singularity container retrieval failed with the following error:") - log.info("".join(lines)) - raise FileNotFoundError(f'The container "{container}" is unavailable.\n{"".join(lines)}') + log.error("Singularity container retrieval failed with the following error:") + log.error("".join(lines)) + if stderr.is_interactive and rich.prompt.Confirm.ask( + f"[blue]Continue downloading the workflow nonetheless?" + ): + log.error( + f'Skipped download of "{container}".\n Please troubleshoot command "{" ".join(singularity_command)}" on an individual basis.' + ) + progress.remove_task(task) + return + else: + raise FileNotFoundError(f'Downloading image "{container}" unfortunately failed.') # Copy cached download if we are using the cache if cache_path: From 8abe1a687f009738d5d197e40e17e2fc859c2fcf Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 9 Jun 2023 17:09:05 +0200 Subject: [PATCH 8/8] Adapt the mechanism to identify the latest version, since distutils module is deprecated. See PEP 632 and PEP 386 for details. --- nf_core/download.py | 48 +++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/nf_core/download.py b/nf_core/download.py index 2b3ae5894b..347268ea04 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -13,7 +13,6 @@ import tarfile import textwrap from datetime import datetime -from distutils.version import StrictVersion from zipfile import ZipFile import git @@ -23,6 +22,7 @@ import rich import rich.progress from git.exc import GitCommandError, InvalidGitRepositoryError +from pkg_resources import parse_version as VersionParser import nf_core import nf_core.list @@ -886,7 +886,8 @@ def get_singularity_images(self, current_revision=""): # Raise exception if this is not possible log.error("Not able to pull image. Service might be down or internet connection is dead.") raise r - progress.update(task, advance=1) + finally: + progress.update(task, advance=1) def singularity_image_filenames(self, container): """Check Singularity cache for image, copy to destination folder if found. @@ -1054,18 +1055,12 @@ def singularity_pull_image(self, container, out_path, cache_path, progress): if lines: # something went wrong with the container retrieval if any("FATAL: " in line for line in lines): - log.error("Singularity container retrieval failed with the following error:") - log.error("".join(lines)) - if stderr.is_interactive and rich.prompt.Confirm.ask( - f"[blue]Continue downloading the workflow nonetheless?" - ): - log.error( - f'Skipped download of "{container}".\n Please troubleshoot command "{" ".join(singularity_command)}" on an individual basis.' - ) - progress.remove_task(task) - return - else: - raise FileNotFoundError(f'Downloading image "{container}" unfortunately failed.') + log.error(f'[bold red]The singularity image "{container}" could not be pulled:[/]\n\n{"".join(lines)}') + log.error( + f'Skipping failed pull of "{container}". Please troubleshoot the command \n"{" ".join(singularity_command)}"\n\n\n' + ) + progress.remove_task(task) + return # Copy cached download if we are using the cache if cache_path: @@ -1287,6 +1282,9 @@ def tidy_tags_and_branches(self): for revision in desired_revisions: if not self.repo.is_valid_object(revision): self.checkout(revision) + self.repo.create_head(revision, revision) + if self.repo.head.is_detached: + self.repo.head.reset(index=True, working_tree=True) # no branch exists, but one is required for Tower's UI to display revisions correctly). Thus, "latest" will be created. if not bool(self.repo.heads): @@ -1295,18 +1293,16 @@ def tidy_tags_and_branches(self): self.repo.create_head("latest", "latest") # create a new head for latest self.checkout("latest") else: - # "latest" is neither a branch (no heads exist) nor a tag, since is_valid_object() returned False. - # try to determine highest contained version number from desired revisions, to which latest will be pinned. - try: - latest = sorted(desired_revisions, key=StrictVersion, reverse=True)[0] - except ( - ValueError - ): # sorting might fail, because desired revisions may have names that do not comply with semantic version requirements. - # retry sorting using first letters only, this should also sort 3.10 before 3.5 - latest = sorted(desired_revisions, key=lambda x: str(x)[0:4])[0] - finally: - self.repo.create_head("latest", latest) - self.checkout(latest) + # desired revisions may contain arbitrary branch names that do not correspond to valid sematic versioning patterns. + valid_versions = [ + VersionParser(v) + for v in desired_revisions + if re.match(r"\d+\.\d+(?:\.\d+)*(?:[\w\-_])*", v) + ] + # valid versions sorted in ascending order, last will be aliased as "latest". + latest = sorted(valid_versions)[-1] + self.repo.create_head("latest", latest) + self.checkout(latest) if self.repo.head.is_detached: self.repo.head.reset(index=True, working_tree=True)