diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a58..4ecfbfe 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index ea3ff5f..41734bd 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/epitopeprediction then the best place to ask is on the nf-core Slack [#epitopeprediction](https://nfcore.slack.com/channels/epitopeprediction) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/epitopeprediction then the best place to ask is on the nf-core Slack [#epitopeprediction](https://nfcore.slack.com/channels/epitopeprediction) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4b..b8bdd21 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 0000000..6ad3392 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/CHANGELOG.md b/CHANGELOG.md index 5306ece..5fee662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.2.2dev - 2023-08-29 +## v2.3dev - 2023-08-29 ### `Changed` +- [213](https://github.com/nf-core/epitopeprediction/pull/203) - Rename param `genome_version` to `genome_reference`, add functionality to handle BioMart archive urls +- [213](https://github.com/nf-core/epitopeprediction/pull/203) - Update to nf-core template `2.10` - [203](https://github.com/nf-core/epitopeprediction/pull/203) - Update to nf-core template `2.9` - [203](https://github.com/nf-core/epitopeprediction/pull/203) - Update to nf-core template `2.8` - [#206](https://github.com/nf-core/epitopeprediction/issues/206) - Update the row checker class. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index e931fa9..e3b0729 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,7 @@ # ![nf-core/epitopeprediction](docs/images/nf-core-epitopeprediction_logo_light.png#gh-light-mode-only) ![nf-core/epitopeprediction](docs/images/nf-core-epitopeprediction_logo_dark.png#gh-dark-mode-only) [![GitHub Actions CI Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3564666-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3564666) +[![GitHub Actions Linting Status](https://github.com/nf-core/epitopeprediction/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/epitopeprediction/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/epitopeprediction/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3564666-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3564666) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) @@ -30,7 +28,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Usage -> **Note** +> [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. @@ -55,8 +53,8 @@ nextflow run nf-core/epitopeprediction \ --outdir ```` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow > > `-params-file` option. Custom config files including those > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 8a6b9e8..ebfcc68 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,12 +1,7 @@ report_comment: > - This report has been generated by the nf-core/epitopeprediction + This report has been generated by the nf-core/epitopeprediction analysis pipeline. For information about how to interpret these results, please see the - documentation. - -custom_logo: "nf-core-epitopeprediction_logo_light.png" -custom_logo_url: https://github.com/nf-core/epitopeprediction/ -custom_logo_title: "nf-core/epitopeprediction" - + documentation. report_section_order: "nf-core-epitopeprediction-methods-description": order: -1000 diff --git a/bin/epaa.py b/bin/epaa.py index 2603b9f..a538a61 100755 --- a/bin/epaa.py +++ b/bin/epaa.py @@ -41,7 +41,7 @@ logger.addHandler(handler) ID_SYSTEM_USED = EIdentifierTypes.ENSEMBL -transcriptProteinMap = {} +transcriptProteinTable = {} transcriptSwissProtMap = {} @@ -230,9 +230,7 @@ def read_GSvar(filename, pass_only=True): if not mut_type: mut_type = a_mut_type - # TODO with the next epytope release we can deal with transcript id version - transcript_id = transcript_id.split(".")[0] - + # with the latest epytope release (3.3.1), we can now handle full transcript IDs coding[transcript_id] = MutationSyntax( transcript_id, int(trans_pos.split("_")[0]) - 1, int(prot_start) - 1, trans_coding, prot_coding ) @@ -420,8 +418,7 @@ def read_vcf(filename, pass_only=True): positions = re.findall(r"\d+", prot_coding) tpos = int(positions[0]) - 1 - # TODO with the new epytope release we will support transcript IDs with version - transcript_id = transcript_id.split(".")[0] + # with the latest epytope release (3.3.1), we can now handle full transcript IDs if "NM" in transcript_id: ID_SYSTEM_USED = EIdentifierTypes.REFSEQ @@ -599,13 +596,17 @@ def read_lig_ID_values(filename): def create_protein_column_value(pep): + # retrieve Ensembl protein ID for given transcript IDs, if we want to provide additional protein ID types, adapt here all_proteins = [ - transcriptProteinMap[transcript.transcript_id.split(":")[0]] for transcript in set(pep.get_all_transcripts()) + # split by : otherwise epytope generator suffix included + transcriptProteinTable.query(f'transcript_id == "{transcript.transcript_id.split(":")[0]}"')["ensembl_id"] + for transcript in set(pep.get_all_transcripts()) ] return ",".join(set([item for sublist in all_proteins for item in sublist])) def create_transcript_column_value(pep): + # split by : otherwise epytope generator suffix included return ",".join(set([transcript.transcript_id.split(":")[0] for transcript in set(pep.get_all_transcripts())])) @@ -773,91 +774,6 @@ def create_ligandomics_column_value_for_result(row, lig_id, val, wild_type): return "" -def get_protein_ids_for_transcripts(idtype, transcripts, ensembl_url, reference): - result = {} - result_swissProt = {} - - biomart_url = "{}/biomart/martservice?query=".format(ensembl_url) - biomart_head = """ - - - - - """.strip() - biomart_tail = """ - - - """.strip() - biomart_filter = """""" - biomart_attribute = """""" - - ENSEMBL = False - if idtype == EIdentifierTypes.ENSEMBL: - idname = "ensembl_transcript_id" - ENSEMBL = True - elif idtype == EIdentifierTypes.REFSEQ: - idname = "refseq_mrna" - - input_lists = [] - - # too long requests will fail - if len(transcripts) > 200: - input_lists = [transcripts[i : i + 3] for i in range(0, len(transcripts), 3)] - - else: - input_lists += [transcripts] - - attribut_swissprot = "uniprot_swissprot_accession" if reference == "GRCh37" else "uniprotswissprot" - - tsvselect = [] - for l in input_lists: - rq_n = ( - biomart_head % ("hsapiens_gene_ensembl", "default") - + biomart_filter % (idname, ",".join(l)) - + biomart_attribute % ("ensembl_peptide_id") - + biomart_attribute % (attribut_swissprot) - + biomart_attribute % ("refseq_peptide") - + biomart_attribute % (idname) - + biomart_tail - ) - - # DictReader returns byte object that is transformed into a string by '.decode('utf-8')' - tsvreader = csv.DictReader( - urllib.request.urlopen(biomart_url + urllib.parse.quote(rq_n)).read().decode("utf-8").splitlines(), - dialect="excel-tab", - ) - - tsvselect += [x for x in tsvreader] - - swissProtKey = "UniProt/SwissProt Accession" if reference == "GRCh37" else "UniProtKB/Swiss-Prot ID" - - if ENSEMBL: - key = "Ensembl Transcript ID" if reference == "GRCh37" else "Transcript stable ID" - protein_key = "Ensembl Protein ID" if reference == "GRCh37" else "Protein stable ID" - for dic in tsvselect: - if dic[key] in result: - merged = result[dic[key]] + [dic[protein_key]] - merged_swissProt = result_swissProt[dic[key]] + [dic[swissProtKey]] - result[dic[key]] = merged - result_swissProt[dic[key]] = merged_swissProt - else: - result[dic[key]] = [dic[protein_key]] - result_swissProt[dic[key]] = [dic[swissProtKey]] - else: - key = "RefSeq mRNA [e.g. NM_001195597]" - for dic in tsvselect: - if dic[key] in result: - merged = result[dic[key]] + [dic["RefSeq Protein ID [e.g. NP_001005353]"]] - merged_swissProt = result_swissProt[dic[key]] + [dic[swissProtKey]] - result[dic[key]] = merged - result_swissProt[dic[key]] = merged_swissProt - else: - result[dic[key]] = [dic["RefSeq Protein ID [e.g. NP_001005353]"]] - result_swissProt[dic[key]] = [dic[swissProtKey]] - - return result, result_swissProt - - def get_matrix_max_score(allele, length): allele_model = "%s_%i" % (allele, length) try: @@ -945,25 +861,6 @@ def create_peptide_variant_dictionary(peptides): return pep_to_variants -# TODO replace by epytope function once released -def is_created_by_variant(peptide): - transcript_ids = [x.transcript_id for x in set(peptide.get_all_transcripts())] - for t in transcript_ids: - p = peptide.proteins[t] - varmap = p.vars - for pos, vars in varmap.items(): - for var in vars: - if var.type in [VariationType.FSDEL, VariationType.FSINS]: - if peptide.proteinPos[t][0] + len(peptide) > pos: - return True - else: - for start_pos in peptide.proteinPos[t]: - positions = list(range(start_pos, start_pos + len(peptide))) - if pos in positions: - return True - return False - - def make_predictions_from_variants( variants_all, methods, @@ -976,7 +873,7 @@ def make_predictions_from_variants( protein_db, identifier, metadata, - transcriptProteinMap, + transcriptProteinTable, ): # list for all peptides and filtered peptides all_peptides = [] @@ -1000,7 +897,7 @@ def make_predictions_from_variants( peptide_gen = generator.generate_peptides_from_proteins(prots, peplen) peptides_var = [x for x in peptide_gen] - peptides = [p for p in peptides_var if is_created_by_variant(p)] + peptides = [p for p in peptides_var if p.is_created_by_variant()] # filter out self peptides selfies = [str(p) for p in peptides if protein_db.exists(str(p))] @@ -1266,11 +1163,10 @@ def __main__(): parser.add_argument("-a", "--alleles", help=" MHC Alleles", required=True, type=str) parser.add_argument( "-r", - "--reference", + "--genome_reference", help="Reference, retrieved information will be based on this ensembl version", required=False, - default="GRCh37", - choices=["GRCh37", "GRCh38"], + default="https://grch37.ensembl.org/", ) parser.add_argument( "-f", "--filter_self", help="Filter peptides against human proteom", required=False, action="store_true" @@ -1314,11 +1210,15 @@ def __main__(): metadata = [] proteins = [] - references = {"GRCh37": "http://feb2014.archive.ensembl.org", "GRCh38": "http://apr2018.archive.ensembl.org"} - global transcriptProteinMap + global transcriptProteinTable global transcriptSwissProtMap + # initialize MartsAdapter + # in previous version, these were the defaults "GRCh37": "http://feb2014.archive.ensembl.org" (broken) + # "GRCh38": "http://apr2018.archive.ensembl.org" (different dataset table scheme, could potentially be fixed on BiomartAdapter level if needed ) + ma = MartsAdapter(biomart=args.genome_reference) + # read in variants or peptides if args.peptides: logger.info("Running epaa for peptides...") @@ -1331,9 +1231,9 @@ def __main__(): variant_list, transcripts, metadata = read_vcf(args.somatic_mutations) transcripts = list(set(transcripts)) - transcriptProteinMap, transcriptSwissProtMap = get_protein_ids_for_transcripts( - ID_SYSTEM_USED, transcripts, references[args.reference], args.reference - ) + + # use function provided by epytope to retrieve protein IDs (different systems) for transcript IDs + transcriptProteinTable = ma.get_protein_ids_from_transcripts(transcripts, type=ID_SYSTEM_USED) # get the alleles if args.alleles.startswith("http"): @@ -1343,9 +1243,6 @@ def __main__(): else: alleles = [Allele(a) for a in args.alleles.split(";")] - # initialize MartsAdapter, GRCh37 or GRCh38 based - ma = MartsAdapter(biomart=references[args.reference]) - # create protein db instance for filtering self-peptides up_db = UniProtDB("sp") if args.filter_self: @@ -1417,7 +1314,7 @@ def __main__(): up_db, args.identifier, metadata, - transcriptProteinMap, + transcriptProteinTable, ) # concat dataframes for all peptide lengths diff --git a/conf/modules.config b/conf/modules.config index 8886348..9fc3089 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,6 +10,8 @@ ---------------------------------------------------------------------------------------- */ +def genome_reference = params.genome_reference.toLowerCase() + process { publishDir = [ @@ -18,10 +20,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: MULTIQC { - ext.args = '' - } - withName: SAMPLESHEET_CHECK { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -38,6 +36,15 @@ process { ] } + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: EPYTOPE_CHECK_REQUESTED_MODELS { publishDir = [ path: { "${params.outdir}/reports" }, @@ -71,30 +78,45 @@ process { } withName: EPYTOPE_PEPTIDE_PREDICTION_PROTEIN { + // Argument list needs to end with --peptides + ext.args = [ + genome_reference != 'grch37' & genome_reference != 'grch38' ? "--genome_reference '${genome_reference}'" : '', + genome_reference == 'grch37' ? "--genome_reference 'https://grch37.ensembl.org/'" : '', + genome_reference == 'grch38' ? "--genome_reference 'https://ensembl.org/'" : '', + '--peptides' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/split_predictions/${meta.sample}" }, mode: params.publish_dir_mode ] - // Argument list needs to end with --peptides - ext.args = "--reference ${params.genome_version} --peptides" } withName: EPYTOPE_PEPTIDE_PREDICTION_PEP { + // Argument list needs to end with --peptides + ext.args = [ + genome_reference != 'grch37' & genome_reference != 'grch38' ? "--genome_reference '${genome_reference}'" : '', + genome_reference == 'grch37' ? "--genome_reference 'https://grch37.ensembl.org/'" : '', + genome_reference == 'grch38' ? "--genome_reference 'https://ensembl.org/'" : '', + '--peptides' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/split_predictions/${meta.sample}" }, mode: params.publish_dir_mode ] - // Argument list needs to end with --peptides - ext.args = "--reference ${params.genome_version} --peptides" } withName: EPYTOPE_PEPTIDE_PREDICTION_VAR { + // Argument list needs to end with --somatic_mutation + ext.args = [ + genome_reference != 'grch37' & genome_reference != 'grch38' ? "--genome_reference '${genome_reference}'" : '', + genome_reference == 'grch37' ? "--genome_reference 'https://grch37.ensembl.org/'" : '', + genome_reference == 'grch38' ? "--genome_reference 'https://ensembl.org/'" : '', + '--somatic_mutation' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/split_predictions/${meta.sample}" }, mode: params.publish_dir_mode ] - // Argument list needs to end with --somatic_mutation - ext.args = "--reference ${params.genome_version} --somatic_mutation" } withName: MERGE_JSON_SINGLE { @@ -180,4 +202,5 @@ process { mode: params.publish_dir_mode ] } + } diff --git a/conf/test.config b/conf/test.config index 681012d..749c8e3 100644 --- a/conf/test.config +++ b/conf/test.config @@ -17,7 +17,6 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/epitopeprediction/testdata/sample_sheets/sample_sheet_variants.csv' - schema_ignore_params = 'genomes,input_paths' // Limit resources so that this can run on GitHub Actions max_cpus = 2 diff --git a/conf/test_full.config b/conf/test_full.config index f1fa9ef..b21c2b3 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,6 +16,5 @@ params { // Input data for full size test input = 'https://raw.githubusercontent.com/nf-core/test-datasets/epitopeprediction/testdata/sample_sheets/sample_sheet_full_test.csv' - schema_ignore_params = 'genomes,input_paths' tools = 'syfpeithi,mhcflurry,mhcnuggets-class-1,mhcnuggets-class-2' } diff --git a/docs/output.md b/docs/output.md index ebb2f96..bda154a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -97,6 +97,7 @@ When running the pipeline using the `--show_supported_models` parameter, informa - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index a6a0025..f4e4fe7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -16,12 +16,16 @@ You will need to create a samplesheet with information about the samples you wou ### Input Formats -The pipeline currently accepts three different types of input that are genomic variants, peptides and proteins. The supported file formats for genomic variants are `.vcf`, `.vcf.gz` and `tsv`. - -:warning: Please note that genomic variants have to be annotated. Currently, we support variants that have been annotated using [SnpEff](http://pcingola.github.io/SnpEff/). Support for [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) will be available with one of the upcoming versions. +The pipeline currently accepts three different types of input that are genomic variants, peptides and proteins. #### Genomic variants +The supported file formats for genomic variants are `.vcf`, `.vcf.gz` and `tsv`. + +> [!IMPORTANT] +> Please note that genomic variants have to be annotated. Currently, we support variants that have been annotated using [SnpEff](http://pcingola.> github.io/SnpEff/). +> Support for [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) will be available with one of the upcoming versions. + `tsv` files with genomic variants have to provide the following columns: ```console @@ -32,6 +36,13 @@ chr1 12954870 12954870 C T . 0 NORMAL:414,TUMOR:8 . missense_variant 0.5 transcr ``` +For genomic variants, reference information from `Ensembl BioMart` is used. The default database version is the most recent `GRCh37` version. If you want to do the predictions based on `GRCh38` as the reference genome, please specify `--genome_reference grch37` in your pipeline call. + +You can also specify valid `Ensembl BioMart` archive version urls as `--genome_reference` value, e.g. [the archive version of December 2021](http://dec2021.archive.ensembl.org/). + +> [!IMPORTANT] +> Please note that old archive versions are regularly retired, therefore it might be possible that a used version is not available anymore at a later point. + #### Peptide sequences Peptide sequences have to be provided in `tsv` format with two mandatory columns `id` and `sequence`. Additional columns will be added as metadata to results. @@ -81,7 +92,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/epitopeprediction --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/epitopeprediction --input ./samplesheet.csv --outdir ./results -profile docker ``` This will launch the pipeline with the `docker` configuration profile and default options (`syfpeithi` by default). See below for more information about profiles. @@ -99,7 +110,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). The above pipeline run specified with a params file in yaml format: @@ -112,7 +124,6 @@ with `params.yaml` containing: ```yaml input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' <...> ``` @@ -149,11 +160,14 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: ## Core Nextflow arguments -> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). ### `-profile` @@ -161,7 +175,9 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however, Conda is also supported as an alternative. +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: The pipeline dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information, and to see if your system is available in these configs, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 408951a..01b8653 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -3,6 +3,7 @@ // import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput class NfcoreTemplate { @@ -222,6 +223,21 @@ class NfcoreTemplate { } } + // + // Dump pipeline parameters in a json file + // + public static void dump_parameters(workflow, params) { + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def output_pf = new File(output_d, "params_${timestamp}.json") + def jsonStr = JsonOutput.toJson(params) + output_pf.text = JsonOutput.prettyPrint(jsonStr) + } + // // Print pipeline summary on completion // diff --git a/lib/WorkflowEpitopeprediction.groovy b/lib/WorkflowEpitopeprediction.groovy index caa0c46..94179f7 100755 --- a/lib/WorkflowEpitopeprediction.groovy +++ b/lib/WorkflowEpitopeprediction.groovy @@ -14,10 +14,6 @@ class WorkflowEpitopeprediction { genomeExistsError(params, log) - - //if (!params.fasta) { - // Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - //} } // @@ -53,7 +49,7 @@ class WorkflowEpitopeprediction { public static String toolCitationText(params) { - // TODO Optionally add in-text citation tools to this list. + // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ diff --git a/main.nf b/main.nf index dfb0d7f..3cb571d 100644 --- a/main.nf +++ b/main.nf @@ -11,6 +11,13 @@ nextflow.enable.dsl = 2 +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY diff --git a/modules.json b/modules.json index d3987e8..6aa6487 100644 --- a/modules.json +++ b/modules.json @@ -7,7 +7,12 @@ "nf-core": { "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "a33464f205fa15305bfe268546f6607b6f4d4753", "installed_by": ["modules"] }, "gunzip": { @@ -17,7 +22,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] } } diff --git a/modules/local/epytope_check_requested_models.nf b/modules/local/epytope_check_requested_models.nf index fa75353..f78a77a 100644 --- a/modules/local/epytope_check_requested_models.nf +++ b/modules/local/epytope_check_requested_models.nf @@ -1,10 +1,10 @@ process EPYTOPE_CHECK_REQUESTED_MODELS { label 'process_low' - conda "bioconda::epytope=3.1.0" + conda "bioconda::epytope=3.3.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : - 'biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/epytope:3.3.1--pyh7cba7a3_0' : + 'biocontainers/epytope:3.3.1--pyh7cba7a3_0' }" input: tuple val(meta), path(input_file) diff --git a/modules/local/epytope_generate_peptides.nf b/modules/local/epytope_generate_peptides.nf index c4f9ac7..401afa7 100644 --- a/modules/local/epytope_generate_peptides.nf +++ b/modules/local/epytope_generate_peptides.nf @@ -2,10 +2,10 @@ process EPYTOPE_GENERATE_PEPTIDES { label 'process_low' tag "${meta.sample}" - conda "bioconda::epytope=3.1.0" + conda "bioconda::epytope=3.3.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : - 'biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/epytope:3.3.1--pyh7cba7a3_0' : + 'biocontainers/epytope:3.3.1--pyh7cba7a3_0' }" input: tuple val(meta), path(raw) diff --git a/modules/local/epytope_peptide_prediction.nf b/modules/local/epytope_peptide_prediction.nf index 929a557..8b8ca73 100644 --- a/modules/local/epytope_peptide_prediction.nf +++ b/modules/local/epytope_peptide_prediction.nf @@ -2,9 +2,7 @@ process EPYTOPE_PEPTIDE_PREDICTION { label 'process_low' conda "conda-forge::coreutils=9.1 conda-forge::tcsh=6.20.00 bioconda::epytope=3.1.0 conda-forge::gawk=5.1.0 conda-forge::perl=5.32.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-11bbf0d242ea96f7b9c08d5b5bc26f2cd5ac5943:3419f320edefe6077631798f50d7bd4f8dc4763f-0' : - 'biocontainers/mulled-v2-11bbf0d242ea96f7b9c08d5b5bc26f2cd5ac5943:3419f320edefe6077631798f50d7bd4f8dc4763f-0' }" + container 'ghcr.io/jonasscheid/epitopeprediction-2:0.3.0' input: tuple val(meta), path(splitted), path(software_versions) @@ -85,7 +83,7 @@ process EPYTOPE_PEPTIDE_PREDICTION { python: \$(python --version 2>&1 | sed 's/Python //g') epytope: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('epytope').version)") pandas: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pandas').version)") - pyvcf: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('pyvcf').version)") + pyvcf: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('PyVCF3').version)") mhcflurry: \$(mhcflurry-predict --version 2>&1 | sed 's/^mhcflurry //; s/ .*\$//') mhcnuggets: \$(python -c "import pkg_resources; print(pkg_resources.get_distribution('mhcnuggets').version)") END_VERSIONS diff --git a/modules/local/epytope_show_supported_models.nf b/modules/local/epytope_show_supported_models.nf index ee9f117..68a8e1b 100644 --- a/modules/local/epytope_show_supported_models.nf +++ b/modules/local/epytope_show_supported_models.nf @@ -1,10 +1,10 @@ process EPYTOPE_SHOW_SUPPORTED_MODELS { label 'process_low' - conda "bioconda::epytope=3.1.0" + conda "bioconda::epytope=3.3.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : - 'biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/epytope:3.3.1--pyh7cba7a3_0' : + 'biocontainers/epytope:3.3.1--pyh7cba7a3_0' }" input: tuple val(meta), path(raw), path(software_versions) diff --git a/modules/local/external_tools_import.nf b/modules/local/external_tools_import.nf index 2895b48..f1b4768 100644 --- a/modules/local/external_tools_import.nf +++ b/modules/local/external_tools_import.nf @@ -78,7 +78,6 @@ process EXTERNAL_TOOLS_IMPORT { # # CREATE VERSION FILE # - cat <<-END_VERSIONS > versions.yml "${task.process}": ${toolname}: ${toolversion} diff --git a/modules/local/get_prediction_versions.nf b/modules/local/get_prediction_versions.nf index 5364ddb..939221b 100644 --- a/modules/local/get_prediction_versions.nf +++ b/modules/local/get_prediction_versions.nf @@ -1,10 +1,10 @@ process GET_PREDICTION_VERSIONS { label 'process_low' - conda "bioconda::epytope=3.1.0" + conda "bioconda::epytope=3.3.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/epytope:3.1.0--pyh5e36f6f_0' : - 'biocontainers/epytope:3.1.0--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/epytope:3.3.1--pyh7cba7a3_0' : + 'biocontainers/epytope:3.3.1--pyh7cba7a3_0' }" input: val external_tool_versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc8727..c9d014b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387b..65d7dd0 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow.config b/nextflow.config index 0b6dcc9..fd47846 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,15 +9,17 @@ // Global default params, used in configs params { // Input options - input = null - peptides_split_maxchunks = 100 - peptides_split_minchunksize = 5000 - split_by_variants = false - split_by_variants_size = 0 - split_by_variants_distance = 110000 + input = null + peptides_split_maxchunks = 100 + peptides_split_minchunksize = 5000 + split_by_variants = false + split_by_variants_size = 0 + split_by_variants_distance = 110000 // References - genome_version = 'GRCh37' + genome_reference = 'grch37' + genome = null + fasta = null // Options: Predictions tools = 'syfpeithi' @@ -68,7 +70,6 @@ params { version = false validate_params = true show_hidden_params = false - schema_ignore_params = 'genomes' igenomes_ignore = true // Config options @@ -89,11 +90,11 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' + validationSchemaIgnoreParams = 'fasta,genome,genomes,igenomes_base' validationShowHiddenParams = false validate_params = true - conda.enabled = false + conda.enabled = false } @@ -191,6 +192,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -200,8 +202,8 @@ profiles { } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_variant_tsv { includeConfig 'conf/test_variant_tsv.config' } @@ -278,7 +280,7 @@ manifest { description = """A fully reproducible and state of the art epitope prediction pipeline.""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.2.2dev' + version = '2.3dev' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index aefc10b..a983071 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,12 +48,11 @@ "description": "Options for the genome and proteome reference.", "fa_icon": "fas fa-stream", "properties": { - "genome_version": { + "genome_reference": { "type": "string", - "default": "GRCh37", - "help_text": "This defines against which human reference genome the pipeline performs the analysis including the incorporation of genetic variants e.g..", - "enum": ["GRCh37", "GRCh38"], - "description": "Specifies the human reference genome version." + "default": "grch37", + "help_text": "This defines against which human Ensembl genome reference the pipeline performs the analysis including the incorporation of genetic variants e.g.. If `grch37` or `grch38` are specified the most recent Ensembl Biomart version for genome versions will be used. Alternatively, an Ensembl Biomart (archive) version can be specified, e.g. http://jan2020.archive.ensembl.org/.", + "description": "Specifies the Ensembl genome reference version that will be used." }, "proteome": { "type": "string", @@ -68,6 +67,22 @@ "fa_icon": "fas fa-dna", "description": "Reference genome related files and options required for the workflow.", "properties": { + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book", + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + }, + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "fa_icon": "far fa-file-code" + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -311,14 +326,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -342,7 +355,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -357,7 +369,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -396,7 +407,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -404,7 +414,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -412,7 +421,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." }, @@ -422,13 +430,6 @@ "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "schema_ignore_params": { - "type": "string", - "default": "genomes", - "description": "Ignore JSON schema validation of the following params", - "fa_icon": "fas fa-ban", - "hidden": true } } } diff --git a/workflows/epitopeprediction.nf b/workflows/epitopeprediction.nf index 0ec4303..849ad28 100644 --- a/workflows/epitopeprediction.nf +++ b/workflows/epitopeprediction.nf @@ -482,6 +482,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) { NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)