diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2d316d9d02..b2eb969078 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,8 +2,6 @@ name: nf-core CI
# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
on:
push:
- branches:
- - dev
pull_request:
release:
types: [published]
@@ -21,6 +19,7 @@ jobs:
matrix:
# Nextflow versions: check pipeline minimum
nxf_ver: ['20.11.0-edge']
+ engine: ['docker']
steps:
- name: Check out pipeline code
uses: actions/checkout@v2
@@ -31,9 +30,12 @@ jobs:
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
+ - name: Run --help
+ run: |
+ nextflow run ${GITHUB_WORKSPACE} -profile test,${{ matrix.engine }} --help
- name: Run pipeline with test data
run: |
- nextflow run ${GITHUB_WORKSPACE} -profile test,docker
+ nextflow run ${GITHUB_WORKSPACE} -profile test,${{ matrix.engine }}
- name: Show results
run: ls -lR results
diff --git a/.github/workflows/push_dockerhub_dev.yml b/.github/workflows/push_dockerhub_dev.yml
deleted file mode 100644
index bcf8018d56..0000000000
--- a/.github/workflows/push_dockerhub_dev.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: nf-core Docker push (dev)
-# This builds the docker image and pushes it to DockerHub
-# Runs on nf-core repo releases and push event to 'dev' branch (PR merges)
-on:
- push:
- branches:
- - dev
-
-jobs:
- push_dockerhub:
- name: Push new Docker image to Docker Hub (dev)
- runs-on: ubuntu-latest
- # Only run for the nf-core repo, for releases and merged PRs
- if: ${{ github.repository == 'nf-core/sarek' }}
- env:
- DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
- DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
- steps:
- - name: Check out pipeline code
- uses: actions/checkout@v2
-
- - name: Build new docker image
- run: docker build --no-cache . -t nfcore/sarek:dev
-
- - name: Push Docker image to DockerHub (dev)
- run: |
- echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
- docker push nfcore/sarek:dev
diff --git a/.github/workflows/push_dockerhub_release.yml b/.github/workflows/push_dockerhub_release.yml
deleted file mode 100644
index dd4cda6d51..0000000000
--- a/.github/workflows/push_dockerhub_release.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: nf-core Docker push (release)
-# This builds the docker image and pushes it to DockerHub
-# Runs on nf-core repo releases and push event to 'dev' branch (PR merges)
-on:
- release:
- types: [published]
-
-jobs:
- push_dockerhub:
- name: Push new Docker image to Docker Hub (release)
- runs-on: ubuntu-latest
- # Only run for the nf-core repo, for releases and merged PRs
- if: ${{ github.repository == 'nf-core/sarek' }}
- env:
- DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
- DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
- steps:
- - name: Check out pipeline code
- uses: actions/checkout@v2
-
- - name: Build new docker image
- run: docker build --no-cache . -t nfcore/sarek:latest
-
- - name: Push Docker image to DockerHub (release)
- run: |
- echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
- docker push nfcore/sarek:latest
- docker tag nfcore/sarek:latest nfcore/sarek:${{ github.event.release.tag_name }}
- docker push nfcore/sarek:${{ github.event.release.tag_name }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 78eddb36a1..53150492a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,12 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#234](https://github.com/nf-core/sarek/pull/234) - Switching to DSL2
- [#234](https://github.com/nf-core/sarek/pull/234), [#238](https://github.com/nf-core/sarek/pull/238) - Add modules and sub workflow for building indices
-- [#234](https://github.com/nf-core/sarek/pull/234), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#283](https://github.com/nf-core/sarek/pull/283), [#334](https://github.com/nf-core/sarek/pull/334) - Update Nextflow `19.10.0` -> `20.11.0-edg`
+- [#234](https://github.com/nf-core/sarek/pull/234), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#283](https://github.com/nf-core/sarek/pull/283), [#334](https://github.com/nf-core/sarek/pull/334) - Update Nextflow `19.10.0` -> `20.11.0-edge`
- [#239](https://github.com/nf-core/sarek/pull/239) - Restore Sarek ascii art to header
- [#241](https://github.com/nf-core/sarek/pull/241), [#248](https://github.com/nf-core/sarek/pull/248), [#250](https://github.com/nf-core/sarek/pull/250), [#257](https://github.com/nf-core/sarek/pull/257), [#259](https://github.com/nf-core/sarek/pull/259) - Add modules and sub workflow for preprocessing
-- [#242](https://github.com/nf-core/sarek/pull/242), [#244](https://github.com/nf-core/sarek/pull/244), [#245](https://github.com/nf-core/sarek/pull/245), [#246](https://github.com/nf-core/sarek/pull/246), [#247](https://github.com/nf-core/sarek/pull/247), [#249](https://github.com/nf-core/sarek/pull/249), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#263](https://github.com/nf-core/sarek/pull/263), [#264](https://github.com/nf-core/sarek/pull/264), [#283](https://github.com/nf-core/sarek/pull/283), [#285](https://github.com/nf-core/sarek/pull/285) - Refactor `dsl2` branch
+- [#242](https://github.com/nf-core/sarek/pull/242), [#244](https://github.com/nf-core/sarek/pull/244), [#245](https://github.com/nf-core/sarek/pull/245), [#246](https://github.com/nf-core/sarek/pull/246), [#247](https://github.com/nf-core/sarek/pull/247), [#249](https://github.com/nf-core/sarek/pull/249), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#263](https://github.com/nf-core/sarek/pull/263), [#264](https://github.com/nf-core/sarek/pull/264), [#283](https://github.com/nf-core/sarek/pull/283), [#285](https://github.com/nf-core/sarek/pull/285), [#338](https://github.com/nf-core/sarek/pull/338) - Refactor `dsl2` branch
- [#257](https://github.com/nf-core/sarek/pull/257) - Use a params modules config file
-- [#266](https://github.com/nf-core/sarek/pull/266), [#285](https://github.com/nf-core/sarek/pull/285) - Add modules and sub workflow for variant calling
+- [#266](https://github.com/nf-core/sarek/pull/266), [#285](https://github.com/nf-core/sarek/pull/285), [#297](https://github.com/nf-core/sarek/pull/297) - Add modules and sub workflow for variant calling
- [#333](https://github.com/nf-core/sarek/pull/333) - Bump `Sarek` version to `3.0dev`
- [#334](https://github.com/nf-core/sarek/pull/334) - Sync `dsl2` and `dev` branches
diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index dbdec87e1c..0000000000
--- a/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM nfcore/base:1.12.1
-LABEL authors="Maxime Garcia, Szilveszter Juhos" \
- description="Docker image containing all software requirements for the nf-core/sarek pipeline"
-
-# Install the conda environment
-COPY environment.yml /
-RUN conda env create --quiet -f /environment.yml && conda clean -a
-
-# Add conda installation dir to PATH (instead of doing 'conda activate')
-ENV PATH /opt/conda/envs/nf-core-sarek-3.0dev/bin:$PATH
-
-# Dump the details of the installed packages to a file for posterity
-RUN conda env export --name nf-core-sarek-3.0dev > nf-core-sarek-3.0dev.yml
-
-# Instruct R processes to use these empty files instead of clashing with a local version
-RUN touch .Rprofile
-RUN touch .Renviron
diff --git a/README.md b/README.md
index 07a8557837..243b3ee239 100644
--- a/README.md
+++ b/README.md
@@ -2,17 +2,20 @@
> **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing**
-[](https://www.nextflow.io/)
-[](https://nf-co.re/)
-[](https://zenodo.org/badge/latestdoi/184289291)
-
-[](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22)
-[](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22)
+[](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22)
+[](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22)
[](https://circleci.com/gh/nf-core/sarek/)
+
+
+[](https://www.nextflow.io/)
+[](https://docs.conda.io/en/latest/)
+[](https://www.docker.com/)
+[](https://sylabs.io/docs/)
-[](https://bioconda.github.io/)
-[](https://hub.docker.com/r/nfcore/sarek)
+[](https://doi.org/10.5281/zenodo.1400710)
[](https://nfcore.slack.com/channels/sarek)
+[](https://twitter.com/nf_core)
+[](https://www.youtube.com/c/nf-core)
## Introduction
@@ -22,12 +25,28 @@ Sarek can also handle tumour / normal pairs and could include additional relapse
The pipeline is built using [`Nextflow`](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with `Docker` containers making installation trivial and results highly reproducible.
+
+
+It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek).
+
+## Pipeline Summary
+
+By default, the pipeline currently performs the following:
+
+* Sequencing quality control (`FastQC`)
+* Map Reads to Reference (`BWA mem`)
+* Mark Duplicates (`GATK MarkDuplicatesSpark`)
+* Base (Quality Score) Recalibration (`GATK BaseRecalibrator`, `GATK ApplyBQSR`)
+* Preprocessing quality control (`samtools stats`)
+* Preprocessing quality control (`Qualimap bamqc`)
+* Overall pipeline run summaries (`MultiQC`)
+
-It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek).
-
## Quick Start
1. Install [`Nextflow`](https://nf-co.re/usage/installation)
@@ -52,18 +71,6 @@ It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-
See [usage docs](https://nf-co.re/sarek/usage) for all of the available options when running the pipeline.
-## Pipeline Summary
-
-By default, the pipeline currently performs the following:
-
-* Sequencing quality control (`FastQC`)
-* Map Reads to Reference (`BWA mem`)
-* Mark Duplicates (`GATK MarkDuplicatesSpark`)
-* Base (Quality Score) Recalibration (`GATK BaseRecalibrator`, `GATK ApplyBQSR`)
-* Preprocessing quality control (`samtools stats`)
-* Preprocessing quality control (`Qualimap bamqc`)
-* Overall pipeline run summaries (`MultiQC`)
-
## Documentation
The nf-core/sarek pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/sarek/usage) and [output](https://nf-co.re/sarek/output).
@@ -110,16 +117,6 @@ Helpful contributors:
* [pallolason](https://github.com/pallolason)
* [silviamorins](https://github.com/silviamorins)
-## Contributions & Support
-
-If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).
-
-For further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime Garcia](mailto:maxime.garcia@scilifelab.se?subject=[GitHub]%20nf-core/sarek), [Szilvester Juhos](mailto:szilveszter.juhos@scilifelab.se?subject=[GitHub]%20nf-core/sarek)
-
-## CHANGELOG
-
-* [CHANGELOG](CHANGELOG.md)
-
## Acknowledgements
[](https://ki.se/forskning/barntumorbanken) | [](https://scilifelab.se)
@@ -127,6 +124,12 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
[](https://ngisweden.scilifelab.se/) | [](https://nbis.se)
[](hhttps://www.qbic.uni-tuebingen.de) |
+## Contributions & Support
+
+If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).
+
+For further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime Garcia](mailto:maxime.garcia@scilifelab.se?subject=[GitHub]%20nf-core/sarek), [Szilvester Juhos](mailto:szilveszter.juhos@scilifelab.se?subject=[GitHub]%20nf-core/sarek)
+
## Citations
If you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows:
@@ -143,4 +146,7 @@ You can cite the `nf-core` publication as follows:
> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.
>
> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).
-> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ)
+
+## CHANGELOG
+
+* [CHANGELOG](CHANGELOG.md)
diff --git a/conf/base.config b/conf/base.config
index 3381bed5ff..64bcb42714 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -70,31 +70,19 @@ process {
memory = {params.max_memory}
}
- withName:GET_SOFTWARE_VERSIONS {
- cache = false
- }
-
- withName:CONCAT_VCF {
- // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE
- // (exit code 141). Rerunning the process will usually work.
- errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'}
- }
+// withName:CONCAT_VCF {
+// // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE
+// // (exit code 141). Rerunning the process will usually work.
+// errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'}
+// }
withLabel:FASTQC {
errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'}
}
- withName:BWAMEM2_MEM {
+ withLabel:BWAMEM2_MEM {
memory = {check_resource(60.GB * task.attempt)}
time = {check_resource(48.h * task.attempt)}
}
- withName:MULTIQC {
- errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'}
- }
- withName:SNPEFF {
- container = {(params.annotation_cache && params.snpeff_cache) ? 'nfcore/sarek:dev' : "nfcore/sareksnpeff:dev.${params.genome}"}
- errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'}
- }
- withLabel:VEP {
- container = {(params.annotation_cache && params.vep_cache) ? 'nfcore/sarek:dev' : "nfcore/sarekvep:dev.${params.genome}"}
- errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'}
- }
+// withName:MULTIQC {
+// errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'}
+// }
}
diff --git a/conf/modules.config b/conf/modules.config
index 7476bda3c5..dc005bfed1 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -9,63 +9,76 @@ params {
// BUILD_INDICES
'build_intervals' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'bwa_index' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'bwamem2_index' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'create_intervals_bed' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'dict' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'index_target_bed' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'msisensor_scan' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'samtools_faidx' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'tabix_dbsnp' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'tabix_germline_resource' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'tabix_known_indels' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
'tabix_pon' {
publish_dir = "reference"
- publish_files = "false"
+ publish_files = false
}
// MAPPING
'bwa_mem1_mem' {
args = "-K 100000000 -M"
- publish_files = "false"
+ args2 = "sort"
+ publish_files = false
+ }
+ 'bwa_mem1_mem_tumor' {
+ args = "-K 100000000 -M -B 3"
+ args2 = "sort"
+ publish_files = false
}
'bwa_mem2_mem' {
args = "-K 100000000 -M"
- publish_files = "false"
+ args2 = "sort"
+ publish_files = false
+ }
+ 'bwa_mem2_mem_tumor' {
+ args = "-K 100000000 -M -B 3"
+ args2 = "sort"
+ publish_files = false
}
'merge_bam_mapping' {
publish_by_id = "true"
+ publish_files = ['bam':'mapped']
publish_dir = "preprocessing"
}
'qualimap_bamqc_mapping' {
@@ -74,6 +87,7 @@ params {
}
'samtools_index_mapping' {
publish_by_id = "true"
+ publish_files = ['bai':'mapped']
publish_dir = "preprocessing"
}
'samtools_stats_mapping' {
@@ -108,6 +122,7 @@ params {
'merge_bam_recalibrate' {
suffix = "recal"
publish_by_id = "true"
+ publish_files = ['bam':'recalibrated']
publish_dir = "preprocessing"
}
'qualimap_bamqc_recalibrate' {
@@ -128,12 +143,12 @@ params {
'haplotypecaller' {
publish_by_id = "true"
publish_dir = "variant_calling"
- publish_files = "false"
+ publish_files = false
}
'genotypegvcf' {
publish_by_id = "true"
publish_dir = "variant_calling"
- publish_files = "false"
+ publish_files = false
}
'concat_haplotypecaller' {
suffix = "haplotypecaller"
diff --git a/docs/usage.md b/docs/usage.md
index 1805bd5f32..000d0fe717 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -495,13 +495,9 @@ This tool is enabled within `Sarek` if both `--sentieon` and `--tools DNAscope`
### Containers
-`sarek`, our main container is designed using [Conda](https://conda.io/).
+With `Nextflow DSL2`, each process use its own `Conda` environment or container from `biocontainers`.
-[](https://hub.docker.com/r/nfcore/sarek)
-
-Based on [nfcore/base:1.12.1](https://hub.docker.com/r/nfcore/base/tags):
-
-For annotation, the main container can be used, but then cache has to be downloaded, or additional containers are available with cache.
+For annotation, cache has to be downloaded, or specifically designed containers are available with cache.
`sareksnpeff`, our `snpeff` container is designed using [Conda](https://conda.io/).
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index b05d4d3789..0000000000
--- a/environment.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-# You can use this file to create a conda environment for this pipeline:
-# conda env create -f environment.yml
-name: nf-core-sarek-3.0dev
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - conda-forge::markdown=3.1.1
- - conda-forge::pymdown-extensions=6.0
- - conda-forge::pygments=2.5.2
diff --git a/lib/Checks.groovy b/lib/Checks.groovy
index 0c912c401b..63c9cf5cfc 100644
--- a/lib/Checks.groovy
+++ b/lib/Checks.groovy
@@ -1,17 +1,48 @@
+import org.yaml.snakeyaml.Yaml
+
/*
* This file holds several functions used to perform standard checks for the nf-core pipeline template.
*/
class Checks {
+ static void check_conda_channels(log) {
+ Yaml parser = new Yaml()
+ def channels = []
+ try {
+ def config = parser.load("conda config --show channels".execute().text)
+ channels = config.channels
+ } catch(NullPointerException | IOException e) {
+ log.warn "Could not verify conda channel configuration."
+ return
+ }
+
+ // Check that all channels are present
+ def required_channels = ['conda-forge', 'bioconda', 'defaults']
+ def conda_check_failed = !required_channels.every { ch -> ch in channels }
+
+ // Check that they are in the right order
+ conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
+ conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
+
+ if (conda_check_failed) {
+ log.warn "=============================================================================\n" +
+ " There is a problem with your Conda configuration!\n\n" +
+ " You will need to set-up the conda-forge and bioconda channels correctly.\n" +
+ " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
+ " NB: The order of the channels matters!\n" +
+ "==================================================================================="
+ }
+ }
+
static void aws_batch(workflow, params) {
if (workflow.profile.contains('awsbatch')) {
- assert !params.awsqueue || !params.awsregion : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
+ assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
// Check outdir paths to be S3 buckets if running on AWSBatch
// related: https://github.com/nextflow-io/nextflow/issues/813
- assert !params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
+ assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
// Prevent trace files to be stored on S3 since S3 does not support rolling files.
- assert params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
+ assert !params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
}
}
@@ -32,5 +63,40 @@ class Checks {
}
}
}
-}
+ // Citation string
+ private static String citation(workflow) {
+ return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
+ "* The pipeline\n" +
+ " https://doi.org/10.12688/f1000research.16665.2\n" +
+ " https://doi.org/10.5281/zenodo.4468605\n\n" +
+ "* The nf-core framework\n" +
+ " https://doi.org/10.1038/s41587-020-0439-x\n\n" +
+ "* Software dependencies\n" +
+ " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
+ }
+
+ // Exit pipeline if incorrect --genome key provided
+ static void genome_exists(params, log) {
+ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
+ log.error "=============================================================================\n" +
+ " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
+ " Currently, the available genome keys are:\n" +
+ " ${params.genomes.keySet().join(", ")}\n" +
+ "============================================================================="
+ System.exit(0)
+ }
+ }
+
+ // Get attribute from genome config file e.g. fasta
+ static String get_genome_attribute(params, attribute) {
+ def val = ''
+ if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
+ if (params.genomes[ params.genome ].containsKey(attribute)) {
+ val = params.genomes[ params.genome ][ attribute ]
+ }
+ }
+ return val
+ }
+
+}
diff --git a/lib/Completion.groovy b/lib/Completion.groovy
index 956a87574e..5a933eb9a5 100644
--- a/lib/Completion.groovy
+++ b/lib/Completion.groovy
@@ -3,7 +3,7 @@
*/
class Completion {
- static void email(workflow, params, summary, run_name, projectDir, multiqc_report, log) {
+ static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) {
// Set up the e-mail variables
def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
@@ -11,36 +11,45 @@ class Completion {
subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
}
- def email_fields = [:]
- email_fields['version'] = workflow.manifest.version
- email_fields['runName'] = run_name ?: workflow.runName
- email_fields['success'] = workflow.success
- email_fields['dateComplete'] = workflow.complete
- email_fields['duration'] = workflow.duration
- email_fields['exitStatus'] = workflow.exitStatus
- email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
- email_fields['errorReport'] = (workflow.errorReport ?: 'None')
- email_fields['commandLine'] = workflow.commandLine
- email_fields['projectDir'] = workflow.projectDir
- email_fields['summary'] = summary
- email_fields['summary']['Date Started'] = workflow.start
- email_fields['summary']['Date Completed'] = workflow.complete
- email_fields['summary']['Pipeline script file path'] = workflow.scriptFile
- email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId
- if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
- if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
- if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
- email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
- email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
- email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
+ def summary = [:]
+ for (group in summary_params.keySet()) {
+ summary << summary_params[group]
+ }
+
+ def misc_fields = [:]
+ misc_fields['Date Started'] = workflow.start
+ misc_fields['Date Completed'] = workflow.complete
+ misc_fields['Pipeline script file path'] = workflow.scriptFile
+ misc_fields['Pipeline script hash ID'] = workflow.scriptId
+ if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository
+ if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId
+ if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision
+ misc_fields['Nextflow Version'] = workflow.nextflow.version
+ misc_fields['Nextflow Build'] = workflow.nextflow.build
+ misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
+ def email_fields = [:]
+ email_fields['version'] = workflow.manifest.version
+ email_fields['runName'] = workflow.runName
+ email_fields['success'] = workflow.success
+ email_fields['dateComplete'] = workflow.complete
+ email_fields['duration'] = workflow.duration
+ email_fields['exitStatus'] = workflow.exitStatus
+ email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+ email_fields['errorReport'] = (workflow.errorReport ?: 'None')
+ email_fields['commandLine'] = workflow.commandLine
+ email_fields['projectDir'] = workflow.projectDir
+ email_fields['summary'] = summary << misc_fields
+
// On success try attach the multiqc report
def mqc_report = null
try {
- if (workflow.success) {
+ if (workflow.success && !params.skip_multiqc) {
mqc_report = multiqc_report.getVal()
- if (mqc_report.getClass() == ArrayList) {
- log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
+ if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
+ if (mqc_report.size() > 1) {
+ log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
+ }
mqc_report = mqc_report[0]
}
}
@@ -55,37 +64,39 @@ class Completion {
}
// Render the TXT template
- def engine = new groovy.text.GStringTemplateEngine()
- def tf = new File("$projectDir/assets/email_template.txt")
+ def engine = new groovy.text.GStringTemplateEngine()
+ def tf = new File("$projectDir/assets/email_template.txt")
def txt_template = engine.createTemplate(tf).make(email_fields)
- def email_txt = txt_template.toString()
+ def email_txt = txt_template.toString()
// Render the HTML template
- def hf = new File("$projectDir/assets/email_template.html")
+ def hf = new File("$projectDir/assets/email_template.html")
def html_template = engine.createTemplate(hf).make(email_fields)
- def email_html = html_template.toString()
+ def email_html = html_template.toString()
// Render the sendmail template
- def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ]
- def sf = new File("$projectDir/assets/sendmail_template.txt")
- def sendmail_template = engine.createTemplate(sf).make(smail_fields)
- def sendmail_html = sendmail_template.toString()
+ def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit
+ def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()]
+ def sf = new File("$projectDir/assets/sendmail_template.txt")
+ def sendmail_template = engine.createTemplate(sf).make(smail_fields)
+ def sendmail_html = sendmail_template.toString()
// Send the HTML e-mail
+ Map colors = Headers.log_colours(params.monochrome_logs)
if (email_address) {
try {
if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
// Try to send HTML e-mail using sendmail
[ 'sendmail', '-t' ].execute() << sendmail_html
- log.info "[$workflow.manifest.name] Sent summary e-mail to $email_address (sendmail)"
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
} catch (all) {
// Catch failures and try with plaintext
def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
- if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) {
+ if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
mail_cmd += [ '-A', mqc_report ]
}
mail_cmd.execute() << email_html
- log.info "[$workflow.manifest.name] Sent summary e-mail to $email_address (mail)"
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
}
}
@@ -102,17 +113,16 @@ class Completion {
static void summary(workflow, params, log) {
Map colors = Headers.log_colours(params.monochrome_logs)
- if (workflow.stats.ignoredCount > 0 && workflow.success) {
- log.info "-${colors.purple}Warning, pipeline completed, but with errored process(es) ${colors.reset}-"
- log.info "-${colors.red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${colors.reset}-"
- log.info "-${colors.green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${colors.reset}-"
- }
+
if (workflow.success) {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
+ if (workflow.stats.ignoredCount == 0) {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
+ } else {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+ }
} else {
- Checks.hostname()
+ Checks.hostname(workflow, params, log)
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
}
}
}
-
diff --git a/lib/Headers.groovy b/lib/Headers.groovy
index ee3817cfde..19e3220561 100644
--- a/lib/Headers.groovy
+++ b/lib/Headers.groovy
@@ -6,25 +6,30 @@ class Headers {
private static Map log_colours(Boolean monochrome_logs) {
Map colorcodes = [:]
- colorcodes['reset'] = monochrome_logs ? '' : "\033[0m"
- colorcodes['dim'] = monochrome_logs ? '' : "\033[2m"
- colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m"
- colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m"
- colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m"
+ colorcodes['reset'] = monochrome_logs ? '' : "\033[0m"
+ colorcodes['dim'] = monochrome_logs ? '' : "\033[2m"
+ colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m"
+ colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m"
+ colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m"
colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m"
- colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m"
- colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m"
- colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m"
- colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m"
- colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m"
+ colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m"
+ colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m"
+ colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m"
+ colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m"
+ colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m"
return colorcodes
}
+ static String dashed_line(monochrome_logs) {
+ Map colors = log_colours(monochrome_logs)
+ return "-${colors.dim}----------------------------------------------------${colors.reset}-"
+ }
+
static String nf_core(workflow, monochrome_logs) {
Map colors = log_colours(monochrome_logs)
String.format(
-"""
--${colors.dim}----------------------------------------------------${colors.reset}-
+ """\n
+${dashed_line(monochrome_logs)}
${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset}
${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
@@ -38,7 +43,7 @@ class Headers {
${colors.white}`${colors.green}|${colors.white}____${colors.green}\\${colors.white}´${colors.reset}
${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset}
--${colors.dim}--------------------------------------------------${colors.reset}-
+${dashed_line(monochrome_logs)}
""".stripIndent()
)
}
diff --git a/lib/Schema.groovy b/lib/Schema.groovy
index a4ac82173a..c2cad355a5 100644
--- a/lib/Schema.groovy
+++ b/lib/Schema.groovy
@@ -4,19 +4,19 @@
import groovy.json.JsonSlurper
-class JSON {
+class Schema {
/*
* This method tries to read a JSON params file
*/
- private static LinkedHashMap params_get(String path) {
- def usage = new LinkedHashMap()
+ private static LinkedHashMap params_load(String json_schema) {
+ def params_map = new LinkedHashMap()
try {
- usage = params_try(path)
+ params_map = params_read(json_schema)
} catch (Exception e) {
println "Could not read parameters settings from JSON. $e"
- usage = new LinkedHashMap()
+ params_map = new LinkedHashMap()
}
- return usage
+ return params_map
}
/*
@@ -28,199 +28,201 @@ class JSON {
Group
-
*/
- private static LinkedHashMap params_try(String path) throws Exception {
-
- def json = new File(path).text
- def Map usage = (Map) new JsonSlurper().parseText(json).get('properties')
-
+ private static LinkedHashMap params_read(String json_schema) throws Exception {
+ def json = new File(json_schema).text
+ def Map json_params = (Map) new JsonSlurper().parseText(json).get('definitions')
/* Tree looks like this in nf-core schema
- * properties <- this is what the first get('properties') gets us
+ * definitions <- this is what the first get('definitions') gets us
group 1
- properties
+ title
description
+ properties
+ parameter 1
+ type
+ description
+ parameter 2
+ type
+ description
group 2
- properties
- description
- group 3
- properties
+ title
description
+ properties
+ parameter 1
+ type
+ description
*/
- def output_map = new LinkedHashMap()
-
- // Lets go deeper
- usage.each { key, val ->
- def Map submap = usage."$key".properties // Gets the property object of the group
+ def params_map = new LinkedHashMap()
+ json_params.each { key, val ->
+ def Map group = json_params."$key".properties // Gets the property object of the group
+ def title = json_params."$key".title
def sub_params = new LinkedHashMap()
- submap.each { innerkey, value ->
- sub_params.put("$innerkey", "$value.description")
+ group.each { innerkey, value ->
+ sub_params.put(innerkey, value)
}
- output_map.put("$key", sub_params)
+ params_map.put(title, sub_params)
}
- return output_map
+ return params_map
}
- static String params_help(path, command) {
- String output = "Typical pipeline command:\n\n"
- output += " ${command}\n\n"
- output += params_beautify(params_get(path))
+ /*
+ * Get maximum number of characters across all parameter names
+ */
+ private static Integer params_max_chars(params_map) {
+ Integer max_chars = 0
+ for (group in params_map.keySet()) {
+ def group_params = params_map.get(group) // This gets the parameters of that particular group
+ for (param in group_params.keySet()) {
+ if (param.size() > max_chars) {
+ max_chars = param.size()
+ }
+ }
+ }
+ return max_chars
}
- static String params_beautify(usage) {
- String output = ""
- for (group in usage.keySet()) {
+ /*
+ * Beautify parameters for --help
+ */
+ private static String params_help(workflow, params, json_schema, command) {
+ String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n"
+ output += "Typical pipeline command:\n\n"
+ output += " ${command}\n\n"
+ def params_map = params_load(json_schema)
+ def max_chars = params_max_chars(params_map) + 1
+ for (group in params_map.keySet()) {
output += group + "\n"
- def params = usage.get(group) // This gets the parameters of that particular group
- for (par in params.keySet()) {
- output+= " \u001B[1m" + par.padRight(27) + "\u001B[1m" + params.get(par) + "\n"
+ def group_params = params_map.get(group) // This gets the parameters of that particular group
+ for (param in group_params.keySet()) {
+ def type = "[" + group_params.get(param).type + "]"
+ def description = group_params.get(param).description
+ output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n"
}
output += "\n"
}
+ output += Headers.dashed_line(params.monochrome_logs)
+ output += "\n\n" + Checks.citation(workflow)
+ output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
return output
}
- private static LinkedHashMap params_summary(workflow, params, run_name, step, tools, skip_qc, annotate_tools) {
- def Map summary = [:]
- if (workflow.revision) summary['Pipeline Release'] = workflow.revision
- summary['Run Name'] = run_name ?: workflow.runName
- summary['Max Resources'] = "${params.max_memory} memory, ${params.max_cpus} cpus, ${params.max_time} time per job"
- if (workflow.containerEngine) summary['Container'] = "${workflow.containerEngine} - ${workflow.container}"
- summary['Input'] = params.input
- summary['Step'] = step
- summary['Genome'] = params.genome
- if (params.no_intervals && step != 'annotate') summary['Intervals'] = 'Do not use'
- summary['Nucleotides/s'] = params.nucleotides_per_second
- if (params.sentieon) summary['Sention'] = "Using Sentieon for Preprocessing and/or Variant Calling"
- if (params.skip_qc) summary['QC tools skipped'] = skip_qc.join(', ')
- if (params.target_bed) summary['Target BED'] = params.target_bed
- if (params.tools) summary['Tools'] = tools.join(', ')
- if (params.trim_fastq || params.split_fastq) summary['Modify fastqs'] = "trim and/or split"
-
- if (params.trim_fastq) {
- summary['Fastq trim'] = "Fastq trim selected"
- summary['Trim R1'] = "${params.clip_r1} bp"
- summary['Trim R2'] = "${params.clip_r2} bp"
- summary["Trim 3 R1"] = "${params.three_prime_clip_r1} bp"
- summary["Trim 3 R2"] = "${params.three_prime_clip_r2} bp"
- summary['NextSeq Trim'] = "${params.trim_nextseq} bp"
- summary['Saved Trimmed Fastq'] = params.save_trimmed ? 'Yes' : 'No'
- }
- if (params.split_fastq) summary['Reads in fastq'] = params.split_fastq
-
- summary['MarkDuplicates'] = "Options"
- summary['Java options'] = params.markdup_java_options
- summary['GATK Spark'] = params.use_gatk_spark ? 'Yes' : 'No'
-
- summary['Save BAMs mapped'] = params.save_bam_mapped ? 'Yes' : 'No'
- summary['Skip MarkDuplicates'] = params.skip_markduplicates ? 'Yes' : 'No'
-
- if ('ascat' in tools) {
- summary['ASCAT'] = "Options"
- if (params.ascat_purity) summary['purity'] = params.ascat_purity
- if (params.ascat_ploidy) summary['ploidy'] = params.ascat_ploidy
- }
-
- if ('controlfreec' in tools) {
- summary['Control-FREEC'] = "Options"
- if (params.cf_window) summary['window'] = params.cf_window
- if (params.cf_coeff) summary['coeff of variation'] = params.cf_coeff
- if (params.cf_ploidy) summary['ploidy'] = params.cf_ploidy
+ /*
+ * Groovy Map summarising parameters/workflow options used by the pipeline
+ */
+ private static LinkedHashMap params_summary_map(workflow, params, json_schema) {
+ // Get a selection of core Nextflow workflow options
+ def Map workflow_summary = [:]
+ if (workflow.revision) {
+ workflow_summary['revision'] = workflow.revision
}
-
- if ('haplotypecaller' in tools) summary['GVCF'] = params.generate_gvcf ? 'Yes' : 'No'
- if ('strelka' in tools && 'manta' in tools) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes'
- if (params.pon && ('mutect2' in tools || (params.sentieon && 'tnscope' in tools))) summary['Panel of normals'] = params.pon
-
- if (params.annotate_tools) summary['Tools to annotate'] = annotate_tools.join(', ')
-
- if (params.annotation_cache) {
- summary['Annotation cache'] = "Enabled"
- if (params.snpeff_cache) summary['snpEff cache'] = params.snpeff_cache
- if (params.vep_cache) summary['VEP cache'] = params.vep_cache
+ workflow_summary['runName'] = workflow.runName
+ if (workflow.containerEngine) {
+ workflow_summary['containerEngine'] = "$workflow.containerEngine"
}
-
- if (params.cadd_cache) {
- summary['CADD cache'] = "Enabled"
- if (params.cadd_indels) summary['CADD indels'] = params.cadd_indels
- if (params.cadd_wg_snvs) summary['CADD wg snvs'] = params.cadd_wg_snvs
+ if (workflow.container) {
+ workflow_summary['container'] = "$workflow.container"
}
-
- if (params.genesplicer) summary['genesplicer'] = "Enabled"
-
- if (params.igenomes_base && !params.igenomes_ignore) summary['AWS iGenomes base'] = params.igenomes_base
- if (params.igenomes_ignore) summary['AWS iGenomes'] = "Do not use"
- if (params.genomes_base && !params.igenomes_ignore) summary['Genomes base'] = params.genomes_base
-
- summary['Save Reference'] = params.save_reference ? 'Yes' : 'No'
-
- if (params.ac_loci) summary['Loci'] = params.ac_loci
- if (params.ac_loci_gc) summary['Loci GC'] = params.ac_loci_gc
- if (params.bwa) summary['BWA indexes'] = params.bwa
- if (params.chr_dir) summary['Chromosomes'] = params.chr_dir
- if (params.chr_length) summary['Chromosomes length'] = params.chr_length
- if (params.dbsnp) summary['dbsnp'] = params.dbsnp
- if (params.dbsnp_index) summary['dbsnp index'] = params.dbsnp_index
- if (params.dict) summary['dict'] = params.dict
- if (params.fasta) summary['fasta reference'] = params.fasta
- if (params.fasta_fai) summary['fasta index'] = params.fasta_fai
- if (params.germline_resource) summary['germline resource'] = params.germline_resource
- if (params.germline_resource_index) summary['germline resource index'] = params.germline_resource_index
- if (params.intervals) summary['intervals'] = params.intervals
- if (params.known_indels) summary['known indels'] = params.known_indels
- if (params.known_indels_index) summary['known indels index'] = params.known_indels_index
- if (params.mappability) summary['Mappability'] = params.mappability
- if (params.snpeff_cache) summary['snpEff cache'] = params.snpeff_cache
- if (params.snpeff_db) summary['snpEff DB'] = params.snpeff_db
- if (params.species) summary['snpEff species'] = params.species
- if (params.vep_cache) summary['VEP cache'] = params.vep_cache
- if (params.vep_cache_version) summary['VEP cache version'] = params.vep_cache_version
-
- summary['Output dir'] = params.outdir
- summary['Publish dir mode'] = params.publish_dir_mode
- if (params.sequencing_center) summary['Sequenced by'] = params.sequencing_center
-
- summary['Launch dir'] = workflow.launchDir
- summary['Working dir'] = workflow.workDir
- summary['Script dir'] = workflow.projectDir
- summary['User'] = workflow.userName
-
- if (params.multiqc_config) summary['MultiQC config'] = params.multiqc_config
-
- summary['Config Profile'] = workflow.profile
-
- if (params.config_profile_description) summary['Description'] = params.config_profile_description
- if (params.config_profile_contact) summary['Contact'] = params.config_profile_contact
- if (params.config_profile_url) summary['URL'] = params.config_profile_url
-
- summary['Config Files'] = workflow.configFiles.join(', ')
-
- if (params.email || params.email_on_fail) {
- summary['E-mail Address'] = params.email
- summary['E-mail on failure'] = params.email_on_fail
- summary['MultiQC maxsize'] = params.max_multiqc_email_size
+ workflow_summary['launchDir'] = workflow.launchDir
+ workflow_summary['workDir'] = workflow.workDir
+ workflow_summary['projectDir'] = workflow.projectDir
+ workflow_summary['userName'] = workflow.userName
+ workflow_summary['profile'] = workflow.profile
+ workflow_summary['configFiles'] = workflow.configFiles.join(', ')
+
+ // Get pipeline parameters defined in JSON Schema
+ def Map params_summary = [:]
+ def blacklist = ['hostnames']
+ def params_map = params_load(json_schema)
+ for (group in params_map.keySet()) {
+ def sub_params = new LinkedHashMap()
+ def group_params = params_map.get(group) // This gets the parameters of that particular group
+ for (param in group_params.keySet()) {
+ if (params.containsKey(param) && !blacklist.contains(param)) {
+ def params_value = params.get(param)
+ def schema_value = group_params.get(param).default
+ def param_type = group_params.get(param).type
+ if (schema_value == null) {
+ if (param_type == 'boolean') {
+ schema_value = false
+ }
+ if (param_type == 'string') {
+ schema_value = ''
+ }
+ if (param_type == 'integer') {
+ schema_value = 0
+ }
+ } else {
+ if (param_type == 'string') {
+ if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) {
+ def sub_string = schema_value.replace('\$projectDir','')
+ sub_string = sub_string.replace('\${projectDir}','')
+ if (params_value.contains(sub_string)) {
+ schema_value = params_value
+ }
+ }
+ if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) {
+ def sub_string = schema_value.replace('\$params.outdir','')
+ sub_string = sub_string.replace('\${params.outdir}','')
+ if ("${params.outdir}${sub_string}" == params_value) {
+ schema_value = params_value
+ }
+ }
+ }
+ }
+
+ if (params_value != schema_value) {
+ sub_params.put("$param", params_value)
+ }
+ }
+ }
+ params_summary.put(group, sub_params)
}
+ return [ 'Core Nextflow options' : workflow_summary ] << params_summary
+ }
- if (workflow.profile.contains('awsbatch')) {
- summary['AWS Region'] = params.awsregion
- summary['AWS Queue'] = params.awsqueue
- summary['AWS CLI'] = params.awscli
+ /*
+ * Beautify parameters for summary and return as string
+ */
+ private static String params_summary_log(workflow, params, json_schema) {
+ String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n"
+ def params_map = params_summary_map(workflow, params, json_schema)
+ def max_chars = params_max_chars(params_map)
+ for (group in params_map.keySet()) {
+ def group_params = params_map.get(group) // This gets the parameters of that particular group
+ if (group_params) {
+ output += group + "\n"
+ for (param in group_params.keySet()) {
+ output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n"
+ }
+ output += "\n"
+ }
}
-
- return summary
+ output += Headers.dashed_line(params.monochrome_logs)
+ output += "\n\n" + Checks.citation(workflow)
+ output += "\n\n" + Headers.dashed_line(params.monochrome_logs)
+ return output
}
- static String params_mqc_summary(summary) {
- String yaml_file_text = """
- id: 'nf-core-sarek-summary'
- description: " - this information is collected when the pipeline is started."
- section_name: 'nf-core/sarek Workflow Summary'
- section_href: 'https://github.com/nf-core/sarek'
- plot_type: 'html'
- data: |
-
- ${summary.collect { k,v -> " - $k
- ${v ?: 'N/A'}
" }.join("\n")}
-
- """.stripIndent()
+ static String params_summary_multiqc(workflow, summary) {
+ String summary_section = ''
+ for (group in summary.keySet()) {
+ def group_params = summary.get(group) // This gets the parameters of that particular group
+ if (group_params) {
+ summary_section += " $group
\n"
+ summary_section += " \n"
+ for (param in group_params.keySet()) {
+ summary_section += " - $param
- ${group_params.get(param) ?: 'N/A'}
\n"
+ }
+ summary_section += "
\n"
+ }
+ }
+ String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
+ yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n"
+ yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
+ yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
+ yaml_file_text += "plot_type: 'html'\n"
+ yaml_file_text += "data: |\n"
+ yaml_file_text += "${summary_section}"
return yaml_file_text
}
}
diff --git a/main.nf b/main.nf
index d571c4f9a0..0c77129ce8 100644
--- a/main.nf
+++ b/main.nf
@@ -22,1922 +22,76 @@ nf-core/sarek:
nextflow.enable.dsl=2
-// Print help message if required
+////////////////////////////////////////////////////
+/* -- PRINT HELP -- */
+////////////////////////////////////////////////////
+def json_schema = "$projectDir/nextflow_schema.json"
if (params.help) {
- def command = "nextflow run nf-core/sarek -profile docker --input sample.tsv"
- log.info Schema.params_help("$projectDir/nextflow_schema.json", command)
+ def command = "nextflow run nf-core/sarek -profile docker --input sample.tsv --genome GRCh38"
+ log.info Schema.params_help(workflow, params, json_schema, command)
exit 0
}
-/*
---------------------------------------------------------------------------------
- INCLUDE SAREK FUNCTIONS
---------------------------------------------------------------------------------
-*/
-
-include {
- check_parameter_existence;
- check_parameter_list;
- define_anno_list;
- define_skip_qc_list;
- define_step_list;
- define_tool_list;
- extract_bam;
- extract_fastq;
- extract_fastq_from_dir;
- extract_recal;
- has_extension
-} from './modules/local/functions'
-
-/*
---------------------------------------------------------------------------------
- SET UP CONFIGURATION VARIABLES
---------------------------------------------------------------------------------
-*/
-
-// Check parameters
-
-Checks.aws_batch(workflow, params) // Check AWS batch settings
-Checks.hostname(workflow, params, log) // Check the hostnames against configured profiles
-
-// MultiQC - Stage config files
-
-multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true)
-multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty()
-output_docs = file("$projectDir/docs/output.md", checkIfExists: true)
-output_docs_images = file("$projectDir/docs/images/", checkIfExists: true)
-
-// Check if genome exists in the config file
-if (params.genomes && !params.genomes.containsKey(params.genome) && !params.igenomes_ignore) {
- exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
-} else if (params.genomes && !params.genomes.containsKey(params.genome) && params.igenomes_ignore) {
- exit 1, "The provided genome '${params.genome}' is not available in the genomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
-}
-
-step_list = define_step_list()
-step = params.step ? params.step.toLowerCase().replaceAll('-', '').replaceAll('_', '') : ''
-
-if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information'
-if (!check_parameter_existence(step, step_list)) exit 1, "Unknown step ${step}, see --help for more information"
-
-tool_list = define_tool_list()
-tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : []
-if (step == 'controlfreec') tools = ['controlfreec']
-if (!check_parameter_list(tools, tool_list)) exit 1, 'Unknown tool(s), see --help for more information'
-
-skip_qc_list = define_skip_qc_list()
-skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : []
-if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information'
-
-anno_list = define_anno_list()
-annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : []
-if (!check_parameter_list(annotate_tools,anno_list)) exit 1, 'Unknown tool(s) to annotate, see --help for more information'
-
-if (!(params.aligner in ['bwa-mem', 'bwa-mem2'])) exit 1, 'Unknown aligner, see --help for more information'
-
-// // Check parameters
-if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them'
-if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them'
-if (params.umi && !(params.read_structure1 && params.read_structure2)) exit 1, 'Please specify both --read_structure1 and --read_structure2, when using --umi'
-
-// Handle input
-tsv_path = null
-if (params.input && (has_extension(params.input, "tsv") || has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) tsv_path = params.input
-if (params.input && (has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) step = "annotate"
-
-save_bam_mapped = params.skip_markduplicates ? true : params.save_bam_mapped ? true : false
-
-// If no input file specified, trying to get TSV files corresponding to step in the TSV directory
-// only for steps preparerecalibration, recalibrate, variantcalling and controlfreec
-if (!params.input && params.sentieon) {
- switch (step) {
- case 'mapping': break
- case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_deduped.tsv"; break
- case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_recalibrated.tsv"; break
- case 'annotate': break
- default: exit 1, "Unknown step ${step}"
- }
-} else if (!params.input && !params.sentieon && !params.skip_markduplicates) {
- switch (step) {
- case 'mapping': break
- case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates_no_table.tsv"; break
- case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates.tsv"; break
- case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break
- case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break
- case 'annotate': break
- default: exit 1, "Unknown step ${step}"
- }
-} else if (!params.input && !params.sentieon && params.skip_markduplicates) {
- switch (step) {
- case 'mapping': break
- case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/mapped.tsv"; break
- case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/mapped_no_markduplicates.tsv"; break
- case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break
- case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break
- case 'annotate': break
- default: exit 1, "Unknown step ${step}"
- }
-}
-
-input_sample = Channel.empty()
-if (tsv_path) {
- tsv_file = file(tsv_path)
- switch (step) {
- case 'mapping': input_sample = extract_fastq(tsv_file); break
- case 'preparerecalibration': input_sample = extract_bam(tsv_file); break
- case 'recalibrate': input_sample = extract_recal(tsv_file); break
- case 'variantcalling': input_sample = extract_bam(tsv_file); break
- case 'controlfreec': input_sample = extract_pileup(tsv_file); break
- case 'annotate': break
- default: exit 1, "Unknown step ${step}"
- }
-} else if (params.input && !has_extension(params.input, "tsv")) {
- log.info "No TSV file"
- if (step != 'mapping') exit 1, 'No step other than "mapping" supports a directory as an input'
- log.info "Reading ${params.input} directory"
- log.warn "[nf-core/sarek] in ${params.input} directory, all fastqs are assuming to be from the same sample, which is assumed to be a germline one"
- input_sample = extract_fastq_from_dir(params.input)
- tsv_file = params.input // used in the reports
-} else if (tsv_path && step == 'annotate') {
- log.info "Annotating ${tsv_path}"
-} else if (step == 'annotate') {
- log.info "Trying automatic annotation on files in the VariantCalling/ directory"
-} else exit 1, 'No sample were defined, see --help'
-
-/*
---------------------------------------------------------------------------------
- UPDATE MODULES OPTIONS BASED ON PARAMS
---------------------------------------------------------------------------------
-*/
-
-modules = params.modules
-
-if (params.save_reference) modules['build_intervals'].publish_files = ['bed':'intervals']
-if (params.save_reference) modules['bwa_index'].publish_files = ['amb':'bwa', 'ann':'bwa', 'bwt':'bwa', 'pac':'bwa', 'sa':'bwa']
-if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2']
-if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals']
-if (params.save_reference) modules['dict'].publish_files = ['dict':'dict']
-if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target']
-if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi']
-if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai']
-if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp']
-if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource']
-if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels']
-if (params.save_reference) modules['tabix_pon'].publish_files = ['vcf.gz.tbi':'pon']
-if (save_bam_mapped) modules['samtools_index_mapping'].publish_files = ['bam':'mapped', 'bai':'mapped']
-if (params.skip_markduplicates) modules['baserecalibrator'].publish_files = ['recal.table':'mapped']
-if (params.skip_markduplicates) modules['gatherbqsrreports'].publish_files = ['recal.table':'mapped']
-
-/*
---------------------------------------------------------------------------------
- CHECKING REFERENCES
---------------------------------------------------------------------------------
-*/
-
-// Initialize each params in params.genomes, catch the command line first if it was defined
-params.ac_loci = params.genome ? params.genomes[params.genome].ac_loci ?: false : false
-params.ac_loci_gc = params.genome ? params.genomes[params.genome].ac_loci_gc ?: false : false
-params.bwa = params.genome ? params.genomes[params.genome].bwa ?: false : false
-params.chr_dir = params.genome ? params.genomes[params.genome].chr_dir ?: false : false
-params.chr_length = params.genome ? params.genomes[params.genome].chr_length ?: false : false
-params.dbsnp = params.genome ? params.genomes[params.genome].dbsnp ?: false : false
-params.dbsnp_index = params.genome ? params.genomes[params.genome].dbsnp_index ?: false : false
-params.dict = params.genome ? params.genomes[params.genome].dict ?: false : false
-params.fasta = params.genome ? params.genomes[params.genome].fasta ?: false : false
-params.fasta_fai = params.genome ? params.genomes[params.genome].fasta_fai ?: false : false
-params.germline_resource = params.genome ? params.genomes[params.genome].germline_resource ?: false : false
-params.germline_resource_index = params.genome ? params.genomes[params.genome].germline_resource_index ?: false : false
-params.intervals = params.genome ? params.genomes[params.genome].intervals ?: false : false
-params.known_indels = params.genome ? params.genomes[params.genome].known_indels ?: false : false
-params.known_indels_index = params.genome ? params.genomes[params.genome].known_indels_index ?: false : false
-params.mappability = params.genome ? params.genomes[params.genome].mappability ?: false : false
-params.snpeff_db = params.genome ? params.genomes[params.genome].snpeff_db ?: false : false
-params.species = params.genome ? params.genomes[params.genome].species ?: false : false
-params.vep_cache_version = params.genome ? params.genomes[params.genome].vep_cache_version ?: false : false
-
-file("${params.outdir}/no_file").text = "no_file\n"
-
-// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
-chr_dir = params.chr_dir ? file(params.chr_dir) : file("${params.outdir}/no_file")
-chr_length = params.chr_length ? file(params.chr_length) : file("${params.outdir}/no_file")
-dbsnp = params.dbsnp ? file(params.dbsnp) : file("${params.outdir}/no_file")
-fasta = params.fasta ? file(params.fasta) : file("${params.outdir}/no_file")
-germline_resource = params.germline_resource ? file(params.germline_resource) : file("${params.outdir}/no_file")
-known_indels = params.known_indels ? file(params.known_indels) : file("${params.outdir}/no_file")
-loci = params.ac_loci ? file(params.ac_loci) : file("${params.outdir}/no_file")
-loci_gc = params.ac_loci_gc ? file(params.ac_loci_gc) : file("${params.outdir}/no_file")
-mappability = params.mappability ? file(params.mappability) : file("${params.outdir}/no_file")
-
-// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
-snpeff_db = params.snpeff_db ?: Channel.empty()
-snpeff_species = params.species ?: Channel.empty()
-vep_cache_version = params.vep_cache_version ?: Channel.empty()
+////////////////////////////////////////////////////
+/* -- PARAMETER CHECKS -- */
+////////////////////////////////////////////////////
-// Initialize files channels based on params, not defined within the params.genomes[params.genome] scope
-cadd_indels = params.cadd_indels ? file(params.cadd_indels) : file("${params.outdir}/no_file")
-cadd_indels_tbi = params.cadd_indels_tbi ? file(params.cadd_indels_tbi) : file("${params.outdir}/no_file")
-cadd_wg_snvs = params.cadd_wg_snvs ? file(params.cadd_wg_snvs) : file("${params.outdir}/no_file")
-cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? file(params.cadd_wg_snvs_tbi) : file("${params.outdir}/no_file")
-pon = params.pon ? file(params.pon) : file("${params.outdir}/no_file")
-snpeff_cache = params.snpeff_cache ? file(params.snpeff_cache) : file("${params.outdir}/no_file")
-target_bed = params.target_bed ? file(params.target_bed) : file("${params.outdir}/no_file")
-vep_cache = params.vep_cache ? file(params.vep_cache) : file("${params.outdir}/no_file")
-
-// Initialize value channels based on params, not defined within the params.genomes[params.genome] scope
-read_structure1 = params.read_structure1 ?: Channel.empty()
-read_structure2 = params.read_structure2 ?: Channel.empty()
-
-/*
---------------------------------------------------------------------------------
- PRINTING SUMMARY
---------------------------------------------------------------------------------
-*/
-
-// Has the run name been specified by the user?
-// This has the bonus effect of catching both -name and --name
-run_name = params.name
-if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
- run_name = workflow.runName
+// Check that conda channels are set-up correctly
+if (params.enable_conda) {
+ Checks.check_conda_channels(log)
}
-summary = Schema.params_summary(workflow, params, run_name, step, tools, skip_qc, annotate_tools)
-log.info Headers.nf_core(workflow, params.monochrome_logs)
-log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n")
-log.info "-\033[2m----------------------------------------------------\033[0m-"
-// params summary for MultiQC
-workflow_summary = Schema.params_mqc_summary(summary)
-workflow_summary = Channel.value(workflow_summary)
-
-if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal"
-if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works if Sentieon is available where nf-core/sarek is run"
-
-/*
---------------------------------------------------------------------------------
- INCLUDE LOCAL MODULES
---------------------------------------------------------------------------------
-*/
-
-/*
---------------------------------------------------------------------------------
- INCLUDE LOCAL SUBWORKFLOWS
---------------------------------------------------------------------------------
-*/
-
-include { BUILD_INDICES } from './modules/local/subworkflow/build_indices' addParams(
- build_intervals_options: modules['build_intervals'],
- bwa_index_options: modules['bwa_index'],
- bwamem2_index_options: modules['bwamem2_index'],
- create_intervals_bed_options: modules['create_intervals_bed'],
- gatk_dict_options: modules['dict'],
- index_target_bed_options: modules['index_target_bed'],
- msisensor_scan_options: modules['msisensor_scan'],
- samtools_faidx_options: modules['samtools_faidx'],
- tabix_dbsnp_options: modules['tabix_dbsnp'],
- tabix_germline_resource_options: modules['tabix_germline_resource'],
- tabix_known_indels_options: modules['tabix_known_indels'],
- tabix_pon_options: modules['tabix_pon']
-)
-include { MAPPING } from './modules/local/subworkflow/mapping' addParams(
- bwamem1_mem_options: modules['bwa_mem1_mem'],
- bwamem2_mem_options: modules['bwa_mem2_mem'],
- merge_bam_options: modules['merge_bam_mapping'],
- qualimap_bamqc_options: modules['qualimap_bamqc_mapping'],
- samtools_index_options: modules['samtools_index_mapping'],
- samtools_stats_options: modules['samtools_stats_mapping']
-)
-include { MARKDUPLICATES } from './modules/local/subworkflow/markduplicates' addParams(
- markduplicates_options: modules['markduplicates']
-)
-include { PREPARE_RECALIBRATION } from './modules/local/subworkflow/prepare_recalibration' addParams(
- baserecalibrator_options: modules['baserecalibrator'],
- gatherbqsrreports_options: modules['gatherbqsrreports']
-)
-include { RECALIBRATE } from './modules/local/subworkflow/recalibrate' addParams(
- applybqsr_options: modules['applybqsr'],
- merge_bam_options: modules['merge_bam_recalibrate'],
- qualimap_bamqc_options: modules['qualimap_bamqc_recalibrate'],
- samtools_index_options: modules['samtools_index_recalibrate'],
- samtools_stats_options: modules['samtools_stats_recalibrate']
-)
-include { GERMLINE_VARIANT_CALLING } from './modules/local/subworkflow/germline_variant_calling' addParams(
- concat_gvcf_options: modules['concat_gvcf'],
- concat_haplotypecaller_options: modules['concat_haplotypecaller'],
- genotypegvcf_options: modules['genotypegvcf'],
- haplotypecaller_options: modules['haplotypecaller'],
- strelka_options: modules['strelka_germline']
-)
-// include { TUMOR_VARIANT_CALLING } from './modules/local/subworkflow/tumor_variant_calling' addParams(
-// )
-include { PAIR_VARIANT_CALLING } from './modules/local/subworkflow/pair_variant_calling' addParams(
- manta_options: modules['manta_somatic'],
- msisensor_msi_options: modules['msisensor_msi'],
- strelka_bp_options: modules['strelka_somatic_bp'],
- strelka_options: modules['strelka_somatic']
-)
-
-/*
---------------------------------------------------------------------------------
- INCLUDE nf-core MODULES
---------------------------------------------------------------------------------
-*/
-
-include { MULTIQC } from './modules/nf-core/software/multiqc'
-
-/*
---------------------------------------------------------------------------------
- INCLUDE nf-core SUBWORKFLOWS
---------------------------------------------------------------------------------
-*/
-
-include { QC_TRIM } from './modules/nf-core/subworkflow/qc_trim' addParams(
- fastqc_options: modules['fastqc'],
- trimgalore_options: modules['trimgalore']
-)
-// PREPARING CHANNELS FOR PREPROCESSING AND QC
-
-// input_bam = Channel.empty()
-// input_pair_reads = Channel.empty()
-
-// if (step in ['preparerecalibration', 'recalibrate', 'variantcalling', 'controlfreec', 'annotate']) {
-// input_bam.close()
-// input_pair_reads.close()
-// } else input_sample.branch(input_pair_reads, input_bam) {has_extension(it[3], "bam") ? 1 : 0}
-
-// (input_bam, input_bam_fastqc) = input_bam.into(2)
-
-// // Removing inputFile2 which is null in case of uBAM
-// input_bam_fastqc = input_bam_fastqc.map {
-// idPatient, idSample, idRun, inputFile1, inputFile2 ->
-// [idPatient, idSample, idRun, inputFile1]
-// }
-
-// if (params.split_fastq){
-// input_pair_reads = input_pair_reads
-// // newly splitfastq are named based on split, so the name is easier to catch
-// .splitFastq(by: params.split_fastq, compress:true, file:"split", pe:true)
-// .map {idPatient, idSample, idRun, reads1, reads2 ->
-// // The split fastq read1 is the 4th element (indexed 3) its name is split_3
-// // The split fastq read2's name is split_4
-// // It's followed by which split it's acutally based on the mother fastq file
-// // Index start at 1
-// // Extracting the index to get a new IdRun
-// splitIndex = reads1.fileName.toString().minus("split_3.").minus(".gz")
-// newIdRun = idRun + "_" + splitIndex
-// // Giving the files a new nice name
-// newReads1 = file("${idSample}_${newIdRun}_R1.fastq.gz")
-// newReads2 = file("${idSample}_${newIdRun}_R2.fastq.gz")
-// [idPatient, idSample, newIdRun, reads1, reads2]}
-//}
-
-// input_pair_reads.dump(tag:'INPUT')
-
-// (input_pair_reads, input_pair_readstrimgalore, input_pair_readsfastqc) = input_pair_reads.into(3)
-
-
-/*
---------------------------------------------------------------------------------
- RUN THE WORKFLOW
---------------------------------------------------------------------------------
-*/
+// Check AWS batch settings
+Checks.aws_batch(workflow, params)
+
+// Check the hostnames against configured profiles
+Checks.hostname(workflow, params, log)
+
+// Check genome key exists if provided
+Checks.genome_exists(params, log)
+
+////////////////////////////////////////////////////
+/* -- REFERENCES PARAMETER VALUES -- */
+////////////////////////////////////////////////////
+/* -- Initialize each params in params.genomes -- */
+/* -- catch the command line first if defined -- */
+////////////////////////////////////////////////////
+
+params.ac_loci = Checks.get_genome_attribute(params, 'ac_loci')
+params.ac_loci_gc = Checks.get_genome_attribute(params, 'ac_loci_gc')
+params.bwa = Checks.get_genome_attribute(params, 'bwa')
+params.chr_dir = Checks.get_genome_attribute(params, 'chr_dir')
+params.chr_length = Checks.get_genome_attribute(params, 'chr_length')
+params.dbsnp = Checks.get_genome_attribute(params, 'dbsnp')
+params.dbsnp_index = Checks.get_genome_attribute(params, 'dbsnp_index')
+params.dict = Checks.get_genome_attribute(params, 'dict')
+params.fasta = Checks.get_genome_attribute(params, 'fasta')
+params.fasta_fai = Checks.get_genome_attribute(params, 'fasta_fai')
+params.germline_resource = Checks.get_genome_attribute(params, 'germline_resource')
+params.germline_resource_index = Checks.get_genome_attribute(params, 'germline_resource_index')
+params.intervals = Checks.get_genome_attribute(params, 'intervals')
+params.known_indels = Checks.get_genome_attribute(params, 'known_indels')
+params.known_indels_index = Checks.get_genome_attribute(params, 'known_indels_index')
+params.mappability = Checks.get_genome_attribute(params, 'mappability')
+params.snpeff_db = Checks.get_genome_attribute(params, 'snpeff_db')
+params.species = Checks.get_genome_attribute(params, 'species')
+params.vep_cache_version = Checks.get_genome_attribute(params, 'vep_cache_version')
+
+////////////////////////////////////////////////////
+/* -- PRINT PARAMETER SUMMARY -- */
+////////////////////////////////////////////////////
+
+def summary_params = Schema.params_summary_map(workflow, params, json_schema)
+log.info Schema.params_summary_log(workflow, params, json_schema)
+
+////////////////////////////////////////////////////
+/* -- RUN THE WORKFLOW -- */
+////////////////////////////////////////////////////
workflow {
-/*
---------------------------------------------------------------------------------
- BUILD INDICES
---------------------------------------------------------------------------------
-*/
-
- BUILD_INDICES(
- dbsnp,
- fasta,
- germline_resource,
- known_indels,
- pon,
- step,
- target_bed,
- tools)
-
- intervals = BUILD_INDICES.out.intervals
-
- bwa = params.bwa ? file(params.bwa) : BUILD_INDICES.out.bwa
- dict = params.dict ? file(params.dict) : BUILD_INDICES.out.dict
- fai = params.fasta_fai ? file(params.fasta_fai) : BUILD_INDICES.out.fai
-
- dbsnp_tbi = params.dbsnp ? params.dbsnp_index ? file(params.dbsnp_index) : BUILD_INDICES.out.dbsnp_tbi : file("${params.outdir}/no_file")
- germline_resource_tbi = params.germline_resource ? params.germline_resource_index ? file(params.germline_resource_index) : BUILD_INDICES.out.germline_resource_tbi : file("${params.outdir}/no_file")
- known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file")
- pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file")
-
- msisensor_scan = BUILD_INDICES.out.msisensor_scan
- target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi
-/*
---------------------------------------------------------------------------------
- PREPROCESSING
---------------------------------------------------------------------------------
-*/
-
- bam_mapped = Channel.empty()
- bam_mapped_qc = Channel.empty()
- bam_recalibrated_qc = Channel.empty()
- input_reads = Channel.empty()
- qc_reports = Channel.empty()
-
- // STEP 0: QC & TRIM
- // `--skip_qc fastqc` to skip fastqc
- // trim only with `--trim_fastq`
- // additional options to be set up
-
- QC_TRIM(
- input_sample,
- ('fastqc' in skip_qc || step != "mapping"),
- !(params.trim_fastq))
-
- reads_input = QC_TRIM.out.reads
-
- qc_reports = qc_reports.mix(
- QC_TRIM.out.fastqc_html,
- QC_TRIM.out.fastqc_zip,
- QC_TRIM.out.trimgalore_html,
- QC_TRIM.out.trimgalore_log,
- QC_TRIM.out.trimgalore_zip)
-
- // STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA-MEM
-
- MAPPING(
- ('bamqc' in skip_qc),
- ('samtools' in skip_qc),
- bwa,
- fai,
- fasta,
- reads_input,
- save_bam_mapped,
- step,
- target_bed)
-
- bam_mapped = MAPPING.out.bam
- bam_mapped_qc = MAPPING.out.qc
-
- qc_reports = qc_reports.mix(bam_mapped_qc)
-
- // STEP 2: MARKING DUPLICATES
-
- MARKDUPLICATES(
- bam_mapped,
- step)
-
- bam_markduplicates = MARKDUPLICATES.out.bam
-
- if (step == 'preparerecalibration') bam_markduplicates = input_sample
-
- // STEP 3: CREATING RECALIBRATION TABLES
+ include { SAREK } from './workflows/sarek' addParams( summary_params: summary_params )
+ SAREK ()
- PREPARE_RECALIBRATION(
- bam_markduplicates,
- dbsnp,
- dbsnp_tbi,
- dict,
- fai,
- fasta,
- intervals,
- known_indels,
- known_indels_tbi,
- step)
-
- table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr
-
- // STEP 4: RECALIBRATING
- bam_applybqsr = bam_markduplicates.join(table_bqsr)
-
- if (step == 'recalibrate') bam_applybqsr = input_sample
-
- RECALIBRATE(
- ('bamqc' in skip_qc),
- ('samtools' in skip_qc),
- bam_applybqsr,
- dict,
- fai,
- fasta,
- intervals,
- step,
- target_bed)
-
- bam_recalibrated = RECALIBRATE.out.bam
- bam_recalibrated_qc = RECALIBRATE.out.qc
-
- qc_reports = qc_reports.mix(bam_recalibrated_qc)
-
- bam_variant_calling = bam_recalibrated
-
- if (step == 'variantcalling') bam_variant_calling = input_sample
-
- /*
- --------------------------------------------------------------------------------
- GERMLINE VARIANT CALLING
- --------------------------------------------------------------------------------
- */
-
- GERMLINE_VARIANT_CALLING(
- bam_variant_calling,
- dbsnp,
- dbsnp_tbi,
- dict,
- fai,
- fasta,
- intervals,
- target_bed,
- target_bed_gz_tbi,
- tools)
-
-/*
---------------------------------------------------------------------------------
- SOMATIC VARIANT CALLING
---------------------------------------------------------------------------------
-*/
-
- // TUMOR_VARIANT_CALLING(
- // bam_variant_calling,
- // dbsnp,
- // dbsnp_tbi,
- // dict,
- // fai,
- // fasta,
- // intervals,
- // target_bed,
- // target_bed_gz_tbi,
- // tools)
-
- PAIR_VARIANT_CALLING(
- bam_variant_calling,
- dbsnp,
- dbsnp_tbi,
- dict,
- fai,
- fasta,
- intervals,
- msisensor_scan,
- target_bed,
- target_bed_gz_tbi,
- tools)
-
-/*
---------------------------------------------------------------------------------
- ANNOTATION
---------------------------------------------------------------------------------
-*/
-
-
-/*
---------------------------------------------------------------------------------
- MULTIQC
---------------------------------------------------------------------------------
-*/
-
- // GET_SOFTWARE_VERSIONS()
-
- MULTIQC(
- // GET_SOFTWARE_VERSIONS.out.yml,
- multiqc_config,
- multiqc_custom_config.ifEmpty([]),
- workflow_summary,
- qc_reports.collect())
-}
-
-/*
---------------------------------------------------------------------------------
- SEND COMPLETION EMAIL
---------------------------------------------------------------------------------
-*/
-
-workflow.onComplete {
- def multiqc_report = []
- Completion.email(workflow, params, summary, run_name, projectDir, multiqc_report, log)
- Completion.summary(workflow, params, log)
}
-
-// /*
-// --------------------------------------------------------------------------------
-// GERMLINE VARIANT CALLING
-// --------------------------------------------------------------------------------
-// */
-
-// // STEP MANTA.1 - SINGLE MODE
-
-// process MantaSingle {
-// label 'cpus_max'
-// label 'memory_max'
-
-// tag "${idSample}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSample}/Manta", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSample, file(bam), file(bai) from bamMantaSingle
-// file(fasta) from fasta
-// file(fastaFai) from fai
-// file(targetBED) from ch_target_bed
-
-// output:
-// set val("Manta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfMantaSingle
-
-// when: 'manta' in tools
-
-// script:
-// beforeScript = params.target_bed ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : ""
-// options = params.target_bed ? "--exome --callRegions call_targets.bed.gz" : ""
-// status = status_map[idPatient, idSample]
-// input_bam = status == 0 ? "--bam" : "--tumorBam"
-// vcftype = status == 0 ? "diploid" : "tumor"
-// """
-// ${beforeScript}
-// configManta.py \
-// ${input_bam} ${bam} \
-// --reference ${fasta} \
-// ${options} \
-// --runDir Manta
-
-// python Manta/runWorkflow.py -m local -j ${task.cpus}
-
-// mv Manta/results/variants/candidateSmallIndels.vcf.gz \
-// Manta_${idSample}.candidateSmallIndels.vcf.gz
-// mv Manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
-// Manta_${idSample}.candidateSmallIndels.vcf.gz.tbi
-// mv Manta/results/variants/candidateSV.vcf.gz \
-// Manta_${idSample}.candidateSV.vcf.gz
-// mv Manta/results/variants/candidateSV.vcf.gz.tbi \
-// Manta_${idSample}.candidateSV.vcf.gz.tbi
-// mv Manta/results/variants/${vcftype}SV.vcf.gz \
-// Manta_${idSample}.${vcftype}SV.vcf.gz
-// mv Manta/results/variants/${vcftype}SV.vcf.gz.tbi \
-// Manta_${idSample}.${vcftype}SV.vcf.gz.tbi
-// """
-// }
-
-// vcfMantaSingle = vcfMantaSingle.dump(tag:'Single Manta')
-
-// // STEP TIDDIT
-
-// process TIDDIT {
-// tag "${idSample}"
-
-// publishDir params.outdir, mode: params.publish_dir_mode,
-// saveAs: {
-// if (it == "TIDDIT_${idSample}.vcf") "VariantCalling/${idSample}/TIDDIT/${it}"
-// else "Reports/${idSample}/TIDDIT/${it}"
-// }
-
-// input:
-// set idPatient, idSample, file(bam), file(bai) from bamTIDDIT
-// file(fasta) from fasta
-// file(fastaFai) from fai
-
-// output:
-// set val("TIDDIT"), idPatient, idSample, file("*.vcf.gz"), file("*.tbi") into vcfTIDDIT
-// set file("TIDDIT_${idSample}.old.vcf"), file("TIDDIT_${idSample}.ploidy.tab"), file("TIDDIT_${idSample}.signals.tab"), file("TIDDIT_${idSample}.wig"), file("TIDDIT_${idSample}.gc.wig") into tidditOut
-
-// when: 'tiddit' in tools
-
-// script:
-// """
-// tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${fasta}
-
-// mv TIDDIT_${idSample}.vcf TIDDIT_${idSample}.old.vcf
-
-// grep -E "#|PASS" TIDDIT_${idSample}.old.vcf > TIDDIT_${idSample}.vcf
-
-// bgzip --threads ${task.cpus} -c TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf.gz
-
-// tabix TIDDIT_${idSample}.vcf.gz
-// """
-// }
-
-// vcfTIDDIT = vcfTIDDIT.dump(tag:'TIDDIT')
-
-// // STEP FREEBAYES SINGLE MODE
-
-// process FreebayesSingle {
-// tag "${idSample}-${intervalBed.baseName}"
-
-// label 'cpus_1'
-
-// input:
-// set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamFreebayesSingle
-// file(fasta) from fasta
-// file(fastaFai) from ch_software_versions_yaml
-
-// output:
-// set val("FreeBayes"), idPatient, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into vcfFreebayesSingle
-
-// when: 'freebayes' in tools
-
-// script:
-// intervalsOptions = params.no_intervals ? "" : "-t ${intervalBed}"
-// """
-// freebayes \
-// -f ${fasta} \
-// --min-alternate-fraction 0.1 \
-// --min-mapping-quality 1 \
-// ${intervalsOptions} \
-// ${bam} > ${intervalBed.baseName}_${idSample}.vcf
-// """
-// }
-
-// vcfFreebayesSingle = vcfFreebayesSingle.groupTuple(by: [0,1,2])
-
-// /*
-// --------------------------------------------------------------------------------
-// SOMATIC VARIANT CALLING
-// --------------------------------------------------------------------------------
-// */
-// // Ascat, pileup, pileups with no intervals, recalibrated BAMs
-// (bamAscat, bamMpileup, bamMpileupNoInt, bamRecalAll) = bamRecalAll.into(4)
-
-// // separate BAM by status
-// bamNormal = Channel.create()
-// bamTumor = Channel.create()
-
-// bamRecalAll
-// .choice(bamTumor, bamNormal) {status_map[it[0], it[1]] == 0 ? 1 : 0}
-
-// // Crossing Normal and Tumor to get a T/N pair for Somatic Variant Calling
-// // Remapping channel to remove common key idPatient
-// pairBam = bamNormal.cross(bamTumor).map {
-// normal, tumor ->
-// [normal[0], normal[1], normal[2], normal[3], tumor[1], tumor[2], tumor[3]]
-// }
-
-// pairBam = pairBam.dump(tag:'BAM Somatic Pair')
-
-// // Manta, Strelka, Mutect2, MSIsensor
-// (pairBamManta, pairBamStrelka, pairBamStrelkaBP, pairBamCalculateContamination, pairBamFilterMutect2, pairBamMsisensor, pairBamCNVkit, pairBam) = pairBam.into(8)
-
-// // Making Pair Bam for Sention
-
-// // separate BAM by status
-// bam_sention_normal = Channel.create()
-// bam_sentieon_tumor = Channel.create()
-
-// bam_sentieon_all
-// .choice(bam_sentieon_tumor, bam_sention_normal) {status_map[it[0], it[1]] == 0 ? 1 : 0}
-
-// // Crossing Normal and Tumor to get a T/N pair for Somatic Variant Calling
-// // Remapping channel to remove common key idPatient
-
-// bam_pair_sentieon_TNscope = bam_sention_normal.cross(bam_sentieon_tumor).map {
-// normal, tumor ->
-// [normal[0], normal[1], normal[2], normal[3], normal[4], tumor[1], tumor[2], tumor[3], tumor[4]]
-// }
-
-// intervalPairBam = pairBam.spread(bedIntervals)
-
-// bamMpileup = bamMpileup.spread(intMpileup)
-
-// // intervals for Mutect2 calls, FreeBayes and pileups for Mutect2 filtering
-// (pairBamMutect2, pairBamFreeBayes, pairBamPileupSummaries) = intervalPairBam.into(3)
-
-// // STEP FREEBAYES
-
-// process FreeBayes {
-// tag "${idSampleTumor}_vs_${idSampleNormal}-${intervalBed.baseName}"
-
-// label 'cpus_1'
-
-// input:
-// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamFreeBayes
-// file(fasta) from fasta
-// file(fastaFai) from fai
-
-// output:
-// set val("FreeBayes"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into vcfFreeBayes
-
-// when: 'freebayes' in tools
-
-// script:
-// intervalsOptions = params.no_intervals ? "" : "-t ${intervalBed}"
-// """
-// freebayes \
-// -f ${fasta} \
-// --pooled-continuous \
-// --pooled-discrete \
-// --genotype-qualities \
-// --report-genotype-likelihood-max \
-// --allele-balance-priors-off \
-// --min-alternate-fraction 0.03 \
-// --min-repeat-entropy 1 \
-// --min-alternate-count 2 \
-// ${intervalsOptions} \
-// ${bamTumor} \
-// ${bamNormal} > ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
-// """
-// }
-
-// vcfFreeBayes = vcfFreeBayes.groupTuple(by:[0,1,2])
-
-// // STEP GATK MUTECT2.1 - RAW CALLS
-
-// process Mutect2 {
-// tag "${idSampleTumor}_vs_${idSampleNormal}-${intervalBed.baseName}"
-
-// label 'cpus_1'
-
-// input:
-// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamMutect2
-// file(dict) from dict
-// file(fasta) from fasta
-// file(fastaFai) from fai
-// file(germlineResource) from germline_resource
-// file(germlineResourceIndex) from germline_resource_tbi
-// file(intervals) from intervals
-// file(pon) from pon
-// file(ponIndex) from pon_tbi
-
-// output:
-// set val("Mutect2"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect2Output
-// set idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf.stats") optional true into intervalStatsFiles
-// set idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf.stats"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") optional true into mutect2Stats
-
-// when: 'mutect2' in tools
-
-// script:
-// // please make a panel-of-normals, using at least 40 samples
-// // https://gatkforums.broadinstitute.org/gatk/discussion/11136/how-to-call-somatic-mutations-using-gatk4-mutect2
-// PON = params.pon ? "--panel-of-normals ${pon}" : ""
-// intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}"
-// softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : ""
-// """
-// # Get raw calls
-// gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-// Mutect2 \
-// -R ${fasta}\
-// -I ${bamTumor} -tumor ${idSampleTumor} \
-// -I ${bamNormal} -normal ${idSampleNormal} \
-// ${intervalsOptions} \
-// ${softClippedOption} \
-// --germline-resource ${germlineResource} \
-// ${PON} \
-// -O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf
-// """
-// }
-
-// mutect2Output = mutect2Output.groupTuple(by:[0,1,2])
-// mutect2Stats = mutect2Stats.groupTuple(by:[0,1])
-
-// // STEP GATK MUTECT2.2 - MERGING STATS
-
-// process MergeMutect2Stats {
-// tag "${idSamplePair}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSamplePair}/Mutect2", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSamplePair, file(statsFiles), file(vcf) from mutect2Stats // Actual stats files and corresponding VCF chunks
-// file(dict) from dict
-// file(fasta) from fasta
-// file(fastaFai) from fai
-// file(germlineResource) from germline_resource
-// file(germlineResourceIndex) from germline_resource_tbi
-// file(intervals) from intervals
-
-// output:
-// set idPatient, idSamplePair, file("${idSamplePair}.vcf.gz.stats") into mergedStatsFile
-
-// when: 'mutect2' in tools
-
-// script:
-// stats = statsFiles.collect{ "-stats ${it} " }.join(' ')
-// """
-// gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-// MergeMutectStats \
-// ${stats} \
-// -O ${idSamplePair}.vcf.gz.stats
-// """
-// }
-
-// // we are merging the VCFs that are called separatelly for different intervals
-// // so we can have a single sorted VCF containing all the calls for a given caller
-
-// // STEP MERGING VCF - FREEBAYES & GATK HAPLOTYPECALLER
-
-// vcfConcatenateVCFs = vcfFreeBayes.mix(vcfFreebayesSingle, vcfGenotypeGVCFs, gvcfHaplotypeCaller)
-// vcfConcatenateVCFs = vcfConcatenateVCFs.dump(tag:'VCF to merge')
-
-// process ConcatVCF {
-// label 'cpus_8'
-
-// tag "${variantCaller}-${idSample}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSample}/${"$variantCaller"}", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idPatient, idSample, file(vcf) from vcfConcatenateVCFs
-// file(fastaFai) from fai
-// file(targetBED) from ch_target_bed
-
-// output:
-// // we have this funny *_* pattern to avoid copying the raw calls to publishdir
-// set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated
-
-// when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools)
-
-// script:
-// if (variantCaller == 'HaplotypeCallerGVCF')
-// outputFile = "HaplotypeCaller_${idSample}.g.vcf"
-// else
-// outputFile = "${variantCaller}_${idSample}.vcf"
-// options = params.target_bed ? "-t ${targetBED}" : ""
-// intervalsOptions = params.no_intervals ? "-n" : ""
-// """
-// concatenateVCFs.sh -i ${fastaFai} -c ${task.cpus} -o ${outputFile} ${options} ${intervalsOptions}
-// """
-// }
-
-// vcfConcatenated = vcfConcatenated.dump(tag:'VCF')
-
-// // STEP MERGING VCF - GATK MUTECT2 (UNFILTERED)
-
-// mutect2Output = mutect2Output.dump(tag:'Mutect2 output VCF to merge')
-
-// process ConcatVCF_Mutect2 {
-// label 'cpus_8'
-
-// tag "${idSample}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSample}/Mutect2", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idPatient, idSample, file(vcf) from mutect2Output
-// file(fastaFai) from fai
-// file(targetBED) from ch_target_bed
-
-// output:
-// // we have this funny *_* pattern to avoid copying the raw calls to publishdir
-// set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenatedForFilter
-
-// when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools)
-
-// script:
-// outputFile = "Mutect2_unfiltered_${idSample}.vcf"
-// options = params.target_bed ? "-t ${targetBED}" : ""
-// intervalsOptions = params.no_intervals ? "-n" : ""
-// """
-// concatenateVCFs.sh -i ${fastaFai} -c ${task.cpus} -o ${outputFile} ${options} ${intervalsOptions}
-// """
-// }
-
-// vcfConcatenatedForFilter = vcfConcatenatedForFilter.dump(tag:'Mutect2 unfiltered VCF')
-
-// // STEP GATK MUTECT2.3 - GENERATING PILEUP SUMMARIES
-
-// pairBamPileupSummaries = pairBamPileupSummaries.map{
-// idPatient, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor, intervalBed ->
-// [idPatient, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor, intervalBed]
-// }.join(intervalStatsFiles, by:[0,1,2])
-
-// process PileupSummariesForMutect2 {
-// tag "${idSampleTumor}_vs_${idSampleNormal}-${intervalBed.baseName}"
-
-// label 'cpus_1'
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(bamNormal), file(baiNormal), file(bamTumor), file(baiTumor), file(intervalBed), file(statsFile) from pairBamPileupSummaries
-// file(germlineResource) from germline_resource
-// file(germlineResourceIndex) from germline_resource_tbi
-
-// output:
-// set idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_pileupsummaries.table") into pileupSummaries
-
-// when: 'mutect2' in tools
-
-// script:
-// intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}"
-// """
-// gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-// GetPileupSummaries \
-// -I ${bamTumor} \
-// -V ${germlineResource} \
-// ${intervalsOptions} \
-// -O ${intervalBed.baseName}_${idSampleTumor}_pileupsummaries.table
-// """
-// }
-
-// pileupSummaries = pileupSummaries.groupTuple(by:[0,1,2])
-
-// // STEP GATK MUTECT2.4 - MERGING PILEUP SUMMARIES
-
-// process MergePileupSummaries {
-// label 'cpus_1'
-
-// tag "${idPatient}_${idSampleTumor}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}/Mutect2", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(pileupSums) from pileupSummaries
-// file(dict) from dict
-
-// output:
-// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}_pileupsummaries.table") into mergedPileupFile
-
-// when: 'mutect2' in tools
-
-// script:
-// allPileups = pileupSums.collect{ "-I ${it} " }.join(' ')
-// """
-// gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-// GatherPileupSummaries \
-// --sequence-dictionary ${dict} \
-// ${allPileups} \
-// -O ${idSampleTumor}_pileupsummaries.table
-// """
-// }
-
-// // STEP GATK MUTECT2.5 - CALCULATING CONTAMINATION
-
-// pairBamCalculateContamination = pairBamCalculateContamination.map{
-// idPatient, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor ->
-// [idPatient, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor]
-// }.join(mergedPileupFile, by:[0,1,2])
-
-// process CalculateContamination {
-// label 'cpus_1'
-
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}/Mutect2", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(bamNormal), file(baiNormal), file(bamTumor), file(baiTumor), file(mergedPileup) from pairBamCalculateContamination
-
-// output:
-// set idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${idSampleTumor}_contamination.table") into contaminationTable
-
-// when: 'mutect2' in tools
-
-// script:
-// """
-// # calculate contamination
-// gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-// CalculateContamination \
-// -I ${idSampleTumor}_pileupsummaries.table \
-// -O ${idSampleTumor}_contamination.table
-// """
-// }
-
-// // STEP GATK MUTECT2.6 - FILTERING CALLS
-
-// mutect2CallsToFilter = vcfConcatenatedForFilter.map{
-// variantCaller, idPatient, idSamplePair, vcf, tbi ->
-// [idPatient, idSamplePair, vcf, tbi]
-// }.join(mergedStatsFile, by:[0,1]).join(contaminationTable, by:[0,1])
-
-// process FilterMutect2Calls {
-// label 'cpus_1'
-
-// tag "${idSamplePair}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSamplePair}/Mutect2", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSamplePair, file(unfiltered), file(unfilteredIndex), file(stats), file(contaminationTable) from mutect2CallsToFilter
-// file(dict) from dict
-// file(fasta) from fasta
-// file(fastaFai) from fai
-// file(germlineResource) from germline_resource
-// file(germlineResourceIndex) from germline_resource_tbi
-// file(intervals) from intervals
-
-// output:
-// set val("Mutect2"), idPatient, idSamplePair, file("Mutect2_filtered_${idSamplePair}.vcf.gz"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.tbi"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.filteringStats.tsv") into filteredMutect2Output
-
-// when: 'mutect2' in tools
-
-// script:
-// """
-// # do the actual filtering
-// gatk --java-options "-Xmx${task.memory.toGiga()}g" \
-// FilterMutectCalls \
-// -V ${unfiltered} \
-// --contamination-table ${contaminationTable} \
-// --stats ${stats} \
-// -R ${fasta} \
-// -O Mutect2_filtered_${idSamplePair}.vcf.gz
-// """
-// }
-
-// // STEP SENTIEON TNSCOPE
-
-// process Sentieon_TNscope {
-// label 'cpus_max'
-// label 'memory_max'
-// label 'sentieon'
-
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// input:
-// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), file(recalNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(recalTumor) from bam_pair_sentieon_TNscope
-// file(dict) from dict
-// file(fasta) from fasta
-// file(fastaFai) from fai
-// file(dbsnp) from dbsnp
-// file(dbsnpIndex) from dbsnp_tbi
-// file(pon) from pon
-// file(ponIndex) from pon_tbi
-
-// output:
-// set val("SentieonTNscope"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf") into vcf_sentieon_TNscope
-
-// when: 'tnscope' in tools && params.sentieon
-
-// script:
-// PON = params.pon ? "--pon ${pon}" : ""
-// """
-// sentieon driver \
-// -t ${task.cpus} \
-// -r ${fasta} \
-// -i ${bamTumor} \
-// -q ${recalTumor} \
-// -i ${bamNormal} \
-// -q ${recalNormal} \
-// --algo TNscope \
-// --tumor_sample ${idSampleTumor} \
-// --normal_sample ${idSampleNormal} \
-// --dbsnp ${dbsnp} \
-// ${PON} \
-// TNscope_${idSampleTumor}_vs_${idSampleNormal}.vcf
-// """
-// }
-
-// vcf_sentieon_TNscope = vcf_sentieon_TNscope.dump(tag:'Sentieon TNscope')
-
-// vcf_sentieon = vcf_sentieon_DNAseq.mix(vcf_sentieon_DNAscope, vcf_sentieon_DNAscope_SV, vcf_sentieon_TNscope)
-
-// process CompressSentieonVCF {
-// tag "${idSample} - ${vcf}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSample}/${variantCaller}", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idPatient, idSample, file(vcf) from vcf_sentieon
-
-// output:
-// set variantCaller, idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcf_sentieon_compressed
-
-// script:
-// """
-// bgzip < ${vcf} > ${vcf}.gz
-// tabix ${vcf}.gz
-// """
-// }
-
-// vcf_sentieon_compressed = vcf_sentieon_compressed.dump(tag:'Sentieon VCF indexed')
-
-// // STEP CNVkit
-
-// process CNVkit {
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/CNVkit", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamCNVkit
-// file(targetBED) from ch_target_bed
-// file(fasta) from fasta
-
-// output:
-// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}*"), file("${idSampleNormal}*") into cnvkitOut
-
-// when: 'cnvkit' in tools && params.target_bed
-
-// script:
-// """
-// cnvkit.py \
-// batch \
-// ${bamTumor} \
-// --normal ${bamNormal} \
-// --targets ${targetBED} \
-// --fasta ${fasta} \
-// --output-reference output_reference.cnn \
-// --output-dir ./ \
-// --diagram \
-// --scatter
-// """
-// }
-
-// // STEP ASCAT.1 - ALLELECOUNTER
-
-// // Run commands and code from Malin Larsson
-// // Based on Jesper Eisfeldt's code
-// process AlleleCounter {
-// label 'memory_singleCPU_2_task'
-
-// tag "${idSample}"
-
-// input:
-// set idPatient, idSample, file(bam), file(bai) from bamAscat
-// file(acLoci) from loci
-// file(dict) from dict
-// file(fasta) from fasta
-// file(fastaFai) from fai
-
-// output:
-// set idPatient, idSample, file("${idSample}.alleleCount") into alleleCounterOut
-
-// when: 'ascat' in tools
-
-// script:
-// """
-// alleleCounter \
-// -l ${acLoci} \
-// -r ${fasta} \
-// -b ${bam} \
-// -o ${idSample}.alleleCount;
-// """
-// }
-
-// alleleCountOutNormal = Channel.create()
-// alleleCountOutTumor = Channel.create()
-
-// alleleCounterOut
-// .choice(alleleCountOutTumor, alleleCountOutNormal) {status_map[it[0], it[1]] == 0 ? 1 : 0}
-
-// alleleCounterOut = alleleCountOutNormal.combine(alleleCountOutTumor, by:0)
-
-// alleleCounterOut = alleleCounterOut.map {
-// idPatientNormal, idSampleNormal, alleleCountOutNormal,
-// idSampleTumor, alleleCountOutTumor ->
-// [idPatientNormal, idSampleNormal, idSampleTumor, alleleCountOutNormal, alleleCountOutTumor]
-// }
-
-// // STEP ASCAT.2 - CONVERTALLELECOUNTS
-
-// // R script from Malin Larssons bitbucket repo:
-// // https://bitbucket.org/malinlarsson/somatic_wgs_pipeline
-// process ConvertAlleleCounts {
-// label 'memory_singleCPU_2_task'
-
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCounterOut
-
-// output:
-// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleNormal}.BAF"), file("${idSampleNormal}.LogR"), file("${idSampleTumor}.BAF"), file("${idSampleTumor}.LogR") into convertAlleleCountsOut
-
-// when: 'ascat' in tools
-
-// script:
-// gender = gender_map[idPatient]
-// """
-// convertAlleleCounts.r ${idSampleTumor} ${alleleCountTumor} ${idSampleNormal} ${alleleCountNormal} ${gender}
-// """
-// }
-
-// // STEP ASCAT.3 - ASCAT
-
-// // R scripts from Malin Larssons bitbucket repo:
-// // https://bitbucket.org/malinlarsson/somatic_wgs_pipeline
-// process Ascat {
-// label 'memory_singleCPU_2_task'
-
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOut
-// file(acLociGC) from loci_gc
-
-// output:
-// set val("ASCAT"), idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.*.{png,txt}") into ascatOut
-
-// when: 'ascat' in tools
-
-// script:
-// gender = gender_map[idPatient]
-// purity_ploidy = (params.ascat_purity && params.ascat_ploidy) ? "--purity ${params.ascat_purity} --ploidy ${params.ascat_ploidy}" : ""
-// """
-// for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done
-// run_ascat.r \
-// --tumorbaf ${bafTumor} \
-// --tumorlogr ${logrTumor} \
-// --normalbaf ${bafNormal} \
-// --normallogr ${logrNormal} \
-// --tumorname ${idSampleTumor} \
-// --basedir ${$projectDir} \
-// --gcfile ${acLociGC} \
-// --gender ${gender} \
-// ${purity_ploidy}
-// """
-// }
-
-// ascatOut.dump(tag:'ASCAT')
-
-// // STEP MPILEUP.1
-
-// process Mpileup {
-// label 'cpus_1'
-// label 'memory_singleCPU_2_task'
-
-// tag "${idSample}-${intervalBed.baseName}"
-
-// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { it == "${idSample}.pileup" ? "VariantCalling/${idSample}/Control-FREEC/${it}" : null }
-
-// input:
-// set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamMpileup
-// file(fasta) from fasta
-// file(fastaFai) from fai
-
-// output:
-// set idPatient, idSample, file("${prefix}${idSample}.pileup") into mpileupMerge
-// set idPatient, idSample into tsv_mpileup
-
-// when: 'controlfreec' in tools || 'mpileup' in tools
-
-// script:
-// prefix = params.no_intervals ? "" : "${intervalBed.baseName}_"
-// intervalsOptions = params.no_intervals ? "" : "-l ${intervalBed}"
-
-// """
-// # Control-FREEC reads uncompresses the zipped file TWICE in single-threaded mode.
-// # we are therefore not using compressed pileups here
-// samtools mpileup \
-// -f ${fasta} ${bam} \
-// ${intervalsOptions} > ${prefix}${idSample}.pileup
-// """
-// }
-
-// (tsv_mpileup, tsv_mpileup_sample) = tsv_mpileup.groupTuple(by:[0, 1]).into(2)
-
-// // Creating a TSV file to restart from this step
-// tsv_mpileup.map { idPatient, idSample ->
-// gender = gender_map[idPatient]
-// status = status_map[idPatient, idSample]
-// mpileup = "${params.outdir}/VariantCalling/${idSample}/Control-FREEC/${idSample}.pileup"
-// "${idPatient}\t${gender}\t${status}\t${idSample}\t${mpileup}\n"
-// }.collectFile(
-// name: 'control-freec_mpileup.tsv', sort: true, storeDir: "${params.outdir}/VariantCalling/TSV"
-// )
-
-// tsv_mpileup_sample
-// .collectFile(storeDir: "${params.outdir}/VariantCalling/TSV") {
-// idPatient, idSample ->
-// status = status_map[idPatient, idSample]
-// gender = gender_map[idPatient]
-// mpileup = "${params.outdir}/VariantCalling/${idSample}/Control-FREEC/${idSample}.pileup"
-// ["control-freec_mpileup_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${mpileup}\n"]
-// }
-
-// if (!params.no_intervals) {
-// mpileupMerge = mpileupMerge.groupTuple(by:[0, 1])
-// mpileupNoInt = Channel.empty()
-// } else {
-// (mpileupMerge, mpileupNoInt) = mpileupMerge.into(2)
-// mpileupMerge.close()
-// }
-
-// // STEP MPILEUP.2 - MERGE
-// process MergeMpileup {
-// label 'cpus_1'
-
-// tag "${idSample}"
-
-// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { it == "${idSample}.pileup" ? "VariantCalling/${idSample}/Control-FREEC/${it}" : null }
-
-// input:
-// set idPatient, idSample, file(mpileup) from mpileupMerge
-
-// output:
-// set idPatient, idSample, file("${idSample}.pileup") into mpileupOut
-
-// when: !(params.no_intervals) && 'controlfreec' in tools || 'mpileup' in tools
-
-// script:
-// """
-// for i in `ls -1v *.pileup`;
-// do cat \$i >> ${idSample}.pileup
-// done
-// """
-// }
-
-// mpileupOut = mpileupOut.mix(mpileupNoInt)
-// mpileupOut = mpileupOut.dump(tag:'mpileup')
-
-// mpileupOutNormal = Channel.create()
-// mpileupOutTumor = Channel.create()
-
-// if (step == 'controlfreec') mpileupOut = input_sample
-
-// mpileupOut
-// .choice(mpileupOutTumor, mpileupOutNormal) {status_map[it[0], it[1]] == 0 ? 1 : 0}
-
-// mpileupOut = mpileupOutNormal.combine(mpileupOutTumor, by:0)
-
-// mpileupOut = mpileupOut.map {
-// idPatientNormal, idSampleNormal, mpileupOutNormal,
-// idSampleTumor, mpileupOutTumor ->
-// [idPatientNormal, idSampleNormal, idSampleTumor, mpileupOutNormal, mpileupOutTumor]
-// }
-
-// // STEP CONTROLFREEC.1 - CONTROLFREEC
-
-// process ControlFREEC {
-// label 'cpus_max'
-// //label 'memory_singleCPU_2_task'
-
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Control-FREEC", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(mpileupNormal), file(mpileupTumor) from mpileupOut
-// file(chrDir) from chr_dir
-// file(mappability) from mappability
-// file(chrLength) from chr_length
-// file(dbsnp) from dbsnp
-// file(dbsnpIndex) from dbsnp_tbi
-// file(fasta) from fasta
-// file(fastaFai) from fai
-
-// output:
-// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.pileup_CNVs"), file("${idSampleTumor}.pileup_ratio.txt"), file("${idSampleTumor}.pileup_normal_CNVs"), file("${idSampleTumor}.pileup_normal_ratio.txt"), file("${idSampleTumor}.pileup_BAF.txt"), file("${idSampleNormal}.pileup_BAF.txt") into controlFreecViz
-// set file("*.pileup*"), file("${idSampleTumor}_vs_${idSampleNormal}.config.txt") into controlFreecOut
-
-// when: 'controlfreec' in tools
-
-// script:
-// config = "${idSampleTumor}_vs_${idSampleNormal}.config.txt"
-// gender = gender_map[idPatient]
-// // if we are using coefficientOfVariation, we must delete the window parameter
-// // it is "window = 20000" in the default settings, without coefficientOfVariation set,
-// // but we do not like it. Note, it is not written in stone
-// coeff_or_window = params.cf_window ? "window = ${params.cf_window}" : "coefficientOfVariation = ${params.cf_coeff}"
-
-// """
-// touch ${config}
-// echo "[general]" >> ${config}
-// echo "BedGraphOutput = TRUE" >> ${config}
-// echo "chrFiles = \${PWD}/${chrDir.fileName}" >> ${config}
-// echo "chrLenFile = \${PWD}/${chrLength.fileName}" >> ${config}
-// echo "gemMappabilityFile = \${PWD}/${mappability}" >> ${config}
-// echo "${coeff_or_window}" >> ${config}
-// echo "contaminationAdjustment = TRUE" >> ${config}
-// echo "forceGCcontentNormalization = 1" >> ${config}
-// echo "maxThreads = ${task.cpus}" >> ${config}
-// echo "minimalSubclonePresence = 20" >> ${config}
-// echo "ploidy = ${params.cf_ploidy}" >> ${config}
-// echo "sex = ${gender}" >> ${config}
-// echo "" >> ${config}
-
-// echo "[control]" >> ${config}
-// echo "inputFormat = pileup" >> ${config}
-// echo "mateFile = \${PWD}/${mpileupNormal}" >> ${config}
-// echo "mateOrientation = FR" >> ${config}
-// echo "" >> ${config}
-
-// echo "[sample]" >> ${config}
-// echo "inputFormat = pileup" >> ${config}
-// echo "mateFile = \${PWD}/${mpileupTumor}" >> ${config}
-// echo "mateOrientation = FR" >> ${config}
-// echo "" >> ${config}
-
-// echo "[BAF]" >> ${config}
-// echo "SNPfile = ${dbsnp.fileName}" >> ${config}
-
-// freec -conf ${config}
-// """
-// }
-
-// controlFreecOut.dump(tag:'ControlFREEC')
-
-// // STEP CONTROLFREEC.3 - VISUALIZATION
-
-// process ControlFreecViz {
-// label 'memory_singleCPU_2_task'
-
-// tag "${idSampleTumor}_vs_${idSampleNormal}"
-
-// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Control-FREEC", mode: params.publish_dir_mode
-
-// input:
-// set idPatient, idSampleNormal, idSampleTumor, file(cnvTumor), file(ratioTumor), file(cnvNormal), file(ratioNormal), file(bafTumor), file(bafNormal) from controlFreecViz
-
-// output:
-// set file("*.txt"), file("*.png"), file("*.bed") into controlFreecVizOut
-
-// when: 'controlfreec' in tools
-
-// """
-// echo "Shaping CNV files to make sure we can assess significance"
-// awk 'NF==9{print}' ${cnvTumor} > TUMOR.CNVs
-// awk 'NF==7{print}' ${cnvNormal} > NORMAL.CNVs
-
-// echo "############### Calculating significance values for TUMOR CNVs #############"
-// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args TUMOR.CNVs ${ratioTumor}
-
-// echo "############### Calculating significance values for NORMAL CNVs ############"
-// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args NORMAL.CNVs ${ratioNormal}
-
-// echo "############### Creating graph for TUMOR ratios ###############"
-// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/makeGraph.R | R --slave --args 2 ${ratioTumor} ${bafTumor}
-
-// echo "############### Creating graph for NORMAL ratios ##############"
-// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/makeGraph.R | R --slave --args 2 ${ratioNormal} ${bafNormal}
-
-// echo "############### Creating BED files for TUMOR ##############"
-// perl /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/freec2bed.pl -f ${ratioTumor} > ${idSampleTumor}.bed
-
-// echo "############### Creating BED files for NORMAL #############"
-// perl /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/freec2bed.pl -f ${ratioNormal} > ${idSampleNormal}.bed
-// """
-// }
-
-// controlFreecVizOut.dump(tag:'ControlFreecViz')
-
-// // Remapping channels for QC and annotation
-
-// (vcfStrelkaIndels, vcfStrelkaSNVS) = vcfStrelka.into(2)
-// (vcfStrelkaBPIndels, vcfStrelkaBPSNVS) = vcfStrelkaBP.into(2)
-// (vcfMantaSomaticSV, vcfMantaDiploidSV) = vcfManta.into(2)
-
-// vcfKeep = Channel.empty().mix(
-// filteredMutect2Output.map{
-// variantCaller, idPatient, idSample, vcf, tbi, tsv ->
-// [variantcaller, idSample, vcf]
-// },
-// vcfConcatenated.map{
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf]
-// },
-// vcf_sentieon_compressed.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf]
-// },
-// vcfStrelkaSingle.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[1]]
-// },
-// vcfMantaSingle.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[2]]
-// },
-// vcfMantaDiploidSV.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[2]]
-// },
-// vcfMantaSomaticSV.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[3]]
-// },
-// vcfStrelkaIndels.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[0]]
-// },
-// vcfStrelkaSNVS.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[1]]
-// },
-// vcfStrelkaBPIndels.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[0]]
-// },
-// vcfStrelkaBPSNVS.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf[1]]
-// },
-// vcfTIDDIT.map {
-// variantcaller, idPatient, idSample, vcf, tbi ->
-// [variantcaller, idSample, vcf]
-// })
-
-// (vcfBCFtools, vcfVCFtools, vcfAnnotation) = vcfKeep.into(3)
-
-// // STEP VCF.QC
-
-// process BcftoolsStats {
-// label 'cpus_1'
-
-// tag "${variantCaller} - ${vcf}"
-
-// publishDir "${params.outdir}/Reports/${idSample}/BCFToolsStats", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idSample, file(vcf) from vcfBCFtools
-
-// output:
-// file ("*.bcf.tools.stats.out") into bcftoolsReport
-
-// when: !('bcftools' in skip_qc)
-
-// script:
-// """
-// bcftools stats ${vcf} > ${reduceVCF(vcf.fileName)}.bcf.tools.stats.out
-// """
-// }
-
-// bcftoolsReport = bcftoolsReport.dump(tag:'BCFTools')
-
-// process Vcftools {
-// label 'cpus_1'
-
-// tag "${variantCaller} - ${vcf}"
-
-// publishDir "${params.outdir}/Reports/${idSample}/VCFTools", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idSample, file(vcf) from vcfVCFtools
-
-// output:
-// file ("${reduceVCF(vcf.fileName)}.*") into vcftoolsReport
-
-// when: !('vcftools' in skip_qc)
-
-// script:
-// """
-// vcftools \
-// --gzvcf ${vcf} \
-// --TsTv-by-count \
-// --out ${reduceVCF(vcf.fileName)}
-
-// vcftools \
-// --gzvcf ${vcf} \
-// --TsTv-by-qual \
-// --out ${reduceVCF(vcf.fileName)}
-
-// vcftools \
-// --gzvcf ${vcf} \
-// --FILTER-summary \
-// --out ${reduceVCF(vcf.fileName)}
-// """
-// }
-
-// vcftoolsReport = vcftoolsReport.dump(tag:'VCFTools')
-
-// /*
-// --------------------------------------------------------------------------------
-// ANNOTATION
-// --------------------------------------------------------------------------------
-// */
-
-// if (step == 'annotate') {
-// vcfToAnnotate = Channel.create()
-// vcfNoAnnotate = Channel.create()
-
-// if (tsv_path == []) {
-// // Sarek, by default, annotates all available vcfs that it can find in the VariantCalling directory
-// // Excluding vcfs from FreeBayes, and g.vcf from HaplotypeCaller
-// // Basically it's: results/VariantCalling/*/{HaplotypeCaller,Manta,Mutect2,SentieonDNAseq,SentieonDNAscope,SentieonTNscope,Strelka,TIDDIT}/*.vcf.gz
-// // Without *SmallIndels.vcf.gz from Manta, and *.genome.vcf.gz from Strelka
-// // The small snippet `vcf.minus(vcf.fileName)[-2]` catches idSample
-// // This field is used to output final annotated VCFs in the correct directory
-// Channel.empty().mix(
-// Channel.fromPath("${params.outdir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz")
-// .flatten().map{vcf -> ['HaplotypeCaller', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/Manta/*[!candidate]SV.vcf.gz")
-// .flatten().map{vcf -> ['Manta', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/Mutect2/*.vcf.gz")
-// .flatten().map{vcf -> ['Mutect2', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/SentieonDNAseq/*.vcf.gz")
-// .flatten().map{vcf -> ['SentieonDNAseq', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/SentieonDNAscope/*.vcf.gz")
-// .flatten().map{vcf -> ['SentieonDNAscope', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/SentieonTNscope/*.vcf.gz")
-// .flatten().map{vcf -> ['SentieonTNscope', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/Strelka/*{somatic,variant}*.vcf.gz")
-// .flatten().map{vcf -> ['Strelka', vcf.minus(vcf.fileName)[-2].toString(), vcf]},
-// Channel.fromPath("${params.outdir}/VariantCalling/*/TIDDIT/*.vcf.gz")
-// .flatten().map{vcf -> ['TIDDIT', vcf.minus(vcf.fileName)[-2].toString(), vcf]}
-// ).choice(vcfToAnnotate, vcfNoAnnotate) {
-// annotate_tools == [] || (annotate_tools != [] && it[0] in annotate_tools) ? 0 : 1
-// }
-// } else if (annotate_tools == []) {
-// // Annotate user-submitted VCFs
-// // If user-submitted, Sarek assume that the idSample should be assumed automatically
-// vcfToAnnotate = Channel.fromPath(tsv_path)
-// .map{vcf -> ['userspecified', vcf.minus(vcf.fileName)[-2].toString(), vcf]}
-// } else exit 1, "specify only tools or files to annotate, not both"
-
-// vcfNoAnnotate.close()
-// vcfAnnotation = vcfAnnotation.mix(vcfToAnnotate)
-// }
-
-// // as now have the list of VCFs to annotate, the first step is to annotate with allele frequencies, if there are any
-
-// (vcfSnpeff, vcfVep) = vcfAnnotation.into(2)
-
-// vcfVep = vcfVep.map {
-// variantCaller, idSample, vcf ->
-// [variantCaller, idSample, vcf, null]
-// }
-
-// // STEP SNPEFF
-
-// process Snpeff {
-// tag "${idSample} - ${variantCaller} - ${vcf}"
-
-// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: {
-// if (it == "${reducedVCF}_snpEff.ann.vcf") null
-// else "Reports/${idSample}/snpEff/${it}"
-// }
-
-// input:
-// set variantCaller, idSample, file(vcf) from vcfSnpeff
-// file(dataDir) from snpeff_cache
-// val snpeffDb from snpeff_db
-
-// output:
-// set file("${reducedVCF}_snpEff.genes.txt"), file("${reducedVCF}_snpEff.html"), file("${reducedVCF}_snpEff.csv") into snpeffReport
-// set variantCaller, idSample, file("${reducedVCF}_snpEff.ann.vcf") into snpeffVCF
-
-// when: 'snpeff' in tools || 'merge' in tools
-
-// script:
-// reducedVCF = reduceVCF(vcf.fileName)
-// cache = (params.snpeff_cache && params.annotation_cache) ? "-dataDir \${PWD}/${dataDir}" : ""
-// """
-// snpEff -Xmx${task.memory.toGiga()}g \
-// ${snpeffDb} \
-// -csvStats ${reducedVCF}_snpEff.csv \
-// -nodownload \
-// ${cache} \
-// -canon \
-// -v \
-// ${vcf} \
-// > ${reducedVCF}_snpEff.ann.vcf
-
-// mv snpEff_summary.html ${reducedVCF}_snpEff.html
-// """
-// }
-
-// snpeffReport = snpeffReport.dump(tag:'snpEff report')
-
-// // STEP COMPRESS AND INDEX VCF.1 - SNPEFF
-
-// process CompressVCFsnpEff {
-// tag "${idSample} - ${vcf}"
-
-// publishDir "${params.outdir}/Annotation/${idSample}/snpEff", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idSample, file(vcf) from snpeffVCF
-
-// output:
-// set variantCaller, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (compressVCFsnpEffOut)
-
-// script:
-// """
-// bgzip < ${vcf} > ${vcf}.gz
-// tabix ${vcf}.gz
-// """
-// }
-
-// compressVCFsnpEffOut = compressVCFsnpEffOut.dump(tag:'VCF')
-
-// // STEP VEP.1
-
-// process VEP {
-// label 'VEP'
-// label 'cpus_4'
-
-// tag "${idSample} - ${variantCaller} - ${vcf}"
-
-// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: {
-// if (it == "${reducedVCF}_VEP.summary.html") "Reports/${idSample}/VEP/${it}"
-// else null
-// }
-
-// input:
-// set variantCaller, idSample, file(vcf), file(idx) from vcfVep
-// file(dataDir) from vep_cache
-// val cache_version from vep_cache_version
-// file(cadd_InDels) from cadd_indels
-// file(cadd_InDels_tbi) from cadd_indels_tbi
-// file(cadd_WG_SNVs) from cadd_wg_snvs
-// file(cadd_WG_SNVs_tbi) from cadd_wg_snvs_tbi
-// output:
-// set variantCaller, idSample, file("${reducedVCF}_VEP.ann.vcf") into vepVCF
-// file("${reducedVCF}_VEP.summary.html") into vepReport
-
-// when: 'vep' in tools
-
-// script:
-// reducedVCF = reduceVCF(vcf.fileName)
-// genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
-
-// dir_cache = (params.vep_cache && params.annotation_cache) ? " \${PWD}/${dataDir}" : "/.vep"
-// cadd = (params.cadd_cache && params.cadd_wg_snvs && params.cadd_indels) ? "--plugin CADD,whole_genome_SNVs.tsv.gz,InDels.tsv.gz" : ""
-// genesplicer = params.genesplicer ? "--plugin GeneSplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/genesplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/share/genesplicer-1.0-1/human,context=200,tmpdir=\$PWD/${reducedVCF}" : "--offline"
-// """
-// mkdir ${reducedVCF}
-
-// vep \
-// -i ${vcf} \
-// -o ${reducedVCF}_VEP.ann.vcf \
-// --assembly ${genome} \
-// --species ${params.species} \
-// ${cadd} \
-// ${genesplicer} \
-// --cache \
-// --cache_version ${cache_version} \
-// --dir_cache ${dir_cache} \
-// --everything \
-// --filter_common \
-// --fork ${task.cpus} \
-// --format vcf \
-// --per_gene \
-// --stats_file ${reducedVCF}_VEP.summary.html \
-// --total_length \
-// --vcf
-
-// rm -rf ${reducedVCF}
-// """
-// }
-
-// vepReport = vepReport.dump(tag:'VEP')
-
-// // STEP VEP.2 - VEP AFTER SNPEFF
-
-// process VEPmerge {
-// label 'VEP'
-// label 'cpus_4'
-
-// tag "${idSample} - ${variantCaller} - ${vcf}"
-
-// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: {
-// if (it == "${reducedVCF}_VEP.summary.html") "Reports/${idSample}/VEP/${it}"
-// else null
-// }
-
-// input:
-// set variantCaller, idSample, file(vcf), file(idx) from compressVCFsnpEffOut
-// file(dataDir) from vep_cache
-// val cache_version from vep_cache_version
-// file(cadd_InDels) from cadd_indels
-// file(cadd_InDels_tbi) from cadd_indels_tbi
-// file(cadd_WG_SNVs) from cadd_wg_snvs
-// file(cadd_WG_SNVs_tbi) from cadd_wg_snvs_tbi
-// output:
-// set variantCaller, idSample, file("${reducedVCF}_VEP.ann.vcf") into vepVCFmerge
-// file("${reducedVCF}_VEP.summary.html") into vepReportMerge
-
-// when: 'merge' in tools
-
-// script:
-// reducedVCF = reduceVCF(vcf.fileName)
-// genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome
-// dir_cache = (params.vep_cache && params.annotation_cache) ? " \${PWD}/${dataDir}" : "/.vep"
-// cadd = (params.cadd_cache && params.cadd_wg_snvs && params.cadd_indels) ? "--plugin CADD,whole_genome_SNVs.tsv.gz,InDels.tsv.gz" : ""
-// genesplicer = params.genesplicer ? "--plugin GeneSplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/genesplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/share/genesplicer-1.0-1/human,context=200,tmpdir=\$PWD/${reducedVCF}" : "--offline"
-// """
-// mkdir ${reducedVCF}
-
-// vep \
-// -i ${vcf} \
-// -o ${reducedVCF}_VEP.ann.vcf \
-// --assembly ${genome} \
-// --species ${params.species} \
-// ${cadd} \
-// ${genesplicer} \
-// --cache \
-// --cache_version ${cache_version} \
-// --dir_cache ${dir_cache} \
-// --everything \
-// --filter_common \
-// --fork ${task.cpus} \
-// --format vcf \
-// --per_gene \
-// --stats_file ${reducedVCF}_VEP.summary.html \
-// --total_length \
-// --vcf
-
-// rm -rf ${reducedVCF}
-// """
-// }
-
-// vepReportMerge = vepReportMerge.dump(tag:'VEP')
-
-// vcfCompressVCFvep = vepVCF.mix(vepVCFmerge)
-
-// // STEP COMPRESS AND INDEX VCF.2 - VEP
-
-// process CompressVCFvep {
-// tag "${idSample} - ${vcf}"
-
-// publishDir "${params.outdir}/Annotation/${idSample}/VEP", mode: params.publish_dir_mode
-
-// input:
-// set variantCaller, idSample, file(vcf) from vcfCompressVCFvep
-
-// output:
-// set variantCaller, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into compressVCFOutVEP
-
-// script:
-// """
-// bgzip < ${vcf} > ${vcf}.gz
-// tabix ${vcf}.gz
-// """
-// }
-
-// compressVCFOutVEP = compressVCFOutVEP.dump(tag:'VCF')
diff --git a/modules/local/process/build_intervals.nf b/modules/local/build_intervals.nf
similarity index 57%
rename from modules/local/process/build_intervals.nf
rename to modules/local/build_intervals.nf
index 861c3c07b8..c476dd8258 100644
--- a/modules/local/process/build_intervals.nf
+++ b/modules/local/build_intervals.nf
@@ -1,21 +1,21 @@
// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
+include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "anaconda::gawk=5.1.0" : null
-container = "quay.io/biocontainers/gawk:5.1.0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gawk:5.1.0"
-
process BUILD_INTERVALS {
tag "${fai}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gawk:5.1.0"
+ } else {
+ container "quay.io/biocontainers/gawk:5.1.0"
+ }
input:
path fai
diff --git a/modules/local/process/concat_vcf.nf b/modules/local/concat_vcf.nf
similarity index 65%
rename from modules/local/process/concat_vcf.nf
rename to modules/local/concat_vcf.nf
index aa1bc5387b..f76d382b24 100644
--- a/modules/local/process/concat_vcf.nf
+++ b/modules/local/concat_vcf.nf
@@ -1,12 +1,9 @@
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::htslib=1.11" : null
-container = "quay.io/biocontainers/htslib:1.11--hd3b49d5_0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0"
-
process CONCAT_VCF {
label 'cpus_8'
@@ -15,8 +12,12 @@ process CONCAT_VCF {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::htslib=1.11" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0"
+ } else {
+ container "quay.io/biocontainers/htslib:1.11--hd3b49d5_0"
+ }
input:
tuple val(meta), path(vcf)
diff --git a/modules/local/process/create_intervals_bed.nf b/modules/local/create_intervals_bed.nf
similarity index 79%
rename from modules/local/process/create_intervals_bed.nf
rename to modules/local/create_intervals_bed.nf
index 6c6eecac95..52f20dbfb2 100644
--- a/modules/local/process/create_intervals_bed.nf
+++ b/modules/local/create_intervals_bed.nf
@@ -1,21 +1,21 @@
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
-include { has_extension } from '../functions'
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; has_extension } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "anaconda::gawk=5.1.0" : null
-container = "quay.io/biocontainers/gawk:5.1.0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gawk:5.1.0"
-
process CREATE_INTERVALS_BED {
tag "${intervals}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gawk:5.1.0"
+ } else {
+ container "quay.io/biocontainers/gawk:5.1.0"
+ }
input:
path intervals
diff --git a/modules/local/functions.nf b/modules/local/functions.nf
index 7293db1f18..c4a9d6baaa 100644
--- a/modules/local/functions.nf
+++ b/modules/local/functions.nf
@@ -123,14 +123,17 @@ def extract_fastq_from_dir(folder) {
fastq = fastq.map{ run, pair ->
def meta = [:]
- meta.patient = sample
- meta.sample = meta.patient
- meta.gender = 'ZZ' // unused
- meta.status = 0 // normal (not tumor)
- meta.run = run
- meta.id = "${meta.sample}-${meta.run}"
- def read1 = pair[0]
- def read2 = pair[1]
+ meta.patient = sample
+ meta.sample = meta.patient
+ meta.gender = 'ZZ' // unused
+ meta.status = 0 // normal (not tumor)
+ meta.run = run
+ meta.id = "${meta.sample}-${meta.run}"
+ def read1 = pair[0]
+ def read2 = pair[1]
+ def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ""
+ def read_group = "\"@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA\""
+ meta.read_group = read_group
return [meta, [read1, read2]]
}
@@ -144,14 +147,18 @@ def extract_fastq(tsvFile) {
.splitCsv(sep: '\t')
.map { row ->
def meta = [:]
- meta.patient = row[0]
- meta.gender = row[1]
- meta.status = return_status(row[2].toInteger())
- meta.sample = row[3]
- meta.run = row[4]
- meta.id = "${meta.sample}-${meta.run}"
- def read1 = return_file(row[5])
- def read2 = "null"
+ meta.patient = row[0]
+ meta.gender = row[1]
+ meta.status = return_status(row[2].toInteger())
+ meta.sample = row[3]
+ meta.run = row[4]
+ meta.id = "${meta.sample}-${meta.run}"
+ def read1 = return_file(row[5])
+ def read2 = "null"
+ def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ""
+ def read_group = "\"@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA\""
+ meta.read_group = read_group
+
if (has_extension(read1, "fastq.gz") || has_extension(read1, "fq.gz") || has_extension(read1, "fastq") || has_extension(read1, "fq")) {
check_number_of_item(row, 7)
read2 = return_file(row[6])
@@ -260,4 +267,62 @@ def reduce_vcf(file) {
def return_status(it) {
if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information"
return it
-}
\ No newline at end of file
+}
+
+/*
+ * nf-core core functions
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/local/process/index_target_bed.nf b/modules/local/index_target_bed.nf
similarity index 57%
rename from modules/local/process/index_target_bed.nf
rename to modules/local/index_target_bed.nf
index d03eaa581a..185c136a22 100644
--- a/modules/local/process/index_target_bed.nf
+++ b/modules/local/index_target_bed.nf
@@ -1,12 +1,9 @@
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::htslib=1.11" : null
-container = "quay.io/biocontainers/htslib:1.11--hd3b49d5_0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0"
-
process INDEX_TARGET_BED {
label 'cpus_8'
@@ -15,8 +12,12 @@ process INDEX_TARGET_BED {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::htslib=1.11" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0"
+ } else {
+ container "quay.io/biocontainers/htslib:1.11--hd3b49d5_0"
+ }
input:
path target_bed
diff --git a/modules/local/process/merge_bam.nf b/modules/local/merge_bam.nf
similarity index 54%
rename from modules/local/process/merge_bam.nf
rename to modules/local/merge_bam.nf
index e190d8dbd1..20725a2787 100644
--- a/modules/local/process/merge_bam.nf
+++ b/modules/local/merge_bam.nf
@@ -1,22 +1,24 @@
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::samtools=1.10" : null
-container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3"
-
process MERGE_BAM {
label 'cpus_8'
tag "${meta.id}"
- publishDir params.outdir, mode: params.publish_dir_mode,
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::samtools=1.11" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/samtools:1.11--h6270b1f_0"
+ } else {
+ container "quay.io/biocontainers/samtools:1.11--h6270b1f_0"
+ }
input:
tuple val(meta), path(bam)
diff --git a/modules/local/process/bwa_mem.nf b/modules/local/process/bwa_mem.nf
deleted file mode 100644
index bcbd43d9ba..0000000000
--- a/modules/local/process/bwa_mem.nf
+++ /dev/null
@@ -1,50 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.10" : null
-container = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0"
-
-process BWA_MEM {
- label 'process_high'
-
- tag "${meta.id}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
-
-
- conda environment
- container container
-
- input:
- tuple val(meta), path(reads)
- path bwa
- path fasta
- path fai
-
- output:
- tuple val(meta), path("*.bam"), emit: bam
- path "*.version.txt" , emit: version
-
- script:
- CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ""
- readGroup = "@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA"
- extra = meta.status == 1 ? "-B 3" : ""
- """
- bwa mem \
- ${options.args} \
- -R \"${readGroup}\" \
- ${extra} \
- -t ${task.cpus} \
- ${fasta} ${reads} | \
- samtools sort --threads ${task.cpus} -m 2G - > ${meta.id}.bam
-
- # samtools index ${meta.id}.bam
-
- echo \$(bwa version 2>&1) > bwa.version.txt
- """
-}
diff --git a/modules/local/process/bwamem2_mem.nf b/modules/local/process/bwamem2_mem.nf
deleted file mode 100644
index f0117b5807..0000000000
--- a/modules/local/process/bwamem2_mem.nf
+++ /dev/null
@@ -1,47 +0,0 @@
-include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::bwa-mem2=2.0 bioconda::samtools=1.10" : null
-container = "quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:876eb6f1d38fbf578296ea94e5aede4e317939e7-0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:876eb6f1d38fbf578296ea94e5aede4e317939e7-0"
-
-process BWAMEM2_MEM {
- label 'process_high'
-
- tag "${meta.id}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
-
- conda environment
- container container
-
- input:
- tuple val(meta), path(reads)
- path bwa
- path fasta
- path fai
-
- output:
- tuple val(meta), path("*.bam")
-
- script:
- CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : ""
- readGroup = "@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA"
- extra = meta.status == 1 ? "-B 3" : ""
- """
- bwa-mem2 mem \
- ${options.args} \
- -R \"${readGroup}\" \
- ${extra} \
- -t ${task.cpus} \
- ${fasta} ${reads} | \
- samtools sort --threads ${task.cpus} -m 2G - > ${meta.id}.bam
-
- # samtools index ${meta.id}.bam
-
- echo \$(bwa-mem2 version 2>&1) > bwa-mem2.version.txt
- """
-}
\ No newline at end of file
diff --git a/modules/nf-core/software/bwa/index/functions.nf b/modules/nf-core/software/bwa/index/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/bwa/index/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/bwa/index/main.nf b/modules/nf-core/software/bwa/index/main.nf
index 078cfb51d1..47d2ec54a6 100644
--- a/modules/nf-core/software/bwa/index/main.nf
+++ b/modules/nf-core/software/bwa/index/main.nf
@@ -1,35 +1,34 @@
-include { initOptions; saveFiles; getSoftwareName } from './../../functions'
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::bwa=0.7.17" : null
-container = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7"
-
process BWA_INDEX {
+ tag "$fasta"
label 'process_high'
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') }
- tag "${fasta}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
-
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7"
+ } else {
+ container "quay.io/biocontainers/bwa:0.7.17--hed695b0_7"
+ }
input:
- path fasta
+ path fasta
output:
- path "${fasta}.*" , emit: index
- path "*.version.txt", emit: version
+ path "${fasta}.*" , emit: index
+ path "*.version.txt", emit: version
script:
def software = getSoftwareName(task.process)
- def ioptions = initOptions(options)
"""
- bwa index ${ioptions.args} ${fasta}
+ bwa index $options.args $fasta
echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//' > ${software}.version.txt
"""
}
diff --git a/modules/nf-core/software/bwa/index/meta.yml b/modules/nf-core/software/bwa/index/meta.yml
index a2f5b1ed66..0c3b8f5ff4 100644
--- a/modules/nf-core/software/bwa/index/meta.yml
+++ b/modules/nf-core/software/bwa/index/meta.yml
@@ -24,20 +24,20 @@ params:
description: |
Value for the Nextflow `publishDir` mode parameter.
Available: symlink, rellink, link, copy, copyNoFollow, move.
- - conda:
+ - enable_conda:
type: boolean
description: |
Run the module with Conda using the software specified
via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
input:
- fasta:
type: file
description: Input genome fasta file
- - options:
- type: map
- description: |
- Groovy Map containing module options for passing command-line arguments and
- output file paths.
output:
- index:
type: file
diff --git a/modules/nf-core/software/bwa/mem/functions.nf b/modules/nf-core/software/bwa/mem/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/bwa/mem/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/bwa/mem/main.nf b/modules/nf-core/software/bwa/mem/main.nf
new file mode 100644
index 0000000000..6da50c3d0f
--- /dev/null
+++ b/modules/nf-core/software/bwa/mem/main.nf
@@ -0,0 +1,46 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+def options = initOptions(params.options)
+
+process BWA_MEM {
+ tag "$meta.id"
+ label 'process_high'
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.10" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0"
+ } else {
+ container "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0"
+ }
+
+ input:
+ tuple val(meta), path(reads)
+ path index
+ path fasta
+ path fai
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "*.version.txt" , emit: version
+
+ script:
+ def software = getSoftwareName(task.process)
+ def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+ def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
+ """
+ bwa mem \
+ $options.args \
+ $read_group \
+ -t $task.cpus \
+ $fasta \
+ $reads \
+ | samtools $options.args2 --threads $task.cpus -o ${prefix}.bam -
+
+ echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//' > ${software}.version.txt
+ """
+}
diff --git a/modules/nf-core/software/bwa/mem/meta.yml b/modules/nf-core/software/bwa/mem/meta.yml
new file mode 100644
index 0000000000..de61798bb1
--- /dev/null
+++ b/modules/nf-core/software/bwa/mem/meta.yml
@@ -0,0 +1,69 @@
+name: bwa_mem
+description: Performs fastq alignment to a fasta reference using BWA
+keywords:
+ - mem
+ - bwa
+ - alignment
+ - map
+ - fastq
+ - bam
+ - sam
+tools:
+ - bwa:
+ description: |
+ BWA is a software package for mapping DNA sequences against
+ a large reference genome, such as the human genome.
+ homepage: http://bio-bwa.sourceforge.net/
+ documentation: http://www.htslib.org/doc/samtools.html
+ arxiv: arXiv:1303.3997
+params:
+ - outdir:
+ type: string
+ description: |
+ The pipeline's output directory. By default, the module will
+ output files into `$params.outdir/`
+ - publish_dir_mode:
+ type: string
+ description: |
+ Value for the Nextflow `publishDir` mode parameter.
+ Available: symlink, rellink, link, copy, copyNoFollow, move.
+ - enable_conda:
+ type: boolean
+ description: |
+ Run the module with Conda using the software specified
+ via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - index:
+ type: file
+ description: BWA genome index files
+ pattern: "*.{amb,ann,bwt,pac,sa}"
+ - fasta:
+ type: file
+ description: Input genome fasta file
+output:
+ - bam:
+ type: file
+ description: Output BAM file containing read alignments
+ pattern: "*.{bam}"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@drpatelh"
+ - "@jeremy1805"
diff --git a/modules/nf-core/software/bwamem2_index.nf b/modules/nf-core/software/bwamem2_index.nf
index c8e26dfd7e..4621a35312 100644
--- a/modules/nf-core/software/bwamem2_index.nf
+++ b/modules/nf-core/software/bwamem2_index.nf
@@ -3,24 +3,25 @@ include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::bwa-mem2=2.0" : null
-container = "quay.io/biocontainers/bwa-mem2:2.0--he513fc3_1"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/bwa-mem2:2.0--he513fc3_1"
-
process BWAMEM2_INDEX {
tag "${fasta}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::bwa-mem2=2.1" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/bwa-mem2:2.1--he513fc3_0"
+ } else {
+ container "quay.io/biocontainers/bwa-mem2:2.1--he513fc3_0"
+ }
input:
path fasta
output:
- path "${fasta}.*"
+ path "${fasta}.*" , emit: index
+ path "*.version.txt" , emit: version
script:
def software = getSoftwareName(task.process)
diff --git a/modules/nf-core/software/bwamem2_mem.nf b/modules/nf-core/software/bwamem2_mem.nf
new file mode 100644
index 0000000000..9a2c95fde8
--- /dev/null
+++ b/modules/nf-core/software/bwamem2_mem.nf
@@ -0,0 +1,46 @@
+include { initOptions; saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+def options = initOptions(params.options)
+
+process BWAMEM2_MEM {
+ tag "$meta.id"
+ label 'process_high'
+ label 'BWAMEM2_MEM'
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::bwa-mem2=2.1 bioconda::samtools=1.11" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:e6f0d20c9d78572ddbbf00d8767ee6ff865edd4e-0"
+ } else {
+ container "quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:e6f0d20c9d78572ddbbf00d8767ee6ff865edd4e-0"
+ }
+
+ input:
+ tuple val(meta), path(reads)
+ path index
+ path fasta
+ path fai
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "*.version.txt" , emit: version
+
+ script:
+ def software = getSoftwareName(task.process)
+ def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+ def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
+ """
+ bwa-mem2 mem \
+ $options.args \
+ $read_group \
+ -t $task.cpus \
+ $fasta \
+ $reads \
+ | samtools $options.args2 --threads $task.cpus -o ${prefix}.bam -
+
+ echo \$(bwa-mem2 version 2>&1) > ${software}.version.txt
+ """
+}
\ No newline at end of file
diff --git a/modules/nf-core/software/fastqc.nf b/modules/nf-core/software/fastqc.nf
deleted file mode 100644
index 80a5582a1f..0000000000
--- a/modules/nf-core/software/fastqc.nf
+++ /dev/null
@@ -1,42 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::fastqc=0.11.9" : null
-container = "quay.io/biocontainers/fastqc:0.11.9--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0"
-
-process FASTQC {
- label 'process_medium'
- label 'cpus_2'
-
- tag "${meta.id}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
-
- conda environment
- container container
-
- input:
- tuple val(meta), path(reads)
-
- output:
- path "*.html", emit: html
- path "*.version.txt", emit: version
- path "*.zip", emit: zip
-
- script:
- // Add soft-links to original FastQs for consistent naming in pipeline
- prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
- """
- [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
- [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
-
- fastqc ${options.args} --threads ${task.cpus} ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
-
- fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt
- """
-}
\ No newline at end of file
diff --git a/modules/nf-core/software/fastqc/functions.nf b/modules/nf-core/software/fastqc/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/fastqc/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/fastqc/main.nf b/modules/nf-core/software/fastqc/main.nf
new file mode 100644
index 0000000000..cce410a080
--- /dev/null
+++ b/modules/nf-core/software/fastqc/main.nf
@@ -0,0 +1,47 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+def options = initOptions(params.options)
+
+process FASTQC {
+ tag "$meta.id"
+ label 'process_medium'
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0"
+ } else {
+ container "quay.io/biocontainers/fastqc:0.11.9--0"
+ }
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.html"), emit: html
+ tuple val(meta), path("*.zip") , emit: zip
+ path "*.version.txt" , emit: version
+
+ script:
+ // Add soft-links to original FastQs for consistent naming in pipeline
+ def software = getSoftwareName(task.process)
+ def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
+ if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
+ fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz
+ fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt
+ """
+ } else {
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
+ fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
+ fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt
+ """
+ }
+}
diff --git a/modules/nf-core/software/fastqc/meta.yml b/modules/nf-core/software/fastqc/meta.yml
new file mode 100644
index 0000000000..413aad8db6
--- /dev/null
+++ b/modules/nf-core/software/fastqc/meta.yml
@@ -0,0 +1,72 @@
+name: fastqc
+description: Run FastQC on sequenced reads
+keywords:
+ - quality control
+ - qc
+ - adapters
+ - fastq
+tools:
+ - fastqc:
+ description: |
+ FastQC gives general quality metrics about your reads.
+ It provides information about the quality score distribution
+ across your reads, the per base sequence content (%A/C/G/T).
+ You get information about adapter contamination and other
+ overrepresented sequences.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+params:
+ - outdir:
+ type: string
+ description: |
+ The pipeline's output directory. By default, the module will
+ output files into `$params.outdir/`
+ - publish_dir_mode:
+ type: string
+ description: |
+ Value for the Nextflow `publishDir` mode parameter.
+ Available: symlink, rellink, link, copy, copyNoFollow, move.
+ - enable_conda:
+ type: boolean
+ description: |
+ Run the module with Conda using the software specified
+ via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - html:
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ - zip:
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
diff --git a/modules/nf-core/software/functions.nf b/modules/nf-core/software/functions.nf
index ca46a99f5d..d25eea86b3 100644
--- a/modules/nf-core/software/functions.nf
+++ b/modules/nf-core/software/functions.nf
@@ -41,17 +41,19 @@ def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
- if (ioptions.publish_by_id) path_list.add(args.publish_id)
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
- return "${getPathFromList(ext_list)}/${args.filename}"
+ return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
- return "${getPathFromList(path_list)}/${args.filename}"
+ return "${getPathFromList(path_list)}/$args.filename"
}
}
}
diff --git a/modules/nf-core/software/gatk/applybqsr.nf b/modules/nf-core/software/gatk/applybqsr.nf
index d17bb10dda..f21f9b04cd 100644
--- a/modules/nf-core/software/gatk/applybqsr.nf
+++ b/modules/nf-core/software/gatk/applybqsr.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_APPLYBQSR {
label 'memory_singleCPU_2_task'
label 'cpus_2'
@@ -16,8 +12,12 @@ process GATK_APPLYBQSR {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
tuple val(meta), path(bam), path(bai), path(recalibrationReport), path(interval)
diff --git a/modules/nf-core/software/gatk/baserecalibrator.nf b/modules/nf-core/software/gatk/baserecalibrator.nf
index c6b8a35392..50210f8f1c 100644
--- a/modules/nf-core/software/gatk/baserecalibrator.nf
+++ b/modules/nf-core/software/gatk/baserecalibrator.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_BASERECALIBRATOR {
label 'cpus_1'
@@ -15,8 +11,12 @@ process GATK_BASERECALIBRATOR {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
tuple val(meta), path(bam), path(bai), path(interval)
diff --git a/modules/nf-core/software/gatk/createsequencedictionary.nf b/modules/nf-core/software/gatk/createsequencedictionary.nf
index a740b91a64..62373b0a04 100644
--- a/modules/nf-core/software/gatk/createsequencedictionary.nf
+++ b/modules/nf-core/software/gatk/createsequencedictionary.nf
@@ -3,18 +3,18 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_CREATESEQUENCEDICTIONARY {
tag "${fasta}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
path fasta
diff --git a/modules/nf-core/software/gatk/gatherbqsrreports.nf b/modules/nf-core/software/gatk/gatherbqsrreports.nf
index 7bcebc6db4..bfb6dd6f2f 100644
--- a/modules/nf-core/software/gatk/gatherbqsrreports.nf
+++ b/modules/nf-core/software/gatk/gatherbqsrreports.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_GATHERBQSRREPORTS {
label 'memory_singleCPU_2_task'
label 'cpus_2'
@@ -16,8 +12,12 @@ process GATK_GATHERBQSRREPORTS {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
tuple val(meta), path(recal)
diff --git a/modules/nf-core/software/gatk/genotypegvcf.nf b/modules/nf-core/software/gatk/genotypegvcf.nf
index 07a009caa9..612039cc71 100644
--- a/modules/nf-core/software/gatk/genotypegvcf.nf
+++ b/modules/nf-core/software/gatk/genotypegvcf.nf
@@ -3,18 +3,18 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_GENOTYPEGVCF {
tag "${meta.id}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
tuple val(meta), path(interval), path(gvcf)
diff --git a/modules/nf-core/software/gatk/haplotypecaller.nf b/modules/nf-core/software/gatk/haplotypecaller.nf
index fdcc259abc..7d4621040f 100644
--- a/modules/nf-core/software/gatk/haplotypecaller.nf
+++ b/modules/nf-core/software/gatk/haplotypecaller.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_HAPLOTYPECALLER {
label 'MEMORY_SINGLECPU_TASK_SQ'
label 'CPUS_2'
@@ -16,8 +12,12 @@ process GATK_HAPLOTYPECALLER {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
tuple val(meta), path(bam), path(bai), file(interval)
diff --git a/modules/nf-core/software/gatk/markduplicates.nf b/modules/nf-core/software/gatk/markduplicates.nf
index b5be16d0e1..02a2965feb 100644
--- a/modules/nf-core/software/gatk/markduplicates.nf
+++ b/modules/nf-core/software/gatk/markduplicates.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null
-container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
-
process GATK_MARKDUPLICATES {
label 'cpus_16'
@@ -15,8 +11,12 @@ process GATK_MARKDUPLICATES {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0"
+ } else {
+ container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0"
+ }
input:
tuple val(meta), path("${meta.sample}.bam"), path("${meta.sample}.bam.bai")
@@ -25,23 +25,11 @@ process GATK_MARKDUPLICATES {
tuple val(meta), path("${meta.sample}.md.bam"), path("${meta.sample}.md.bam.bai"), emit: bam
val meta, emit: tsv
path "${meta.sample}.bam.metrics", optional : true, emit: report
-
+
script:
markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics"
- if (params.use_gatk_spark)
- """
- gatk --java-options ${markdup_java_options} \
- MarkDuplicatesSpark \
- -I ${meta.sample}.bam \
- -O ${meta.sample}.md.bam \
- ${metrics} \
- --tmp-dir . \
- --create-output-bam-index true \
- --spark-master local[${task.cpus}]
- """
- else
"""
gatk --java-options ${markdup_java_options} \
MarkDuplicates \
@@ -53,4 +41,43 @@ process GATK_MARKDUPLICATES {
--OUTPUT ${meta.sample}.md.bam
mv ${meta.sample}.md.bai ${meta.sample}.md.bam.bai
"""
+}
+
+process GATK_MARKDUPLICATES_SPARK {
+ label 'cpus_16'
+
+ tag "${meta.id}"
+
+ publishDir params.outdir, mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0"
+ } else {
+ container "quay.io/biocontainers/gatk4-spark:4.1.8.1--0"
+ }
+
+ input:
+ tuple val(meta), path("${meta.sample}.bam"), path("${meta.sample}.bam.bai")
+
+ output:
+ tuple val(meta), path("${meta.sample}.md.bam"), path("${meta.sample}.md.bam.bai"), emit: bam
+ val meta, emit: tsv
+ path "${meta.sample}.bam.metrics", optional : true, emit: report
+
+ script:
+ markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\""
+ metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics"
+
+ """
+ gatk --java-options ${markdup_java_options} \
+ MarkDuplicatesSpark \
+ -I ${meta.sample}.bam \
+ -O ${meta.sample}.md.bam \
+ ${metrics} \
+ --tmp-dir . \
+ --create-output-bam-index true \
+ --spark-master local[${task.cpus}]
+ """
}
\ No newline at end of file
diff --git a/modules/nf-core/software/htslib_tabix.nf b/modules/nf-core/software/htslib_tabix.nf
index ce133a4b13..2457321c49 100644
--- a/modules/nf-core/software/htslib_tabix.nf
+++ b/modules/nf-core/software/htslib_tabix.nf
@@ -4,18 +4,18 @@ include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::tabix=0.2.6" : null
-container = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/tabix:0.2.6--ha92aebf_0"
-
process HTSLIB_TABIX {
tag "${vcf}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::tabix=0.2.6" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/tabix:0.2.6--ha92aebf_0"
+ } else {
+ container "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
+ }
input:
path vcf
diff --git a/modules/nf-core/software/manta/somatic.nf b/modules/nf-core/software/manta/somatic.nf
index 98b5a9b6ed..a31c86d904 100644
--- a/modules/nf-core/software/manta/somatic.nf
+++ b/modules/nf-core/software/manta/somatic.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::manta=1.6.0" : null
-container = "quay.io/biocontainers/manta:1.6.0--py27_0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/manta:1.6.0--py27_0"
-
process MANTA_SOMATIC {
tag "${meta.id}"
@@ -16,8 +12,12 @@ process MANTA_SOMATIC {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::manta=1.6.0" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/manta:1.6.0--py27_0"
+ } else {
+ container "quay.io/biocontainers/manta:1.6.0--py27_0"
+ }
input:
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor)
diff --git a/modules/nf-core/software/msisensor/msi.nf b/modules/nf-core/software/msisensor/msi.nf
index 79ce4925fa..be257af009 100644
--- a/modules/nf-core/software/msisensor/msi.nf
+++ b/modules/nf-core/software/msisensor/msi.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::msisensor=0.5" : null
-container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2"
-
process MSISENSOR_MSI {
tag "${meta.id}"
@@ -16,8 +12,12 @@ process MSISENSOR_MSI {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::msisensor=0.5" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2"
+ } else {
+ container "quay.io/biocontainers/msisensor:0.5--hb3646a4_2"
+ }
input:
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor)
diff --git a/modules/nf-core/software/msisensor/scan.nf b/modules/nf-core/software/msisensor/scan.nf
index 180c73f4c7..850cff618d 100644
--- a/modules/nf-core/software/msisensor/scan.nf
+++ b/modules/nf-core/software/msisensor/scan.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::msisensor=0.5" : null
-container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2"
-
process MSISENSOR_SCAN {
tag "${fasta}"
@@ -16,8 +12,12 @@ process MSISENSOR_SCAN {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::msisensor=0.5" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2"
+ } else {
+ container "quay.io/biocontainers/msisensor:0.5--hb3646a4_2"
+ }
input:
path fasta
diff --git a/modules/nf-core/software/multiqc.nf b/modules/nf-core/software/multiqc.nf
deleted file mode 100644
index ed201b0f49..0000000000
--- a/modules/nf-core/software/multiqc.nf
+++ /dev/null
@@ -1,43 +0,0 @@
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::multiqc=1.9" : null
-container = "quay.io/biocontainers/multiqc:1.9--py_1"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/multiqc:1.9--py_1"
-
-// Has the run name been specified by the user?
-// this has the bonus effect of catching both -name and --name
-def custom_runName = params.name
-if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) {
- custom_runName = workflow.runName
-}
-
-process MULTIQC {
- publishDir "${params.outdir}/multiqc", mode: params.publish_dir_mode
-
- conda environment
- container container
-
- input:
- // path software_versions
- path multiqc_config
- path multiqc_custom_config
- val workflow_summary
- path qc_reports
-
- output:
- path "*multiqc_report.html"
- path "*_data"
- path "multiqc_plots"
-
- script:
- title = custom_runName ? "--title \"${custom_runName}\"" : ''
- filename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
- custom_config_file = params.multiqc_config ? "--config ${multiqc_custom_config}" : ''
- """
- echo '${workflow_summary}' > workflow_summary_mqc.yaml
- multiqc -f ${title} ${filename} ${custom_config_file} .
- """
-}
diff --git a/modules/nf-core/software/multiqc/functions.nf b/modules/nf-core/software/multiqc/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/multiqc/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/multiqc/main.nf b/modules/nf-core/software/multiqc/main.nf
new file mode 100644
index 0000000000..ff1175fcd0
--- /dev/null
+++ b/modules/nf-core/software/multiqc/main.nf
@@ -0,0 +1,35 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+def options = initOptions(params.options)
+
+process MULTIQC {
+ label 'process_medium'
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename: filename, options: params.options, publish_dir: getSoftwareName(task.process), publish_id: '') }
+
+ conda (params.enable_conda ? "bioconda::multiqc=1.9" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/multiqc:1.9--pyh9f0ad1d_0"
+ } else {
+ container "quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0"
+ }
+
+ input:
+ path multiqc_files
+
+ output:
+ path "*multiqc_report.html", emit: report
+ path "*_data" , emit: data
+ path "*_plots" , optional:true, emit: plots
+ path "*.version.txt" , emit: version
+
+ script:
+ def software = getSoftwareName(task.process)
+ """
+ multiqc -f $options.args .
+ multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt
+ """
+}
diff --git a/modules/nf-core/software/multiqc/meta.yml b/modules/nf-core/software/multiqc/meta.yml
new file mode 100644
index 0000000000..02f4b41586
--- /dev/null
+++ b/modules/nf-core/software/multiqc/meta.yml
@@ -0,0 +1,60 @@
+name: MultiQC
+description: Aggregate results from bioinformatics analyses across many samples into a single report
+keywords:
+ - QC
+ - bioinformatics tools
+ - Beautiful stand-alone HTML report
+tools:
+ - multiqc:
+ description: |
+ MultiQC searches a given directory for analysis logs and compiles a HTML report.
+ It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
+ homepage: https://multiqc.info/
+ documentation: https://multiqc.info/docs/
+params:
+ - outdir:
+ type: string
+ description: |
+ The pipeline's output directory. By default, the module will
+ output files into `$params.outdir/`
+ - publish_dir_mode:
+ type: string
+ description: |
+ Value for the Nextflow `publishDir` mode parameter.
+ Available: symlink, rellink, link, copy, copyNoFollow, move.
+ - enable_conda:
+ type: boolean
+ description: |
+ Run the module with Conda using the software specified
+ via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
+input:
+ - multiqc_files:
+ type: file
+ description: |
+ List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+output:
+ - report:
+ type: file
+ description: MultiQC report file
+ pattern: "multiqc_report.html"
+ - data:
+ type: dir
+ description: MultiQC data dir
+ pattern: "multiqc_data"
+ - plots:
+ type: file
+ description: Plots created by MultiQC
+ pattern: "*_data"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
diff --git a/modules/nf-core/software/qualimap_bamqc.nf b/modules/nf-core/software/qualimap_bamqc.nf
index 25d3715d11..54cdf90a17 100644
--- a/modules/nf-core/software/qualimap_bamqc.nf
+++ b/modules/nf-core/software/qualimap_bamqc.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::qualimap=2.2.2d" : null
-container = "quay.io/biocontainers/qualimap:2.2.2d--1"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1"
-
process QUALIMAP_BAMQC {
label 'memory_max'
label 'cpus_16'
@@ -16,8 +12,12 @@ process QUALIMAP_BAMQC {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::qualimap=2.2.2d" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1"
+ } else {
+ container "quay.io/biocontainers/qualimap:2.2.2d--1"
+ }
input:
tuple val(meta), path(bam)
diff --git a/modules/nf-core/software/samtools/faidx.nf b/modules/nf-core/software/samtools/faidx.nf
index c60a62518c..40ac3ce209 100644
--- a/modules/nf-core/software/samtools/faidx.nf
+++ b/modules/nf-core/software/samtools/faidx.nf
@@ -3,18 +3,18 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::samtools=1.10" : null
-container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3"
-
process SAMTOOLS_FAIDX {
tag "${fasta}"
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::samtools=1.11" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/samtools:1.11--h6270b1f_0"
+ } else {
+ container "quay.io/biocontainers/samtools:1.11--h6270b1f_0"
+ }
input:
path fasta
diff --git a/modules/nf-core/software/samtools/index.nf b/modules/nf-core/software/samtools/index.nf
deleted file mode 100644
index 0e5c3f11f7..0000000000
--- a/modules/nf-core/software/samtools/index.nf
+++ /dev/null
@@ -1,34 +0,0 @@
-include { initOptions; saveFiles; getSoftwareName } from './../functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::samtools=1.10" : null
-container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3"
-
-process SAMTOOLS_INDEX {
- label 'cpus_8'
-
- tag "${meta.id}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
-
- conda environment
- container container
-
- input:
- tuple val(meta), path(bam)
-
- output:
- tuple val(meta), path("${name}.bam"), path("*.bai")
-
- script:
- name = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}"
- """
- [ ! -f ${name}.bam ] && ln -s ${bam} ${name}.bam
-
- samtools index ${name}.bam
- """
-}
\ No newline at end of file
diff --git a/modules/nf-core/software/samtools/index/functions.nf b/modules/nf-core/software/samtools/index/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/samtools/index/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/samtools/index/main.nf b/modules/nf-core/software/samtools/index/main.nf
new file mode 100644
index 0000000000..5dd631daa5
--- /dev/null
+++ b/modules/nf-core/software/samtools/index/main.nf
@@ -0,0 +1,32 @@
+// Import generic module functions
+include { saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+
+process SAMTOOLS_INDEX {
+ tag "$meta.id"
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::samtools=1.10" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/samtools:1.10--h9402c20_2"
+ } else {
+ container "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ }
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path("*.bai"), emit: bai
+ path "*.version.txt" , emit: version
+
+ script:
+ def software = getSoftwareName(task.process)
+ """
+ samtools index $bam
+ echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt
+ """
+}
diff --git a/modules/nf-core/software/samtools/index/meta.yml b/modules/nf-core/software/samtools/index/meta.yml
new file mode 100644
index 0000000000..089a83be02
--- /dev/null
+++ b/modules/nf-core/software/samtools/index/meta.yml
@@ -0,0 +1,64 @@
+name: samtools_index
+description: Index SAM/BAM/CRAM file
+keywords:
+ - index
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: hhttp://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+params:
+ - outdir:
+ type: string
+ description: |
+ The pipeline's output directory. By default, the module will
+ output files into `$params.outdir/`
+ - publish_dir_mode:
+ type: string
+ description: |
+ Value for the Nextflow `publishDir` mode parameter.
+ Available: symlink, rellink, link, copy, copyNoFollow, move.
+ - enable_conda:
+ type: boolean
+ description: |
+ Run the module with Conda using the software specified
+ via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bai:
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@drpatelh"
+ - "@ewels"
diff --git a/modules/nf-core/software/samtools/stats.nf b/modules/nf-core/software/samtools/stats.nf
deleted file mode 100644
index 6302366db5..0000000000
--- a/modules/nf-core/software/samtools/stats.nf
+++ /dev/null
@@ -1,31 +0,0 @@
-include { initOptions; saveFiles; getSoftwareName } from './../functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::samtools=1.10" : null
-container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3"
-
-process SAMTOOLS_STATS {
- label 'cpus_2'
-
- tag "${meta.id}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
-
- conda environment
- container container
-
- input:
- tuple val(meta), path(bam)
-
- output:
- path ("${bam}.samtools.stats.out")
-
- script:
- """
- samtools stats ${bam} > ${bam}.samtools.stats.out
- """
-}
\ No newline at end of file
diff --git a/modules/nf-core/software/samtools/stats/functions.nf b/modules/nf-core/software/samtools/stats/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/samtools/stats/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/samtools/stats/main.nf b/modules/nf-core/software/samtools/stats/main.nf
new file mode 100644
index 0000000000..d8d1d02072
--- /dev/null
+++ b/modules/nf-core/software/samtools/stats/main.nf
@@ -0,0 +1,32 @@
+// Import generic module functions
+include { saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+
+process SAMTOOLS_STATS {
+ tag "$meta.id"
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::samtools=1.10" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/samtools:1.10--h9402c20_2"
+ } else {
+ container "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ }
+
+ input:
+ tuple val(meta), path(bam), path(bai)
+
+ output:
+ tuple val(meta), path("*.stats"), emit: stats
+ path "*.version.txt" , emit: version
+
+ script:
+ def software = getSoftwareName(task.process)
+ """
+ samtools stats $bam > ${bam}.stats
+ echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt
+ """
+}
diff --git a/modules/nf-core/software/samtools/stats/meta.yml b/modules/nf-core/software/samtools/stats/meta.yml
new file mode 100644
index 0000000000..b907df92c5
--- /dev/null
+++ b/modules/nf-core/software/samtools/stats/meta.yml
@@ -0,0 +1,68 @@
+name: samtools_stats
+description: Produces comprehensive statistics from SAM/BAM/CRAM file
+keywords:
+ - statistics
+ - counts
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: hhttp://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+params:
+ - outdir:
+ type: string
+ description: |
+ The pipeline's output directory. By default, the module will
+ output files into `$params.outdir/`
+ - publish_dir_mode:
+ type: string
+ description: |
+ Value for the Nextflow `publishDir` mode parameter.
+ Available: symlink, rellink, link, copy, copyNoFollow, move.
+ - enable_conda:
+ type: boolean
+ description: |
+ Run the module with Conda using the software specified
+ via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+ - bai:
+ type: file
+ description: Index for BAM/CRAM/SAM file
+ pattern: "*.{bai,crai,sai}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - stats:
+ type: file
+ description: File containing samtools stats output
+ pattern: "*.{stats}"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@drpatelh"
diff --git a/modules/nf-core/software/strelka/germline.nf b/modules/nf-core/software/strelka/germline.nf
index 6295ac1d66..39abd20da8 100644
--- a/modules/nf-core/software/strelka/germline.nf
+++ b/modules/nf-core/software/strelka/germline.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::strelka=2.9.10" : null
-container = "quay.io/biocontainers/strelka:2.9.10--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0"
-
process STRELKA_GERMLINE {
tag "${meta.id}"
@@ -16,8 +12,12 @@ process STRELKA_GERMLINE {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0"
+ } else {
+ container "quay.io/biocontainers/strelka:2.9.10--0"
+ }
input:
tuple val(meta), path(bam), path(bai)
diff --git a/modules/nf-core/software/strelka/somatic.nf b/modules/nf-core/software/strelka/somatic.nf
index 0cfb2b8458..2efc864edd 100644
--- a/modules/nf-core/software/strelka/somatic.nf
+++ b/modules/nf-core/software/strelka/somatic.nf
@@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions'
params.options = [:]
def options = initOptions(params.options)
-environment = params.enable_conda ? "bioconda::strelka=2.9.10" : null
-container = "quay.io/biocontainers/strelka:2.9.10--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0"
-
process STRELKA_SOMATIC {
tag "${meta.id}"
@@ -16,8 +12,12 @@ process STRELKA_SOMATIC {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0"
+ } else {
+ container "quay.io/biocontainers/strelka:2.9.10--0"
+ }
input:
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor)
@@ -66,8 +66,12 @@ process STRELKA_SOMATIC_BEST_PRACTICES {
publishDir params.outdir, mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
- conda environment
- container container
+ conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0"
+ } else {
+ container "quay.io/biocontainers/strelka:2.9.10--0"
+ }
input:
tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor), path(manta_csi), path(manta_csi_tbi)
diff --git a/modules/nf-core/software/trimgalore.nf b/modules/nf-core/software/trimgalore.nf
deleted file mode 100644
index 60369d80af..0000000000
--- a/modules/nf-core/software/trimgalore.nf
+++ /dev/null
@@ -1,69 +0,0 @@
-include { initOptions; saveFiles; getSoftwareName } from './functions'
-
-params.options = [:]
-def options = initOptions(params.options)
-
-environment = params.enable_conda ? "bioconda::trim-galore=0.6.5" : null
-container = "quay.io/biocontainers/trim-galore:0.6.5--0"
-if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/trim-galore:0.6.5--0"
-
-process TRIMGALORE {
- label 'process_high'
-
- tag "${meta.id}"
-
- publishDir params.outdir, mode: params.publish_dir_mode,
- saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
-
- conda environment
- container container
-
- input:
- tuple val(meta), path(reads)
-
- output:
- tuple val(meta), path("*_1.fq.gz"), path("*_2.fq.gz"), emit: reads
- path "*.html" , emit: html optional true
- path "*.txt" , emit: log
- path "*.version.txt", emit: version
- path "*.zip" , emit: zip optional true
-
- script:
- // Calculate number of --cores for TrimGalore based on value of task.cpus
- // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019
- // See: https://github.com/nf-core/atacseq/pull/65
- def cores = 1
- if (task.cpus) {
- cores = (task.cpus as int) - 4
- if (meta.single_end) cores = (task.cpus as int) - 3
- if (cores < 1) cores = 1
- if (cores > 4) cores = 4
- }
-
- // Clipping presets have to be evaluated in the context of SE/PE
- c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : ''
- c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : ''
- tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : ''
- tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : ''
-
- // Added soft-links to original fastqs for consistent naming in MultiQC
- prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
- """
- [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
- [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
-
- trim_galore \\
- ${options.args} \\
- --cores ${cores} \\
- --paired \\
- --gzip \\
- ${c_r1} \\
- ${c_r2} \\
- ${tpc_r1} \\
- ${tpc_r2} \\
- ${prefix}_1.fastq.gz \\
- ${prefix}_2.fastq.gz
-
- trim_galore --version > trim_galore.version.txt
- """
-}
diff --git a/modules/nf-core/software/trimgalore/functions.nf b/modules/nf-core/software/trimgalore/functions.nf
new file mode 100644
index 0000000000..d25eea86b3
--- /dev/null
+++ b/modules/nf-core/software/trimgalore/functions.nf
@@ -0,0 +1,59 @@
+/*
+ * -----------------------------------------------------
+ * Utility functions used in nf-core DSL2 module files
+ * -----------------------------------------------------
+ */
+
+/*
+ * Extract name of software tool from process name using $task.process
+ */
+def getSoftwareName(task_process) {
+ return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+/*
+ * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+ */
+def initOptions(Map args) {
+ def Map options = [:]
+ options.args = args.args ?: ''
+ options.args2 = args.args2 ?: ''
+ options.publish_by_id = args.publish_by_id ?: false
+ options.publish_dir = args.publish_dir ?: ''
+ options.publish_files = args.publish_files
+ options.suffix = args.suffix ?: ''
+ return options
+}
+
+/*
+ * Tidy up and join elements of a list to return a path string
+ */
+def getPathFromList(path_list) {
+ def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
+ paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+ return paths.join('/')
+}
+
+/*
+ * Function to save/publish module results
+ */
+def saveFiles(Map args) {
+ if (!args.filename.endsWith('.version.txt')) {
+ def ioptions = initOptions(args.options)
+ def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+ if (ioptions.publish_by_id) {
+ path_list.add(args.publish_id)
+ }
+ if (ioptions.publish_files instanceof Map) {
+ for (ext in ioptions.publish_files) {
+ if (args.filename.endsWith(ext.key)) {
+ def ext_list = path_list.collect()
+ ext_list.add(ext.value)
+ return "${getPathFromList(ext_list)}/$args.filename"
+ }
+ }
+ } else if (ioptions.publish_files == null) {
+ return "${getPathFromList(path_list)}/$args.filename"
+ }
+ }
+}
diff --git a/modules/nf-core/software/trimgalore/main.nf b/modules/nf-core/software/trimgalore/main.nf
new file mode 100644
index 0000000000..79cc745625
--- /dev/null
+++ b/modules/nf-core/software/trimgalore/main.nf
@@ -0,0 +1,83 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName } from './functions'
+
+params.options = [:]
+def options = initOptions(params.options)
+
+process TRIMGALORE {
+ tag "$meta.id"
+ label 'process_high'
+ publishDir "${params.outdir}",
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
+
+ conda (params.enable_conda ? "bioconda::trim-galore=0.6.6" : null)
+ if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+ container "https://depot.galaxyproject.org/singularity/trim-galore:0.6.6--0"
+ } else {
+ container "quay.io/biocontainers/trim-galore:0.6.6--0"
+ }
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.fq.gz") , emit: reads
+ tuple val(meta), path("*report.txt"), emit: log
+ path "*.version.txt" , emit: version
+
+ tuple val(meta), path("*.html"), emit: html optional true
+ tuple val(meta), path("*.zip") , emit: zip optional true
+
+ script:
+ // Calculate number of --cores for TrimGalore based on value of task.cpus
+ // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019
+ // See: https://github.com/nf-core/atacseq/pull/65
+ def cores = 1
+ if (task.cpus) {
+ cores = (task.cpus as int) - 4
+ if (meta.single_end) cores = (task.cpus as int) - 3
+ if (cores < 1) cores = 1
+ if (cores > 4) cores = 4
+ }
+
+ // Clipping presets have to be evaluated in the context of SE/PE
+ def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : ''
+ def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : ''
+ def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : ''
+ def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : ''
+
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ def software = getSoftwareName(task.process)
+ def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+ if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
+ trim_galore \\
+ $options.args \\
+ --cores $cores \\
+ --gzip \\
+ $c_r1 \\
+ $tpc_r1 \\
+ ${prefix}.fastq.gz
+ echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//' > ${software}.version.txt
+ """
+ } else {
+ """
+ [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
+ [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
+ trim_galore \\
+ $options.args \\
+ --cores $cores \\
+ --paired \\
+ --gzip \\
+ $c_r1 \\
+ $c_r2 \\
+ $tpc_r1 \\
+ $tpc_r2 \\
+ ${prefix}_1.fastq.gz \\
+ ${prefix}_2.fastq.gz
+ echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//' > ${software}.version.txt
+ """
+ }
+}
diff --git a/modules/nf-core/software/trimgalore/meta.yml b/modules/nf-core/software/trimgalore/meta.yml
new file mode 100644
index 0000000000..86ba5cd44a
--- /dev/null
+++ b/modules/nf-core/software/trimgalore/meta.yml
@@ -0,0 +1,99 @@
+name: trimgalore
+description: Trim FastQ files using Trim Galore!
+keywords:
+ - trimming
+ - adapters
+ - sequencing adapters
+ - fastq
+tools:
+ - trimgalore:
+ description: |
+ A wrapper tool around Cutadapt and FastQC to consistently apply quality
+ and adapter trimming to FastQ files, with some extra functionality for
+ MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/
+ documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md
+params:
+ - outdir:
+ type: string
+ description: |
+ The pipeline's output directory. By default, the module will
+ output files into `$params.outdir/`
+ - publish_dir_mode:
+ type: string
+ description: |
+ Value for the Nextflow `publishDir` mode parameter.
+ Available: symlink, rellink, link, copy, copyNoFollow, move.
+ - enable_conda:
+ type: boolean
+ description: |
+ Run the module with Conda using the software specified
+ via the `conda` directive
+ - singularity_pull_docker_container:
+ type: boolean
+ description: |
+ Instead of directly downloading Singularity images for use with Singularity,
+ force the workflow to pull and convert Docker containers instead.
+ - clip_r1:
+ type: integer
+ description: |
+ Instructs Trim Galore to remove bp from the 5' end of read 1
+ (or single-end reads)
+ - clip_r2:
+ type: integer
+ description: |
+ Instructs Trim Galore to remove bp from the 5' end of read 2
+ (paired-end reads only)
+ - three_prime_clip_r1:
+ type: integer
+ description: |
+ Instructs Trim Galore to remove bp from the 3' end of read 1
+ AFTER adapter/quality trimming has been performed
+ - three_prime_clip_r2:
+ type: integer
+ description: |
+ Instructs Trim Galore to re move bp from the 3' end of read 2
+ AFTER adapter/quality trimming has been performed
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input adapter trimmed FastQ files of size 1 and 2 for
+ single-end and paired-end data, respectively.
+ pattern: "*.{fq.gz}"
+ - html:
+ type: file
+ description: FastQC report (optional)
+ pattern: "*_{fastqc.html}"
+ - zip:
+ type: file
+ description: FastQC report archive (optional)
+ pattern: "*_{fastqc.zip}"
+ - log:
+ type: file
+ description: Trim Galore! trimming report
+ pattern: "*_{report.txt}"
+ - version:
+ type: file
+ description: File containing software version
+ pattern: "*.{version.txt}"
+authors:
+ - "@drpatelh"
+ - "@ewels"
+ - "@FelixKrueger"
diff --git a/modules/nf-core/subworkflow/qc_trim.nf b/modules/nf-core/subworkflow/qc_trim.nf
deleted file mode 100644
index b39476c5ef..0000000000
--- a/modules/nf-core/subworkflow/qc_trim.nf
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Read QC and trimming
- */
-params.fastqc_options = [:]
-params.trimgalore_options = [:]
-
-include { FASTQC } from '../software/fastqc' addParams(options: params.fastqc_options)
-include { TRIMGALORE } from '../software/trimgalore' addParams(options: params.trimgalore_options)
-
-workflow QC_TRIM {
- take:
-
- reads // channel: [ val(meta), [ reads ] ]
- skip_fastqc // boolean: true/false
- skip_trimming // boolean: true/false
-
- main:
-
- fastqc_html = Channel.empty()
- fastqc_version = Channel.empty()
- fastqc_zip = Channel.empty()
- if (!skip_fastqc) {
- FASTQC(reads)
- fastqc_html = FASTQC.out.html
- fastqc_version = FASTQC.out.version
- fastqc_zip = FASTQC.out.zip
- }
-
- trim_reads = reads
- trimgalore_html = Channel.empty()
- trimgalore_zip = Channel.empty()
- trimgalore_log = Channel.empty()
- trimgalore_version = Channel.empty()
- if (!skip_trimming) {
- TRIMGALORE(reads)
- trim_reads = TRIMGALORE.out.reads
- trimgalore_html = TRIMGALORE.out.html
- trimgalore_zip = TRIMGALORE.out.zip
- trimgalore_log = TRIMGALORE.out.log
- trimgalore_version = TRIMGALORE.out.version
- }
-
- emit:
-
- fastqc_html // path: *.html
- fastqc_zip // path: *.zip
- fastqc_version // path: *.version.txt
- reads = trim_reads // channel: [ val(meta), [ reads ] ]
- trimgalore_html // path: *.html
- trimgalore_log // path: *.txt
- trimgalore_zip // path: *.zip
- trimgalore_version // path: *.version.txt
-}
diff --git a/scripts/make_snapshot.sh b/scripts/make_snapshot.sh
deleted file mode 100755
index 16ae81b5e3..0000000000
--- a/scripts/make_snapshot.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-# This script makes an archive of sarek, with or without configs and test datasets
-# https://github.com/nf-core/sarek
-
-usage() { echo "Usage: $0 <-t> <-c>" 1>&2; exit 1; }
-
-CONFIGS=false
-NAME=sarek-$(git describe --tags --always)
-TEST=false
-
-while [[ $# -gt 0 ]]
-do
- key="$1"
- case $key in
- -i|-t|--include-test-data)
- TEST=true
- shift # past argument
- ;;
- -c|--include-configs)
- CONFIGS=true
- shift # past argument
- ;;
- *) # unknown option
- shift # past argument
- usage
- ;;
- esac
-done
-
-if [[ $CONFIGS == true ]]
-then
- echo "Archiving nf-core/configs"
- git submodule add -f https://github.com/nf-core/configs.git configs
-fi
-
-if [[ $TEST == true ]]
-then
- echo "Archiving nf-core/test-datasets:sarek"
- git submodule add -f --branch sarek https://github.com/nf-core/test-datasets.git data
-fi
-
-echo "Archiving nf-core/sarek"
-
-if [[ $CONFIGS == true ]] || [[ $TEST == true ]]
-then
- git-archive-all --prefix=${NAME} --force-submodules ${NAME}.tar.gz
-else
- git archive --format=tar.gz HEAD --prefix=${NAME}/ > ${NAME}.tar.gz
-fi
-
-echo "Wrote ${NAME}.tar.gz"
diff --git a/modules/local/subworkflow/build_indices.nf b/subworkflow/local/build_indices.nf
similarity index 79%
rename from modules/local/subworkflow/build_indices.nf
rename to subworkflow/local/build_indices.nf
index 696e22a733..273e71f141 100644
--- a/modules/local/subworkflow/build_indices.nf
+++ b/subworkflow/local/build_indices.nf
@@ -19,18 +19,18 @@ params.tabix_pon_options = [:]
// Initialize channels based on params or indices that were just built
-include { BUILD_INTERVALS } from '../process/build_intervals.nf' addParams(options: params.build_intervals_options)
-include { BWA_INDEX as BWAMEM1_INDEX } from '../../nf-core/software/bwa/index/main.nf' addParams(options: params.bwa_index_options)
-include { BWAMEM2_INDEX } from '../../nf-core/software/bwamem2_index.nf' addParams(options: params.bwamem2_index_options)
-include { CREATE_INTERVALS_BED } from '../process/create_intervals_bed.nf' addParams(options: params.create_intervals_bed_options)
-include { GATK_CREATESEQUENCEDICTIONARY as GATK_DICT } from '../../nf-core/software/gatk/createsequencedictionary.nf' addParams(options: params.gatk_dict_options)
-include { HTSLIB_TABIX as TABIX_DBSNP } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_dbsnp_options)
-include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options)
-include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options)
-include { HTSLIB_TABIX as TABIX_PON } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options)
-include { INDEX_TARGET_BED } from '../process/index_target_bed' addParams(options: params.index_target_bed_options)
-include { MSISENSOR_SCAN } from '../../nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options)
-include { SAMTOOLS_FAIDX } from '../../nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options)
+include { BUILD_INTERVALS } from '../../modules/local/build_intervals.nf' addParams(options: params.build_intervals_options)
+include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/software/bwa/index/main.nf' addParams(options: params.bwa_index_options)
+include { BWAMEM2_INDEX } from '../../modules/nf-core/software/bwamem2_index.nf' addParams(options: params.bwamem2_index_options)
+include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed.nf' addParams(options: params.create_intervals_bed_options)
+include { GATK_CREATESEQUENCEDICTIONARY as GATK_DICT } from '../../modules/nf-core/software/gatk/createsequencedictionary.nf' addParams(options: params.gatk_dict_options)
+include { HTSLIB_TABIX as TABIX_DBSNP } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_dbsnp_options)
+include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options)
+include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options)
+include { HTSLIB_TABIX as TABIX_PON } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options)
+include { INDEX_TARGET_BED } from '../../modules/local/index_target_bed.nf' addParams(options: params.index_target_bed_options)
+include { MSISENSOR_SCAN } from '../../modules/nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options)
+include { SAMTOOLS_FAIDX } from '../../modules/nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options)
workflow BUILD_INDICES{
take:
@@ -45,11 +45,11 @@ workflow BUILD_INDICES{
main:
- result_bwa = Channel.empty()
+ result_bwa = Channel.empty()
version_bwa = Channel.empty()
if (!(params.bwa) && 'mapping' in step)
if (params.aligner == "bwa-mem") (result_bwa, version_bwa) = BWAMEM1_INDEX(fasta)
- else result_bwa = BWAMEM2_INDEX(fasta)
+ else (result_bwa, version_bwa) = BWAMEM2_INDEX(fasta)
result_dict = Channel.empty()
if (!(params.dict) && !('annotate' in step) && !('controlfreec' in step))
diff --git a/modules/local/subworkflow/germline_variant_calling.nf b/subworkflow/local/germline_variant_calling.nf
similarity index 86%
rename from modules/local/subworkflow/germline_variant_calling.nf
rename to subworkflow/local/germline_variant_calling.nf
index c2288519a2..b742b1de26 100644
--- a/modules/local/subworkflow/germline_variant_calling.nf
+++ b/subworkflow/local/germline_variant_calling.nf
@@ -10,11 +10,11 @@ params.concat_gvcf_options = [:]
params.concat_haplotypecaller_options = [:]
params.strelka_options = [:]
-include { GATK_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../nf-core/software/gatk/haplotypecaller' addParams(options: params.haplotypecaller_options)
-include { GATK_GENOTYPEGVCF as GENOTYPEGVCF } from '../../nf-core/software/gatk/genotypegvcf' addParams(options: params.genotypegvcf_options)
-include { CONCAT_VCF as CONCAT_GVCF } from '../process/concat_vcf' addParams(options: params.concat_gvcf_options)
-include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../process/concat_vcf' addParams(options: params.concat_haplotypecaller_options)
-include { STRELKA_GERMLINE as STRELKA } from '../../nf-core/software/strelka/germline' addParams(options: params.strelka_options)
+include { GATK_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/software/gatk/haplotypecaller' addParams(options: params.haplotypecaller_options)
+include { GATK_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/software/gatk/genotypegvcf' addParams(options: params.genotypegvcf_options)
+include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf' addParams(options: params.concat_gvcf_options)
+include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf' addParams(options: params.concat_haplotypecaller_options)
+include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/software/strelka/germline' addParams(options: params.strelka_options)
workflow GERMLINE_VARIANT_CALLING {
take:
diff --git a/modules/local/subworkflow/mapping.nf b/subworkflow/local/mapping.nf
similarity index 61%
rename from modules/local/subworkflow/mapping.nf
rename to subworkflow/local/mapping.nf
index 4ca5effd2f..85dcb2f8e8 100644
--- a/modules/local/subworkflow/mapping.nf
+++ b/subworkflow/local/mapping.nf
@@ -4,19 +4,23 @@
================================================================================
*/
-params.bwamem1_mem_options = [:]
-params.bwamem2_mem_options = [:]
-params.merge_bam_options = [:]
-params.qualimap_bamqc_options = [:]
-params.samtools_index_options = [:]
-params.samtools_stats_options = [:]
-
-include { BWA_MEM as BWAMEM1_MEM } from '../process/bwa_mem' addParams(options: params.bwamem1_mem_options)
-include { BWAMEM2_MEM } from '../process/bwamem2_mem' addParams(options: params.bwamem2_mem_options)
-include { MERGE_BAM } from '../process/merge_bam' addParams(options: params.merge_bam_options)
-include { QUALIMAP_BAMQC } from '../../nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options)
-include { SAMTOOLS_INDEX } from '../../nf-core/software/samtools/index' addParams(options: params.samtools_index_options)
-include { SAMTOOLS_STATS } from '../../nf-core/software/samtools/stats' addParams(options: params.samtools_stats_options)
+params.bwamem1_mem_options = [:]
+params.bwamem1_mem_tumor_options = [:]
+params.bwamem2_mem_options = [:]
+params.bwamem2_mem_tumor_options = [:]
+params.merge_bam_options = [:]
+params.qualimap_bamqc_options = [:]
+params.samtools_index_options = [:]
+params.samtools_stats_options = [:]
+
+include { BWA_MEM as BWAMEM1_MEM } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_options)
+include { BWA_MEM as BWAMEM1_MEM_T } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_tumor_options)
+include { BWAMEM2_MEM } from '../../modules/nf-core/software/bwamem2_mem.nf' addParams(options: params.bwamem2_mem_options)
+include { BWAMEM2_MEM as BWAMEM2_MEM_T } from '../../modules/nf-core/software/bwamem2_mem.nf' addParams(options: params.bwamem2_mem_tumor_options)
+include { MERGE_BAM } from '../../modules/local/merge_bam' addParams(options: params.merge_bam_options)
+include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options)
+include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options)
+include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options)
workflow MAPPING {
take:
@@ -35,16 +39,32 @@ workflow MAPPING {
bam_mapped_index = Channel.empty()
bam_reports = Channel.empty()
+ reads_input.groupTuple(by: [0,1])
+ .branch{
+ normal: it[0].status == 0
+ tumor: it[0].status == 1
+ }.set{ reads_input_status }
+
if (step == "mapping") {
bam_bwamem1 = Channel.empty()
bam_bwamem2 = Channel.empty()
if (params.aligner == "bwa-mem") {
- BWAMEM1_MEM(reads_input, bwa, fasta, fai)
- bam_bwamem1 = BWAMEM1_MEM.out.bam
+ BWAMEM1_MEM(reads_input_status.normal, bwa, fasta, fai)
+ bam_bwamem1_n = BWAMEM1_MEM.out.bam
+
+ BWAMEM1_MEM_T(reads_input_status.tumor, bwa, fasta, fai)
+ bam_bwamem1_t = BWAMEM1_MEM_T.out.bam
+
+ bam_bwamem1 = bam_bwamem1_n.mix(bam_bwamem1_t)
} else {
- BWAMEM2_MEM(reads_input, bwa, fasta, fai)
- bam_bwamem2 = BWAMEM2_MEM.out
+ BWAMEM2_MEM(reads_input_status.normal, bwa, fasta, fai)
+ bam_bwamem2_n = BWAMEM2_MEM.out.bam
+
+ BWAMEM2_MEM_T(reads_input_status.tumor, bwa, fasta, fai)
+ bam_bwamem2_t = BWAMEM2_MEM_T.out.bam
+
+ bam_bwamem2 = bam_bwamem2_n.mix(bam_bwamem2_t)
}
bam_bwa = bam_bwamem1.mix(bam_bwamem2)
@@ -91,7 +111,9 @@ workflow MAPPING {
MERGE_BAM(bam_bwa_multiple)
bam_mapped = bam_bwa_single.mix(MERGE_BAM.out.bam)
- bam_mapped_index = SAMTOOLS_INDEX(bam_mapped)
+
+ SAMTOOLS_INDEX(bam_mapped)
+ bam_mapped_index = bam_mapped.join(SAMTOOLS_INDEX.out.bai)
qualimap_bamqc = Channel.empty()
samtools_stats = Channel.empty()
@@ -102,8 +124,8 @@ workflow MAPPING {
}
if (!skip_samtools) {
- SAMTOOLS_STATS(bam_mapped)
- samtools_stats = SAMTOOLS_STATS.out
+ SAMTOOLS_STATS(bam_mapped_index)
+ samtools_stats = SAMTOOLS_STATS.out.stats
}
bam_reports = samtools_stats.mix(qualimap_bamqc)
diff --git a/modules/local/subworkflow/markduplicates.nf b/subworkflow/local/markduplicates.nf
similarity index 80%
rename from modules/local/subworkflow/markduplicates.nf
rename to subworkflow/local/markduplicates.nf
index 531e11b464..ca30123101 100644
--- a/modules/local/subworkflow/markduplicates.nf
+++ b/subworkflow/local/markduplicates.nf
@@ -6,7 +6,8 @@
params.markduplicates_options = [:]
-include { GATK_MARKDUPLICATES } from '../../nf-core/software/gatk/markduplicates' addParams(options: params.markduplicates_options)
+include { GATK_MARKDUPLICATES } from '../../modules/nf-core/software/gatk/markduplicates' addParams(options: params.markduplicates_options)
+include { GATK_MARKDUPLICATES_SPARK } from '../../modules/nf-core/software/gatk/markduplicates' addParams(options: params.markduplicates_options)
workflow MARKDUPLICATES {
take:
@@ -20,10 +21,17 @@ workflow MARKDUPLICATES {
if (step == "mapping") {
if (!params.skip_markduplicates) {
- GATK_MARKDUPLICATES(bam_mapped)
- report_markduplicates = GATK_MARKDUPLICATES.out.report
- bam_markduplicates = GATK_MARKDUPLICATES.out.bam
- tsv_markduplicates = GATK_MARKDUPLICATES.out.tsv
+ if (params.use_gatk_spark) {
+ GATK_MARKDUPLICATES_SPARK(bam_mapped)
+ report_markduplicates = GATK_MARKDUPLICATES_SPARK.out.report
+ bam_markduplicates = GATK_MARKDUPLICATES_SPARK.out.bam
+ tsv_markduplicates = GATK_MARKDUPLICATES_SPARK.out.tsv
+ } else {
+ GATK_MARKDUPLICATES(bam_mapped)
+ report_markduplicates = GATK_MARKDUPLICATES.out.report
+ bam_markduplicates = GATK_MARKDUPLICATES.out.bam
+ tsv_markduplicates = GATK_MARKDUPLICATES.out.tsv
+ }
// Creating TSV files to restart from this step
tsv_markduplicates.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta ->
diff --git a/modules/local/subworkflow/pair_variant_calling.nf b/subworkflow/local/pair_variant_calling.nf
similarity index 90%
rename from modules/local/subworkflow/pair_variant_calling.nf
rename to subworkflow/local/pair_variant_calling.nf
index 37b21807d6..5654217909 100644
--- a/modules/local/subworkflow/pair_variant_calling.nf
+++ b/subworkflow/local/pair_variant_calling.nf
@@ -9,10 +9,10 @@ params.msisensor_msi_options = [:]
params.strelka_options = [:]
params.strelka_bp_options = [:]
-include { MANTA_SOMATIC as MANTA } from '../../nf-core/software/manta/somatic' addParams(options: params.manta_options)
-include { MSISENSOR_MSI } from '../../nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options)
-include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options)
-include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_bp_options)
+include { MANTA_SOMATIC as MANTA } from '../../modules/nf-core/software/manta/somatic' addParams(options: params.manta_options)
+include { MSISENSOR_MSI } from '../../modules/nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options)
+include { STRELKA_SOMATIC as STRELKA } from '../../modules/nf-core/software/strelka/somatic' addParams(options: params.strelka_options)
+include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../modules/nf-core/software/strelka/somatic' addParams(options: params.strelka_bp_options)
workflow PAIR_VARIANT_CALLING {
take:
diff --git a/modules/local/subworkflow/prepare_recalibration.nf b/subworkflow/local/prepare_recalibration.nf
similarity index 94%
rename from modules/local/subworkflow/prepare_recalibration.nf
rename to subworkflow/local/prepare_recalibration.nf
index 510e4bbb55..9f0baf94e9 100644
--- a/modules/local/subworkflow/prepare_recalibration.nf
+++ b/subworkflow/local/prepare_recalibration.nf
@@ -7,8 +7,8 @@
params.baserecalibrator_options = [:]
params.gatherbqsrreports_options = [:]
-include { GATK_BASERECALIBRATOR as BASERECALIBRATOR } from '../../nf-core/software/gatk/baserecalibrator' addParams(options: params.baserecalibrator_options)
-include { GATK_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../nf-core/software/gatk/gatherbqsrreports' addParams(options: params.gatherbqsrreports_options)
+include { GATK_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/software/gatk/baserecalibrator' addParams(options: params.baserecalibrator_options)
+include { GATK_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/software/gatk/gatherbqsrreports' addParams(options: params.gatherbqsrreports_options)
workflow PREPARE_RECALIBRATION {
take:
diff --git a/modules/local/subworkflow/recalibrate.nf b/subworkflow/local/recalibrate.nf
similarity index 82%
rename from modules/local/subworkflow/recalibrate.nf
rename to subworkflow/local/recalibrate.nf
index 5551199799..e2eb64b453 100644
--- a/modules/local/subworkflow/recalibrate.nf
+++ b/subworkflow/local/recalibrate.nf
@@ -10,11 +10,11 @@ params.qualimap_bamqc_options = [:]
params.samtools_index_options = [:]
params.samtools_stats_options = [:]
-include { GATK_APPLYBQSR as APPLYBQSR } from '../../nf-core/software/gatk/applybqsr' addParams(options: params.applybqsr_options)
-include { MERGE_BAM } from '../process/merge_bam' addParams(options: params.merge_bam_options)
-include { QUALIMAP_BAMQC } from '../../nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options)
-include { SAMTOOLS_INDEX } from '../../nf-core/software/samtools/index' addParams(options: params.samtools_index_options)
-include { SAMTOOLS_STATS } from '../../nf-core/software/samtools/stats' addParams(options: params.samtools_stats_options)
+include { GATK_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/software/gatk/applybqsr' addParams(options: params.applybqsr_options)
+include { MERGE_BAM } from '../../modules/local/merge_bam' addParams(options: params.merge_bam_options)
+include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options)
+include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options)
+include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options)
workflow RECALIBRATE {
take:
@@ -71,7 +71,8 @@ workflow RECALIBRATE {
tsv_recalibrated = MERGE_BAM.out.tsv
}
- bam_recalibrated_index = SAMTOOLS_INDEX(bam_recalibrated)
+ SAMTOOLS_INDEX(bam_recalibrated)
+ bam_recalibrated_index = bam_recalibrated.join(SAMTOOLS_INDEX.out.bai)
qualimap_bamqc = Channel.empty()
samtools_stats = Channel.empty()
@@ -82,8 +83,8 @@ workflow RECALIBRATE {
}
if (!skip_samtools) {
- SAMTOOLS_STATS(bam_recalibrated)
- samtools_stats = SAMTOOLS_STATS.out
+ SAMTOOLS_STATS(bam_recalibrated_index)
+ samtools_stats = SAMTOOLS_STATS.out.stats
}
bam_reports = samtools_stats.mix(qualimap_bamqc)
diff --git a/subworkflow/nf-core/fastqc_trimgalore.nf b/subworkflow/nf-core/fastqc_trimgalore.nf
new file mode 100644
index 0000000000..87cac4e452
--- /dev/null
+++ b/subworkflow/nf-core/fastqc_trimgalore.nf
@@ -0,0 +1,51 @@
+/*
+ * Read QC and trimming
+ */
+
+params.fastqc_options = [:]
+params.trimgalore_options = [:]
+
+include { FASTQC } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_options )
+include { TRIMGALORE } from '../../modules/nf-core/software/trimgalore/main' addParams( options: params.trimgalore_options )
+
+workflow FASTQC_TRIMGALORE {
+ take:
+ reads // channel: [ val(meta), [ reads ] ]
+ skip_fastqc // boolean: true/false
+ skip_trimming // boolean: true/false
+
+ main:
+ fastqc_html = Channel.empty()
+ fastqc_zip = Channel.empty()
+ fastqc_version = Channel.empty()
+ if (!skip_fastqc) {
+ FASTQC ( reads ).html.set { fastqc_html }
+ fastqc_zip = FASTQC.out.zip
+ fastqc_version = FASTQC.out.version
+ }
+
+ trim_reads = reads
+ trim_html = Channel.empty()
+ trim_zip = Channel.empty()
+ trim_log = Channel.empty()
+ trimgalore_version = Channel.empty()
+ if (!skip_trimming) {
+ TRIMGALORE ( reads ).reads.set { trim_reads }
+ trim_html = TRIMGALORE.out.html
+ trim_zip = TRIMGALORE.out.zip
+ trim_log = TRIMGALORE.out.log
+ trimgalore_version = TRIMGALORE.out.version
+ }
+
+ emit:
+ reads = trim_reads // channel: [ val(meta), [ reads ] ]
+
+ fastqc_html // channel: [ val(meta), [ html ] ]
+ fastqc_zip // channel: [ val(meta), [ zip ] ]
+ fastqc_version // path: *.version.txt
+
+ trim_html // channel: [ val(meta), [ html ] ]
+ trim_zip // channel: [ val(meta), [ zip ] ]
+ trim_log // channel: [ val(meta), [ txt ] ]
+ trimgalore_version // path: *.version.txt
+}
diff --git a/workflows/sarek.nf b/workflows/sarek.nf
new file mode 100644
index 0000000000..a0ef24439f
--- /dev/null
+++ b/workflows/sarek.nf
@@ -0,0 +1,420 @@
+////////////////////////////////////////////////////
+/* -- INCLUDE SAREK FUNCTIONS -- */
+////////////////////////////////////////////////////
+
+include {
+ check_parameter_existence;
+ check_parameter_list;
+ define_anno_list;
+ define_skip_qc_list;
+ define_step_list;
+ define_tool_list;
+ extract_bam;
+ extract_fastq;
+ extract_fastq_from_dir;
+ extract_recal;
+ has_extension
+} from '../modules/local/functions'
+
+////////////////////////////////////////////////////
+/* -- SET UP CONFIGURATION VARIABLES -- */
+////////////////////////////////////////////////////
+
+step_list = define_step_list()
+step = params.step ? params.step.toLowerCase().replaceAll('-', '').replaceAll('_', '') : ''
+
+if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information'
+if (!check_parameter_existence(step, step_list)) exit 1, "Unknown step ${step}, see --help for more information"
+
+tool_list = define_tool_list()
+tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : []
+if (step == 'controlfreec') tools = ['controlfreec']
+if (!check_parameter_list(tools, tool_list)) exit 1, 'Unknown tool(s), see --help for more information'
+
+skip_qc_list = define_skip_qc_list()
+skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : []
+if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information'
+
+anno_list = define_anno_list()
+annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : []
+if (!check_parameter_list(annotate_tools,anno_list)) exit 1, 'Unknown tool(s) to annotate, see --help for more information'
+
+if (!(params.aligner in ['bwa-mem', 'bwa-mem2'])) exit 1, 'Unknown aligner, see --help for more information'
+
+// // Check parameters
+if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them'
+if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them'
+if (params.umi && !(params.read_structure1 && params.read_structure2)) exit 1, 'Please specify both --read_structure1 and --read_structure2, when using --umi'
+
+// Handle input
+tsv_path = null
+if (params.input && (has_extension(params.input, "tsv") || has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) tsv_path = params.input
+if (params.input && (has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) step = "annotate"
+
+save_bam_mapped = params.skip_markduplicates ? true : params.save_bam_mapped ? true : false
+
+// If no input file specified, trying to get TSV files corresponding to step in the TSV directory
+// only for steps preparerecalibration, recalibrate, variantcalling and controlfreec
+if (!params.input && params.sentieon) {
+ switch (step) {
+ case 'mapping': break
+ case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_deduped.tsv"; break
+ case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_recalibrated.tsv"; break
+ case 'annotate': break
+ default: exit 1, "Unknown step ${step}"
+ }
+} else if (!params.input && !params.sentieon && !params.skip_markduplicates) {
+ switch (step) {
+ case 'mapping': break
+ case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates_no_table.tsv"; break
+ case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates.tsv"; break
+ case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break
+ case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break
+ case 'annotate': break
+ default: exit 1, "Unknown step ${step}"
+ }
+} else if (!params.input && !params.sentieon && params.skip_markduplicates) {
+ switch (step) {
+ case 'mapping': break
+ case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/mapped.tsv"; break
+ case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/mapped_no_markduplicates.tsv"; break
+ case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break
+ case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break
+ case 'annotate': break
+ default: exit 1, "Unknown step ${step}"
+ }
+}
+
+input_sample = Channel.empty()
+if (tsv_path) {
+ tsv_file = file(tsv_path)
+ switch (step) {
+ case 'mapping': input_sample = extract_fastq(tsv_file); break
+ case 'preparerecalibration': input_sample = extract_bam(tsv_file); break
+ case 'recalibrate': input_sample = extract_recal(tsv_file); break
+ case 'variantcalling': input_sample = extract_bam(tsv_file); break
+ case 'controlfreec': input_sample = extract_pileup(tsv_file); break
+ case 'annotate': break
+ default: exit 1, "Unknown step ${step}"
+ }
+} else if (params.input && !has_extension(params.input, "tsv")) {
+ log.info "No TSV file"
+ if (step != 'mapping') exit 1, 'No step other than "mapping" supports a directory as an input'
+ log.info "Reading ${params.input} directory"
+ log.warn "[nf-core/sarek] in ${params.input} directory, all fastqs are assuming to be from the same sample, which is assumed to be a germline one"
+ input_sample = extract_fastq_from_dir(params.input)
+ tsv_file = params.input // used in the reports
+} else if (tsv_path && step == 'annotate') {
+ log.info "Annotating ${tsv_path}"
+} else if (step == 'annotate') {
+ log.info "Trying automatic annotation on files in the VariantCalling/ directory"
+} else exit 1, 'No sample were defined, see --help'
+
+////////////////////////////////////////////////////
+/* -- UPDATE MODULES OPTIONS BASED ON PARAMS -- */
+////////////////////////////////////////////////////
+
+modules = params.modules
+
+if (params.save_reference) modules['build_intervals'].publish_files = ['bed':'intervals']
+if (params.save_reference) modules['bwa_index'].publish_files = ['amb':'bwa', 'ann':'bwa', 'bwt':'bwa', 'pac':'bwa', 'sa':'bwa']
+if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2']
+if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals']
+if (params.save_reference) modules['dict'].publish_files = ['dict':'dict']
+if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target']
+if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi']
+if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai']
+if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp']
+if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource']
+if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels']
+if (params.save_reference) modules['tabix_pon'].publish_files = ['vcf.gz.tbi':'pon']
+if (save_bam_mapped) modules['samtools_index_mapping'].publish_files = ['bam':'mapped', 'bai':'mapped']
+if (params.skip_markduplicates) modules['baserecalibrator'].publish_files = ['recal.table':'mapped']
+if (params.skip_markduplicates) modules['gatherbqsrreports'].publish_files = ['recal.table':'mapped']
+
+file("${params.outdir}/no_file").text = "no_file\n"
+
+// Initialize file channels based on params, defined in the params.genomes[params.genome] scope
+chr_dir = params.chr_dir ? file(params.chr_dir) : file("${params.outdir}/no_file")
+chr_length = params.chr_length ? file(params.chr_length) : file("${params.outdir}/no_file")
+dbsnp = params.dbsnp ? file(params.dbsnp) : file("${params.outdir}/no_file")
+fasta = params.fasta ? file(params.fasta) : file("${params.outdir}/no_file")
+germline_resource = params.germline_resource ? file(params.germline_resource) : file("${params.outdir}/no_file")
+known_indels = params.known_indels ? file(params.known_indels) : file("${params.outdir}/no_file")
+loci = params.ac_loci ? file(params.ac_loci) : file("${params.outdir}/no_file")
+loci_gc = params.ac_loci_gc ? file(params.ac_loci_gc) : file("${params.outdir}/no_file")
+mappability = params.mappability ? file(params.mappability) : file("${params.outdir}/no_file")
+
+// Initialize value channels based on params, defined in the params.genomes[params.genome] scope
+snpeff_db = params.snpeff_db ?: Channel.empty()
+snpeff_species = params.species ?: Channel.empty()
+vep_cache_version = params.vep_cache_version ?: Channel.empty()
+
+// Initialize files channels based on params, not defined within the params.genomes[params.genome] scope
+cadd_indels = params.cadd_indels ? file(params.cadd_indels) : file("${params.outdir}/no_file")
+cadd_indels_tbi = params.cadd_indels_tbi ? file(params.cadd_indels_tbi) : file("${params.outdir}/no_file")
+cadd_wg_snvs = params.cadd_wg_snvs ? file(params.cadd_wg_snvs) : file("${params.outdir}/no_file")
+cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? file(params.cadd_wg_snvs_tbi) : file("${params.outdir}/no_file")
+pon = params.pon ? file(params.pon) : file("${params.outdir}/no_file")
+snpeff_cache = params.snpeff_cache ? file(params.snpeff_cache) : file("${params.outdir}/no_file")
+target_bed = params.target_bed ? file(params.target_bed) : file("${params.outdir}/no_file")
+vep_cache = params.vep_cache ? file(params.vep_cache) : file("${params.outdir}/no_file")
+
+// Initialize value channels based on params, not defined within the params.genomes[params.genome] scope
+read_structure1 = params.read_structure1 ?: Channel.empty()
+read_structure2 = params.read_structure2 ?: Channel.empty()
+
+if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal"
+if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works if Sentieon is available where nf-core/sarek is run"
+
+////////////////////////////////////////////////////
+/* -- INCLUDE LOCAL SUBWORKFLOWS -- */
+////////////////////////////////////////////////////
+
+include { BUILD_INDICES } from '../subworkflow/local/build_indices' addParams(
+ build_intervals_options: modules['build_intervals'],
+ bwa_index_options: modules['bwa_index'],
+ bwamem2_index_options: modules['bwamem2_index'],
+ create_intervals_bed_options: modules['create_intervals_bed'],
+ gatk_dict_options: modules['dict'],
+ index_target_bed_options: modules['index_target_bed'],
+ msisensor_scan_options: modules['msisensor_scan'],
+ samtools_faidx_options: modules['samtools_faidx'],
+ tabix_dbsnp_options: modules['tabix_dbsnp'],
+ tabix_germline_resource_options: modules['tabix_germline_resource'],
+ tabix_known_indels_options: modules['tabix_known_indels'],
+ tabix_pon_options: modules['tabix_pon']
+)
+include { MAPPING } from '../subworkflow/local/mapping' addParams(
+ bwamem1_mem_options: modules['bwa_mem1_mem'],
+ bwamem1_mem_tumor_options: modules['bwa_mem1_mem_tumor'],
+ bwamem2_mem_options: modules['bwa_mem2_mem'],
+ bwamem2_mem_tumor_options: modules['bwa_mem2_mem_tumor'],
+ merge_bam_options: modules['merge_bam_mapping'],
+ qualimap_bamqc_options: modules['qualimap_bamqc_mapping'],
+ samtools_index_options: modules['samtools_index_mapping'],
+ samtools_stats_options: modules['samtools_stats_mapping']
+)
+include { MARKDUPLICATES } from '../subworkflow/local/markduplicates' addParams(
+ markduplicates_options: modules['markduplicates']
+)
+include { PREPARE_RECALIBRATION } from '../subworkflow/local/prepare_recalibration' addParams(
+ baserecalibrator_options: modules['baserecalibrator'],
+ gatherbqsrreports_options: modules['gatherbqsrreports']
+)
+include { RECALIBRATE } from '../subworkflow/local/recalibrate' addParams(
+ applybqsr_options: modules['applybqsr'],
+ merge_bam_options: modules['merge_bam_recalibrate'],
+ qualimap_bamqc_options: modules['qualimap_bamqc_recalibrate'],
+ samtools_index_options: modules['samtools_index_recalibrate'],
+ samtools_stats_options: modules['samtools_stats_recalibrate']
+)
+include { GERMLINE_VARIANT_CALLING } from '../subworkflow/local/germline_variant_calling' addParams(
+ concat_gvcf_options: modules['concat_gvcf'],
+ concat_haplotypecaller_options: modules['concat_haplotypecaller'],
+ genotypegvcf_options: modules['genotypegvcf'],
+ haplotypecaller_options: modules['haplotypecaller'],
+ strelka_options: modules['strelka_germline']
+)
+// include { TUMOR_VARIANT_CALLING } from '../subworkflow/local/tumor_variant_calling' addParams(
+// )
+include { PAIR_VARIANT_CALLING } from '../subworkflow/local/pair_variant_calling' addParams(
+ manta_options: modules['manta_somatic'],
+ msisensor_msi_options: modules['msisensor_msi'],
+ strelka_bp_options: modules['strelka_somatic_bp'],
+ strelka_options: modules['strelka_somatic']
+)
+
+////////////////////////////////////////////////////
+/* -- INCLUDE NF-CORE MODULES -- */
+////////////////////////////////////////////////////
+
+include { MULTIQC } from '../modules/nf-core/software/multiqc/main'
+
+////////////////////////////////////////////////////
+/* -- INCLUDE NF-CORE SUBWORKFLOWS -- */
+////////////////////////////////////////////////////
+
+include { FASTQC_TRIMGALORE } from '../subworkflow/nf-core/fastqc_trimgalore' addParams(
+ fastqc_options: modules['fastqc'],
+ trimgalore_options: modules['trimgalore']
+)
+
+workflow SAREK {
+
+ ////////////////////////////////////////////////////
+ /* -- BUILD INDICES -- */
+ ////////////////////////////////////////////////////
+
+ BUILD_INDICES(
+ dbsnp,
+ fasta,
+ germline_resource,
+ known_indels,
+ pon,
+ step,
+ target_bed,
+ tools)
+
+ intervals = BUILD_INDICES.out.intervals
+
+ bwa = params.bwa ? file(params.bwa) : BUILD_INDICES.out.bwa
+ dict = params.dict ? file(params.dict) : BUILD_INDICES.out.dict
+ fai = params.fasta_fai ? file(params.fasta_fai) : BUILD_INDICES.out.fai
+
+ dbsnp_tbi = params.dbsnp ? params.dbsnp_index ? file(params.dbsnp_index) : BUILD_INDICES.out.dbsnp_tbi : file("${params.outdir}/no_file")
+ germline_resource_tbi = params.germline_resource ? params.germline_resource_index ? file(params.germline_resource_index) : BUILD_INDICES.out.germline_resource_tbi : file("${params.outdir}/no_file")
+ known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file")
+ pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file")
+
+ msisensor_scan = BUILD_INDICES.out.msisensor_scan
+ target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi
+
+ ////////////////////////////////////////////////////
+ /* -- PREPROCESSING -- */
+ ////////////////////////////////////////////////////
+
+ bam_mapped = Channel.empty()
+ bam_mapped_qc = Channel.empty()
+ bam_recalibrated_qc = Channel.empty()
+ input_reads = Channel.empty()
+ qc_reports = Channel.empty()
+
+ // STEP 0: QC & TRIM
+ // `--skip_qc fastqc` to skip fastqc
+ // trim only with `--trim_fastq`
+ // additional options to be set up
+
+ FASTQC_TRIMGALORE(
+ input_sample,
+ ('fastqc' in skip_qc || step != "mapping"),
+ !(params.trim_fastq))
+
+ reads_input = FASTQC_TRIMGALORE.out.reads
+
+ qc_reports = qc_reports.mix(
+ FASTQC_TRIMGALORE.out.fastqc_html,
+ FASTQC_TRIMGALORE.out.fastqc_zip,
+ FASTQC_TRIMGALORE.out.trim_html,
+ FASTQC_TRIMGALORE.out.trim_log,
+ FASTQC_TRIMGALORE.out.trim_zip)
+
+ // STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA-MEM
+
+ MAPPING(
+ ('bamqc' in skip_qc),
+ ('samtools' in skip_qc),
+ bwa,
+ fai,
+ fasta,
+ reads_input,
+ save_bam_mapped,
+ step,
+ target_bed)
+
+ bam_mapped = MAPPING.out.bam
+ bam_mapped_qc = MAPPING.out.qc
+
+ qc_reports = qc_reports.mix(bam_mapped_qc)
+
+ // STEP 2: MARKING DUPLICATES
+
+ MARKDUPLICATES(
+ bam_mapped,
+ step)
+
+ bam_markduplicates = MARKDUPLICATES.out.bam
+
+ if (step == 'preparerecalibration') bam_markduplicates = input_sample
+
+ // STEP 3: CREATING RECALIBRATION TABLES
+
+ PREPARE_RECALIBRATION(
+ bam_markduplicates,
+ dbsnp,
+ dbsnp_tbi,
+ dict,
+ fai,
+ fasta,
+ intervals,
+ known_indels,
+ known_indels_tbi,
+ step)
+
+ table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr
+
+ // STEP 4: RECALIBRATING
+ bam_applybqsr = bam_markduplicates.join(table_bqsr)
+
+ if (step == 'recalibrate') bam_applybqsr = input_sample
+
+ RECALIBRATE(
+ ('bamqc' in skip_qc),
+ ('samtools' in skip_qc),
+ bam_applybqsr,
+ dict,
+ fai,
+ fasta,
+ intervals,
+ step,
+ target_bed)
+
+ bam_recalibrated = RECALIBRATE.out.bam
+ bam_recalibrated_qc = RECALIBRATE.out.qc
+
+ qc_reports = qc_reports.mix(bam_recalibrated_qc)
+
+ bam_variant_calling = bam_recalibrated
+
+ if (step == 'variantcalling') bam_variant_calling = input_sample
+
+ ////////////////////////////////////////////////////
+ /* -- GERMLINE VARIANT CALLING -- */
+ ////////////////////////////////////////////////////
+
+ GERMLINE_VARIANT_CALLING(
+ bam_variant_calling,
+ dbsnp,
+ dbsnp_tbi,
+ dict,
+ fai,
+ fasta,
+ intervals,
+ target_bed,
+ target_bed_gz_tbi,
+ tools)
+
+ ////////////////////////////////////////////////////
+ /* -- SOMATIC VARIANT CALLING -- */
+ ////////////////////////////////////////////////////
+
+ // TUMOR_VARIANT_CALLING(
+ // bam_variant_calling,
+ // dbsnp,
+ // dbsnp_tbi,
+ // dict,
+ // fai,
+ // fasta,
+ // intervals,
+ // target_bed,
+ // target_bed_gz_tbi,
+ // tools)
+
+ PAIR_VARIANT_CALLING(
+ bam_variant_calling,
+ dbsnp,
+ dbsnp_tbi,
+ dict,
+ fai,
+ fasta,
+ intervals,
+ msisensor_scan,
+ target_bed,
+ target_bed_gz_tbi,
+ tools)
+
+ ////////////////////////////////////////////////////
+ /* -- ANNOTATION -- */
+ ////////////////////////////////////////////////////
+
+}
\ No newline at end of file