diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d316d9d02..b2eb969078 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,8 +2,6 @@ name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: push: - branches: - - dev pull_request: release: types: [published] @@ -21,6 +19,7 @@ jobs: matrix: # Nextflow versions: check pipeline minimum nxf_ver: ['20.11.0-edge'] + engine: ['docker'] steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -31,9 +30,12 @@ jobs: run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ + - name: Run --help + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,${{ matrix.engine }} --help - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + nextflow run ${GITHUB_WORKSPACE} -profile test,${{ matrix.engine }} - name: Show results run: ls -lR results diff --git a/.github/workflows/push_dockerhub_dev.yml b/.github/workflows/push_dockerhub_dev.yml deleted file mode 100644 index bcf8018d56..0000000000 --- a/.github/workflows/push_dockerhub_dev.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: nf-core Docker push (dev) -# This builds the docker image and pushes it to DockerHub -# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) -on: - push: - branches: - - dev - -jobs: - push_dockerhub: - name: Push new Docker image to Docker Hub (dev) - runs-on: ubuntu-latest - # Only run for the nf-core repo, for releases and merged PRs - if: ${{ github.repository == 'nf-core/sarek' }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Build new docker image - run: docker build --no-cache . -t nfcore/sarek:dev - - - name: Push Docker image to DockerHub (dev) - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push nfcore/sarek:dev diff --git a/.github/workflows/push_dockerhub_release.yml b/.github/workflows/push_dockerhub_release.yml deleted file mode 100644 index dd4cda6d51..0000000000 --- a/.github/workflows/push_dockerhub_release.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: nf-core Docker push (release) -# This builds the docker image and pushes it to DockerHub -# Runs on nf-core repo releases and push event to 'dev' branch (PR merges) -on: - release: - types: [published] - -jobs: - push_dockerhub: - name: Push new Docker image to Docker Hub (release) - runs-on: ubuntu-latest - # Only run for the nf-core repo, for releases and merged PRs - if: ${{ github.repository == 'nf-core/sarek' }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Build new docker image - run: docker build --no-cache . -t nfcore/sarek:latest - - - name: Push Docker image to DockerHub (release) - run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push nfcore/sarek:latest - docker tag nfcore/sarek:latest nfcore/sarek:${{ github.event.release.tag_name }} - docker push nfcore/sarek:${{ github.event.release.tag_name }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 78eddb36a1..53150492a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#234](https://github.com/nf-core/sarek/pull/234) - Switching to DSL2 - [#234](https://github.com/nf-core/sarek/pull/234), [#238](https://github.com/nf-core/sarek/pull/238) - Add modules and sub workflow for building indices -- [#234](https://github.com/nf-core/sarek/pull/234), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#283](https://github.com/nf-core/sarek/pull/283), [#334](https://github.com/nf-core/sarek/pull/334) - Update Nextflow `19.10.0` -> `20.11.0-edg` +- [#234](https://github.com/nf-core/sarek/pull/234), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#283](https://github.com/nf-core/sarek/pull/283), [#334](https://github.com/nf-core/sarek/pull/334) - Update Nextflow `19.10.0` -> `20.11.0-edge` - [#239](https://github.com/nf-core/sarek/pull/239) - Restore Sarek ascii art to header - [#241](https://github.com/nf-core/sarek/pull/241), [#248](https://github.com/nf-core/sarek/pull/248), [#250](https://github.com/nf-core/sarek/pull/250), [#257](https://github.com/nf-core/sarek/pull/257), [#259](https://github.com/nf-core/sarek/pull/259) - Add modules and sub workflow for preprocessing -- [#242](https://github.com/nf-core/sarek/pull/242), [#244](https://github.com/nf-core/sarek/pull/244), [#245](https://github.com/nf-core/sarek/pull/245), [#246](https://github.com/nf-core/sarek/pull/246), [#247](https://github.com/nf-core/sarek/pull/247), [#249](https://github.com/nf-core/sarek/pull/249), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#263](https://github.com/nf-core/sarek/pull/263), [#264](https://github.com/nf-core/sarek/pull/264), [#283](https://github.com/nf-core/sarek/pull/283), [#285](https://github.com/nf-core/sarek/pull/285) - Refactor `dsl2` branch +- [#242](https://github.com/nf-core/sarek/pull/242), [#244](https://github.com/nf-core/sarek/pull/244), [#245](https://github.com/nf-core/sarek/pull/245), [#246](https://github.com/nf-core/sarek/pull/246), [#247](https://github.com/nf-core/sarek/pull/247), [#249](https://github.com/nf-core/sarek/pull/249), [#252](https://github.com/nf-core/sarek/pull/252), [#256](https://github.com/nf-core/sarek/pull/256), [#263](https://github.com/nf-core/sarek/pull/263), [#264](https://github.com/nf-core/sarek/pull/264), [#283](https://github.com/nf-core/sarek/pull/283), [#285](https://github.com/nf-core/sarek/pull/285), [#338](https://github.com/nf-core/sarek/pull/338) - Refactor `dsl2` branch - [#257](https://github.com/nf-core/sarek/pull/257) - Use a params modules config file -- [#266](https://github.com/nf-core/sarek/pull/266), [#285](https://github.com/nf-core/sarek/pull/285) - Add modules and sub workflow for variant calling +- [#266](https://github.com/nf-core/sarek/pull/266), [#285](https://github.com/nf-core/sarek/pull/285), [#297](https://github.com/nf-core/sarek/pull/297) - Add modules and sub workflow for variant calling - [#333](https://github.com/nf-core/sarek/pull/333) - Bump `Sarek` version to `3.0dev` - [#334](https://github.com/nf-core/sarek/pull/334) - Sync `dsl2` and `dev` branches diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index dbdec87e1c..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM nfcore/base:1.12.1 -LABEL authors="Maxime Garcia, Szilveszter Juhos" \ - description="Docker image containing all software requirements for the nf-core/sarek pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create --quiet -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-sarek-3.0dev/bin:$PATH - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-sarek-3.0dev > nf-core-sarek-3.0dev.yml - -# Instruct R processes to use these empty files instead of clashing with a local version -RUN touch .Rprofile -RUN touch .Renviron diff --git a/README.md b/README.md index 07a8557837..243b3ee239 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,20 @@ > **An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing** -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A520.11.0--edge-brightgreen.svg)](https://www.nextflow.io/) -[![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/) -[![DOI](https://zenodo.org/badge/184289291.svg)](https://zenodo.org/badge/latestdoi/184289291) - -[![GitHub Actions CI status](https://github.com/nf-core/sarek/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting status](https://github.com/nf-core/sarek/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22) +[![GitHub Actions CI Status](https://github.com/nf-core/sarek/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+CI%22) +[![GitHub Actions Linting Status](https://github.com/nf-core/sarek/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/sarek/actions?query=workflow%3A%22nf-core+linting%22) [![CircleCi build status](https://img.shields.io/circleci/project/github/nf-core/sarek?logo=circleci)](https://circleci.com/gh/nf-core/sarek/) + + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A520.11.0--edge-23aa62.svg)](https://www.nextflow.io/) +[![Run with Conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) +[![Run with Docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) +[![Run with Singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/sarek.svg)](https://hub.docker.com/r/nfcore/sarek) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.1400710-1073c8)](https://doi.org/10.5281/zenodo.1400710) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23sarek-4A154B?logo=slack)](https://nfcore.slack.com/channels/sarek) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -22,12 +25,28 @@ Sarek can also handle tumour / normal pairs and could include additional relapse The pipeline is built using [`Nextflow`](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with `Docker` containers making installation trivial and results highly reproducible. + + +It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek). + +## Pipeline Summary + +By default, the pipeline currently performs the following: + +* Sequencing quality control (`FastQC`) +* Map Reads to Reference (`BWA mem`) +* Mark Duplicates (`GATK MarkDuplicatesSpark`) +* Base (Quality Score) Recalibration (`GATK BaseRecalibrator`, `GATK ApplyBQSR`) +* Preprocessing quality control (`samtools stats`) +* Preprocessing quality control (`Qualimap bamqc`) +* Overall pipeline run summaries (`MultiQC`) +

-It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf-core-sarek) and [Dockstore](https://dockstore.org/workflows/github.com/nf-core/sarek). - ## Quick Start 1. Install [`Nextflow`](https://nf-co.re/usage/installation) @@ -52,18 +71,6 @@ It's listed on [Elixir - Tools and Data Services Registry](https://bio.tools/nf- See [usage docs](https://nf-co.re/sarek/usage) for all of the available options when running the pipeline. -## Pipeline Summary - -By default, the pipeline currently performs the following: - -* Sequencing quality control (`FastQC`) -* Map Reads to Reference (`BWA mem`) -* Mark Duplicates (`GATK MarkDuplicatesSpark`) -* Base (Quality Score) Recalibration (`GATK BaseRecalibrator`, `GATK ApplyBQSR`) -* Preprocessing quality control (`samtools stats`) -* Preprocessing quality control (`Qualimap bamqc`) -* Overall pipeline run summaries (`MultiQC`) - ## Documentation The nf-core/sarek pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/sarek/usage) and [output](https://nf-co.re/sarek/output). @@ -110,16 +117,6 @@ Helpful contributors: * [pallolason](https://github.com/pallolason) * [silviamorins](https://github.com/silviamorins) -## Contributions & Support - -If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). - -For further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime Garcia](mailto:maxime.garcia@scilifelab.se?subject=[GitHub]%20nf-core/sarek), [Szilvester Juhos](mailto:szilveszter.juhos@scilifelab.se?subject=[GitHub]%20nf-core/sarek) - -## CHANGELOG - -* [CHANGELOG](CHANGELOG.md) - ## Acknowledgements [![Barntumörbanken](docs/images/BTB_logo.png)](https://ki.se/forskning/barntumorbanken) | [![SciLifeLab](docs/images/SciLifeLab_logo.png)](https://scilifelab.se) @@ -127,6 +124,12 @@ For further information or help, don't hesitate to get in touch on the [Slack `# [![National Genomics Infrastructure](docs/images/NGI_logo.png)](https://ngisweden.scilifelab.se/) | [![National Bioinformatics Infrastructure Sweden](docs/images/NBIS_logo.png)](https://nbis.se) [![QBiC](docs/images/QBiC_logo.png)](hhttps://www.qbic.uni-tuebingen.de) | +## Contributions & Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on the [Slack `#sarek` channel](https://nfcore.slack.com/channels/sarek) (you can join with [this invite](https://nf-co.re/join/slack)), or contact us: [Maxime Garcia](mailto:maxime.garcia@scilifelab.se?subject=[GitHub]%20nf-core/sarek), [Szilvester Juhos](mailto:szilveszter.juhos@scilifelab.se?subject=[GitHub]%20nf-core/sarek) + ## Citations If you use `nf-core/sarek` for your analysis, please cite the `Sarek` article as follows: @@ -143,4 +146,7 @@ You can cite the `nf-core` publication as follows: > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. > > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). -> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) + +## CHANGELOG + +* [CHANGELOG](CHANGELOG.md) diff --git a/conf/base.config b/conf/base.config index 3381bed5ff..64bcb42714 100644 --- a/conf/base.config +++ b/conf/base.config @@ -70,31 +70,19 @@ process { memory = {params.max_memory} } - withName:GET_SOFTWARE_VERSIONS { - cache = false - } - - withName:CONCAT_VCF { - // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE - // (exit code 141). Rerunning the process will usually work. - errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} - } +// withName:CONCAT_VCF { +// // For unknown reasons, CONCAT_VCF sometimes fails with SIGPIPE +// // (exit code 141). Rerunning the process will usually work. +// errorStrategy = {task.exitStatus == 141 ? 'retry' : 'terminate'} +// } withLabel:FASTQC { errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} } - withName:BWAMEM2_MEM { + withLabel:BWAMEM2_MEM { memory = {check_resource(60.GB * task.attempt)} time = {check_resource(48.h * task.attempt)} } - withName:MULTIQC { - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - } - withName:SNPEFF { - container = {(params.annotation_cache && params.snpeff_cache) ? 'nfcore/sarek:dev' : "nfcore/sareksnpeff:dev.${params.genome}"} - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - } - withLabel:VEP { - container = {(params.annotation_cache && params.vep_cache) ? 'nfcore/sarek:dev' : "nfcore/sarekvep:dev.${params.genome}"} - errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} - } +// withName:MULTIQC { +// errorStrategy = {task.exitStatus == 143 ? 'retry' : 'ignore'} +// } } diff --git a/conf/modules.config b/conf/modules.config index 7476bda3c5..dc005bfed1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -9,63 +9,76 @@ params { // BUILD_INDICES 'build_intervals' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'bwa_index' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'bwamem2_index' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'create_intervals_bed' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'dict' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'index_target_bed' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'msisensor_scan' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'samtools_faidx' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'tabix_dbsnp' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'tabix_germline_resource' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'tabix_known_indels' { publish_dir = "reference" - publish_files = "false" + publish_files = false } 'tabix_pon' { publish_dir = "reference" - publish_files = "false" + publish_files = false } // MAPPING 'bwa_mem1_mem' { args = "-K 100000000 -M" - publish_files = "false" + args2 = "sort" + publish_files = false + } + 'bwa_mem1_mem_tumor' { + args = "-K 100000000 -M -B 3" + args2 = "sort" + publish_files = false } 'bwa_mem2_mem' { args = "-K 100000000 -M" - publish_files = "false" + args2 = "sort" + publish_files = false + } + 'bwa_mem2_mem_tumor' { + args = "-K 100000000 -M -B 3" + args2 = "sort" + publish_files = false } 'merge_bam_mapping' { publish_by_id = "true" + publish_files = ['bam':'mapped'] publish_dir = "preprocessing" } 'qualimap_bamqc_mapping' { @@ -74,6 +87,7 @@ params { } 'samtools_index_mapping' { publish_by_id = "true" + publish_files = ['bai':'mapped'] publish_dir = "preprocessing" } 'samtools_stats_mapping' { @@ -108,6 +122,7 @@ params { 'merge_bam_recalibrate' { suffix = "recal" publish_by_id = "true" + publish_files = ['bam':'recalibrated'] publish_dir = "preprocessing" } 'qualimap_bamqc_recalibrate' { @@ -128,12 +143,12 @@ params { 'haplotypecaller' { publish_by_id = "true" publish_dir = "variant_calling" - publish_files = "false" + publish_files = false } 'genotypegvcf' { publish_by_id = "true" publish_dir = "variant_calling" - publish_files = "false" + publish_files = false } 'concat_haplotypecaller' { suffix = "haplotypecaller" diff --git a/docs/usage.md b/docs/usage.md index 1805bd5f32..000d0fe717 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -495,13 +495,9 @@ This tool is enabled within `Sarek` if both `--sentieon` and `--tools DNAscope` ### Containers -`sarek`, our main container is designed using [Conda](https://conda.io/). +With `Nextflow DSL2`, each process use its own `Conda` environment or container from `biocontainers`. -[![sarek-docker status](https://img.shields.io/docker/automated/nfcore/sarek.svg)](https://hub.docker.com/r/nfcore/sarek) - -Based on [nfcore/base:1.12.1](https://hub.docker.com/r/nfcore/base/tags): - -For annotation, the main container can be used, but then cache has to be downloaded, or additional containers are available with cache. +For annotation, cache has to be downloaded, or specifically designed containers are available with cache. `sareksnpeff`, our `snpeff` container is designed using [Conda](https://conda.io/). diff --git a/environment.yml b/environment.yml deleted file mode 100644 index b05d4d3789..0000000000 --- a/environment.yml +++ /dev/null @@ -1,11 +0,0 @@ -# You can use this file to create a conda environment for this pipeline: -# conda env create -f environment.yml -name: nf-core-sarek-3.0dev -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - conda-forge::markdown=3.1.1 - - conda-forge::pymdown-extensions=6.0 - - conda-forge::pygments=2.5.2 diff --git a/lib/Checks.groovy b/lib/Checks.groovy index 0c912c401b..63c9cf5cfc 100644 --- a/lib/Checks.groovy +++ b/lib/Checks.groovy @@ -1,17 +1,48 @@ +import org.yaml.snakeyaml.Yaml + /* * This file holds several functions used to perform standard checks for the nf-core pipeline template. */ class Checks { + static void check_conda_channels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "=============================================================================\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "===================================================================================" + } + } + static void aws_batch(workflow, params) { if (workflow.profile.contains('awsbatch')) { - assert !params.awsqueue || !params.awsregion : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" // Check outdir paths to be S3 buckets if running on AWSBatch // related: https://github.com/nextflow-io/nextflow/issues/813 - assert !params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" // Prevent trace files to be stored on S3 since S3 does not support rolling files. - assert params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." + assert !params.tracedir.startsWith('s3:') : "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } } @@ -32,5 +63,40 @@ class Checks { } } } -} + // Citation string + private static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.12688/f1000research.16665.2\n" + + " https://doi.org/10.5281/zenodo.4468605\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // Exit pipeline if incorrect --genome key provided + static void genome_exists(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + log.error "=============================================================================\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "=============================================================================" + System.exit(0) + } + } + + // Get attribute from genome config file e.g. fasta + static String get_genome_attribute(params, attribute) { + def val = '' + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + val = params.genomes[ params.genome ][ attribute ] + } + } + return val + } + +} diff --git a/lib/Completion.groovy b/lib/Completion.groovy index 956a87574e..5a933eb9a5 100644 --- a/lib/Completion.groovy +++ b/lib/Completion.groovy @@ -3,7 +3,7 @@ */ class Completion { - static void email(workflow, params, summary, run_name, projectDir, multiqc_report, log) { + static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { // Set up the e-mail variables def subject = "[$workflow.manifest.name] Successful: $workflow.runName" @@ -11,36 +11,45 @@ class Completion { subject = "[$workflow.manifest.name] FAILED: $workflow.runName" } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = run_name ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + // On success try attach the multiqc report def mqc_report = null try { - if (workflow.success) { + if (workflow.success && !params.skip_multiqc) { mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } mqc_report = mqc_report[0] } } @@ -55,37 +64,39 @@ class Completion { } // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() + def email_txt = txt_template.toString() // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") + def hf = new File("$projectDir/assets/email_template.html") def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() + def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() // Send the HTML e-mail + Map colors = Headers.log_colours(params.monochrome_logs) if (email_address) { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[$workflow.manifest.name] Sent summary e-mail to $email_address (sendmail)" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= params.max_multiqc_email_size.toBytes() ) { + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html - log.info "[$workflow.manifest.name] Sent summary e-mail to $email_address (mail)" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" } } @@ -102,17 +113,16 @@ class Completion { static void summary(workflow, params, log) { Map colors = Headers.log_colours(params.monochrome_logs) - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${colors.purple}Warning, pipeline completed, but with errored process(es) ${colors.reset}-" - log.info "-${colors.red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${colors.reset}-" - log.info "-${colors.green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${colors.reset}-" - } + if (workflow.success) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } } else { - Checks.hostname() + Checks.hostname(workflow, params, log) log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" } } } - diff --git a/lib/Headers.groovy b/lib/Headers.groovy index ee3817cfde..19e3220561 100644 --- a/lib/Headers.groovy +++ b/lib/Headers.groovy @@ -6,25 +6,30 @@ class Headers { private static Map log_colours(Boolean monochrome_logs) { Map colorcodes = [:] - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + colorcodes['red'] = monochrome_logs ? '' : "\033[1;91m" return colorcodes } + static String dashed_line(monochrome_logs) { + Map colors = log_colours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + static String nf_core(workflow, monochrome_logs) { Map colors = log_colours(monochrome_logs) String.format( -""" --${colors.dim}----------------------------------------------------${colors.reset}- + """\n +${dashed_line(monochrome_logs)} ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} @@ -38,7 +43,7 @@ class Headers { ${colors.white}`${colors.green}|${colors.white}____${colors.green}\\${colors.white}´${colors.reset} ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} --${colors.dim}--------------------------------------------------${colors.reset}- +${dashed_line(monochrome_logs)} """.stripIndent() ) } diff --git a/lib/Schema.groovy b/lib/Schema.groovy index a4ac82173a..c2cad355a5 100644 --- a/lib/Schema.groovy +++ b/lib/Schema.groovy @@ -4,19 +4,19 @@ import groovy.json.JsonSlurper -class JSON { +class Schema { /* * This method tries to read a JSON params file */ - private static LinkedHashMap params_get(String path) { - def usage = new LinkedHashMap() + private static LinkedHashMap params_load(String json_schema) { + def params_map = new LinkedHashMap() try { - usage = params_try(path) + params_map = params_read(json_schema) } catch (Exception e) { println "Could not read parameters settings from JSON. $e" - usage = new LinkedHashMap() + params_map = new LinkedHashMap() } - return usage + return params_map } /* @@ -28,199 +28,201 @@ class JSON { Group - */ - private static LinkedHashMap params_try(String path) throws Exception { - - def json = new File(path).text - def Map usage = (Map) new JsonSlurper().parseText(json).get('properties') - + private static LinkedHashMap params_read(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map json_params = (Map) new JsonSlurper().parseText(json).get('definitions') /* Tree looks like this in nf-core schema - * properties <- this is what the first get('properties') gets us + * definitions <- this is what the first get('definitions') gets us group 1 - properties + title description + properties + parameter 1 + type + description + parameter 2 + type + description group 2 - properties - description - group 3 - properties + title description + properties + parameter 1 + type + description */ - def output_map = new LinkedHashMap() - - // Lets go deeper - usage.each { key, val -> - def Map submap = usage."$key".properties // Gets the property object of the group + def params_map = new LinkedHashMap() + json_params.each { key, val -> + def Map group = json_params."$key".properties // Gets the property object of the group + def title = json_params."$key".title def sub_params = new LinkedHashMap() - submap.each { innerkey, value -> - sub_params.put("$innerkey", "$value.description") + group.each { innerkey, value -> + sub_params.put(innerkey, value) } - output_map.put("$key", sub_params) + params_map.put(title, sub_params) } - return output_map + return params_map } - static String params_help(path, command) { - String output = "Typical pipeline command:\n\n" - output += " ${command}\n\n" - output += params_beautify(params_get(path)) + /* + * Get maximum number of characters across all parameter names + */ + private static Integer params_max_chars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars } - static String params_beautify(usage) { - String output = "" - for (group in usage.keySet()) { + /* + * Beautify parameters for --help + */ + private static String params_help(workflow, params, json_schema, command) { + String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + output += "Typical pipeline command:\n\n" + output += " ${command}\n\n" + def params_map = params_load(json_schema) + def max_chars = params_max_chars(params_map) + 1 + for (group in params_map.keySet()) { output += group + "\n" - def params = usage.get(group) // This gets the parameters of that particular group - for (par in params.keySet()) { - output+= " \u001B[1m" + par.padRight(27) + "\u001B[1m" + params.get(par) + "\n" + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + def type = "[" + group_params.get(param).type + "]" + def description = group_params.get(param).description + output += " \u001B[1m--" + param.padRight(max_chars) + "\u001B[1m" + type.padRight(10) + description + "\n" } output += "\n" } + output += Headers.dashed_line(params.monochrome_logs) + output += "\n\n" + Checks.citation(workflow) + output += "\n\n" + Headers.dashed_line(params.monochrome_logs) return output } - private static LinkedHashMap params_summary(workflow, params, run_name, step, tools, skip_qc, annotate_tools) { - def Map summary = [:] - if (workflow.revision) summary['Pipeline Release'] = workflow.revision - summary['Run Name'] = run_name ?: workflow.runName - summary['Max Resources'] = "${params.max_memory} memory, ${params.max_cpus} cpus, ${params.max_time} time per job" - if (workflow.containerEngine) summary['Container'] = "${workflow.containerEngine} - ${workflow.container}" - summary['Input'] = params.input - summary['Step'] = step - summary['Genome'] = params.genome - if (params.no_intervals && step != 'annotate') summary['Intervals'] = 'Do not use' - summary['Nucleotides/s'] = params.nucleotides_per_second - if (params.sentieon) summary['Sention'] = "Using Sentieon for Preprocessing and/or Variant Calling" - if (params.skip_qc) summary['QC tools skipped'] = skip_qc.join(', ') - if (params.target_bed) summary['Target BED'] = params.target_bed - if (params.tools) summary['Tools'] = tools.join(', ') - if (params.trim_fastq || params.split_fastq) summary['Modify fastqs'] = "trim and/or split" - - if (params.trim_fastq) { - summary['Fastq trim'] = "Fastq trim selected" - summary['Trim R1'] = "${params.clip_r1} bp" - summary['Trim R2'] = "${params.clip_r2} bp" - summary["Trim 3 R1"] = "${params.three_prime_clip_r1} bp" - summary["Trim 3 R2"] = "${params.three_prime_clip_r2} bp" - summary['NextSeq Trim'] = "${params.trim_nextseq} bp" - summary['Saved Trimmed Fastq'] = params.save_trimmed ? 'Yes' : 'No' - } - if (params.split_fastq) summary['Reads in fastq'] = params.split_fastq - - summary['MarkDuplicates'] = "Options" - summary['Java options'] = params.markdup_java_options - summary['GATK Spark'] = params.use_gatk_spark ? 'Yes' : 'No' - - summary['Save BAMs mapped'] = params.save_bam_mapped ? 'Yes' : 'No' - summary['Skip MarkDuplicates'] = params.skip_markduplicates ? 'Yes' : 'No' - - if ('ascat' in tools) { - summary['ASCAT'] = "Options" - if (params.ascat_purity) summary['purity'] = params.ascat_purity - if (params.ascat_ploidy) summary['ploidy'] = params.ascat_ploidy - } - - if ('controlfreec' in tools) { - summary['Control-FREEC'] = "Options" - if (params.cf_window) summary['window'] = params.cf_window - if (params.cf_coeff) summary['coeff of variation'] = params.cf_coeff - if (params.cf_ploidy) summary['ploidy'] = params.cf_ploidy + /* + * Groovy Map summarising parameters/workflow options used by the pipeline + */ + private static LinkedHashMap params_summary_map(workflow, params, json_schema) { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision } - - if ('haplotypecaller' in tools) summary['GVCF'] = params.generate_gvcf ? 'Yes' : 'No' - if ('strelka' in tools && 'manta' in tools) summary['Strelka BP'] = params.no_strelka_bp ? 'No' : 'Yes' - if (params.pon && ('mutect2' in tools || (params.sentieon && 'tnscope' in tools))) summary['Panel of normals'] = params.pon - - if (params.annotate_tools) summary['Tools to annotate'] = annotate_tools.join(', ') - - if (params.annotation_cache) { - summary['Annotation cache'] = "Enabled" - if (params.snpeff_cache) summary['snpEff cache'] = params.snpeff_cache - if (params.vep_cache) summary['VEP cache'] = params.vep_cache + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = "$workflow.containerEngine" } - - if (params.cadd_cache) { - summary['CADD cache'] = "Enabled" - if (params.cadd_indels) summary['CADD indels'] = params.cadd_indels - if (params.cadd_wg_snvs) summary['CADD wg snvs'] = params.cadd_wg_snvs + if (workflow.container) { + workflow_summary['container'] = "$workflow.container" } - - if (params.genesplicer) summary['genesplicer'] = "Enabled" - - if (params.igenomes_base && !params.igenomes_ignore) summary['AWS iGenomes base'] = params.igenomes_base - if (params.igenomes_ignore) summary['AWS iGenomes'] = "Do not use" - if (params.genomes_base && !params.igenomes_ignore) summary['Genomes base'] = params.genomes_base - - summary['Save Reference'] = params.save_reference ? 'Yes' : 'No' - - if (params.ac_loci) summary['Loci'] = params.ac_loci - if (params.ac_loci_gc) summary['Loci GC'] = params.ac_loci_gc - if (params.bwa) summary['BWA indexes'] = params.bwa - if (params.chr_dir) summary['Chromosomes'] = params.chr_dir - if (params.chr_length) summary['Chromosomes length'] = params.chr_length - if (params.dbsnp) summary['dbsnp'] = params.dbsnp - if (params.dbsnp_index) summary['dbsnp index'] = params.dbsnp_index - if (params.dict) summary['dict'] = params.dict - if (params.fasta) summary['fasta reference'] = params.fasta - if (params.fasta_fai) summary['fasta index'] = params.fasta_fai - if (params.germline_resource) summary['germline resource'] = params.germline_resource - if (params.germline_resource_index) summary['germline resource index'] = params.germline_resource_index - if (params.intervals) summary['intervals'] = params.intervals - if (params.known_indels) summary['known indels'] = params.known_indels - if (params.known_indels_index) summary['known indels index'] = params.known_indels_index - if (params.mappability) summary['Mappability'] = params.mappability - if (params.snpeff_cache) summary['snpEff cache'] = params.snpeff_cache - if (params.snpeff_db) summary['snpEff DB'] = params.snpeff_db - if (params.species) summary['snpEff species'] = params.species - if (params.vep_cache) summary['VEP cache'] = params.vep_cache - if (params.vep_cache_version) summary['VEP cache version'] = params.vep_cache_version - - summary['Output dir'] = params.outdir - summary['Publish dir mode'] = params.publish_dir_mode - if (params.sequencing_center) summary['Sequenced by'] = params.sequencing_center - - summary['Launch dir'] = workflow.launchDir - summary['Working dir'] = workflow.workDir - summary['Script dir'] = workflow.projectDir - summary['User'] = workflow.userName - - if (params.multiqc_config) summary['MultiQC config'] = params.multiqc_config - - summary['Config Profile'] = workflow.profile - - if (params.config_profile_description) summary['Description'] = params.config_profile_description - if (params.config_profile_contact) summary['Contact'] = params.config_profile_contact - if (params.config_profile_url) summary['URL'] = params.config_profile_url - - summary['Config Files'] = workflow.configFiles.join(', ') - - if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC maxsize'] = params.max_multiqc_email_size + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def blacklist = ['hostnames'] + def params_map = params_load(json_schema) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param) && !blacklist.contains(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value == null) { + if (param_type == 'boolean') { + schema_value = false + } + if (param_type == 'string') { + schema_value = '' + } + if (param_type == 'integer') { + schema_value = 0 + } + } else { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir','') + sub_string = sub_string.replace('\${projectDir}','') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir','') + sub_string = sub_string.replace('\${params.outdir}','') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + if (params_value != schema_value) { + sub_params.put("$param", params_value) + } + } + } + params_summary.put(group, sub_params) } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } - if (workflow.profile.contains('awsbatch')) { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue - summary['AWS CLI'] = params.awscli + /* + * Beautify parameters for summary and return as string + */ + private static String params_summary_log(workflow, params, json_schema) { + String output = Headers.nf_core(workflow, params.monochrome_logs) + "\n" + def params_map = params_summary_map(workflow, params, json_schema) + def max_chars = params_max_chars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += group + "\n" + for (param in group_params.keySet()) { + output += " \u001B[1m" + param.padRight(max_chars) + ": \u001B[1m" + group_params.get(param) + "\n" + } + output += "\n" + } } - - return summary + output += Headers.dashed_line(params.monochrome_logs) + output += "\n\n" + Checks.citation(workflow) + output += "\n\n" + Headers.dashed_line(params.monochrome_logs) + return output } - static String params_mqc_summary(summary) { - String yaml_file_text = """ - id: 'nf-core-sarek-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/sarek Workflow Summary' - section_href: 'https://github.com/nf-core/sarek' - plot_type: 'html' - data: | -
- ${summary.collect { k,v -> "
$k
${v ?: 'N/A'}
" }.join("\n")} -
- """.stripIndent() + static String params_summary_multiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" return yaml_file_text } } diff --git a/main.nf b/main.nf index d571c4f9a0..0c77129ce8 100644 --- a/main.nf +++ b/main.nf @@ -22,1922 +22,76 @@ nf-core/sarek: nextflow.enable.dsl=2 -// Print help message if required +//////////////////////////////////////////////////// +/* -- PRINT HELP -- */ +//////////////////////////////////////////////////// +def json_schema = "$projectDir/nextflow_schema.json" if (params.help) { - def command = "nextflow run nf-core/sarek -profile docker --input sample.tsv" - log.info Schema.params_help("$projectDir/nextflow_schema.json", command) + def command = "nextflow run nf-core/sarek -profile docker --input sample.tsv --genome GRCh38" + log.info Schema.params_help(workflow, params, json_schema, command) exit 0 } -/* --------------------------------------------------------------------------------- - INCLUDE SAREK FUNCTIONS --------------------------------------------------------------------------------- -*/ - -include { - check_parameter_existence; - check_parameter_list; - define_anno_list; - define_skip_qc_list; - define_step_list; - define_tool_list; - extract_bam; - extract_fastq; - extract_fastq_from_dir; - extract_recal; - has_extension -} from './modules/local/functions' - -/* --------------------------------------------------------------------------------- - SET UP CONFIGURATION VARIABLES --------------------------------------------------------------------------------- -*/ - -// Check parameters - -Checks.aws_batch(workflow, params) // Check AWS batch settings -Checks.hostname(workflow, params, log) // Check the hostnames against configured profiles - -// MultiQC - Stage config files - -multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) -multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() -output_docs = file("$projectDir/docs/output.md", checkIfExists: true) -output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) - -// Check if genome exists in the config file -if (params.genomes && !params.genomes.containsKey(params.genome) && !params.igenomes_ignore) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} else if (params.genomes && !params.genomes.containsKey(params.genome) && params.igenomes_ignore) { - exit 1, "The provided genome '${params.genome}' is not available in the genomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} - -step_list = define_step_list() -step = params.step ? params.step.toLowerCase().replaceAll('-', '').replaceAll('_', '') : '' - -if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' -if (!check_parameter_existence(step, step_list)) exit 1, "Unknown step ${step}, see --help for more information" - -tool_list = define_tool_list() -tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] -if (step == 'controlfreec') tools = ['controlfreec'] -if (!check_parameter_list(tools, tool_list)) exit 1, 'Unknown tool(s), see --help for more information' - -skip_qc_list = define_skip_qc_list() -skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] -if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information' - -anno_list = define_anno_list() -annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : [] -if (!check_parameter_list(annotate_tools,anno_list)) exit 1, 'Unknown tool(s) to annotate, see --help for more information' - -if (!(params.aligner in ['bwa-mem', 'bwa-mem2'])) exit 1, 'Unknown aligner, see --help for more information' - -// // Check parameters -if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them' -if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them' -if (params.umi && !(params.read_structure1 && params.read_structure2)) exit 1, 'Please specify both --read_structure1 and --read_structure2, when using --umi' - -// Handle input -tsv_path = null -if (params.input && (has_extension(params.input, "tsv") || has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) tsv_path = params.input -if (params.input && (has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) step = "annotate" - -save_bam_mapped = params.skip_markduplicates ? true : params.save_bam_mapped ? true : false - -// If no input file specified, trying to get TSV files corresponding to step in the TSV directory -// only for steps preparerecalibration, recalibrate, variantcalling and controlfreec -if (!params.input && params.sentieon) { - switch (step) { - case 'mapping': break - case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_deduped.tsv"; break - case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_recalibrated.tsv"; break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} else if (!params.input && !params.sentieon && !params.skip_markduplicates) { - switch (step) { - case 'mapping': break - case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates_no_table.tsv"; break - case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates.tsv"; break - case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break - case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} else if (!params.input && !params.sentieon && params.skip_markduplicates) { - switch (step) { - case 'mapping': break - case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/mapped.tsv"; break - case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/mapped_no_markduplicates.tsv"; break - case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break - case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} - -input_sample = Channel.empty() -if (tsv_path) { - tsv_file = file(tsv_path) - switch (step) { - case 'mapping': input_sample = extract_fastq(tsv_file); break - case 'preparerecalibration': input_sample = extract_bam(tsv_file); break - case 'recalibrate': input_sample = extract_recal(tsv_file); break - case 'variantcalling': input_sample = extract_bam(tsv_file); break - case 'controlfreec': input_sample = extract_pileup(tsv_file); break - case 'annotate': break - default: exit 1, "Unknown step ${step}" - } -} else if (params.input && !has_extension(params.input, "tsv")) { - log.info "No TSV file" - if (step != 'mapping') exit 1, 'No step other than "mapping" supports a directory as an input' - log.info "Reading ${params.input} directory" - log.warn "[nf-core/sarek] in ${params.input} directory, all fastqs are assuming to be from the same sample, which is assumed to be a germline one" - input_sample = extract_fastq_from_dir(params.input) - tsv_file = params.input // used in the reports -} else if (tsv_path && step == 'annotate') { - log.info "Annotating ${tsv_path}" -} else if (step == 'annotate') { - log.info "Trying automatic annotation on files in the VariantCalling/ directory" -} else exit 1, 'No sample were defined, see --help' - -/* --------------------------------------------------------------------------------- - UPDATE MODULES OPTIONS BASED ON PARAMS --------------------------------------------------------------------------------- -*/ - -modules = params.modules - -if (params.save_reference) modules['build_intervals'].publish_files = ['bed':'intervals'] -if (params.save_reference) modules['bwa_index'].publish_files = ['amb':'bwa', 'ann':'bwa', 'bwt':'bwa', 'pac':'bwa', 'sa':'bwa'] -if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2'] -if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals'] -if (params.save_reference) modules['dict'].publish_files = ['dict':'dict'] -if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target'] -if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi'] -if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai'] -if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp'] -if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource'] -if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels'] -if (params.save_reference) modules['tabix_pon'].publish_files = ['vcf.gz.tbi':'pon'] -if (save_bam_mapped) modules['samtools_index_mapping'].publish_files = ['bam':'mapped', 'bai':'mapped'] -if (params.skip_markduplicates) modules['baserecalibrator'].publish_files = ['recal.table':'mapped'] -if (params.skip_markduplicates) modules['gatherbqsrreports'].publish_files = ['recal.table':'mapped'] - -/* --------------------------------------------------------------------------------- - CHECKING REFERENCES --------------------------------------------------------------------------------- -*/ - -// Initialize each params in params.genomes, catch the command line first if it was defined -params.ac_loci = params.genome ? params.genomes[params.genome].ac_loci ?: false : false -params.ac_loci_gc = params.genome ? params.genomes[params.genome].ac_loci_gc ?: false : false -params.bwa = params.genome ? params.genomes[params.genome].bwa ?: false : false -params.chr_dir = params.genome ? params.genomes[params.genome].chr_dir ?: false : false -params.chr_length = params.genome ? params.genomes[params.genome].chr_length ?: false : false -params.dbsnp = params.genome ? params.genomes[params.genome].dbsnp ?: false : false -params.dbsnp_index = params.genome ? params.genomes[params.genome].dbsnp_index ?: false : false -params.dict = params.genome ? params.genomes[params.genome].dict ?: false : false -params.fasta = params.genome ? params.genomes[params.genome].fasta ?: false : false -params.fasta_fai = params.genome ? params.genomes[params.genome].fasta_fai ?: false : false -params.germline_resource = params.genome ? params.genomes[params.genome].germline_resource ?: false : false -params.germline_resource_index = params.genome ? params.genomes[params.genome].germline_resource_index ?: false : false -params.intervals = params.genome ? params.genomes[params.genome].intervals ?: false : false -params.known_indels = params.genome ? params.genomes[params.genome].known_indels ?: false : false -params.known_indels_index = params.genome ? params.genomes[params.genome].known_indels_index ?: false : false -params.mappability = params.genome ? params.genomes[params.genome].mappability ?: false : false -params.snpeff_db = params.genome ? params.genomes[params.genome].snpeff_db ?: false : false -params.species = params.genome ? params.genomes[params.genome].species ?: false : false -params.vep_cache_version = params.genome ? params.genomes[params.genome].vep_cache_version ?: false : false - -file("${params.outdir}/no_file").text = "no_file\n" - -// Initialize file channels based on params, defined in the params.genomes[params.genome] scope -chr_dir = params.chr_dir ? file(params.chr_dir) : file("${params.outdir}/no_file") -chr_length = params.chr_length ? file(params.chr_length) : file("${params.outdir}/no_file") -dbsnp = params.dbsnp ? file(params.dbsnp) : file("${params.outdir}/no_file") -fasta = params.fasta ? file(params.fasta) : file("${params.outdir}/no_file") -germline_resource = params.germline_resource ? file(params.germline_resource) : file("${params.outdir}/no_file") -known_indels = params.known_indels ? file(params.known_indels) : file("${params.outdir}/no_file") -loci = params.ac_loci ? file(params.ac_loci) : file("${params.outdir}/no_file") -loci_gc = params.ac_loci_gc ? file(params.ac_loci_gc) : file("${params.outdir}/no_file") -mappability = params.mappability ? file(params.mappability) : file("${params.outdir}/no_file") - -// Initialize value channels based on params, defined in the params.genomes[params.genome] scope -snpeff_db = params.snpeff_db ?: Channel.empty() -snpeff_species = params.species ?: Channel.empty() -vep_cache_version = params.vep_cache_version ?: Channel.empty() +//////////////////////////////////////////////////// +/* -- PARAMETER CHECKS -- */ +//////////////////////////////////////////////////// -// Initialize files channels based on params, not defined within the params.genomes[params.genome] scope -cadd_indels = params.cadd_indels ? file(params.cadd_indels) : file("${params.outdir}/no_file") -cadd_indels_tbi = params.cadd_indels_tbi ? file(params.cadd_indels_tbi) : file("${params.outdir}/no_file") -cadd_wg_snvs = params.cadd_wg_snvs ? file(params.cadd_wg_snvs) : file("${params.outdir}/no_file") -cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? file(params.cadd_wg_snvs_tbi) : file("${params.outdir}/no_file") -pon = params.pon ? file(params.pon) : file("${params.outdir}/no_file") -snpeff_cache = params.snpeff_cache ? file(params.snpeff_cache) : file("${params.outdir}/no_file") -target_bed = params.target_bed ? file(params.target_bed) : file("${params.outdir}/no_file") -vep_cache = params.vep_cache ? file(params.vep_cache) : file("${params.outdir}/no_file") - -// Initialize value channels based on params, not defined within the params.genomes[params.genome] scope -read_structure1 = params.read_structure1 ?: Channel.empty() -read_structure2 = params.read_structure2 ?: Channel.empty() - -/* --------------------------------------------------------------------------------- - PRINTING SUMMARY --------------------------------------------------------------------------------- -*/ - -// Has the run name been specified by the user? -// This has the bonus effect of catching both -name and --name -run_name = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - run_name = workflow.runName +// Check that conda channels are set-up correctly +if (params.enable_conda) { + Checks.check_conda_channels(log) } -summary = Schema.params_summary(workflow, params, run_name, step, tools, skip_qc, annotate_tools) -log.info Headers.nf_core(workflow, params.monochrome_logs) -log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") -log.info "-\033[2m----------------------------------------------------\033[0m-" -// params summary for MultiQC -workflow_summary = Schema.params_mqc_summary(summary) -workflow_summary = Channel.value(workflow_summary) - -if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal" -if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works if Sentieon is available where nf-core/sarek is run" - -/* --------------------------------------------------------------------------------- - INCLUDE LOCAL MODULES --------------------------------------------------------------------------------- -*/ - -/* --------------------------------------------------------------------------------- - INCLUDE LOCAL SUBWORKFLOWS --------------------------------------------------------------------------------- -*/ - -include { BUILD_INDICES } from './modules/local/subworkflow/build_indices' addParams( - build_intervals_options: modules['build_intervals'], - bwa_index_options: modules['bwa_index'], - bwamem2_index_options: modules['bwamem2_index'], - create_intervals_bed_options: modules['create_intervals_bed'], - gatk_dict_options: modules['dict'], - index_target_bed_options: modules['index_target_bed'], - msisensor_scan_options: modules['msisensor_scan'], - samtools_faidx_options: modules['samtools_faidx'], - tabix_dbsnp_options: modules['tabix_dbsnp'], - tabix_germline_resource_options: modules['tabix_germline_resource'], - tabix_known_indels_options: modules['tabix_known_indels'], - tabix_pon_options: modules['tabix_pon'] -) -include { MAPPING } from './modules/local/subworkflow/mapping' addParams( - bwamem1_mem_options: modules['bwa_mem1_mem'], - bwamem2_mem_options: modules['bwa_mem2_mem'], - merge_bam_options: modules['merge_bam_mapping'], - qualimap_bamqc_options: modules['qualimap_bamqc_mapping'], - samtools_index_options: modules['samtools_index_mapping'], - samtools_stats_options: modules['samtools_stats_mapping'] -) -include { MARKDUPLICATES } from './modules/local/subworkflow/markduplicates' addParams( - markduplicates_options: modules['markduplicates'] -) -include { PREPARE_RECALIBRATION } from './modules/local/subworkflow/prepare_recalibration' addParams( - baserecalibrator_options: modules['baserecalibrator'], - gatherbqsrreports_options: modules['gatherbqsrreports'] -) -include { RECALIBRATE } from './modules/local/subworkflow/recalibrate' addParams( - applybqsr_options: modules['applybqsr'], - merge_bam_options: modules['merge_bam_recalibrate'], - qualimap_bamqc_options: modules['qualimap_bamqc_recalibrate'], - samtools_index_options: modules['samtools_index_recalibrate'], - samtools_stats_options: modules['samtools_stats_recalibrate'] -) -include { GERMLINE_VARIANT_CALLING } from './modules/local/subworkflow/germline_variant_calling' addParams( - concat_gvcf_options: modules['concat_gvcf'], - concat_haplotypecaller_options: modules['concat_haplotypecaller'], - genotypegvcf_options: modules['genotypegvcf'], - haplotypecaller_options: modules['haplotypecaller'], - strelka_options: modules['strelka_germline'] -) -// include { TUMOR_VARIANT_CALLING } from './modules/local/subworkflow/tumor_variant_calling' addParams( -// ) -include { PAIR_VARIANT_CALLING } from './modules/local/subworkflow/pair_variant_calling' addParams( - manta_options: modules['manta_somatic'], - msisensor_msi_options: modules['msisensor_msi'], - strelka_bp_options: modules['strelka_somatic_bp'], - strelka_options: modules['strelka_somatic'] -) - -/* --------------------------------------------------------------------------------- - INCLUDE nf-core MODULES --------------------------------------------------------------------------------- -*/ - -include { MULTIQC } from './modules/nf-core/software/multiqc' - -/* --------------------------------------------------------------------------------- - INCLUDE nf-core SUBWORKFLOWS --------------------------------------------------------------------------------- -*/ - -include { QC_TRIM } from './modules/nf-core/subworkflow/qc_trim' addParams( - fastqc_options: modules['fastqc'], - trimgalore_options: modules['trimgalore'] -) -// PREPARING CHANNELS FOR PREPROCESSING AND QC - -// input_bam = Channel.empty() -// input_pair_reads = Channel.empty() - -// if (step in ['preparerecalibration', 'recalibrate', 'variantcalling', 'controlfreec', 'annotate']) { -// input_bam.close() -// input_pair_reads.close() -// } else input_sample.branch(input_pair_reads, input_bam) {has_extension(it[3], "bam") ? 1 : 0} - -// (input_bam, input_bam_fastqc) = input_bam.into(2) - -// // Removing inputFile2 which is null in case of uBAM -// input_bam_fastqc = input_bam_fastqc.map { -// idPatient, idSample, idRun, inputFile1, inputFile2 -> -// [idPatient, idSample, idRun, inputFile1] -// } - -// if (params.split_fastq){ -// input_pair_reads = input_pair_reads -// // newly splitfastq are named based on split, so the name is easier to catch -// .splitFastq(by: params.split_fastq, compress:true, file:"split", pe:true) -// .map {idPatient, idSample, idRun, reads1, reads2 -> -// // The split fastq read1 is the 4th element (indexed 3) its name is split_3 -// // The split fastq read2's name is split_4 -// // It's followed by which split it's acutally based on the mother fastq file -// // Index start at 1 -// // Extracting the index to get a new IdRun -// splitIndex = reads1.fileName.toString().minus("split_3.").minus(".gz") -// newIdRun = idRun + "_" + splitIndex -// // Giving the files a new nice name -// newReads1 = file("${idSample}_${newIdRun}_R1.fastq.gz") -// newReads2 = file("${idSample}_${newIdRun}_R2.fastq.gz") -// [idPatient, idSample, newIdRun, reads1, reads2]} -//} - -// input_pair_reads.dump(tag:'INPUT') - -// (input_pair_reads, input_pair_readstrimgalore, input_pair_readsfastqc) = input_pair_reads.into(3) - - -/* --------------------------------------------------------------------------------- - RUN THE WORKFLOW --------------------------------------------------------------------------------- -*/ +// Check AWS batch settings +Checks.aws_batch(workflow, params) + +// Check the hostnames against configured profiles +Checks.hostname(workflow, params, log) + +// Check genome key exists if provided +Checks.genome_exists(params, log) + +//////////////////////////////////////////////////// +/* -- REFERENCES PARAMETER VALUES -- */ +//////////////////////////////////////////////////// +/* -- Initialize each params in params.genomes -- */ +/* -- catch the command line first if defined -- */ +//////////////////////////////////////////////////// + +params.ac_loci = Checks.get_genome_attribute(params, 'ac_loci') +params.ac_loci_gc = Checks.get_genome_attribute(params, 'ac_loci_gc') +params.bwa = Checks.get_genome_attribute(params, 'bwa') +params.chr_dir = Checks.get_genome_attribute(params, 'chr_dir') +params.chr_length = Checks.get_genome_attribute(params, 'chr_length') +params.dbsnp = Checks.get_genome_attribute(params, 'dbsnp') +params.dbsnp_index = Checks.get_genome_attribute(params, 'dbsnp_index') +params.dict = Checks.get_genome_attribute(params, 'dict') +params.fasta = Checks.get_genome_attribute(params, 'fasta') +params.fasta_fai = Checks.get_genome_attribute(params, 'fasta_fai') +params.germline_resource = Checks.get_genome_attribute(params, 'germline_resource') +params.germline_resource_index = Checks.get_genome_attribute(params, 'germline_resource_index') +params.intervals = Checks.get_genome_attribute(params, 'intervals') +params.known_indels = Checks.get_genome_attribute(params, 'known_indels') +params.known_indels_index = Checks.get_genome_attribute(params, 'known_indels_index') +params.mappability = Checks.get_genome_attribute(params, 'mappability') +params.snpeff_db = Checks.get_genome_attribute(params, 'snpeff_db') +params.species = Checks.get_genome_attribute(params, 'species') +params.vep_cache_version = Checks.get_genome_attribute(params, 'vep_cache_version') + +//////////////////////////////////////////////////// +/* -- PRINT PARAMETER SUMMARY -- */ +//////////////////////////////////////////////////// + +def summary_params = Schema.params_summary_map(workflow, params, json_schema) +log.info Schema.params_summary_log(workflow, params, json_schema) + +//////////////////////////////////////////////////// +/* -- RUN THE WORKFLOW -- */ +//////////////////////////////////////////////////// workflow { -/* --------------------------------------------------------------------------------- - BUILD INDICES --------------------------------------------------------------------------------- -*/ - - BUILD_INDICES( - dbsnp, - fasta, - germline_resource, - known_indels, - pon, - step, - target_bed, - tools) - - intervals = BUILD_INDICES.out.intervals - - bwa = params.bwa ? file(params.bwa) : BUILD_INDICES.out.bwa - dict = params.dict ? file(params.dict) : BUILD_INDICES.out.dict - fai = params.fasta_fai ? file(params.fasta_fai) : BUILD_INDICES.out.fai - - dbsnp_tbi = params.dbsnp ? params.dbsnp_index ? file(params.dbsnp_index) : BUILD_INDICES.out.dbsnp_tbi : file("${params.outdir}/no_file") - germline_resource_tbi = params.germline_resource ? params.germline_resource_index ? file(params.germline_resource_index) : BUILD_INDICES.out.germline_resource_tbi : file("${params.outdir}/no_file") - known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file") - pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file") - - msisensor_scan = BUILD_INDICES.out.msisensor_scan - target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi -/* --------------------------------------------------------------------------------- - PREPROCESSING --------------------------------------------------------------------------------- -*/ - - bam_mapped = Channel.empty() - bam_mapped_qc = Channel.empty() - bam_recalibrated_qc = Channel.empty() - input_reads = Channel.empty() - qc_reports = Channel.empty() - - // STEP 0: QC & TRIM - // `--skip_qc fastqc` to skip fastqc - // trim only with `--trim_fastq` - // additional options to be set up - - QC_TRIM( - input_sample, - ('fastqc' in skip_qc || step != "mapping"), - !(params.trim_fastq)) - - reads_input = QC_TRIM.out.reads - - qc_reports = qc_reports.mix( - QC_TRIM.out.fastqc_html, - QC_TRIM.out.fastqc_zip, - QC_TRIM.out.trimgalore_html, - QC_TRIM.out.trimgalore_log, - QC_TRIM.out.trimgalore_zip) - - // STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA-MEM - - MAPPING( - ('bamqc' in skip_qc), - ('samtools' in skip_qc), - bwa, - fai, - fasta, - reads_input, - save_bam_mapped, - step, - target_bed) - - bam_mapped = MAPPING.out.bam - bam_mapped_qc = MAPPING.out.qc - - qc_reports = qc_reports.mix(bam_mapped_qc) - - // STEP 2: MARKING DUPLICATES - - MARKDUPLICATES( - bam_mapped, - step) - - bam_markduplicates = MARKDUPLICATES.out.bam - - if (step == 'preparerecalibration') bam_markduplicates = input_sample - - // STEP 3: CREATING RECALIBRATION TABLES + include { SAREK } from './workflows/sarek' addParams( summary_params: summary_params ) + SAREK () - PREPARE_RECALIBRATION( - bam_markduplicates, - dbsnp, - dbsnp_tbi, - dict, - fai, - fasta, - intervals, - known_indels, - known_indels_tbi, - step) - - table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr - - // STEP 4: RECALIBRATING - bam_applybqsr = bam_markduplicates.join(table_bqsr) - - if (step == 'recalibrate') bam_applybqsr = input_sample - - RECALIBRATE( - ('bamqc' in skip_qc), - ('samtools' in skip_qc), - bam_applybqsr, - dict, - fai, - fasta, - intervals, - step, - target_bed) - - bam_recalibrated = RECALIBRATE.out.bam - bam_recalibrated_qc = RECALIBRATE.out.qc - - qc_reports = qc_reports.mix(bam_recalibrated_qc) - - bam_variant_calling = bam_recalibrated - - if (step == 'variantcalling') bam_variant_calling = input_sample - - /* - -------------------------------------------------------------------------------- - GERMLINE VARIANT CALLING - -------------------------------------------------------------------------------- - */ - - GERMLINE_VARIANT_CALLING( - bam_variant_calling, - dbsnp, - dbsnp_tbi, - dict, - fai, - fasta, - intervals, - target_bed, - target_bed_gz_tbi, - tools) - -/* --------------------------------------------------------------------------------- - SOMATIC VARIANT CALLING --------------------------------------------------------------------------------- -*/ - - // TUMOR_VARIANT_CALLING( - // bam_variant_calling, - // dbsnp, - // dbsnp_tbi, - // dict, - // fai, - // fasta, - // intervals, - // target_bed, - // target_bed_gz_tbi, - // tools) - - PAIR_VARIANT_CALLING( - bam_variant_calling, - dbsnp, - dbsnp_tbi, - dict, - fai, - fasta, - intervals, - msisensor_scan, - target_bed, - target_bed_gz_tbi, - tools) - -/* --------------------------------------------------------------------------------- - ANNOTATION --------------------------------------------------------------------------------- -*/ - - -/* --------------------------------------------------------------------------------- - MULTIQC --------------------------------------------------------------------------------- -*/ - - // GET_SOFTWARE_VERSIONS() - - MULTIQC( - // GET_SOFTWARE_VERSIONS.out.yml, - multiqc_config, - multiqc_custom_config.ifEmpty([]), - workflow_summary, - qc_reports.collect()) -} - -/* --------------------------------------------------------------------------------- - SEND COMPLETION EMAIL --------------------------------------------------------------------------------- -*/ - -workflow.onComplete { - def multiqc_report = [] - Completion.email(workflow, params, summary, run_name, projectDir, multiqc_report, log) - Completion.summary(workflow, params, log) } - -// /* -// -------------------------------------------------------------------------------- -// GERMLINE VARIANT CALLING -// -------------------------------------------------------------------------------- -// */ - -// // STEP MANTA.1 - SINGLE MODE - -// process MantaSingle { -// label 'cpus_max' -// label 'memory_max' - -// tag "${idSample}" - -// publishDir "${params.outdir}/VariantCalling/${idSample}/Manta", mode: params.publish_dir_mode - -// input: -// set idPatient, idSample, file(bam), file(bai) from bamMantaSingle -// file(fasta) from fasta -// file(fastaFai) from fai -// file(targetBED) from ch_target_bed - -// output: -// set val("Manta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfMantaSingle - -// when: 'manta' in tools - -// script: -// beforeScript = params.target_bed ? "bgzip --threads ${task.cpus} -c ${targetBED} > call_targets.bed.gz ; tabix call_targets.bed.gz" : "" -// options = params.target_bed ? "--exome --callRegions call_targets.bed.gz" : "" -// status = status_map[idPatient, idSample] -// input_bam = status == 0 ? "--bam" : "--tumorBam" -// vcftype = status == 0 ? "diploid" : "tumor" -// """ -// ${beforeScript} -// configManta.py \ -// ${input_bam} ${bam} \ -// --reference ${fasta} \ -// ${options} \ -// --runDir Manta - -// python Manta/runWorkflow.py -m local -j ${task.cpus} - -// mv Manta/results/variants/candidateSmallIndels.vcf.gz \ -// Manta_${idSample}.candidateSmallIndels.vcf.gz -// mv Manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ -// Manta_${idSample}.candidateSmallIndels.vcf.gz.tbi -// mv Manta/results/variants/candidateSV.vcf.gz \ -// Manta_${idSample}.candidateSV.vcf.gz -// mv Manta/results/variants/candidateSV.vcf.gz.tbi \ -// Manta_${idSample}.candidateSV.vcf.gz.tbi -// mv Manta/results/variants/${vcftype}SV.vcf.gz \ -// Manta_${idSample}.${vcftype}SV.vcf.gz -// mv Manta/results/variants/${vcftype}SV.vcf.gz.tbi \ -// Manta_${idSample}.${vcftype}SV.vcf.gz.tbi -// """ -// } - -// vcfMantaSingle = vcfMantaSingle.dump(tag:'Single Manta') - -// // STEP TIDDIT - -// process TIDDIT { -// tag "${idSample}" - -// publishDir params.outdir, mode: params.publish_dir_mode, -// saveAs: { -// if (it == "TIDDIT_${idSample}.vcf") "VariantCalling/${idSample}/TIDDIT/${it}" -// else "Reports/${idSample}/TIDDIT/${it}" -// } - -// input: -// set idPatient, idSample, file(bam), file(bai) from bamTIDDIT -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// set val("TIDDIT"), idPatient, idSample, file("*.vcf.gz"), file("*.tbi") into vcfTIDDIT -// set file("TIDDIT_${idSample}.old.vcf"), file("TIDDIT_${idSample}.ploidy.tab"), file("TIDDIT_${idSample}.signals.tab"), file("TIDDIT_${idSample}.wig"), file("TIDDIT_${idSample}.gc.wig") into tidditOut - -// when: 'tiddit' in tools - -// script: -// """ -// tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${fasta} - -// mv TIDDIT_${idSample}.vcf TIDDIT_${idSample}.old.vcf - -// grep -E "#|PASS" TIDDIT_${idSample}.old.vcf > TIDDIT_${idSample}.vcf - -// bgzip --threads ${task.cpus} -c TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf.gz - -// tabix TIDDIT_${idSample}.vcf.gz -// """ -// } - -// vcfTIDDIT = vcfTIDDIT.dump(tag:'TIDDIT') - -// // STEP FREEBAYES SINGLE MODE - -// process FreebayesSingle { -// tag "${idSample}-${intervalBed.baseName}" - -// label 'cpus_1' - -// input: -// set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamFreebayesSingle -// file(fasta) from fasta -// file(fastaFai) from ch_software_versions_yaml - -// output: -// set val("FreeBayes"), idPatient, idSample, file("${intervalBed.baseName}_${idSample}.vcf") into vcfFreebayesSingle - -// when: 'freebayes' in tools - -// script: -// intervalsOptions = params.no_intervals ? "" : "-t ${intervalBed}" -// """ -// freebayes \ -// -f ${fasta} \ -// --min-alternate-fraction 0.1 \ -// --min-mapping-quality 1 \ -// ${intervalsOptions} \ -// ${bam} > ${intervalBed.baseName}_${idSample}.vcf -// """ -// } - -// vcfFreebayesSingle = vcfFreebayesSingle.groupTuple(by: [0,1,2]) - -// /* -// -------------------------------------------------------------------------------- -// SOMATIC VARIANT CALLING -// -------------------------------------------------------------------------------- -// */ -// // Ascat, pileup, pileups with no intervals, recalibrated BAMs -// (bamAscat, bamMpileup, bamMpileupNoInt, bamRecalAll) = bamRecalAll.into(4) - -// // separate BAM by status -// bamNormal = Channel.create() -// bamTumor = Channel.create() - -// bamRecalAll -// .choice(bamTumor, bamNormal) {status_map[it[0], it[1]] == 0 ? 1 : 0} - -// // Crossing Normal and Tumor to get a T/N pair for Somatic Variant Calling -// // Remapping channel to remove common key idPatient -// pairBam = bamNormal.cross(bamTumor).map { -// normal, tumor -> -// [normal[0], normal[1], normal[2], normal[3], tumor[1], tumor[2], tumor[3]] -// } - -// pairBam = pairBam.dump(tag:'BAM Somatic Pair') - -// // Manta, Strelka, Mutect2, MSIsensor -// (pairBamManta, pairBamStrelka, pairBamStrelkaBP, pairBamCalculateContamination, pairBamFilterMutect2, pairBamMsisensor, pairBamCNVkit, pairBam) = pairBam.into(8) - -// // Making Pair Bam for Sention - -// // separate BAM by status -// bam_sention_normal = Channel.create() -// bam_sentieon_tumor = Channel.create() - -// bam_sentieon_all -// .choice(bam_sentieon_tumor, bam_sention_normal) {status_map[it[0], it[1]] == 0 ? 1 : 0} - -// // Crossing Normal and Tumor to get a T/N pair for Somatic Variant Calling -// // Remapping channel to remove common key idPatient - -// bam_pair_sentieon_TNscope = bam_sention_normal.cross(bam_sentieon_tumor).map { -// normal, tumor -> -// [normal[0], normal[1], normal[2], normal[3], normal[4], tumor[1], tumor[2], tumor[3], tumor[4]] -// } - -// intervalPairBam = pairBam.spread(bedIntervals) - -// bamMpileup = bamMpileup.spread(intMpileup) - -// // intervals for Mutect2 calls, FreeBayes and pileups for Mutect2 filtering -// (pairBamMutect2, pairBamFreeBayes, pairBamPileupSummaries) = intervalPairBam.into(3) - -// // STEP FREEBAYES - -// process FreeBayes { -// tag "${idSampleTumor}_vs_${idSampleNormal}-${intervalBed.baseName}" - -// label 'cpus_1' - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamFreeBayes -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// set val("FreeBayes"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into vcfFreeBayes - -// when: 'freebayes' in tools - -// script: -// intervalsOptions = params.no_intervals ? "" : "-t ${intervalBed}" -// """ -// freebayes \ -// -f ${fasta} \ -// --pooled-continuous \ -// --pooled-discrete \ -// --genotype-qualities \ -// --report-genotype-likelihood-max \ -// --allele-balance-priors-off \ -// --min-alternate-fraction 0.03 \ -// --min-repeat-entropy 1 \ -// --min-alternate-count 2 \ -// ${intervalsOptions} \ -// ${bamTumor} \ -// ${bamNormal} > ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf -// """ -// } - -// vcfFreeBayes = vcfFreeBayes.groupTuple(by:[0,1,2]) - -// // STEP GATK MUTECT2.1 - RAW CALLS - -// process Mutect2 { -// tag "${idSampleTumor}_vs_${idSampleNormal}-${intervalBed.baseName}" - -// label 'cpus_1' - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(intervalBed) from pairBamMutect2 -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai -// file(germlineResource) from germline_resource -// file(germlineResourceIndex) from germline_resource_tbi -// file(intervals) from intervals -// file(pon) from pon -// file(ponIndex) from pon_tbi - -// output: -// set val("Mutect2"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") into mutect2Output -// set idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf.stats") optional true into intervalStatsFiles -// set idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf.stats"), file("${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf") optional true into mutect2Stats - -// when: 'mutect2' in tools - -// script: -// // please make a panel-of-normals, using at least 40 samples -// // https://gatkforums.broadinstitute.org/gatk/discussion/11136/how-to-call-somatic-mutations-using-gatk4-mutect2 -// PON = params.pon ? "--panel-of-normals ${pon}" : "" -// intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}" -// softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" -// """ -// # Get raw calls -// gatk --java-options "-Xmx${task.memory.toGiga()}g" \ -// Mutect2 \ -// -R ${fasta}\ -// -I ${bamTumor} -tumor ${idSampleTumor} \ -// -I ${bamNormal} -normal ${idSampleNormal} \ -// ${intervalsOptions} \ -// ${softClippedOption} \ -// --germline-resource ${germlineResource} \ -// ${PON} \ -// -O ${intervalBed.baseName}_${idSampleTumor}_vs_${idSampleNormal}.vcf -// """ -// } - -// mutect2Output = mutect2Output.groupTuple(by:[0,1,2]) -// mutect2Stats = mutect2Stats.groupTuple(by:[0,1]) - -// // STEP GATK MUTECT2.2 - MERGING STATS - -// process MergeMutect2Stats { -// tag "${idSamplePair}" - -// publishDir "${params.outdir}/VariantCalling/${idSamplePair}/Mutect2", mode: params.publish_dir_mode - -// input: -// set idPatient, idSamplePair, file(statsFiles), file(vcf) from mutect2Stats // Actual stats files and corresponding VCF chunks -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai -// file(germlineResource) from germline_resource -// file(germlineResourceIndex) from germline_resource_tbi -// file(intervals) from intervals - -// output: -// set idPatient, idSamplePair, file("${idSamplePair}.vcf.gz.stats") into mergedStatsFile - -// when: 'mutect2' in tools - -// script: -// stats = statsFiles.collect{ "-stats ${it} " }.join(' ') -// """ -// gatk --java-options "-Xmx${task.memory.toGiga()}g" \ -// MergeMutectStats \ -// ${stats} \ -// -O ${idSamplePair}.vcf.gz.stats -// """ -// } - -// // we are merging the VCFs that are called separatelly for different intervals -// // so we can have a single sorted VCF containing all the calls for a given caller - -// // STEP MERGING VCF - FREEBAYES & GATK HAPLOTYPECALLER - -// vcfConcatenateVCFs = vcfFreeBayes.mix(vcfFreebayesSingle, vcfGenotypeGVCFs, gvcfHaplotypeCaller) -// vcfConcatenateVCFs = vcfConcatenateVCFs.dump(tag:'VCF to merge') - -// process ConcatVCF { -// label 'cpus_8' - -// tag "${variantCaller}-${idSample}" - -// publishDir "${params.outdir}/VariantCalling/${idSample}/${"$variantCaller"}", mode: params.publish_dir_mode - -// input: -// set variantCaller, idPatient, idSample, file(vcf) from vcfConcatenateVCFs -// file(fastaFai) from fai -// file(targetBED) from ch_target_bed - -// output: -// // we have this funny *_* pattern to avoid copying the raw calls to publishdir -// set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated - -// when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools) - -// script: -// if (variantCaller == 'HaplotypeCallerGVCF') -// outputFile = "HaplotypeCaller_${idSample}.g.vcf" -// else -// outputFile = "${variantCaller}_${idSample}.vcf" -// options = params.target_bed ? "-t ${targetBED}" : "" -// intervalsOptions = params.no_intervals ? "-n" : "" -// """ -// concatenateVCFs.sh -i ${fastaFai} -c ${task.cpus} -o ${outputFile} ${options} ${intervalsOptions} -// """ -// } - -// vcfConcatenated = vcfConcatenated.dump(tag:'VCF') - -// // STEP MERGING VCF - GATK MUTECT2 (UNFILTERED) - -// mutect2Output = mutect2Output.dump(tag:'Mutect2 output VCF to merge') - -// process ConcatVCF_Mutect2 { -// label 'cpus_8' - -// tag "${idSample}" - -// publishDir "${params.outdir}/VariantCalling/${idSample}/Mutect2", mode: params.publish_dir_mode - -// input: -// set variantCaller, idPatient, idSample, file(vcf) from mutect2Output -// file(fastaFai) from fai -// file(targetBED) from ch_target_bed - -// output: -// // we have this funny *_* pattern to avoid copying the raw calls to publishdir -// set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenatedForFilter - -// when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools) - -// script: -// outputFile = "Mutect2_unfiltered_${idSample}.vcf" -// options = params.target_bed ? "-t ${targetBED}" : "" -// intervalsOptions = params.no_intervals ? "-n" : "" -// """ -// concatenateVCFs.sh -i ${fastaFai} -c ${task.cpus} -o ${outputFile} ${options} ${intervalsOptions} -// """ -// } - -// vcfConcatenatedForFilter = vcfConcatenatedForFilter.dump(tag:'Mutect2 unfiltered VCF') - -// // STEP GATK MUTECT2.3 - GENERATING PILEUP SUMMARIES - -// pairBamPileupSummaries = pairBamPileupSummaries.map{ -// idPatient, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor, intervalBed -> -// [idPatient, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor, intervalBed] -// }.join(intervalStatsFiles, by:[0,1,2]) - -// process PileupSummariesForMutect2 { -// tag "${idSampleTumor}_vs_${idSampleNormal}-${intervalBed.baseName}" - -// label 'cpus_1' - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(bamNormal), file(baiNormal), file(bamTumor), file(baiTumor), file(intervalBed), file(statsFile) from pairBamPileupSummaries -// file(germlineResource) from germline_resource -// file(germlineResourceIndex) from germline_resource_tbi - -// output: -// set idPatient, idSampleNormal, idSampleTumor, file("${intervalBed.baseName}_${idSampleTumor}_pileupsummaries.table") into pileupSummaries - -// when: 'mutect2' in tools - -// script: -// intervalsOptions = params.no_intervals ? "" : "-L ${intervalBed}" -// """ -// gatk --java-options "-Xmx${task.memory.toGiga()}g" \ -// GetPileupSummaries \ -// -I ${bamTumor} \ -// -V ${germlineResource} \ -// ${intervalsOptions} \ -// -O ${intervalBed.baseName}_${idSampleTumor}_pileupsummaries.table -// """ -// } - -// pileupSummaries = pileupSummaries.groupTuple(by:[0,1,2]) - -// // STEP GATK MUTECT2.4 - MERGING PILEUP SUMMARIES - -// process MergePileupSummaries { -// label 'cpus_1' - -// tag "${idPatient}_${idSampleTumor}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}/Mutect2", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(pileupSums) from pileupSummaries -// file(dict) from dict - -// output: -// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}_pileupsummaries.table") into mergedPileupFile - -// when: 'mutect2' in tools - -// script: -// allPileups = pileupSums.collect{ "-I ${it} " }.join(' ') -// """ -// gatk --java-options "-Xmx${task.memory.toGiga()}g" \ -// GatherPileupSummaries \ -// --sequence-dictionary ${dict} \ -// ${allPileups} \ -// -O ${idSampleTumor}_pileupsummaries.table -// """ -// } - -// // STEP GATK MUTECT2.5 - CALCULATING CONTAMINATION - -// pairBamCalculateContamination = pairBamCalculateContamination.map{ -// idPatient, idSampleNormal, bamNormal, baiNormal, idSampleTumor, bamTumor, baiTumor -> -// [idPatient, idSampleNormal, idSampleTumor, bamNormal, baiNormal, bamTumor, baiTumor] -// }.join(mergedPileupFile, by:[0,1,2]) - -// process CalculateContamination { -// label 'cpus_1' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}/Mutect2", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(bamNormal), file(baiNormal), file(bamTumor), file(baiTumor), file(mergedPileup) from pairBamCalculateContamination - -// output: -// set idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("${idSampleTumor}_contamination.table") into contaminationTable - -// when: 'mutect2' in tools - -// script: -// """ -// # calculate contamination -// gatk --java-options "-Xmx${task.memory.toGiga()}g" \ -// CalculateContamination \ -// -I ${idSampleTumor}_pileupsummaries.table \ -// -O ${idSampleTumor}_contamination.table -// """ -// } - -// // STEP GATK MUTECT2.6 - FILTERING CALLS - -// mutect2CallsToFilter = vcfConcatenatedForFilter.map{ -// variantCaller, idPatient, idSamplePair, vcf, tbi -> -// [idPatient, idSamplePair, vcf, tbi] -// }.join(mergedStatsFile, by:[0,1]).join(contaminationTable, by:[0,1]) - -// process FilterMutect2Calls { -// label 'cpus_1' - -// tag "${idSamplePair}" - -// publishDir "${params.outdir}/VariantCalling/${idSamplePair}/Mutect2", mode: params.publish_dir_mode - -// input: -// set idPatient, idSamplePair, file(unfiltered), file(unfilteredIndex), file(stats), file(contaminationTable) from mutect2CallsToFilter -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai -// file(germlineResource) from germline_resource -// file(germlineResourceIndex) from germline_resource_tbi -// file(intervals) from intervals - -// output: -// set val("Mutect2"), idPatient, idSamplePair, file("Mutect2_filtered_${idSamplePair}.vcf.gz"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.tbi"), file("Mutect2_filtered_${idSamplePair}.vcf.gz.filteringStats.tsv") into filteredMutect2Output - -// when: 'mutect2' in tools - -// script: -// """ -// # do the actual filtering -// gatk --java-options "-Xmx${task.memory.toGiga()}g" \ -// FilterMutectCalls \ -// -V ${unfiltered} \ -// --contamination-table ${contaminationTable} \ -// --stats ${stats} \ -// -R ${fasta} \ -// -O Mutect2_filtered_${idSamplePair}.vcf.gz -// """ -// } - -// // STEP SENTIEON TNSCOPE - -// process Sentieon_TNscope { -// label 'cpus_max' -// label 'memory_max' -// label 'sentieon' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), file(recalNormal), idSampleTumor, file(bamTumor), file(baiTumor), file(recalTumor) from bam_pair_sentieon_TNscope -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai -// file(dbsnp) from dbsnp -// file(dbsnpIndex) from dbsnp_tbi -// file(pon) from pon -// file(ponIndex) from pon_tbi - -// output: -// set val("SentieonTNscope"), idPatient, val("${idSampleTumor}_vs_${idSampleNormal}"), file("*.vcf") into vcf_sentieon_TNscope - -// when: 'tnscope' in tools && params.sentieon - -// script: -// PON = params.pon ? "--pon ${pon}" : "" -// """ -// sentieon driver \ -// -t ${task.cpus} \ -// -r ${fasta} \ -// -i ${bamTumor} \ -// -q ${recalTumor} \ -// -i ${bamNormal} \ -// -q ${recalNormal} \ -// --algo TNscope \ -// --tumor_sample ${idSampleTumor} \ -// --normal_sample ${idSampleNormal} \ -// --dbsnp ${dbsnp} \ -// ${PON} \ -// TNscope_${idSampleTumor}_vs_${idSampleNormal}.vcf -// """ -// } - -// vcf_sentieon_TNscope = vcf_sentieon_TNscope.dump(tag:'Sentieon TNscope') - -// vcf_sentieon = vcf_sentieon_DNAseq.mix(vcf_sentieon_DNAscope, vcf_sentieon_DNAscope_SV, vcf_sentieon_TNscope) - -// process CompressSentieonVCF { -// tag "${idSample} - ${vcf}" - -// publishDir "${params.outdir}/VariantCalling/${idSample}/${variantCaller}", mode: params.publish_dir_mode - -// input: -// set variantCaller, idPatient, idSample, file(vcf) from vcf_sentieon - -// output: -// set variantCaller, idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcf_sentieon_compressed - -// script: -// """ -// bgzip < ${vcf} > ${vcf}.gz -// tabix ${vcf}.gz -// """ -// } - -// vcf_sentieon_compressed = vcf_sentieon_compressed.dump(tag:'Sentieon VCF indexed') - -// // STEP CNVkit - -// process CNVkit { -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/CNVkit", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, file(bamNormal), file(baiNormal), idSampleTumor, file(bamTumor), file(baiTumor) from pairBamCNVkit -// file(targetBED) from ch_target_bed -// file(fasta) from fasta - -// output: -// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}*"), file("${idSampleNormal}*") into cnvkitOut - -// when: 'cnvkit' in tools && params.target_bed - -// script: -// """ -// cnvkit.py \ -// batch \ -// ${bamTumor} \ -// --normal ${bamNormal} \ -// --targets ${targetBED} \ -// --fasta ${fasta} \ -// --output-reference output_reference.cnn \ -// --output-dir ./ \ -// --diagram \ -// --scatter -// """ -// } - -// // STEP ASCAT.1 - ALLELECOUNTER - -// // Run commands and code from Malin Larsson -// // Based on Jesper Eisfeldt's code -// process AlleleCounter { -// label 'memory_singleCPU_2_task' - -// tag "${idSample}" - -// input: -// set idPatient, idSample, file(bam), file(bai) from bamAscat -// file(acLoci) from loci -// file(dict) from dict -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// set idPatient, idSample, file("${idSample}.alleleCount") into alleleCounterOut - -// when: 'ascat' in tools - -// script: -// """ -// alleleCounter \ -// -l ${acLoci} \ -// -r ${fasta} \ -// -b ${bam} \ -// -o ${idSample}.alleleCount; -// """ -// } - -// alleleCountOutNormal = Channel.create() -// alleleCountOutTumor = Channel.create() - -// alleleCounterOut -// .choice(alleleCountOutTumor, alleleCountOutNormal) {status_map[it[0], it[1]] == 0 ? 1 : 0} - -// alleleCounterOut = alleleCountOutNormal.combine(alleleCountOutTumor, by:0) - -// alleleCounterOut = alleleCounterOut.map { -// idPatientNormal, idSampleNormal, alleleCountOutNormal, -// idSampleTumor, alleleCountOutTumor -> -// [idPatientNormal, idSampleNormal, idSampleTumor, alleleCountOutNormal, alleleCountOutTumor] -// } - -// // STEP ASCAT.2 - CONVERTALLELECOUNTS - -// // R script from Malin Larssons bitbucket repo: -// // https://bitbucket.org/malinlarsson/somatic_wgs_pipeline -// process ConvertAlleleCounts { -// label 'memory_singleCPU_2_task' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(alleleCountNormal), file(alleleCountTumor) from alleleCounterOut - -// output: -// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleNormal}.BAF"), file("${idSampleNormal}.LogR"), file("${idSampleTumor}.BAF"), file("${idSampleTumor}.LogR") into convertAlleleCountsOut - -// when: 'ascat' in tools - -// script: -// gender = gender_map[idPatient] -// """ -// convertAlleleCounts.r ${idSampleTumor} ${alleleCountTumor} ${idSampleNormal} ${alleleCountNormal} ${gender} -// """ -// } - -// // STEP ASCAT.3 - ASCAT - -// // R scripts from Malin Larssons bitbucket repo: -// // https://bitbucket.org/malinlarsson/somatic_wgs_pipeline -// process Ascat { -// label 'memory_singleCPU_2_task' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/ASCAT", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(bafNormal), file(logrNormal), file(bafTumor), file(logrTumor) from convertAlleleCountsOut -// file(acLociGC) from loci_gc - -// output: -// set val("ASCAT"), idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.*.{png,txt}") into ascatOut - -// when: 'ascat' in tools - -// script: -// gender = gender_map[idPatient] -// purity_ploidy = (params.ascat_purity && params.ascat_ploidy) ? "--purity ${params.ascat_purity} --ploidy ${params.ascat_ploidy}" : "" -// """ -// for f in *BAF *LogR; do sed 's/chr//g' \$f > tmpFile; mv tmpFile \$f;done -// run_ascat.r \ -// --tumorbaf ${bafTumor} \ -// --tumorlogr ${logrTumor} \ -// --normalbaf ${bafNormal} \ -// --normallogr ${logrNormal} \ -// --tumorname ${idSampleTumor} \ -// --basedir ${$projectDir} \ -// --gcfile ${acLociGC} \ -// --gender ${gender} \ -// ${purity_ploidy} -// """ -// } - -// ascatOut.dump(tag:'ASCAT') - -// // STEP MPILEUP.1 - -// process Mpileup { -// label 'cpus_1' -// label 'memory_singleCPU_2_task' - -// tag "${idSample}-${intervalBed.baseName}" - -// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { it == "${idSample}.pileup" ? "VariantCalling/${idSample}/Control-FREEC/${it}" : null } - -// input: -// set idPatient, idSample, file(bam), file(bai), file(intervalBed) from bamMpileup -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// set idPatient, idSample, file("${prefix}${idSample}.pileup") into mpileupMerge -// set idPatient, idSample into tsv_mpileup - -// when: 'controlfreec' in tools || 'mpileup' in tools - -// script: -// prefix = params.no_intervals ? "" : "${intervalBed.baseName}_" -// intervalsOptions = params.no_intervals ? "" : "-l ${intervalBed}" - -// """ -// # Control-FREEC reads uncompresses the zipped file TWICE in single-threaded mode. -// # we are therefore not using compressed pileups here -// samtools mpileup \ -// -f ${fasta} ${bam} \ -// ${intervalsOptions} > ${prefix}${idSample}.pileup -// """ -// } - -// (tsv_mpileup, tsv_mpileup_sample) = tsv_mpileup.groupTuple(by:[0, 1]).into(2) - -// // Creating a TSV file to restart from this step -// tsv_mpileup.map { idPatient, idSample -> -// gender = gender_map[idPatient] -// status = status_map[idPatient, idSample] -// mpileup = "${params.outdir}/VariantCalling/${idSample}/Control-FREEC/${idSample}.pileup" -// "${idPatient}\t${gender}\t${status}\t${idSample}\t${mpileup}\n" -// }.collectFile( -// name: 'control-freec_mpileup.tsv', sort: true, storeDir: "${params.outdir}/VariantCalling/TSV" -// ) - -// tsv_mpileup_sample -// .collectFile(storeDir: "${params.outdir}/VariantCalling/TSV") { -// idPatient, idSample -> -// status = status_map[idPatient, idSample] -// gender = gender_map[idPatient] -// mpileup = "${params.outdir}/VariantCalling/${idSample}/Control-FREEC/${idSample}.pileup" -// ["control-freec_mpileup_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${mpileup}\n"] -// } - -// if (!params.no_intervals) { -// mpileupMerge = mpileupMerge.groupTuple(by:[0, 1]) -// mpileupNoInt = Channel.empty() -// } else { -// (mpileupMerge, mpileupNoInt) = mpileupMerge.into(2) -// mpileupMerge.close() -// } - -// // STEP MPILEUP.2 - MERGE -// process MergeMpileup { -// label 'cpus_1' - -// tag "${idSample}" - -// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { it == "${idSample}.pileup" ? "VariantCalling/${idSample}/Control-FREEC/${it}" : null } - -// input: -// set idPatient, idSample, file(mpileup) from mpileupMerge - -// output: -// set idPatient, idSample, file("${idSample}.pileup") into mpileupOut - -// when: !(params.no_intervals) && 'controlfreec' in tools || 'mpileup' in tools - -// script: -// """ -// for i in `ls -1v *.pileup`; -// do cat \$i >> ${idSample}.pileup -// done -// """ -// } - -// mpileupOut = mpileupOut.mix(mpileupNoInt) -// mpileupOut = mpileupOut.dump(tag:'mpileup') - -// mpileupOutNormal = Channel.create() -// mpileupOutTumor = Channel.create() - -// if (step == 'controlfreec') mpileupOut = input_sample - -// mpileupOut -// .choice(mpileupOutTumor, mpileupOutNormal) {status_map[it[0], it[1]] == 0 ? 1 : 0} - -// mpileupOut = mpileupOutNormal.combine(mpileupOutTumor, by:0) - -// mpileupOut = mpileupOut.map { -// idPatientNormal, idSampleNormal, mpileupOutNormal, -// idSampleTumor, mpileupOutTumor -> -// [idPatientNormal, idSampleNormal, idSampleTumor, mpileupOutNormal, mpileupOutTumor] -// } - -// // STEP CONTROLFREEC.1 - CONTROLFREEC - -// process ControlFREEC { -// label 'cpus_max' -// //label 'memory_singleCPU_2_task' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Control-FREEC", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(mpileupNormal), file(mpileupTumor) from mpileupOut -// file(chrDir) from chr_dir -// file(mappability) from mappability -// file(chrLength) from chr_length -// file(dbsnp) from dbsnp -// file(dbsnpIndex) from dbsnp_tbi -// file(fasta) from fasta -// file(fastaFai) from fai - -// output: -// set idPatient, idSampleNormal, idSampleTumor, file("${idSampleTumor}.pileup_CNVs"), file("${idSampleTumor}.pileup_ratio.txt"), file("${idSampleTumor}.pileup_normal_CNVs"), file("${idSampleTumor}.pileup_normal_ratio.txt"), file("${idSampleTumor}.pileup_BAF.txt"), file("${idSampleNormal}.pileup_BAF.txt") into controlFreecViz -// set file("*.pileup*"), file("${idSampleTumor}_vs_${idSampleNormal}.config.txt") into controlFreecOut - -// when: 'controlfreec' in tools - -// script: -// config = "${idSampleTumor}_vs_${idSampleNormal}.config.txt" -// gender = gender_map[idPatient] -// // if we are using coefficientOfVariation, we must delete the window parameter -// // it is "window = 20000" in the default settings, without coefficientOfVariation set, -// // but we do not like it. Note, it is not written in stone -// coeff_or_window = params.cf_window ? "window = ${params.cf_window}" : "coefficientOfVariation = ${params.cf_coeff}" - -// """ -// touch ${config} -// echo "[general]" >> ${config} -// echo "BedGraphOutput = TRUE" >> ${config} -// echo "chrFiles = \${PWD}/${chrDir.fileName}" >> ${config} -// echo "chrLenFile = \${PWD}/${chrLength.fileName}" >> ${config} -// echo "gemMappabilityFile = \${PWD}/${mappability}" >> ${config} -// echo "${coeff_or_window}" >> ${config} -// echo "contaminationAdjustment = TRUE" >> ${config} -// echo "forceGCcontentNormalization = 1" >> ${config} -// echo "maxThreads = ${task.cpus}" >> ${config} -// echo "minimalSubclonePresence = 20" >> ${config} -// echo "ploidy = ${params.cf_ploidy}" >> ${config} -// echo "sex = ${gender}" >> ${config} -// echo "" >> ${config} - -// echo "[control]" >> ${config} -// echo "inputFormat = pileup" >> ${config} -// echo "mateFile = \${PWD}/${mpileupNormal}" >> ${config} -// echo "mateOrientation = FR" >> ${config} -// echo "" >> ${config} - -// echo "[sample]" >> ${config} -// echo "inputFormat = pileup" >> ${config} -// echo "mateFile = \${PWD}/${mpileupTumor}" >> ${config} -// echo "mateOrientation = FR" >> ${config} -// echo "" >> ${config} - -// echo "[BAF]" >> ${config} -// echo "SNPfile = ${dbsnp.fileName}" >> ${config} - -// freec -conf ${config} -// """ -// } - -// controlFreecOut.dump(tag:'ControlFREEC') - -// // STEP CONTROLFREEC.3 - VISUALIZATION - -// process ControlFreecViz { -// label 'memory_singleCPU_2_task' - -// tag "${idSampleTumor}_vs_${idSampleNormal}" - -// publishDir "${params.outdir}/VariantCalling/${idSampleTumor}_vs_${idSampleNormal}/Control-FREEC", mode: params.publish_dir_mode - -// input: -// set idPatient, idSampleNormal, idSampleTumor, file(cnvTumor), file(ratioTumor), file(cnvNormal), file(ratioNormal), file(bafTumor), file(bafNormal) from controlFreecViz - -// output: -// set file("*.txt"), file("*.png"), file("*.bed") into controlFreecVizOut - -// when: 'controlfreec' in tools - -// """ -// echo "Shaping CNV files to make sure we can assess significance" -// awk 'NF==9{print}' ${cnvTumor} > TUMOR.CNVs -// awk 'NF==7{print}' ${cnvNormal} > NORMAL.CNVs - -// echo "############### Calculating significance values for TUMOR CNVs #############" -// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args TUMOR.CNVs ${ratioTumor} - -// echo "############### Calculating significance values for NORMAL CNVs ############" -// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/assess_significance.R | R --slave --args NORMAL.CNVs ${ratioNormal} - -// echo "############### Creating graph for TUMOR ratios ###############" -// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/makeGraph.R | R --slave --args 2 ${ratioTumor} ${bafTumor} - -// echo "############### Creating graph for NORMAL ratios ##############" -// cat /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/makeGraph.R | R --slave --args 2 ${ratioNormal} ${bafNormal} - -// echo "############### Creating BED files for TUMOR ##############" -// perl /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/freec2bed.pl -f ${ratioTumor} > ${idSampleTumor}.bed - -// echo "############### Creating BED files for NORMAL #############" -// perl /opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/freec2bed.pl -f ${ratioNormal} > ${idSampleNormal}.bed -// """ -// } - -// controlFreecVizOut.dump(tag:'ControlFreecViz') - -// // Remapping channels for QC and annotation - -// (vcfStrelkaIndels, vcfStrelkaSNVS) = vcfStrelka.into(2) -// (vcfStrelkaBPIndels, vcfStrelkaBPSNVS) = vcfStrelkaBP.into(2) -// (vcfMantaSomaticSV, vcfMantaDiploidSV) = vcfManta.into(2) - -// vcfKeep = Channel.empty().mix( -// filteredMutect2Output.map{ -// variantCaller, idPatient, idSample, vcf, tbi, tsv -> -// [variantcaller, idSample, vcf] -// }, -// vcfConcatenated.map{ -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf] -// }, -// vcf_sentieon_compressed.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf] -// }, -// vcfStrelkaSingle.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[1]] -// }, -// vcfMantaSingle.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[2]] -// }, -// vcfMantaDiploidSV.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[2]] -// }, -// vcfMantaSomaticSV.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[3]] -// }, -// vcfStrelkaIndels.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[0]] -// }, -// vcfStrelkaSNVS.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[1]] -// }, -// vcfStrelkaBPIndels.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[0]] -// }, -// vcfStrelkaBPSNVS.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf[1]] -// }, -// vcfTIDDIT.map { -// variantcaller, idPatient, idSample, vcf, tbi -> -// [variantcaller, idSample, vcf] -// }) - -// (vcfBCFtools, vcfVCFtools, vcfAnnotation) = vcfKeep.into(3) - -// // STEP VCF.QC - -// process BcftoolsStats { -// label 'cpus_1' - -// tag "${variantCaller} - ${vcf}" - -// publishDir "${params.outdir}/Reports/${idSample}/BCFToolsStats", mode: params.publish_dir_mode - -// input: -// set variantCaller, idSample, file(vcf) from vcfBCFtools - -// output: -// file ("*.bcf.tools.stats.out") into bcftoolsReport - -// when: !('bcftools' in skip_qc) - -// script: -// """ -// bcftools stats ${vcf} > ${reduceVCF(vcf.fileName)}.bcf.tools.stats.out -// """ -// } - -// bcftoolsReport = bcftoolsReport.dump(tag:'BCFTools') - -// process Vcftools { -// label 'cpus_1' - -// tag "${variantCaller} - ${vcf}" - -// publishDir "${params.outdir}/Reports/${idSample}/VCFTools", mode: params.publish_dir_mode - -// input: -// set variantCaller, idSample, file(vcf) from vcfVCFtools - -// output: -// file ("${reduceVCF(vcf.fileName)}.*") into vcftoolsReport - -// when: !('vcftools' in skip_qc) - -// script: -// """ -// vcftools \ -// --gzvcf ${vcf} \ -// --TsTv-by-count \ -// --out ${reduceVCF(vcf.fileName)} - -// vcftools \ -// --gzvcf ${vcf} \ -// --TsTv-by-qual \ -// --out ${reduceVCF(vcf.fileName)} - -// vcftools \ -// --gzvcf ${vcf} \ -// --FILTER-summary \ -// --out ${reduceVCF(vcf.fileName)} -// """ -// } - -// vcftoolsReport = vcftoolsReport.dump(tag:'VCFTools') - -// /* -// -------------------------------------------------------------------------------- -// ANNOTATION -// -------------------------------------------------------------------------------- -// */ - -// if (step == 'annotate') { -// vcfToAnnotate = Channel.create() -// vcfNoAnnotate = Channel.create() - -// if (tsv_path == []) { -// // Sarek, by default, annotates all available vcfs that it can find in the VariantCalling directory -// // Excluding vcfs from FreeBayes, and g.vcf from HaplotypeCaller -// // Basically it's: results/VariantCalling/*/{HaplotypeCaller,Manta,Mutect2,SentieonDNAseq,SentieonDNAscope,SentieonTNscope,Strelka,TIDDIT}/*.vcf.gz -// // Without *SmallIndels.vcf.gz from Manta, and *.genome.vcf.gz from Strelka -// // The small snippet `vcf.minus(vcf.fileName)[-2]` catches idSample -// // This field is used to output final annotated VCFs in the correct directory -// Channel.empty().mix( -// Channel.fromPath("${params.outdir}/VariantCalling/*/HaplotypeCaller/*.vcf.gz") -// .flatten().map{vcf -> ['HaplotypeCaller', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/Manta/*[!candidate]SV.vcf.gz") -// .flatten().map{vcf -> ['Manta', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/Mutect2/*.vcf.gz") -// .flatten().map{vcf -> ['Mutect2', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/SentieonDNAseq/*.vcf.gz") -// .flatten().map{vcf -> ['SentieonDNAseq', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/SentieonDNAscope/*.vcf.gz") -// .flatten().map{vcf -> ['SentieonDNAscope', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/SentieonTNscope/*.vcf.gz") -// .flatten().map{vcf -> ['SentieonTNscope', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/Strelka/*{somatic,variant}*.vcf.gz") -// .flatten().map{vcf -> ['Strelka', vcf.minus(vcf.fileName)[-2].toString(), vcf]}, -// Channel.fromPath("${params.outdir}/VariantCalling/*/TIDDIT/*.vcf.gz") -// .flatten().map{vcf -> ['TIDDIT', vcf.minus(vcf.fileName)[-2].toString(), vcf]} -// ).choice(vcfToAnnotate, vcfNoAnnotate) { -// annotate_tools == [] || (annotate_tools != [] && it[0] in annotate_tools) ? 0 : 1 -// } -// } else if (annotate_tools == []) { -// // Annotate user-submitted VCFs -// // If user-submitted, Sarek assume that the idSample should be assumed automatically -// vcfToAnnotate = Channel.fromPath(tsv_path) -// .map{vcf -> ['userspecified', vcf.minus(vcf.fileName)[-2].toString(), vcf]} -// } else exit 1, "specify only tools or files to annotate, not both" - -// vcfNoAnnotate.close() -// vcfAnnotation = vcfAnnotation.mix(vcfToAnnotate) -// } - -// // as now have the list of VCFs to annotate, the first step is to annotate with allele frequencies, if there are any - -// (vcfSnpeff, vcfVep) = vcfAnnotation.into(2) - -// vcfVep = vcfVep.map { -// variantCaller, idSample, vcf -> -// [variantCaller, idSample, vcf, null] -// } - -// // STEP SNPEFF - -// process Snpeff { -// tag "${idSample} - ${variantCaller} - ${vcf}" - -// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { -// if (it == "${reducedVCF}_snpEff.ann.vcf") null -// else "Reports/${idSample}/snpEff/${it}" -// } - -// input: -// set variantCaller, idSample, file(vcf) from vcfSnpeff -// file(dataDir) from snpeff_cache -// val snpeffDb from snpeff_db - -// output: -// set file("${reducedVCF}_snpEff.genes.txt"), file("${reducedVCF}_snpEff.html"), file("${reducedVCF}_snpEff.csv") into snpeffReport -// set variantCaller, idSample, file("${reducedVCF}_snpEff.ann.vcf") into snpeffVCF - -// when: 'snpeff' in tools || 'merge' in tools - -// script: -// reducedVCF = reduceVCF(vcf.fileName) -// cache = (params.snpeff_cache && params.annotation_cache) ? "-dataDir \${PWD}/${dataDir}" : "" -// """ -// snpEff -Xmx${task.memory.toGiga()}g \ -// ${snpeffDb} \ -// -csvStats ${reducedVCF}_snpEff.csv \ -// -nodownload \ -// ${cache} \ -// -canon \ -// -v \ -// ${vcf} \ -// > ${reducedVCF}_snpEff.ann.vcf - -// mv snpEff_summary.html ${reducedVCF}_snpEff.html -// """ -// } - -// snpeffReport = snpeffReport.dump(tag:'snpEff report') - -// // STEP COMPRESS AND INDEX VCF.1 - SNPEFF - -// process CompressVCFsnpEff { -// tag "${idSample} - ${vcf}" - -// publishDir "${params.outdir}/Annotation/${idSample}/snpEff", mode: params.publish_dir_mode - -// input: -// set variantCaller, idSample, file(vcf) from snpeffVCF - -// output: -// set variantCaller, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into (compressVCFsnpEffOut) - -// script: -// """ -// bgzip < ${vcf} > ${vcf}.gz -// tabix ${vcf}.gz -// """ -// } - -// compressVCFsnpEffOut = compressVCFsnpEffOut.dump(tag:'VCF') - -// // STEP VEP.1 - -// process VEP { -// label 'VEP' -// label 'cpus_4' - -// tag "${idSample} - ${variantCaller} - ${vcf}" - -// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { -// if (it == "${reducedVCF}_VEP.summary.html") "Reports/${idSample}/VEP/${it}" -// else null -// } - -// input: -// set variantCaller, idSample, file(vcf), file(idx) from vcfVep -// file(dataDir) from vep_cache -// val cache_version from vep_cache_version -// file(cadd_InDels) from cadd_indels -// file(cadd_InDels_tbi) from cadd_indels_tbi -// file(cadd_WG_SNVs) from cadd_wg_snvs -// file(cadd_WG_SNVs_tbi) from cadd_wg_snvs_tbi -// output: -// set variantCaller, idSample, file("${reducedVCF}_VEP.ann.vcf") into vepVCF -// file("${reducedVCF}_VEP.summary.html") into vepReport - -// when: 'vep' in tools - -// script: -// reducedVCF = reduceVCF(vcf.fileName) -// genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome - -// dir_cache = (params.vep_cache && params.annotation_cache) ? " \${PWD}/${dataDir}" : "/.vep" -// cadd = (params.cadd_cache && params.cadd_wg_snvs && params.cadd_indels) ? "--plugin CADD,whole_genome_SNVs.tsv.gz,InDels.tsv.gz" : "" -// genesplicer = params.genesplicer ? "--plugin GeneSplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/genesplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/share/genesplicer-1.0-1/human,context=200,tmpdir=\$PWD/${reducedVCF}" : "--offline" -// """ -// mkdir ${reducedVCF} - -// vep \ -// -i ${vcf} \ -// -o ${reducedVCF}_VEP.ann.vcf \ -// --assembly ${genome} \ -// --species ${params.species} \ -// ${cadd} \ -// ${genesplicer} \ -// --cache \ -// --cache_version ${cache_version} \ -// --dir_cache ${dir_cache} \ -// --everything \ -// --filter_common \ -// --fork ${task.cpus} \ -// --format vcf \ -// --per_gene \ -// --stats_file ${reducedVCF}_VEP.summary.html \ -// --total_length \ -// --vcf - -// rm -rf ${reducedVCF} -// """ -// } - -// vepReport = vepReport.dump(tag:'VEP') - -// // STEP VEP.2 - VEP AFTER SNPEFF - -// process VEPmerge { -// label 'VEP' -// label 'cpus_4' - -// tag "${idSample} - ${variantCaller} - ${vcf}" - -// publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { -// if (it == "${reducedVCF}_VEP.summary.html") "Reports/${idSample}/VEP/${it}" -// else null -// } - -// input: -// set variantCaller, idSample, file(vcf), file(idx) from compressVCFsnpEffOut -// file(dataDir) from vep_cache -// val cache_version from vep_cache_version -// file(cadd_InDels) from cadd_indels -// file(cadd_InDels_tbi) from cadd_indels_tbi -// file(cadd_WG_SNVs) from cadd_wg_snvs -// file(cadd_WG_SNVs_tbi) from cadd_wg_snvs_tbi -// output: -// set variantCaller, idSample, file("${reducedVCF}_VEP.ann.vcf") into vepVCFmerge -// file("${reducedVCF}_VEP.summary.html") into vepReportMerge - -// when: 'merge' in tools - -// script: -// reducedVCF = reduceVCF(vcf.fileName) -// genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome -// dir_cache = (params.vep_cache && params.annotation_cache) ? " \${PWD}/${dataDir}" : "/.vep" -// cadd = (params.cadd_cache && params.cadd_wg_snvs && params.cadd_indels) ? "--plugin CADD,whole_genome_SNVs.tsv.gz,InDels.tsv.gz" : "" -// genesplicer = params.genesplicer ? "--plugin GeneSplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/bin/genesplicer,/opt/conda/envs/nf-core-sarek-${workflow.manifest.version}/share/genesplicer-1.0-1/human,context=200,tmpdir=\$PWD/${reducedVCF}" : "--offline" -// """ -// mkdir ${reducedVCF} - -// vep \ -// -i ${vcf} \ -// -o ${reducedVCF}_VEP.ann.vcf \ -// --assembly ${genome} \ -// --species ${params.species} \ -// ${cadd} \ -// ${genesplicer} \ -// --cache \ -// --cache_version ${cache_version} \ -// --dir_cache ${dir_cache} \ -// --everything \ -// --filter_common \ -// --fork ${task.cpus} \ -// --format vcf \ -// --per_gene \ -// --stats_file ${reducedVCF}_VEP.summary.html \ -// --total_length \ -// --vcf - -// rm -rf ${reducedVCF} -// """ -// } - -// vepReportMerge = vepReportMerge.dump(tag:'VEP') - -// vcfCompressVCFvep = vepVCF.mix(vepVCFmerge) - -// // STEP COMPRESS AND INDEX VCF.2 - VEP - -// process CompressVCFvep { -// tag "${idSample} - ${vcf}" - -// publishDir "${params.outdir}/Annotation/${idSample}/VEP", mode: params.publish_dir_mode - -// input: -// set variantCaller, idSample, file(vcf) from vcfCompressVCFvep - -// output: -// set variantCaller, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into compressVCFOutVEP - -// script: -// """ -// bgzip < ${vcf} > ${vcf}.gz -// tabix ${vcf}.gz -// """ -// } - -// compressVCFOutVEP = compressVCFOutVEP.dump(tag:'VCF') diff --git a/modules/local/process/build_intervals.nf b/modules/local/build_intervals.nf similarity index 57% rename from modules/local/process/build_intervals.nf rename to modules/local/build_intervals.nf index 861c3c07b8..c476dd8258 100644 --- a/modules/local/process/build_intervals.nf +++ b/modules/local/build_intervals.nf @@ -1,21 +1,21 @@ // Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "anaconda::gawk=5.1.0" : null -container = "quay.io/biocontainers/gawk:5.1.0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gawk:5.1.0" - process BUILD_INTERVALS { tag "${fai}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gawk:5.1.0" + } else { + container "quay.io/biocontainers/gawk:5.1.0" + } input: path fai diff --git a/modules/local/process/concat_vcf.nf b/modules/local/concat_vcf.nf similarity index 65% rename from modules/local/process/concat_vcf.nf rename to modules/local/concat_vcf.nf index aa1bc5387b..f76d382b24 100644 --- a/modules/local/process/concat_vcf.nf +++ b/modules/local/concat_vcf.nf @@ -1,12 +1,9 @@ -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::htslib=1.11" : null -container = "quay.io/biocontainers/htslib:1.11--hd3b49d5_0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0" - process CONCAT_VCF { label 'cpus_8' @@ -15,8 +12,12 @@ process CONCAT_VCF { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::htslib=1.11" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0" + } else { + container "quay.io/biocontainers/htslib:1.11--hd3b49d5_0" + } input: tuple val(meta), path(vcf) diff --git a/modules/local/process/create_intervals_bed.nf b/modules/local/create_intervals_bed.nf similarity index 79% rename from modules/local/process/create_intervals_bed.nf rename to modules/local/create_intervals_bed.nf index 6c6eecac95..52f20dbfb2 100644 --- a/modules/local/process/create_intervals_bed.nf +++ b/modules/local/create_intervals_bed.nf @@ -1,21 +1,21 @@ -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' -include { has_extension } from '../functions' +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; has_extension } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "anaconda::gawk=5.1.0" : null -container = "quay.io/biocontainers/gawk:5.1.0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gawk:5.1.0" - process CREATE_INTERVALS_BED { tag "${intervals}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gawk:5.1.0" + } else { + container "quay.io/biocontainers/gawk:5.1.0" + } input: path intervals diff --git a/modules/local/functions.nf b/modules/local/functions.nf index 7293db1f18..c4a9d6baaa 100644 --- a/modules/local/functions.nf +++ b/modules/local/functions.nf @@ -123,14 +123,17 @@ def extract_fastq_from_dir(folder) { fastq = fastq.map{ run, pair -> def meta = [:] - meta.patient = sample - meta.sample = meta.patient - meta.gender = 'ZZ' // unused - meta.status = 0 // normal (not tumor) - meta.run = run - meta.id = "${meta.sample}-${meta.run}" - def read1 = pair[0] - def read2 = pair[1] + meta.patient = sample + meta.sample = meta.patient + meta.gender = 'ZZ' // unused + meta.status = 0 // normal (not tumor) + meta.run = run + meta.id = "${meta.sample}-${meta.run}" + def read1 = pair[0] + def read2 = pair[1] + def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : "" + def read_group = "\"@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA\"" + meta.read_group = read_group return [meta, [read1, read2]] } @@ -144,14 +147,18 @@ def extract_fastq(tsvFile) { .splitCsv(sep: '\t') .map { row -> def meta = [:] - meta.patient = row[0] - meta.gender = row[1] - meta.status = return_status(row[2].toInteger()) - meta.sample = row[3] - meta.run = row[4] - meta.id = "${meta.sample}-${meta.run}" - def read1 = return_file(row[5]) - def read2 = "null" + meta.patient = row[0] + meta.gender = row[1] + meta.status = return_status(row[2].toInteger()) + meta.sample = row[3] + meta.run = row[4] + meta.id = "${meta.sample}-${meta.run}" + def read1 = return_file(row[5]) + def read2 = "null" + def CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : "" + def read_group = "\"@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA\"" + meta.read_group = read_group + if (has_extension(read1, "fastq.gz") || has_extension(read1, "fq.gz") || has_extension(read1, "fastq") || has_extension(read1, "fq")) { check_number_of_item(row, 7) read2 = return_file(row[6]) @@ -260,4 +267,62 @@ def reduce_vcf(file) { def return_status(it) { if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information" return it -} \ No newline at end of file +} + +/* + * nf-core core functions + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/local/process/index_target_bed.nf b/modules/local/index_target_bed.nf similarity index 57% rename from modules/local/process/index_target_bed.nf rename to modules/local/index_target_bed.nf index d03eaa581a..185c136a22 100644 --- a/modules/local/process/index_target_bed.nf +++ b/modules/local/index_target_bed.nf @@ -1,12 +1,9 @@ -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::htslib=1.11" : null -container = "quay.io/biocontainers/htslib:1.11--hd3b49d5_0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0" - process INDEX_TARGET_BED { label 'cpus_8' @@ -15,8 +12,12 @@ process INDEX_TARGET_BED { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "bioconda::htslib=1.11" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/htslib:1.11--hd3b49d5_0" + } else { + container "quay.io/biocontainers/htslib:1.11--hd3b49d5_0" + } input: path target_bed diff --git a/modules/local/process/merge_bam.nf b/modules/local/merge_bam.nf similarity index 54% rename from modules/local/process/merge_bam.nf rename to modules/local/merge_bam.nf index e190d8dbd1..20725a2787 100644 --- a/modules/local/process/merge_bam.nf +++ b/modules/local/merge_bam.nf @@ -1,22 +1,24 @@ -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::samtools=1.10" : null -container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3" - process MERGE_BAM { label 'cpus_8' tag "${meta.id}" - publishDir params.outdir, mode: params.publish_dir_mode, + publishDir "${params.outdir}", + mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::samtools=1.11" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/samtools:1.11--h6270b1f_0" + } else { + container "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } input: tuple val(meta), path(bam) diff --git a/modules/local/process/bwa_mem.nf b/modules/local/process/bwa_mem.nf deleted file mode 100644 index bcbd43d9ba..0000000000 --- a/modules/local/process/bwa_mem.nf +++ /dev/null @@ -1,50 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.10" : null -container = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" - -process BWA_MEM { - label 'process_high' - - tag "${meta.id}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - - conda environment - container container - - input: - tuple val(meta), path(reads) - path bwa - path fasta - path fai - - output: - tuple val(meta), path("*.bam"), emit: bam - path "*.version.txt" , emit: version - - script: - CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : "" - readGroup = "@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA" - extra = meta.status == 1 ? "-B 3" : "" - """ - bwa mem \ - ${options.args} \ - -R \"${readGroup}\" \ - ${extra} \ - -t ${task.cpus} \ - ${fasta} ${reads} | \ - samtools sort --threads ${task.cpus} -m 2G - > ${meta.id}.bam - - # samtools index ${meta.id}.bam - - echo \$(bwa version 2>&1) > bwa.version.txt - """ -} diff --git a/modules/local/process/bwamem2_mem.nf b/modules/local/process/bwamem2_mem.nf deleted file mode 100644 index f0117b5807..0000000000 --- a/modules/local/process/bwamem2_mem.nf +++ /dev/null @@ -1,47 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../../nf-core/software/functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::bwa-mem2=2.0 bioconda::samtools=1.10" : null -container = "quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:876eb6f1d38fbf578296ea94e5aede4e317939e7-0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:876eb6f1d38fbf578296ea94e5aede4e317939e7-0" - -process BWAMEM2_MEM { - label 'process_high' - - tag "${meta.id}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda environment - container container - - input: - tuple val(meta), path(reads) - path bwa - path fasta - path fai - - output: - tuple val(meta), path("*.bam") - - script: - CN = params.sequencing_center ? "CN:${params.sequencing_center}\\t" : "" - readGroup = "@RG\\tID:${meta.run}\\t${CN}PU:${meta.run}\\tSM:${meta.sample}\\tLB:${meta.sample}\\tPL:ILLUMINA" - extra = meta.status == 1 ? "-B 3" : "" - """ - bwa-mem2 mem \ - ${options.args} \ - -R \"${readGroup}\" \ - ${extra} \ - -t ${task.cpus} \ - ${fasta} ${reads} | \ - samtools sort --threads ${task.cpus} -m 2G - > ${meta.id}.bam - - # samtools index ${meta.id}.bam - - echo \$(bwa-mem2 version 2>&1) > bwa-mem2.version.txt - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/bwa/index/functions.nf b/modules/nf-core/software/bwa/index/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/bwa/index/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/bwa/index/main.nf b/modules/nf-core/software/bwa/index/main.nf index 078cfb51d1..47d2ec54a6 100644 --- a/modules/nf-core/software/bwa/index/main.nf +++ b/modules/nf-core/software/bwa/index/main.nf @@ -1,35 +1,34 @@ -include { initOptions; saveFiles; getSoftwareName } from './../../functions' +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::bwa=0.7.17" : null -container = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7" - process BWA_INDEX { + tag "$fasta" label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } - tag "${fasta}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - - conda environment - container container + conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7" + } else { + container "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + } input: - path fasta + path fasta output: - path "${fasta}.*" , emit: index - path "*.version.txt", emit: version + path "${fasta}.*" , emit: index + path "*.version.txt", emit: version script: def software = getSoftwareName(task.process) - def ioptions = initOptions(options) """ - bwa index ${ioptions.args} ${fasta} + bwa index $options.args $fasta echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//' > ${software}.version.txt """ } diff --git a/modules/nf-core/software/bwa/index/meta.yml b/modules/nf-core/software/bwa/index/meta.yml index a2f5b1ed66..0c3b8f5ff4 100644 --- a/modules/nf-core/software/bwa/index/meta.yml +++ b/modules/nf-core/software/bwa/index/meta.yml @@ -24,20 +24,20 @@ params: description: | Value for the Nextflow `publishDir` mode parameter. Available: symlink, rellink, link, copy, copyNoFollow, move. - - conda: + - enable_conda: type: boolean description: | Run the module with Conda using the software specified via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. input: - fasta: type: file description: Input genome fasta file - - options: - type: map - description: | - Groovy Map containing module options for passing command-line arguments and - output file paths. output: - index: type: file diff --git a/modules/nf-core/software/bwa/mem/functions.nf b/modules/nf-core/software/bwa/mem/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/bwa/mem/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/bwa/mem/main.nf b/modules/nf-core/software/bwa/mem/main.nf new file mode 100644 index 0000000000..6da50c3d0f --- /dev/null +++ b/modules/nf-core/software/bwa/mem/main.nf @@ -0,0 +1,46 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process BWA_MEM { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" + } else { + container "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" + } + + input: + tuple val(meta), path(reads) + path index + path fasta + path fai + + output: + tuple val(meta), path("*.bam"), emit: bam + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def read_group = meta.read_group ? "-R ${meta.read_group}" : "" + """ + bwa mem \ + $options.args \ + $read_group \ + -t $task.cpus \ + $fasta \ + $reads \ + | samtools $options.args2 --threads $task.cpus -o ${prefix}.bam - + + echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//' > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/bwa/mem/meta.yml b/modules/nf-core/software/bwa/mem/meta.yml new file mode 100644 index 0000000000..de61798bb1 --- /dev/null +++ b/modules/nf-core/software/bwa/mem/meta.yml @@ -0,0 +1,69 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - fasta: + type: file + description: Input genome fasta file +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/software/bwamem2_index.nf b/modules/nf-core/software/bwamem2_index.nf index c8e26dfd7e..4621a35312 100644 --- a/modules/nf-core/software/bwamem2_index.nf +++ b/modules/nf-core/software/bwamem2_index.nf @@ -3,24 +3,25 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::bwa-mem2=2.0" : null -container = "quay.io/biocontainers/bwa-mem2:2.0--he513fc3_1" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/bwa-mem2:2.0--he513fc3_1" - process BWAMEM2_INDEX { tag "${fasta}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "bioconda::bwa-mem2=2.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/bwa-mem2:2.1--he513fc3_0" + } else { + container "quay.io/biocontainers/bwa-mem2:2.1--he513fc3_0" + } input: path fasta output: - path "${fasta}.*" + path "${fasta}.*" , emit: index + path "*.version.txt" , emit: version script: def software = getSoftwareName(task.process) diff --git a/modules/nf-core/software/bwamem2_mem.nf b/modules/nf-core/software/bwamem2_mem.nf new file mode 100644 index 0000000000..9a2c95fde8 --- /dev/null +++ b/modules/nf-core/software/bwamem2_mem.nf @@ -0,0 +1,46 @@ +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process BWAMEM2_MEM { + tag "$meta.id" + label 'process_high' + label 'BWAMEM2_MEM' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::bwa-mem2=2.1 bioconda::samtools=1.11" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:e6f0d20c9d78572ddbbf00d8767ee6ff865edd4e-0" + } else { + container "quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:e6f0d20c9d78572ddbbf00d8767ee6ff865edd4e-0" + } + + input: + tuple val(meta), path(reads) + path index + path fasta + path fai + + output: + tuple val(meta), path("*.bam"), emit: bam + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def read_group = meta.read_group ? "-R ${meta.read_group}" : "" + """ + bwa-mem2 mem \ + $options.args \ + $read_group \ + -t $task.cpus \ + $fasta \ + $reads \ + | samtools $options.args2 --threads $task.cpus -o ${prefix}.bam - + + echo \$(bwa-mem2 version 2>&1) > ${software}.version.txt + """ +} \ No newline at end of file diff --git a/modules/nf-core/software/fastqc.nf b/modules/nf-core/software/fastqc.nf deleted file mode 100644 index 80a5582a1f..0000000000 --- a/modules/nf-core/software/fastqc.nf +++ /dev/null @@ -1,42 +0,0 @@ -// Import generic module functions -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::fastqc=0.11.9" : null -container = "quay.io/biocontainers/fastqc:0.11.9--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0" - -process FASTQC { - label 'process_medium' - label 'cpus_2' - - tag "${meta.id}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda environment - container container - - input: - tuple val(meta), path(reads) - - output: - path "*.html", emit: html - path "*.version.txt", emit: version - path "*.zip", emit: zip - - script: - // Add soft-links to original FastQs for consistent naming in pipeline - prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - - fastqc ${options.args} --threads ${task.cpus} ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - fastqc --version | sed -n "s/.*\\(v.*\$\\)/\\1/p" > fastqc.version.txt - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/fastqc/functions.nf b/modules/nf-core/software/fastqc/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/fastqc/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/fastqc/main.nf b/modules/nf-core/software/fastqc/main.nf new file mode 100644 index 0000000000..cce410a080 --- /dev/null +++ b/modules/nf-core/software/fastqc/main.nf @@ -0,0 +1,47 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process FASTQC { + tag "$meta.id" + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0" + } else { + container "quay.io/biocontainers/fastqc:0.11.9--0" + } + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "*.version.txt" , emit: version + + script: + // Add soft-links to original FastQs for consistent naming in pipeline + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $options.args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + fastqc --version | sed -e "s/FastQC v//g" > ${software}.version.txt + """ + } +} diff --git a/modules/nf-core/software/fastqc/meta.yml b/modules/nf-core/software/fastqc/meta.yml new file mode 100644 index 0000000000..413aad8db6 --- /dev/null +++ b/modules/nf-core/software/fastqc/meta.yml @@ -0,0 +1,72 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/software/functions.nf b/modules/nf-core/software/functions.nf index ca46a99f5d..d25eea86b3 100644 --- a/modules/nf-core/software/functions.nf +++ b/modules/nf-core/software/functions.nf @@ -41,17 +41,19 @@ def saveFiles(Map args) { if (!args.filename.endsWith('.version.txt')) { def ioptions = initOptions(args.options) def path_list = [ ioptions.publish_dir ?: args.publish_dir ] - if (ioptions.publish_by_id) path_list.add(args.publish_id) + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } if (ioptions.publish_files instanceof Map) { for (ext in ioptions.publish_files) { if (args.filename.endsWith(ext.key)) { def ext_list = path_list.collect() ext_list.add(ext.value) - return "${getPathFromList(ext_list)}/${args.filename}" + return "${getPathFromList(ext_list)}/$args.filename" } } } else if (ioptions.publish_files == null) { - return "${getPathFromList(path_list)}/${args.filename}" + return "${getPathFromList(path_list)}/$args.filename" } } } diff --git a/modules/nf-core/software/gatk/applybqsr.nf b/modules/nf-core/software/gatk/applybqsr.nf index d17bb10dda..f21f9b04cd 100644 --- a/modules/nf-core/software/gatk/applybqsr.nf +++ b/modules/nf-core/software/gatk/applybqsr.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_APPLYBQSR { label 'memory_singleCPU_2_task' label 'cpus_2' @@ -16,8 +12,12 @@ process GATK_APPLYBQSR { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: tuple val(meta), path(bam), path(bai), path(recalibrationReport), path(interval) diff --git a/modules/nf-core/software/gatk/baserecalibrator.nf b/modules/nf-core/software/gatk/baserecalibrator.nf index c6b8a35392..50210f8f1c 100644 --- a/modules/nf-core/software/gatk/baserecalibrator.nf +++ b/modules/nf-core/software/gatk/baserecalibrator.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_BASERECALIBRATOR { label 'cpus_1' @@ -15,8 +11,12 @@ process GATK_BASERECALIBRATOR { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: tuple val(meta), path(bam), path(bai), path(interval) diff --git a/modules/nf-core/software/gatk/createsequencedictionary.nf b/modules/nf-core/software/gatk/createsequencedictionary.nf index a740b91a64..62373b0a04 100644 --- a/modules/nf-core/software/gatk/createsequencedictionary.nf +++ b/modules/nf-core/software/gatk/createsequencedictionary.nf @@ -3,18 +3,18 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_CREATESEQUENCEDICTIONARY { tag "${fasta}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: path fasta diff --git a/modules/nf-core/software/gatk/gatherbqsrreports.nf b/modules/nf-core/software/gatk/gatherbqsrreports.nf index 7bcebc6db4..bfb6dd6f2f 100644 --- a/modules/nf-core/software/gatk/gatherbqsrreports.nf +++ b/modules/nf-core/software/gatk/gatherbqsrreports.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_GATHERBQSRREPORTS { label 'memory_singleCPU_2_task' label 'cpus_2' @@ -16,8 +12,12 @@ process GATK_GATHERBQSRREPORTS { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: tuple val(meta), path(recal) diff --git a/modules/nf-core/software/gatk/genotypegvcf.nf b/modules/nf-core/software/gatk/genotypegvcf.nf index 07a009caa9..612039cc71 100644 --- a/modules/nf-core/software/gatk/genotypegvcf.nf +++ b/modules/nf-core/software/gatk/genotypegvcf.nf @@ -3,18 +3,18 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_GENOTYPEGVCF { tag "${meta.id}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: tuple val(meta), path(interval), path(gvcf) diff --git a/modules/nf-core/software/gatk/haplotypecaller.nf b/modules/nf-core/software/gatk/haplotypecaller.nf index fdcc259abc..7d4621040f 100644 --- a/modules/nf-core/software/gatk/haplotypecaller.nf +++ b/modules/nf-core/software/gatk/haplotypecaller.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_HAPLOTYPECALLER { label 'MEMORY_SINGLECPU_TASK_SQ' label 'CPUS_2' @@ -16,8 +12,12 @@ process GATK_HAPLOTYPECALLER { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: tuple val(meta), path(bam), path(bai), file(interval) diff --git a/modules/nf-core/software/gatk/markduplicates.nf b/modules/nf-core/software/gatk/markduplicates.nf index b5be16d0e1..02a2965feb 100644 --- a/modules/nf-core/software/gatk/markduplicates.nf +++ b/modules/nf-core/software/gatk/markduplicates.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null -container = "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" - process GATK_MARKDUPLICATES { label 'cpus_16' @@ -15,8 +11,12 @@ process GATK_MARKDUPLICATES { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::gatk4=4.1.9.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4:4.1.9.0--py39_0" + } else { + container "quay.io/biocontainers/gatk4:4.1.9.0--py39_0" + } input: tuple val(meta), path("${meta.sample}.bam"), path("${meta.sample}.bam.bai") @@ -25,23 +25,11 @@ process GATK_MARKDUPLICATES { tuple val(meta), path("${meta.sample}.md.bam"), path("${meta.sample}.md.bam.bai"), emit: bam val meta, emit: tsv path "${meta.sample}.bam.metrics", optional : true, emit: report - + script: markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics" - if (params.use_gatk_spark) - """ - gatk --java-options ${markdup_java_options} \ - MarkDuplicatesSpark \ - -I ${meta.sample}.bam \ - -O ${meta.sample}.md.bam \ - ${metrics} \ - --tmp-dir . \ - --create-output-bam-index true \ - --spark-master local[${task.cpus}] - """ - else """ gatk --java-options ${markdup_java_options} \ MarkDuplicates \ @@ -53,4 +41,43 @@ process GATK_MARKDUPLICATES { --OUTPUT ${meta.sample}.md.bam mv ${meta.sample}.md.bai ${meta.sample}.md.bam.bai """ +} + +process GATK_MARKDUPLICATES_SPARK { + label 'cpus_16' + + tag "${meta.id}" + + publishDir params.outdir, mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::gatk4-spark=4.1.8.1" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/gatk4-spark:4.1.8.1--0" + } else { + container "quay.io/biocontainers/gatk4-spark:4.1.8.1--0" + } + + input: + tuple val(meta), path("${meta.sample}.bam"), path("${meta.sample}.bam.bai") + + output: + tuple val(meta), path("${meta.sample}.md.bam"), path("${meta.sample}.md.bam.bai"), emit: bam + val meta, emit: tsv + path "${meta.sample}.bam.metrics", optional : true, emit: report + + script: + markdup_java_options = task.memory.toGiga() > 8 ? params.markdup_java_options : "\"-Xms" + (task.memory.toGiga() / 2).trunc() + "g -Xmx" + (task.memory.toGiga() - 1) + "g\"" + metrics = 'markduplicates' in params.skip_qc ? '' : "-M ${meta.sample}.bam.metrics" + + """ + gatk --java-options ${markdup_java_options} \ + MarkDuplicatesSpark \ + -I ${meta.sample}.bam \ + -O ${meta.sample}.md.bam \ + ${metrics} \ + --tmp-dir . \ + --create-output-bam-index true \ + --spark-master local[${task.cpus}] + """ } \ No newline at end of file diff --git a/modules/nf-core/software/htslib_tabix.nf b/modules/nf-core/software/htslib_tabix.nf index ce133a4b13..2457321c49 100644 --- a/modules/nf-core/software/htslib_tabix.nf +++ b/modules/nf-core/software/htslib_tabix.nf @@ -4,18 +4,18 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::tabix=0.2.6" : null -container = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/tabix:0.2.6--ha92aebf_0" - process HTSLIB_TABIX { tag "${vcf}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "bioconda::tabix=0.2.6" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/tabix:0.2.6--ha92aebf_0" + } else { + container "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + } input: path vcf diff --git a/modules/nf-core/software/manta/somatic.nf b/modules/nf-core/software/manta/somatic.nf index 98b5a9b6ed..a31c86d904 100644 --- a/modules/nf-core/software/manta/somatic.nf +++ b/modules/nf-core/software/manta/somatic.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::manta=1.6.0" : null -container = "quay.io/biocontainers/manta:1.6.0--py27_0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/manta:1.6.0--py27_0" - process MANTA_SOMATIC { tag "${meta.id}" @@ -16,8 +12,12 @@ process MANTA_SOMATIC { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::manta=1.6.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/manta:1.6.0--py27_0" + } else { + container "quay.io/biocontainers/manta:1.6.0--py27_0" + } input: tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) diff --git a/modules/nf-core/software/msisensor/msi.nf b/modules/nf-core/software/msisensor/msi.nf index 79ce4925fa..be257af009 100644 --- a/modules/nf-core/software/msisensor/msi.nf +++ b/modules/nf-core/software/msisensor/msi.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::msisensor=0.5" : null -container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" - process MSISENSOR_MSI { tag "${meta.id}" @@ -16,8 +12,12 @@ process MSISENSOR_MSI { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::msisensor=0.5" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" + } else { + container "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" + } input: tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) diff --git a/modules/nf-core/software/msisensor/scan.nf b/modules/nf-core/software/msisensor/scan.nf index 180c73f4c7..850cff618d 100644 --- a/modules/nf-core/software/msisensor/scan.nf +++ b/modules/nf-core/software/msisensor/scan.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::msisensor=0.5" : null -container = "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" - process MSISENSOR_SCAN { tag "${fasta}" @@ -16,8 +12,12 @@ process MSISENSOR_SCAN { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "bioconda::msisensor=0.5" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/msisensor:0.5--hb3646a4_2" + } else { + container "quay.io/biocontainers/msisensor:0.5--hb3646a4_2" + } input: path fasta diff --git a/modules/nf-core/software/multiqc.nf b/modules/nf-core/software/multiqc.nf deleted file mode 100644 index ed201b0f49..0000000000 --- a/modules/nf-core/software/multiqc.nf +++ /dev/null @@ -1,43 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::multiqc=1.9" : null -container = "quay.io/biocontainers/multiqc:1.9--py_1" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/multiqc:1.9--py_1" - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -def custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - custom_runName = workflow.runName -} - -process MULTIQC { - publishDir "${params.outdir}/multiqc", mode: params.publish_dir_mode - - conda environment - container container - - input: - // path software_versions - path multiqc_config - path multiqc_custom_config - val workflow_summary - path qc_reports - - output: - path "*multiqc_report.html" - path "*_data" - path "multiqc_plots" - - script: - title = custom_runName ? "--title \"${custom_runName}\"" : '' - filename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - custom_config_file = params.multiqc_config ? "--config ${multiqc_custom_config}" : '' - """ - echo '${workflow_summary}' > workflow_summary_mqc.yaml - multiqc -f ${title} ${filename} ${custom_config_file} . - """ -} diff --git a/modules/nf-core/software/multiqc/functions.nf b/modules/nf-core/software/multiqc/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/multiqc/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/multiqc/main.nf b/modules/nf-core/software/multiqc/main.nf new file mode 100644 index 0000000000..ff1175fcd0 --- /dev/null +++ b/modules/nf-core/software/multiqc/main.nf @@ -0,0 +1,35 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process MULTIQC { + label 'process_medium' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename: filename, options: params.options, publish_dir: getSoftwareName(task.process), publish_id: '') } + + conda (params.enable_conda ? "bioconda::multiqc=1.9" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/multiqc:1.9--pyh9f0ad1d_0" + } else { + container "quay.io/biocontainers/multiqc:1.9--pyh9f0ad1d_0" + } + + input: + path multiqc_files + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + """ + multiqc -f $options.args . + multiqc --version | sed -e "s/multiqc, version //g" > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/multiqc/meta.yml b/modules/nf-core/software/multiqc/meta.yml new file mode 100644 index 0000000000..02f4b41586 --- /dev/null +++ b/modules/nf-core/software/multiqc/meta.yml @@ -0,0 +1,60 @@ +name: MultiQC +description: Aggregate results from bioinformatics analyses across many samples into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. +input: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC +output: + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: dir + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" diff --git a/modules/nf-core/software/qualimap_bamqc.nf b/modules/nf-core/software/qualimap_bamqc.nf index 25d3715d11..54cdf90a17 100644 --- a/modules/nf-core/software/qualimap_bamqc.nf +++ b/modules/nf-core/software/qualimap_bamqc.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::qualimap=2.2.2d" : null -container = "quay.io/biocontainers/qualimap:2.2.2d--1" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1" - process QUALIMAP_BAMQC { label 'memory_max' label 'cpus_16' @@ -16,8 +12,12 @@ process QUALIMAP_BAMQC { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::qualimap=2.2.2d" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1" + } else { + container "quay.io/biocontainers/qualimap:2.2.2d--1" + } input: tuple val(meta), path(bam) diff --git a/modules/nf-core/software/samtools/faidx.nf b/modules/nf-core/software/samtools/faidx.nf index c60a62518c..40ac3ce209 100644 --- a/modules/nf-core/software/samtools/faidx.nf +++ b/modules/nf-core/software/samtools/faidx.nf @@ -3,18 +3,18 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::samtools=1.10" : null -container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3" - process SAMTOOLS_FAIDX { tag "${fasta}" publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:"false") } - conda environment - container container + conda (params.enable_conda ? "bioconda::samtools=1.11" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/samtools:1.11--h6270b1f_0" + } else { + container "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } input: path fasta diff --git a/modules/nf-core/software/samtools/index.nf b/modules/nf-core/software/samtools/index.nf deleted file mode 100644 index 0e5c3f11f7..0000000000 --- a/modules/nf-core/software/samtools/index.nf +++ /dev/null @@ -1,34 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::samtools=1.10" : null -container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3" - -process SAMTOOLS_INDEX { - label 'cpus_8' - - tag "${meta.id}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda environment - container container - - input: - tuple val(meta), path(bam) - - output: - tuple val(meta), path("${name}.bam"), path("*.bai") - - script: - name = options.suffix ? "${meta.id}.${options.suffix}" : "${meta.id}" - """ - [ ! -f ${name}.bam ] && ln -s ${bam} ${name}.bam - - samtools index ${name}.bam - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/samtools/index/functions.nf b/modules/nf-core/software/samtools/index/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/samtools/index/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/samtools/index/main.nf b/modules/nf-core/software/samtools/index/main.nf new file mode 100644 index 0000000000..5dd631daa5 --- /dev/null +++ b/modules/nf-core/software/samtools/index/main.nf @@ -0,0 +1,32 @@ +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +process SAMTOOLS_INDEX { + tag "$meta.id" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::samtools=1.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/samtools:1.10--h9402c20_2" + } else { + container "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bai"), emit: bai + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + """ + samtools index $bam + echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/samtools/index/meta.yml b/modules/nf-core/software/samtools/index/meta.yml new file mode 100644 index 0000000000..089a83be02 --- /dev/null +++ b/modules/nf-core/software/samtools/index/meta.yml @@ -0,0 +1,64 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/software/samtools/stats.nf b/modules/nf-core/software/samtools/stats.nf deleted file mode 100644 index 6302366db5..0000000000 --- a/modules/nf-core/software/samtools/stats.nf +++ /dev/null @@ -1,31 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './../functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::samtools=1.10" : null -container = "quay.io/biocontainers/samtools:1.10--h2e538c0_3" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/samtools:1.10--h2e538c0_3" - -process SAMTOOLS_STATS { - label 'cpus_2' - - tag "${meta.id}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda environment - container container - - input: - tuple val(meta), path(bam) - - output: - path ("${bam}.samtools.stats.out") - - script: - """ - samtools stats ${bam} > ${bam}.samtools.stats.out - """ -} \ No newline at end of file diff --git a/modules/nf-core/software/samtools/stats/functions.nf b/modules/nf-core/software/samtools/stats/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/samtools/stats/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/samtools/stats/main.nf b/modules/nf-core/software/samtools/stats/main.nf new file mode 100644 index 0000000000..d8d1d02072 --- /dev/null +++ b/modules/nf-core/software/samtools/stats/main.nf @@ -0,0 +1,32 @@ +// Import generic module functions +include { saveFiles; getSoftwareName } from './functions' + +params.options = [:] + +process SAMTOOLS_STATS { + tag "$meta.id" + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::samtools=1.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/samtools:1.10--h9402c20_2" + } else { + container "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "*.version.txt" , emit: version + + script: + def software = getSoftwareName(task.process) + """ + samtools stats $bam > ${bam}.stats + echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt + """ +} diff --git a/modules/nf-core/software/samtools/stats/meta.yml b/modules/nf-core/software/samtools/stats/meta.yml new file mode 100644 index 0000000000..b907df92c5 --- /dev/null +++ b/modules/nf-core/software/samtools/stats/meta.yml @@ -0,0 +1,68 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" diff --git a/modules/nf-core/software/strelka/germline.nf b/modules/nf-core/software/strelka/germline.nf index 6295ac1d66..39abd20da8 100644 --- a/modules/nf-core/software/strelka/germline.nf +++ b/modules/nf-core/software/strelka/germline.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::strelka=2.9.10" : null -container = "quay.io/biocontainers/strelka:2.9.10--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" - process STRELKA_GERMLINE { tag "${meta.id}" @@ -16,8 +12,12 @@ process STRELKA_GERMLINE { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + } else { + container "quay.io/biocontainers/strelka:2.9.10--0" + } input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/software/strelka/somatic.nf b/modules/nf-core/software/strelka/somatic.nf index 0cfb2b8458..2efc864edd 100644 --- a/modules/nf-core/software/strelka/somatic.nf +++ b/modules/nf-core/software/strelka/somatic.nf @@ -3,10 +3,6 @@ include { initOptions; saveFiles; getSoftwareName } from './../functions' params.options = [:] def options = initOptions(params.options) -environment = params.enable_conda ? "bioconda::strelka=2.9.10" : null -container = "quay.io/biocontainers/strelka:2.9.10--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" - process STRELKA_SOMATIC { tag "${meta.id}" @@ -16,8 +12,12 @@ process STRELKA_SOMATIC { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + } else { + container "quay.io/biocontainers/strelka:2.9.10--0" + } input: tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor) @@ -66,8 +66,12 @@ process STRELKA_SOMATIC_BEST_PRACTICES { publishDir params.outdir, mode: params.publish_dir_mode, saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - conda environment - container container + conda (params.enable_conda ? "bioconda::strelka=2.9.10" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/strelka:2.9.10--0" + } else { + container "quay.io/biocontainers/strelka:2.9.10--0" + } input: tuple val(meta), path(bam_normal), path(bai_normal), path(bam_tumor), path(bai_tumor), path(manta_csi), path(manta_csi_tbi) diff --git a/modules/nf-core/software/trimgalore.nf b/modules/nf-core/software/trimgalore.nf deleted file mode 100644 index 60369d80af..0000000000 --- a/modules/nf-core/software/trimgalore.nf +++ /dev/null @@ -1,69 +0,0 @@ -include { initOptions; saveFiles; getSoftwareName } from './functions' - -params.options = [:] -def options = initOptions(params.options) - -environment = params.enable_conda ? "bioconda::trim-galore=0.6.5" : null -container = "quay.io/biocontainers/trim-galore:0.6.5--0" -if (workflow.containerEngine == 'singularity' && !params.pull_docker_container) container = "https://depot.galaxyproject.org/singularity/trim-galore:0.6.5--0" - -process TRIMGALORE { - label 'process_high' - - tag "${meta.id}" - - publishDir params.outdir, mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } - - conda environment - container container - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*_1.fq.gz"), path("*_2.fq.gz"), emit: reads - path "*.html" , emit: html optional true - path "*.txt" , emit: log - path "*.version.txt", emit: version - path "*.zip" , emit: zip optional true - - script: - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 4) cores = 4 - } - - // Clipping presets have to be evaluated in the context of SE/PE - c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - - // Added soft-links to original fastqs for consistent naming in MultiQC - prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - - trim_galore \\ - ${options.args} \\ - --cores ${cores} \\ - --paired \\ - --gzip \\ - ${c_r1} \\ - ${c_r2} \\ - ${tpc_r1} \\ - ${tpc_r2} \\ - ${prefix}_1.fastq.gz \\ - ${prefix}_2.fastq.gz - - trim_galore --version > trim_galore.version.txt - """ -} diff --git a/modules/nf-core/software/trimgalore/functions.nf b/modules/nf-core/software/trimgalore/functions.nf new file mode 100644 index 0000000000..d25eea86b3 --- /dev/null +++ b/modules/nf-core/software/trimgalore/functions.nf @@ -0,0 +1,59 @@ +/* + * ----------------------------------------------------- + * Utility functions used in nf-core DSL2 module files + * ----------------------------------------------------- + */ + +/* + * Extract name of software tool from process name using $task.process + */ +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +/* + * Function to initialise default values and to generate a Groovy Map of available options for nf-core modules + */ +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.publish_by_id = args.publish_by_id ?: false + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +/* + * Tidy up and join elements of a list to return a path string + */ +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +/* + * Function to save/publish module results + */ +def saveFiles(Map args) { + if (!args.filename.endsWith('.version.txt')) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + if (ioptions.publish_by_id) { + path_list.add(args.publish_id) + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } + } +} diff --git a/modules/nf-core/software/trimgalore/main.nf b/modules/nf-core/software/trimgalore/main.nf new file mode 100644 index 0000000000..79cc745625 --- /dev/null +++ b/modules/nf-core/software/trimgalore/main.nf @@ -0,0 +1,83 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName } from './functions' + +params.options = [:] +def options = initOptions(params.options) + +process TRIMGALORE { + tag "$meta.id" + label 'process_high' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + + conda (params.enable_conda ? "bioconda::trim-galore=0.6.6" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/trim-galore:0.6.6--0" + } else { + container "quay.io/biocontainers/trim-galore:0.6.6--0" + } + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fq.gz") , emit: reads + tuple val(meta), path("*report.txt"), emit: log + path "*.version.txt" , emit: version + + tuple val(meta), path("*.html"), emit: html optional true + tuple val(meta), path("*.zip") , emit: zip optional true + + script: + // Calculate number of --cores for TrimGalore based on value of task.cpus + // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 + // See: https://github.com/nf-core/atacseq/pull/65 + def cores = 1 + if (task.cpus) { + cores = (task.cpus as int) - 4 + if (meta.single_end) cores = (task.cpus as int) - 3 + if (cores < 1) cores = 1 + if (cores > 4) cores = 4 + } + + // Clipping presets have to be evaluated in the context of SE/PE + def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' + def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' + def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' + def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + + // Added soft-links to original fastqs for consistent naming in MultiQC + def software = getSoftwareName(task.process) + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + trim_galore \\ + $options.args \\ + --cores $cores \\ + --gzip \\ + $c_r1 \\ + $tpc_r1 \\ + ${prefix}.fastq.gz + echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//' > ${software}.version.txt + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + trim_galore \\ + $options.args \\ + --cores $cores \\ + --paired \\ + --gzip \\ + $c_r1 \\ + $c_r2 \\ + $tpc_r1 \\ + $tpc_r2 \\ + ${prefix}_1.fastq.gz \\ + ${prefix}_2.fastq.gz + echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//' > ${software}.version.txt + """ + } +} diff --git a/modules/nf-core/software/trimgalore/meta.yml b/modules/nf-core/software/trimgalore/meta.yml new file mode 100644 index 0000000000..86ba5cd44a --- /dev/null +++ b/modules/nf-core/software/trimgalore/meta.yml @@ -0,0 +1,99 @@ +name: trimgalore +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters + - fastq +tools: + - trimgalore: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md +params: + - outdir: + type: string + description: | + The pipeline's output directory. By default, the module will + output files into `$params.outdir/` + - publish_dir_mode: + type: string + description: | + Value for the Nextflow `publishDir` mode parameter. + Available: symlink, rellink, link, copy, copyNoFollow, move. + - enable_conda: + type: boolean + description: | + Run the module with Conda using the software specified + via the `conda` directive + - singularity_pull_docker_container: + type: boolean + description: | + Instead of directly downloading Singularity images for use with Singularity, + force the workflow to pull and convert Docker containers instead. + - clip_r1: + type: integer + description: | + Instructs Trim Galore to remove bp from the 5' end of read 1 + (or single-end reads) + - clip_r2: + type: integer + description: | + Instructs Trim Galore to remove bp from the 5' end of read 2 + (paired-end reads only) + - three_prime_clip_r1: + type: integer + description: | + Instructs Trim Galore to remove bp from the 3' end of read 1 + AFTER adapter/quality trimming has been performed + - three_prime_clip_r2: + type: integer + description: | + Instructs Trim Galore to re move bp from the 3' end of read 2 + AFTER adapter/quality trimming has been performed +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input adapter trimmed FastQ files of size 1 and 2 for + single-end and paired-end data, respectively. + pattern: "*.{fq.gz}" + - html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/subworkflow/qc_trim.nf b/modules/nf-core/subworkflow/qc_trim.nf deleted file mode 100644 index b39476c5ef..0000000000 --- a/modules/nf-core/subworkflow/qc_trim.nf +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Read QC and trimming - */ -params.fastqc_options = [:] -params.trimgalore_options = [:] - -include { FASTQC } from '../software/fastqc' addParams(options: params.fastqc_options) -include { TRIMGALORE } from '../software/trimgalore' addParams(options: params.trimgalore_options) - -workflow QC_TRIM { - take: - - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - skip_trimming // boolean: true/false - - main: - - fastqc_html = Channel.empty() - fastqc_version = Channel.empty() - fastqc_zip = Channel.empty() - if (!skip_fastqc) { - FASTQC(reads) - fastqc_html = FASTQC.out.html - fastqc_version = FASTQC.out.version - fastqc_zip = FASTQC.out.zip - } - - trim_reads = reads - trimgalore_html = Channel.empty() - trimgalore_zip = Channel.empty() - trimgalore_log = Channel.empty() - trimgalore_version = Channel.empty() - if (!skip_trimming) { - TRIMGALORE(reads) - trim_reads = TRIMGALORE.out.reads - trimgalore_html = TRIMGALORE.out.html - trimgalore_zip = TRIMGALORE.out.zip - trimgalore_log = TRIMGALORE.out.log - trimgalore_version = TRIMGALORE.out.version - } - - emit: - - fastqc_html // path: *.html - fastqc_zip // path: *.zip - fastqc_version // path: *.version.txt - reads = trim_reads // channel: [ val(meta), [ reads ] ] - trimgalore_html // path: *.html - trimgalore_log // path: *.txt - trimgalore_zip // path: *.zip - trimgalore_version // path: *.version.txt -} diff --git a/scripts/make_snapshot.sh b/scripts/make_snapshot.sh deleted file mode 100755 index 16ae81b5e3..0000000000 --- a/scripts/make_snapshot.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# This script makes an archive of sarek, with or without configs and test datasets -# https://github.com/nf-core/sarek - -usage() { echo "Usage: $0 <-t> <-c>" 1>&2; exit 1; } - -CONFIGS=false -NAME=sarek-$(git describe --tags --always) -TEST=false - -while [[ $# -gt 0 ]] -do - key="$1" - case $key in - -i|-t|--include-test-data) - TEST=true - shift # past argument - ;; - -c|--include-configs) - CONFIGS=true - shift # past argument - ;; - *) # unknown option - shift # past argument - usage - ;; - esac -done - -if [[ $CONFIGS == true ]] -then - echo "Archiving nf-core/configs" - git submodule add -f https://github.com/nf-core/configs.git configs -fi - -if [[ $TEST == true ]] -then - echo "Archiving nf-core/test-datasets:sarek" - git submodule add -f --branch sarek https://github.com/nf-core/test-datasets.git data -fi - -echo "Archiving nf-core/sarek" - -if [[ $CONFIGS == true ]] || [[ $TEST == true ]] -then - git-archive-all --prefix=${NAME} --force-submodules ${NAME}.tar.gz -else - git archive --format=tar.gz HEAD --prefix=${NAME}/ > ${NAME}.tar.gz -fi - -echo "Wrote ${NAME}.tar.gz" diff --git a/modules/local/subworkflow/build_indices.nf b/subworkflow/local/build_indices.nf similarity index 79% rename from modules/local/subworkflow/build_indices.nf rename to subworkflow/local/build_indices.nf index 696e22a733..273e71f141 100644 --- a/modules/local/subworkflow/build_indices.nf +++ b/subworkflow/local/build_indices.nf @@ -19,18 +19,18 @@ params.tabix_pon_options = [:] // Initialize channels based on params or indices that were just built -include { BUILD_INTERVALS } from '../process/build_intervals.nf' addParams(options: params.build_intervals_options) -include { BWA_INDEX as BWAMEM1_INDEX } from '../../nf-core/software/bwa/index/main.nf' addParams(options: params.bwa_index_options) -include { BWAMEM2_INDEX } from '../../nf-core/software/bwamem2_index.nf' addParams(options: params.bwamem2_index_options) -include { CREATE_INTERVALS_BED } from '../process/create_intervals_bed.nf' addParams(options: params.create_intervals_bed_options) -include { GATK_CREATESEQUENCEDICTIONARY as GATK_DICT } from '../../nf-core/software/gatk/createsequencedictionary.nf' addParams(options: params.gatk_dict_options) -include { HTSLIB_TABIX as TABIX_DBSNP } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_dbsnp_options) -include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options) -include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options) -include { HTSLIB_TABIX as TABIX_PON } from '../../nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options) -include { INDEX_TARGET_BED } from '../process/index_target_bed' addParams(options: params.index_target_bed_options) -include { MSISENSOR_SCAN } from '../../nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options) -include { SAMTOOLS_FAIDX } from '../../nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options) +include { BUILD_INTERVALS } from '../../modules/local/build_intervals.nf' addParams(options: params.build_intervals_options) +include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/software/bwa/index/main.nf' addParams(options: params.bwa_index_options) +include { BWAMEM2_INDEX } from '../../modules/nf-core/software/bwamem2_index.nf' addParams(options: params.bwamem2_index_options) +include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed.nf' addParams(options: params.create_intervals_bed_options) +include { GATK_CREATESEQUENCEDICTIONARY as GATK_DICT } from '../../modules/nf-core/software/gatk/createsequencedictionary.nf' addParams(options: params.gatk_dict_options) +include { HTSLIB_TABIX as TABIX_DBSNP } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_dbsnp_options) +include { HTSLIB_TABIX as TABIX_GERMLINE_RESOURCE } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_germline_resource_options) +include { HTSLIB_TABIX as TABIX_KNOWN_INDELS } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_known_indels_options) +include { HTSLIB_TABIX as TABIX_PON } from '../../modules/nf-core/software/htslib_tabix' addParams(options: params.tabix_pon_options) +include { INDEX_TARGET_BED } from '../../modules/local/index_target_bed.nf' addParams(options: params.index_target_bed_options) +include { MSISENSOR_SCAN } from '../../modules/nf-core/software/msisensor/scan.nf' addParams(options: params.msisensor_scan_options) +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/software/samtools/faidx.nf' addParams(options: params.samtools_faidx_options) workflow BUILD_INDICES{ take: @@ -45,11 +45,11 @@ workflow BUILD_INDICES{ main: - result_bwa = Channel.empty() + result_bwa = Channel.empty() version_bwa = Channel.empty() if (!(params.bwa) && 'mapping' in step) if (params.aligner == "bwa-mem") (result_bwa, version_bwa) = BWAMEM1_INDEX(fasta) - else result_bwa = BWAMEM2_INDEX(fasta) + else (result_bwa, version_bwa) = BWAMEM2_INDEX(fasta) result_dict = Channel.empty() if (!(params.dict) && !('annotate' in step) && !('controlfreec' in step)) diff --git a/modules/local/subworkflow/germline_variant_calling.nf b/subworkflow/local/germline_variant_calling.nf similarity index 86% rename from modules/local/subworkflow/germline_variant_calling.nf rename to subworkflow/local/germline_variant_calling.nf index c2288519a2..b742b1de26 100644 --- a/modules/local/subworkflow/germline_variant_calling.nf +++ b/subworkflow/local/germline_variant_calling.nf @@ -10,11 +10,11 @@ params.concat_gvcf_options = [:] params.concat_haplotypecaller_options = [:] params.strelka_options = [:] -include { GATK_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../nf-core/software/gatk/haplotypecaller' addParams(options: params.haplotypecaller_options) -include { GATK_GENOTYPEGVCF as GENOTYPEGVCF } from '../../nf-core/software/gatk/genotypegvcf' addParams(options: params.genotypegvcf_options) -include { CONCAT_VCF as CONCAT_GVCF } from '../process/concat_vcf' addParams(options: params.concat_gvcf_options) -include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../process/concat_vcf' addParams(options: params.concat_haplotypecaller_options) -include { STRELKA_GERMLINE as STRELKA } from '../../nf-core/software/strelka/germline' addParams(options: params.strelka_options) +include { GATK_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/software/gatk/haplotypecaller' addParams(options: params.haplotypecaller_options) +include { GATK_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/software/gatk/genotypegvcf' addParams(options: params.genotypegvcf_options) +include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf' addParams(options: params.concat_gvcf_options) +include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf' addParams(options: params.concat_haplotypecaller_options) +include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/software/strelka/germline' addParams(options: params.strelka_options) workflow GERMLINE_VARIANT_CALLING { take: diff --git a/modules/local/subworkflow/mapping.nf b/subworkflow/local/mapping.nf similarity index 61% rename from modules/local/subworkflow/mapping.nf rename to subworkflow/local/mapping.nf index 4ca5effd2f..85dcb2f8e8 100644 --- a/modules/local/subworkflow/mapping.nf +++ b/subworkflow/local/mapping.nf @@ -4,19 +4,23 @@ ================================================================================ */ -params.bwamem1_mem_options = [:] -params.bwamem2_mem_options = [:] -params.merge_bam_options = [:] -params.qualimap_bamqc_options = [:] -params.samtools_index_options = [:] -params.samtools_stats_options = [:] - -include { BWA_MEM as BWAMEM1_MEM } from '../process/bwa_mem' addParams(options: params.bwamem1_mem_options) -include { BWAMEM2_MEM } from '../process/bwamem2_mem' addParams(options: params.bwamem2_mem_options) -include { MERGE_BAM } from '../process/merge_bam' addParams(options: params.merge_bam_options) -include { QUALIMAP_BAMQC } from '../../nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options) -include { SAMTOOLS_INDEX } from '../../nf-core/software/samtools/index' addParams(options: params.samtools_index_options) -include { SAMTOOLS_STATS } from '../../nf-core/software/samtools/stats' addParams(options: params.samtools_stats_options) +params.bwamem1_mem_options = [:] +params.bwamem1_mem_tumor_options = [:] +params.bwamem2_mem_options = [:] +params.bwamem2_mem_tumor_options = [:] +params.merge_bam_options = [:] +params.qualimap_bamqc_options = [:] +params.samtools_index_options = [:] +params.samtools_stats_options = [:] + +include { BWA_MEM as BWAMEM1_MEM } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_options) +include { BWA_MEM as BWAMEM1_MEM_T } from '../../modules/nf-core/software/bwa/mem/main' addParams(options: params.bwamem1_mem_tumor_options) +include { BWAMEM2_MEM } from '../../modules/nf-core/software/bwamem2_mem.nf' addParams(options: params.bwamem2_mem_options) +include { BWAMEM2_MEM as BWAMEM2_MEM_T } from '../../modules/nf-core/software/bwamem2_mem.nf' addParams(options: params.bwamem2_mem_tumor_options) +include { MERGE_BAM } from '../../modules/local/merge_bam' addParams(options: params.merge_bam_options) +include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options) +include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options) workflow MAPPING { take: @@ -35,16 +39,32 @@ workflow MAPPING { bam_mapped_index = Channel.empty() bam_reports = Channel.empty() + reads_input.groupTuple(by: [0,1]) + .branch{ + normal: it[0].status == 0 + tumor: it[0].status == 1 + }.set{ reads_input_status } + if (step == "mapping") { bam_bwamem1 = Channel.empty() bam_bwamem2 = Channel.empty() if (params.aligner == "bwa-mem") { - BWAMEM1_MEM(reads_input, bwa, fasta, fai) - bam_bwamem1 = BWAMEM1_MEM.out.bam + BWAMEM1_MEM(reads_input_status.normal, bwa, fasta, fai) + bam_bwamem1_n = BWAMEM1_MEM.out.bam + + BWAMEM1_MEM_T(reads_input_status.tumor, bwa, fasta, fai) + bam_bwamem1_t = BWAMEM1_MEM_T.out.bam + + bam_bwamem1 = bam_bwamem1_n.mix(bam_bwamem1_t) } else { - BWAMEM2_MEM(reads_input, bwa, fasta, fai) - bam_bwamem2 = BWAMEM2_MEM.out + BWAMEM2_MEM(reads_input_status.normal, bwa, fasta, fai) + bam_bwamem2_n = BWAMEM2_MEM.out.bam + + BWAMEM2_MEM_T(reads_input_status.tumor, bwa, fasta, fai) + bam_bwamem2_t = BWAMEM2_MEM_T.out.bam + + bam_bwamem2 = bam_bwamem2_n.mix(bam_bwamem2_t) } bam_bwa = bam_bwamem1.mix(bam_bwamem2) @@ -91,7 +111,9 @@ workflow MAPPING { MERGE_BAM(bam_bwa_multiple) bam_mapped = bam_bwa_single.mix(MERGE_BAM.out.bam) - bam_mapped_index = SAMTOOLS_INDEX(bam_mapped) + + SAMTOOLS_INDEX(bam_mapped) + bam_mapped_index = bam_mapped.join(SAMTOOLS_INDEX.out.bai) qualimap_bamqc = Channel.empty() samtools_stats = Channel.empty() @@ -102,8 +124,8 @@ workflow MAPPING { } if (!skip_samtools) { - SAMTOOLS_STATS(bam_mapped) - samtools_stats = SAMTOOLS_STATS.out + SAMTOOLS_STATS(bam_mapped_index) + samtools_stats = SAMTOOLS_STATS.out.stats } bam_reports = samtools_stats.mix(qualimap_bamqc) diff --git a/modules/local/subworkflow/markduplicates.nf b/subworkflow/local/markduplicates.nf similarity index 80% rename from modules/local/subworkflow/markduplicates.nf rename to subworkflow/local/markduplicates.nf index 531e11b464..ca30123101 100644 --- a/modules/local/subworkflow/markduplicates.nf +++ b/subworkflow/local/markduplicates.nf @@ -6,7 +6,8 @@ params.markduplicates_options = [:] -include { GATK_MARKDUPLICATES } from '../../nf-core/software/gatk/markduplicates' addParams(options: params.markduplicates_options) +include { GATK_MARKDUPLICATES } from '../../modules/nf-core/software/gatk/markduplicates' addParams(options: params.markduplicates_options) +include { GATK_MARKDUPLICATES_SPARK } from '../../modules/nf-core/software/gatk/markduplicates' addParams(options: params.markduplicates_options) workflow MARKDUPLICATES { take: @@ -20,10 +21,17 @@ workflow MARKDUPLICATES { if (step == "mapping") { if (!params.skip_markduplicates) { - GATK_MARKDUPLICATES(bam_mapped) - report_markduplicates = GATK_MARKDUPLICATES.out.report - bam_markduplicates = GATK_MARKDUPLICATES.out.bam - tsv_markduplicates = GATK_MARKDUPLICATES.out.tsv + if (params.use_gatk_spark) { + GATK_MARKDUPLICATES_SPARK(bam_mapped) + report_markduplicates = GATK_MARKDUPLICATES_SPARK.out.report + bam_markduplicates = GATK_MARKDUPLICATES_SPARK.out.bam + tsv_markduplicates = GATK_MARKDUPLICATES_SPARK.out.tsv + } else { + GATK_MARKDUPLICATES(bam_mapped) + report_markduplicates = GATK_MARKDUPLICATES.out.report + bam_markduplicates = GATK_MARKDUPLICATES.out.bam + tsv_markduplicates = GATK_MARKDUPLICATES.out.tsv + } // Creating TSV files to restart from this step tsv_markduplicates.collectFile(storeDir: "${params.outdir}/preprocessing/tsv") { meta -> diff --git a/modules/local/subworkflow/pair_variant_calling.nf b/subworkflow/local/pair_variant_calling.nf similarity index 90% rename from modules/local/subworkflow/pair_variant_calling.nf rename to subworkflow/local/pair_variant_calling.nf index 37b21807d6..5654217909 100644 --- a/modules/local/subworkflow/pair_variant_calling.nf +++ b/subworkflow/local/pair_variant_calling.nf @@ -9,10 +9,10 @@ params.msisensor_msi_options = [:] params.strelka_options = [:] params.strelka_bp_options = [:] -include { MANTA_SOMATIC as MANTA } from '../../nf-core/software/manta/somatic' addParams(options: params.manta_options) -include { MSISENSOR_MSI } from '../../nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options) -include { STRELKA_SOMATIC as STRELKA } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_options) -include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../nf-core/software/strelka/somatic' addParams(options: params.strelka_bp_options) +include { MANTA_SOMATIC as MANTA } from '../../modules/nf-core/software/manta/somatic' addParams(options: params.manta_options) +include { MSISENSOR_MSI } from '../../modules/nf-core/software/msisensor/msi' addParams(options: params.msisensor_msi_options) +include { STRELKA_SOMATIC as STRELKA } from '../../modules/nf-core/software/strelka/somatic' addParams(options: params.strelka_options) +include { STRELKA_SOMATIC_BEST_PRACTICES as STRELKA_BP } from '../../modules/nf-core/software/strelka/somatic' addParams(options: params.strelka_bp_options) workflow PAIR_VARIANT_CALLING { take: diff --git a/modules/local/subworkflow/prepare_recalibration.nf b/subworkflow/local/prepare_recalibration.nf similarity index 94% rename from modules/local/subworkflow/prepare_recalibration.nf rename to subworkflow/local/prepare_recalibration.nf index 510e4bbb55..9f0baf94e9 100644 --- a/modules/local/subworkflow/prepare_recalibration.nf +++ b/subworkflow/local/prepare_recalibration.nf @@ -7,8 +7,8 @@ params.baserecalibrator_options = [:] params.gatherbqsrreports_options = [:] -include { GATK_BASERECALIBRATOR as BASERECALIBRATOR } from '../../nf-core/software/gatk/baserecalibrator' addParams(options: params.baserecalibrator_options) -include { GATK_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../nf-core/software/gatk/gatherbqsrreports' addParams(options: params.gatherbqsrreports_options) +include { GATK_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/software/gatk/baserecalibrator' addParams(options: params.baserecalibrator_options) +include { GATK_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/software/gatk/gatherbqsrreports' addParams(options: params.gatherbqsrreports_options) workflow PREPARE_RECALIBRATION { take: diff --git a/modules/local/subworkflow/recalibrate.nf b/subworkflow/local/recalibrate.nf similarity index 82% rename from modules/local/subworkflow/recalibrate.nf rename to subworkflow/local/recalibrate.nf index 5551199799..e2eb64b453 100644 --- a/modules/local/subworkflow/recalibrate.nf +++ b/subworkflow/local/recalibrate.nf @@ -10,11 +10,11 @@ params.qualimap_bamqc_options = [:] params.samtools_index_options = [:] params.samtools_stats_options = [:] -include { GATK_APPLYBQSR as APPLYBQSR } from '../../nf-core/software/gatk/applybqsr' addParams(options: params.applybqsr_options) -include { MERGE_BAM } from '../process/merge_bam' addParams(options: params.merge_bam_options) -include { QUALIMAP_BAMQC } from '../../nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options) -include { SAMTOOLS_INDEX } from '../../nf-core/software/samtools/index' addParams(options: params.samtools_index_options) -include { SAMTOOLS_STATS } from '../../nf-core/software/samtools/stats' addParams(options: params.samtools_stats_options) +include { GATK_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/software/gatk/applybqsr' addParams(options: params.applybqsr_options) +include { MERGE_BAM } from '../../modules/local/merge_bam' addParams(options: params.merge_bam_options) +include { QUALIMAP_BAMQC } from '../../modules/nf-core/software/qualimap_bamqc' addParams(options: params.qualimap_bamqc_options) +include { SAMTOOLS_INDEX } from '../../modules/nf-core/software/samtools/index/main' addParams(options: params.samtools_index_options) +include { SAMTOOLS_STATS } from '../../modules/nf-core/software/samtools/stats/main' addParams(options: params.samtools_stats_options) workflow RECALIBRATE { take: @@ -71,7 +71,8 @@ workflow RECALIBRATE { tsv_recalibrated = MERGE_BAM.out.tsv } - bam_recalibrated_index = SAMTOOLS_INDEX(bam_recalibrated) + SAMTOOLS_INDEX(bam_recalibrated) + bam_recalibrated_index = bam_recalibrated.join(SAMTOOLS_INDEX.out.bai) qualimap_bamqc = Channel.empty() samtools_stats = Channel.empty() @@ -82,8 +83,8 @@ workflow RECALIBRATE { } if (!skip_samtools) { - SAMTOOLS_STATS(bam_recalibrated) - samtools_stats = SAMTOOLS_STATS.out + SAMTOOLS_STATS(bam_recalibrated_index) + samtools_stats = SAMTOOLS_STATS.out.stats } bam_reports = samtools_stats.mix(qualimap_bamqc) diff --git a/subworkflow/nf-core/fastqc_trimgalore.nf b/subworkflow/nf-core/fastqc_trimgalore.nf new file mode 100644 index 0000000000..87cac4e452 --- /dev/null +++ b/subworkflow/nf-core/fastqc_trimgalore.nf @@ -0,0 +1,51 @@ +/* + * Read QC and trimming + */ + +params.fastqc_options = [:] +params.trimgalore_options = [:] + +include { FASTQC } from '../../modules/nf-core/software/fastqc/main' addParams( options: params.fastqc_options ) +include { TRIMGALORE } from '../../modules/nf-core/software/trimgalore/main' addParams( options: params.trimgalore_options ) + +workflow FASTQC_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + skip_trimming // boolean: true/false + + main: + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + fastqc_version = Channel.empty() + if (!skip_fastqc) { + FASTQC ( reads ).html.set { fastqc_html } + fastqc_zip = FASTQC.out.zip + fastqc_version = FASTQC.out.version + } + + trim_reads = reads + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + trimgalore_version = Channel.empty() + if (!skip_trimming) { + TRIMGALORE ( reads ).reads.set { trim_reads } + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + trimgalore_version = TRIMGALORE.out.version + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + fastqc_version // path: *.version.txt + + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + trimgalore_version // path: *.version.txt +} diff --git a/workflows/sarek.nf b/workflows/sarek.nf new file mode 100644 index 0000000000..a0ef24439f --- /dev/null +++ b/workflows/sarek.nf @@ -0,0 +1,420 @@ +//////////////////////////////////////////////////// +/* -- INCLUDE SAREK FUNCTIONS -- */ +//////////////////////////////////////////////////// + +include { + check_parameter_existence; + check_parameter_list; + define_anno_list; + define_skip_qc_list; + define_step_list; + define_tool_list; + extract_bam; + extract_fastq; + extract_fastq_from_dir; + extract_recal; + has_extension +} from '../modules/local/functions' + +//////////////////////////////////////////////////// +/* -- SET UP CONFIGURATION VARIABLES -- */ +//////////////////////////////////////////////////// + +step_list = define_step_list() +step = params.step ? params.step.toLowerCase().replaceAll('-', '').replaceAll('_', '') : '' + +if (step.contains(',')) exit 1, 'You can choose only one step, see --help for more information' +if (!check_parameter_existence(step, step_list)) exit 1, "Unknown step ${step}, see --help for more information" + +tool_list = define_tool_list() +tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] +if (step == 'controlfreec') tools = ['controlfreec'] +if (!check_parameter_list(tools, tool_list)) exit 1, 'Unknown tool(s), see --help for more information' + +skip_qc_list = define_skip_qc_list() +skip_qc = params.skip_qc ? params.skip_qc == 'all' ? skip_qc_list : params.skip_qc.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : [] +if (!check_parameter_list(skip_qc, skip_qc_list)) exit 1, 'Unknown QC tool(s), see --help for more information' + +anno_list = define_anno_list() +annotate_tools = params.annotate_tools ? params.annotate_tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '')} : [] +if (!check_parameter_list(annotate_tools,anno_list)) exit 1, 'Unknown tool(s) to annotate, see --help for more information' + +if (!(params.aligner in ['bwa-mem', 'bwa-mem2'])) exit 1, 'Unknown aligner, see --help for more information' + +// // Check parameters +if ((params.ascat_ploidy && !params.ascat_purity) || (!params.ascat_ploidy && params.ascat_purity)) exit 1, 'Please specify both --ascat_purity and --ascat_ploidy, or none of them' +if (params.cf_window && params.cf_coeff) exit 1, 'Please specify either --cf_window OR --cf_coeff, but not both of them' +if (params.umi && !(params.read_structure1 && params.read_structure2)) exit 1, 'Please specify both --read_structure1 and --read_structure2, when using --umi' + +// Handle input +tsv_path = null +if (params.input && (has_extension(params.input, "tsv") || has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) tsv_path = params.input +if (params.input && (has_extension(params.input, "vcf") || has_extension(params.input, "vcf.gz"))) step = "annotate" + +save_bam_mapped = params.skip_markduplicates ? true : params.save_bam_mapped ? true : false + +// If no input file specified, trying to get TSV files corresponding to step in the TSV directory +// only for steps preparerecalibration, recalibrate, variantcalling and controlfreec +if (!params.input && params.sentieon) { + switch (step) { + case 'mapping': break + case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_deduped.tsv"; break + case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/sentieon_recalibrated.tsv"; break + case 'annotate': break + default: exit 1, "Unknown step ${step}" + } +} else if (!params.input && !params.sentieon && !params.skip_markduplicates) { + switch (step) { + case 'mapping': break + case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates_no_table.tsv"; break + case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/markduplicates.tsv"; break + case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break + case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break + case 'annotate': break + default: exit 1, "Unknown step ${step}" + } +} else if (!params.input && !params.sentieon && params.skip_markduplicates) { + switch (step) { + case 'mapping': break + case 'preparerecalibration': tsv_path = "${params.outdir}/preprocessing/tsv/mapped.tsv"; break + case 'recalibrate': tsv_path = "${params.outdir}/preprocessing/tsv/mapped_no_markduplicates.tsv"; break + case 'variantcalling': tsv_path = "${params.outdir}/preprocessing/tsv/recalibrated.tsv"; break + case 'controlfreec': tsv_path = "${params.outdir}/variant_calling/tsv/control-freec_mpileup.tsv"; break + case 'annotate': break + default: exit 1, "Unknown step ${step}" + } +} + +input_sample = Channel.empty() +if (tsv_path) { + tsv_file = file(tsv_path) + switch (step) { + case 'mapping': input_sample = extract_fastq(tsv_file); break + case 'preparerecalibration': input_sample = extract_bam(tsv_file); break + case 'recalibrate': input_sample = extract_recal(tsv_file); break + case 'variantcalling': input_sample = extract_bam(tsv_file); break + case 'controlfreec': input_sample = extract_pileup(tsv_file); break + case 'annotate': break + default: exit 1, "Unknown step ${step}" + } +} else if (params.input && !has_extension(params.input, "tsv")) { + log.info "No TSV file" + if (step != 'mapping') exit 1, 'No step other than "mapping" supports a directory as an input' + log.info "Reading ${params.input} directory" + log.warn "[nf-core/sarek] in ${params.input} directory, all fastqs are assuming to be from the same sample, which is assumed to be a germline one" + input_sample = extract_fastq_from_dir(params.input) + tsv_file = params.input // used in the reports +} else if (tsv_path && step == 'annotate') { + log.info "Annotating ${tsv_path}" +} else if (step == 'annotate') { + log.info "Trying automatic annotation on files in the VariantCalling/ directory" +} else exit 1, 'No sample were defined, see --help' + +//////////////////////////////////////////////////// +/* -- UPDATE MODULES OPTIONS BASED ON PARAMS -- */ +//////////////////////////////////////////////////// + +modules = params.modules + +if (params.save_reference) modules['build_intervals'].publish_files = ['bed':'intervals'] +if (params.save_reference) modules['bwa_index'].publish_files = ['amb':'bwa', 'ann':'bwa', 'bwt':'bwa', 'pac':'bwa', 'sa':'bwa'] +if (params.save_reference) modules['bwamem2_index'].publish_files = ['0123':'bwamem2', 'amb':'bwamem2', 'ann':'bwamem2', 'bwt.2bit.64':'bwamem2', 'bwt.8bit.32':'bwamem2', 'pac':'bwamem2'] +if (params.save_reference) modules['create_intervals_bed'].publish_files = ['bed':'intervals'] +if (params.save_reference) modules['dict'].publish_files = ['dict':'dict'] +if (params.save_reference) modules['index_target_bed'].publish_files = ['bed.gz':'target', 'bed.gz.tbi':'target'] +if (params.save_reference) modules['msisensor_scan'].publish_files = ['list':'msi'] +if (params.save_reference) modules['samtools_faidx'].publish_files = ['fai':'fai'] +if (params.save_reference) modules['tabix_dbsnp'].publish_files = ['vcf.gz.tbi':'dbsnp'] +if (params.save_reference) modules['tabix_germline_resource'].publish_files = ['vcf.gz.tbi':'germline_resource'] +if (params.save_reference) modules['tabix_known_indels'].publish_files = ['vcf.gz.tbi':'known_indels'] +if (params.save_reference) modules['tabix_pon'].publish_files = ['vcf.gz.tbi':'pon'] +if (save_bam_mapped) modules['samtools_index_mapping'].publish_files = ['bam':'mapped', 'bai':'mapped'] +if (params.skip_markduplicates) modules['baserecalibrator'].publish_files = ['recal.table':'mapped'] +if (params.skip_markduplicates) modules['gatherbqsrreports'].publish_files = ['recal.table':'mapped'] + +file("${params.outdir}/no_file").text = "no_file\n" + +// Initialize file channels based on params, defined in the params.genomes[params.genome] scope +chr_dir = params.chr_dir ? file(params.chr_dir) : file("${params.outdir}/no_file") +chr_length = params.chr_length ? file(params.chr_length) : file("${params.outdir}/no_file") +dbsnp = params.dbsnp ? file(params.dbsnp) : file("${params.outdir}/no_file") +fasta = params.fasta ? file(params.fasta) : file("${params.outdir}/no_file") +germline_resource = params.germline_resource ? file(params.germline_resource) : file("${params.outdir}/no_file") +known_indels = params.known_indels ? file(params.known_indels) : file("${params.outdir}/no_file") +loci = params.ac_loci ? file(params.ac_loci) : file("${params.outdir}/no_file") +loci_gc = params.ac_loci_gc ? file(params.ac_loci_gc) : file("${params.outdir}/no_file") +mappability = params.mappability ? file(params.mappability) : file("${params.outdir}/no_file") + +// Initialize value channels based on params, defined in the params.genomes[params.genome] scope +snpeff_db = params.snpeff_db ?: Channel.empty() +snpeff_species = params.species ?: Channel.empty() +vep_cache_version = params.vep_cache_version ?: Channel.empty() + +// Initialize files channels based on params, not defined within the params.genomes[params.genome] scope +cadd_indels = params.cadd_indels ? file(params.cadd_indels) : file("${params.outdir}/no_file") +cadd_indels_tbi = params.cadd_indels_tbi ? file(params.cadd_indels_tbi) : file("${params.outdir}/no_file") +cadd_wg_snvs = params.cadd_wg_snvs ? file(params.cadd_wg_snvs) : file("${params.outdir}/no_file") +cadd_wg_snvs_tbi = params.cadd_wg_snvs_tbi ? file(params.cadd_wg_snvs_tbi) : file("${params.outdir}/no_file") +pon = params.pon ? file(params.pon) : file("${params.outdir}/no_file") +snpeff_cache = params.snpeff_cache ? file(params.snpeff_cache) : file("${params.outdir}/no_file") +target_bed = params.target_bed ? file(params.target_bed) : file("${params.outdir}/no_file") +vep_cache = params.vep_cache ? file(params.vep_cache) : file("${params.outdir}/no_file") + +// Initialize value channels based on params, not defined within the params.genomes[params.genome] scope +read_structure1 = params.read_structure1 ?: Channel.empty() +read_structure2 = params.read_structure2 ?: Channel.empty() + +if ('mutect2' in tools && !(params.pon)) log.warn "[nf-core/sarek] Mutect2 was requested, but as no panel of normals were given, results will not be optimal" +if (params.sentieon) log.warn "[nf-core/sarek] Sentieon will be used, only works if Sentieon is available where nf-core/sarek is run" + +//////////////////////////////////////////////////// +/* -- INCLUDE LOCAL SUBWORKFLOWS -- */ +//////////////////////////////////////////////////// + +include { BUILD_INDICES } from '../subworkflow/local/build_indices' addParams( + build_intervals_options: modules['build_intervals'], + bwa_index_options: modules['bwa_index'], + bwamem2_index_options: modules['bwamem2_index'], + create_intervals_bed_options: modules['create_intervals_bed'], + gatk_dict_options: modules['dict'], + index_target_bed_options: modules['index_target_bed'], + msisensor_scan_options: modules['msisensor_scan'], + samtools_faidx_options: modules['samtools_faidx'], + tabix_dbsnp_options: modules['tabix_dbsnp'], + tabix_germline_resource_options: modules['tabix_germline_resource'], + tabix_known_indels_options: modules['tabix_known_indels'], + tabix_pon_options: modules['tabix_pon'] +) +include { MAPPING } from '../subworkflow/local/mapping' addParams( + bwamem1_mem_options: modules['bwa_mem1_mem'], + bwamem1_mem_tumor_options: modules['bwa_mem1_mem_tumor'], + bwamem2_mem_options: modules['bwa_mem2_mem'], + bwamem2_mem_tumor_options: modules['bwa_mem2_mem_tumor'], + merge_bam_options: modules['merge_bam_mapping'], + qualimap_bamqc_options: modules['qualimap_bamqc_mapping'], + samtools_index_options: modules['samtools_index_mapping'], + samtools_stats_options: modules['samtools_stats_mapping'] +) +include { MARKDUPLICATES } from '../subworkflow/local/markduplicates' addParams( + markduplicates_options: modules['markduplicates'] +) +include { PREPARE_RECALIBRATION } from '../subworkflow/local/prepare_recalibration' addParams( + baserecalibrator_options: modules['baserecalibrator'], + gatherbqsrreports_options: modules['gatherbqsrreports'] +) +include { RECALIBRATE } from '../subworkflow/local/recalibrate' addParams( + applybqsr_options: modules['applybqsr'], + merge_bam_options: modules['merge_bam_recalibrate'], + qualimap_bamqc_options: modules['qualimap_bamqc_recalibrate'], + samtools_index_options: modules['samtools_index_recalibrate'], + samtools_stats_options: modules['samtools_stats_recalibrate'] +) +include { GERMLINE_VARIANT_CALLING } from '../subworkflow/local/germline_variant_calling' addParams( + concat_gvcf_options: modules['concat_gvcf'], + concat_haplotypecaller_options: modules['concat_haplotypecaller'], + genotypegvcf_options: modules['genotypegvcf'], + haplotypecaller_options: modules['haplotypecaller'], + strelka_options: modules['strelka_germline'] +) +// include { TUMOR_VARIANT_CALLING } from '../subworkflow/local/tumor_variant_calling' addParams( +// ) +include { PAIR_VARIANT_CALLING } from '../subworkflow/local/pair_variant_calling' addParams( + manta_options: modules['manta_somatic'], + msisensor_msi_options: modules['msisensor_msi'], + strelka_bp_options: modules['strelka_somatic_bp'], + strelka_options: modules['strelka_somatic'] +) + +//////////////////////////////////////////////////// +/* -- INCLUDE NF-CORE MODULES -- */ +//////////////////////////////////////////////////// + +include { MULTIQC } from '../modules/nf-core/software/multiqc/main' + +//////////////////////////////////////////////////// +/* -- INCLUDE NF-CORE SUBWORKFLOWS -- */ +//////////////////////////////////////////////////// + +include { FASTQC_TRIMGALORE } from '../subworkflow/nf-core/fastqc_trimgalore' addParams( + fastqc_options: modules['fastqc'], + trimgalore_options: modules['trimgalore'] +) + +workflow SAREK { + + //////////////////////////////////////////////////// + /* -- BUILD INDICES -- */ + //////////////////////////////////////////////////// + + BUILD_INDICES( + dbsnp, + fasta, + germline_resource, + known_indels, + pon, + step, + target_bed, + tools) + + intervals = BUILD_INDICES.out.intervals + + bwa = params.bwa ? file(params.bwa) : BUILD_INDICES.out.bwa + dict = params.dict ? file(params.dict) : BUILD_INDICES.out.dict + fai = params.fasta_fai ? file(params.fasta_fai) : BUILD_INDICES.out.fai + + dbsnp_tbi = params.dbsnp ? params.dbsnp_index ? file(params.dbsnp_index) : BUILD_INDICES.out.dbsnp_tbi : file("${params.outdir}/no_file") + germline_resource_tbi = params.germline_resource ? params.germline_resource_index ? file(params.germline_resource_index) : BUILD_INDICES.out.germline_resource_tbi : file("${params.outdir}/no_file") + known_indels_tbi = params.known_indels ? params.known_indels_index ? file(params.known_indels_index) : BUILD_INDICES.out.known_indels_tbi.collect() : file("${params.outdir}/no_file") + pon_tbi = params.pon ? params.pon_index ? file(params.pon_index) : BUILD_INDICES.out.pon_tbi : file("${params.outdir}/no_file") + + msisensor_scan = BUILD_INDICES.out.msisensor_scan + target_bed_gz_tbi = BUILD_INDICES.out.target_bed_gz_tbi + + //////////////////////////////////////////////////// + /* -- PREPROCESSING -- */ + //////////////////////////////////////////////////// + + bam_mapped = Channel.empty() + bam_mapped_qc = Channel.empty() + bam_recalibrated_qc = Channel.empty() + input_reads = Channel.empty() + qc_reports = Channel.empty() + + // STEP 0: QC & TRIM + // `--skip_qc fastqc` to skip fastqc + // trim only with `--trim_fastq` + // additional options to be set up + + FASTQC_TRIMGALORE( + input_sample, + ('fastqc' in skip_qc || step != "mapping"), + !(params.trim_fastq)) + + reads_input = FASTQC_TRIMGALORE.out.reads + + qc_reports = qc_reports.mix( + FASTQC_TRIMGALORE.out.fastqc_html, + FASTQC_TRIMGALORE.out.fastqc_zip, + FASTQC_TRIMGALORE.out.trim_html, + FASTQC_TRIMGALORE.out.trim_log, + FASTQC_TRIMGALORE.out.trim_zip) + + // STEP 1: MAPPING READS TO REFERENCE GENOME WITH BWA-MEM + + MAPPING( + ('bamqc' in skip_qc), + ('samtools' in skip_qc), + bwa, + fai, + fasta, + reads_input, + save_bam_mapped, + step, + target_bed) + + bam_mapped = MAPPING.out.bam + bam_mapped_qc = MAPPING.out.qc + + qc_reports = qc_reports.mix(bam_mapped_qc) + + // STEP 2: MARKING DUPLICATES + + MARKDUPLICATES( + bam_mapped, + step) + + bam_markduplicates = MARKDUPLICATES.out.bam + + if (step == 'preparerecalibration') bam_markduplicates = input_sample + + // STEP 3: CREATING RECALIBRATION TABLES + + PREPARE_RECALIBRATION( + bam_markduplicates, + dbsnp, + dbsnp_tbi, + dict, + fai, + fasta, + intervals, + known_indels, + known_indels_tbi, + step) + + table_bqsr = PREPARE_RECALIBRATION.out.table_bqsr + + // STEP 4: RECALIBRATING + bam_applybqsr = bam_markduplicates.join(table_bqsr) + + if (step == 'recalibrate') bam_applybqsr = input_sample + + RECALIBRATE( + ('bamqc' in skip_qc), + ('samtools' in skip_qc), + bam_applybqsr, + dict, + fai, + fasta, + intervals, + step, + target_bed) + + bam_recalibrated = RECALIBRATE.out.bam + bam_recalibrated_qc = RECALIBRATE.out.qc + + qc_reports = qc_reports.mix(bam_recalibrated_qc) + + bam_variant_calling = bam_recalibrated + + if (step == 'variantcalling') bam_variant_calling = input_sample + + //////////////////////////////////////////////////// + /* -- GERMLINE VARIANT CALLING -- */ + //////////////////////////////////////////////////// + + GERMLINE_VARIANT_CALLING( + bam_variant_calling, + dbsnp, + dbsnp_tbi, + dict, + fai, + fasta, + intervals, + target_bed, + target_bed_gz_tbi, + tools) + + //////////////////////////////////////////////////// + /* -- SOMATIC VARIANT CALLING -- */ + //////////////////////////////////////////////////// + + // TUMOR_VARIANT_CALLING( + // bam_variant_calling, + // dbsnp, + // dbsnp_tbi, + // dict, + // fai, + // fasta, + // intervals, + // target_bed, + // target_bed_gz_tbi, + // tools) + + PAIR_VARIANT_CALLING( + bam_variant_calling, + dbsnp, + dbsnp_tbi, + dict, + fai, + fasta, + intervals, + msisensor_scan, + target_bed, + target_bed_gz_tbi, + tools) + + //////////////////////////////////////////////////// + /* -- ANNOTATION -- */ + //////////////////////////////////////////////////// + +} \ No newline at end of file