From b592c86ceaba95c6173633265548c461707ea167 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 22 Dec 2021 13:58:27 +0100 Subject: [PATCH 01/30] Install gatk4 modules --- modules.json | 24 ++++ .../nf-core/modules/gatk4/applybqsr/main.nf | 46 ++++++++ .../nf-core/modules/gatk4/applybqsr/meta.yml | 67 +++++++++++ .../modules/gatk4/baserecalibrator/main.nf | 49 ++++++++ .../modules/gatk4/baserecalibrator/meta.yml | 67 +++++++++++ .../gatk4/estimatelibrarycomplexity/main.nf | 44 ++++++++ .../gatk4/estimatelibrarycomplexity/meta.yml | 56 ++++++++++ .../modules/gatk4/gatherbqsrreports/main.nf | 41 +++++++ .../modules/gatk4/gatherbqsrreports/meta.yml | 43 +++++++ .../modules/gatk4/genotypegvcfs/main.nf | 51 +++++++++ .../modules/gatk4/genotypegvcfs/meta.yml | 73 ++++++++++++ .../modules/gatk4/haplotypecaller/main.nf | 52 +++++++++ .../modules/gatk4/haplotypecaller/meta.yml | 75 +++++++++++++ .../modules/gatk4/markduplicates/main.nf | 43 +++++++ .../modules/gatk4/markduplicates/meta.yml | 50 +++++++++ modules/nf-core/modules/gatk4/mutect2/main.nf | 76 +++++++++++++ .../nf-core/modules/gatk4/mutect2/meta.yml | 105 ++++++++++++++++++ 17 files changed, 962 insertions(+) create mode 100644 modules/nf-core/modules/gatk4/applybqsr/main.nf create mode 100644 modules/nf-core/modules/gatk4/applybqsr/meta.yml create mode 100644 modules/nf-core/modules/gatk4/baserecalibrator/main.nf create mode 100644 modules/nf-core/modules/gatk4/baserecalibrator/meta.yml create mode 100644 modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf create mode 100644 modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml create mode 100644 modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf create mode 100644 modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml create mode 100644 modules/nf-core/modules/gatk4/genotypegvcfs/main.nf create mode 100644 modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml create mode 100644 modules/nf-core/modules/gatk4/haplotypecaller/main.nf create mode 100644 modules/nf-core/modules/gatk4/haplotypecaller/meta.yml create mode 100644 modules/nf-core/modules/gatk4/markduplicates/main.nf create mode 100644 modules/nf-core/modules/gatk4/markduplicates/meta.yml create mode 100644 modules/nf-core/modules/gatk4/mutect2/main.nf create mode 100644 modules/nf-core/modules/gatk4/mutect2/meta.yml diff --git a/modules.json b/modules.json index 0ef14d64aa..cb5ffe864e 100644 --- a/modules.json +++ b/modules.json @@ -27,9 +27,33 @@ "freebayes": { "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" }, + "gatk4/applybqsr": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, + "gatk4/baserecalibrator": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, "gatk4/createsequencedictionary": { "git_sha": "7389963d5cb18f81c10dff128c510e518ee4f0f6" }, + "gatk4/estimatelibrarycomplexity": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, + "gatk4/gatherbqsrreports": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, + "gatk4/genotypegvcfs": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, + "gatk4/haplotypecaller": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, + "gatk4/markduplicates": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, + "gatk4/mutect2": { + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + }, "manta/germline": { "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" }, diff --git a/modules/nf-core/modules/gatk4/applybqsr/main.nf b/modules/nf-core/modules/gatk4/applybqsr/main.nf new file mode 100644 index 0000000000..20294c2781 --- /dev/null +++ b/modules/nf-core/modules/gatk4/applybqsr/main.nf @@ -0,0 +1,46 @@ +process GATK4_APPLYBQSR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table) + path fasta + path fai + path dict + path intervals + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval = intervals ? "-L ${intervals}" : "" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\ + -R $fasta \\ + -I $input \\ + --bqsr-recal-file $bqsr_table \\ + $interval \\ + --tmp-dir . \\ + -O ${prefix}.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/applybqsr/meta.yml b/modules/nf-core/modules/gatk4/applybqsr/meta.yml new file mode 100644 index 0000000000..4e3b2f9a53 --- /dev/null +++ b/modules/nf-core/modules/gatk4/applybqsr/meta.yml @@ -0,0 +1,67 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bqsr + - bam +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - intervalsBed: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf new file mode 100644 index 0000000000..8a45d6e241 --- /dev/null +++ b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf @@ -0,0 +1,49 @@ +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + path fai + path dict + path intervalsBed + path knownSites + path knownSites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" + def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ + -R $fasta \ + -I $input \ + $sitesCommand \ + $intervalsCommand \ + --tmp-dir . \ + $args \ + -O ${prefix}.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml new file mode 100644 index 0000000000..188340b471 --- /dev/null +++ b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,67 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] + + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - intervalsBed: + type: file + description: Bed file with the genomic regions included in the library (optional) + - knownSites: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf b/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf new file mode 100644 index 0000000000..78079c20ed --- /dev/null +++ b/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main.nf @@ -0,0 +1,44 @@ +process GATK4_ESTIMATELIBRARYCOMPLEXITY { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(cram) + path(fasta) + path(fai) + path(dict) + + output: + tuple val(meta), path('*.metrics'), emit: metrics + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def crams = cram.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \ + ${crams} \ + -O ${prefix}.metrics \ + --REFERENCE_SEQUENCE ${fasta} \ + --VALIDATION_STRINGENCY SILENT \ + --TMP_DIR . $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml b/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml new file mode 100644 index 0000000000..94c1817d9d --- /dev/null +++ b/modules/nf-core/modules/gatk4/estimatelibrarycomplexity/meta.yml @@ -0,0 +1,56 @@ +name: gatk4_estimatelibrarycomplexity +description: Estimates the numbers of unique molecules in a sequencing library. +keywords: + - gatk4 + - gatk4_estimatelibrarycomplexity + - duplication_metrics + - reporting +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ['Apache-2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cram: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - metrics: + type: file + description: File containing metrics on the input files + pattern: "*.{metrics}" + +authors: + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf b/modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf new file mode 100644 index 0000000000..721f5634d9 --- /dev/null +++ b/modules/nf-core/modules/gatk4/gatherbqsrreports/main.nf @@ -0,0 +1,41 @@ +process GATK4_GATHERBQSRREPORTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(recal_table) + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = recal_table.collect{"-I ${it}"}.join(' ') + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GatherBQSRReports] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" \\ + GatherBQSRReports \ + ${input} \ + --tmp-dir . \ + $args \ + --output ${prefix}.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml b/modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml new file mode 100644 index 0000000000..f71afd69d1 --- /dev/null +++ b/modules/nf-core/modules/gatk4/gatherbqsrreports/meta.yml @@ -0,0 +1,43 @@ +name: gatk4_gatherbqsrreports +description: write your description here +keywords: + - gatk4 + - gatk4_gatherbqsrreports + - base_recalibration +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - recal_table: + type: file + description: File(s) containing BQSR table(s) + pattern: "*.table" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - recal_table: + type: file + description: File containing joined BQSR table + pattern: "*.table" + +authors: + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf new file mode 100644 index 0000000000..b1c15a5e53 --- /dev/null +++ b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf @@ -0,0 +1,51 @@ +process GATK4_GENOTYPEGVCFS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(gvcf), path(gvcf_index) + path fasta + path fasta_index + path fasta_dict + path dbsnp + path dbsnp_index + path intervals_bed + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" + def interval_options = intervals_bed ? "-L ${intervals_bed}" : "" + def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" \\ + GenotypeGVCFs \\ + $args \\ + $interval_options \\ + $dbsnp_options \\ + -R $fasta \\ + -V $gvcf_options \\ + -O ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml new file mode 100644 index 0000000000..e6b38863be --- /dev/null +++ b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml @@ -0,0 +1,73 @@ +name: gatk4_genotypegvcfs +description: | + Perform joint genotyping on one or more samples pre-called with HaplotypeCaller. +keywords: + - joint genotyping + - genotype + - gvcf +tools: + - gatk4: + description: Genome Analysis Toolkit (GATK4) + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + tool_dev_url: https://github.com/broadinstitute/gatk + doi: "10.1158/1538-7445.AM2017-3590" + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gvcf: + type: tuple of files + description: | + Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty) + pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"] + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - fasta_index: + type: file + description: Reference fasta index file + pattern: "*.fai" + - fasta_dict: + type: file + description: Reference fasta sequence dict file + pattern: "*.dict" + - dbsnp: + type: file + description: dbSNP VCF file + pattern: "*.vcf.gz" + - dbsnp_index: + type: tuple of files + description: dbSNP VCF index file + pattern: "*.tbi" + - intervals_bed: + type: file + description: An intevals BED file + pattern: "*.bed" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Genotyped VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Tbi index for VCF file + pattern: "*.vcf.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@santiagorevale" diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf new file mode 100644 index 0000000000..d6cf514db1 --- /dev/null +++ b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf @@ -0,0 +1,52 @@ +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + path fai + path dict + path dbsnp + path dbsnp_tbi + path interval + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_option = interval ? "-L ${interval}" : "" + def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk \\ + --java-options "-Xmx${avail_mem}g" \\ + HaplotypeCaller \\ + -R $fasta \\ + -I $input \\ + ${dbsnp_option} \\ + ${interval_option} \\ + -O ${prefix}.vcf.gz \\ + $args \\ + --tmp-dir . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml new file mode 100644 index 0000000000..6c9d08911a --- /dev/null +++ b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml @@ -0,0 +1,75 @@ +name: gatk4_haplotypecaller +description: Call germline SNPs and indels via local re-assembly of haplotypes +keywords: + - gatk4 + - haplotypecaller + - haplotype +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + - interval: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + +authors: + - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/markduplicates/main.nf b/modules/nf-core/modules/gatk4/markduplicates/main.nf new file mode 100644 index 0000000000..5f6f127e3c --- /dev/null +++ b/modules/nf-core/modules/gatk4/markduplicates/main.nf @@ -0,0 +1,43 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bams) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , emit: bai + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\ + $bam_list \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + --CREATE_INDEX true \\ + --OUTPUT ${prefix}.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/markduplicates/meta.yml b/modules/nf-core/modules/gatk4/markduplicates/meta.yml new file mode 100644 index 0000000000..5777067a4f --- /dev/null +++ b/modules/nf-core/modules/gatk4/markduplicates/meta.yml @@ -0,0 +1,50 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - markduplicates + - bam + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/gatk4/mutect2/main.nf b/modules/nf-core/modules/gatk4/mutect2/main.nf new file mode 100644 index 0000000000..1abf44eab2 --- /dev/null +++ b/modules/nf-core/modules/gatk4/mutect2/main.nf @@ -0,0 +1,76 @@ +process GATK4_MUTECT2 { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" + + input: + tuple val(meta) , path(input) , path(input_index) , val(which_norm) + val run_single + val run_pon + val run_mito + val interval_label + path fasta + path fai + path dict + path germline_resource + path germline_resource_tbi + path panel_of_normals + path panel_of_normals_tbi + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + tuple val(meta), path("*.stats") , emit: stats + tuple val(meta), path("*.f1r2.tar.gz"), optional:true, emit: f1r2 + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def panels_command = '' + def normals_command = '' + + def inputs_command = '-I ' + input.join( ' -I ') + + if(run_pon) { + panels_command = '' + normals_command = '' + + } else if(run_single) { + panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals" + normals_command = '' + + } else if(run_mito){ + panels_command = "-L ${interval_label} --mitochondria-mode" + normals_command = '' + + } else { + panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz" + normals_command = '-normal ' + which_norm.join( ' -normal ') + } + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\ + -R ${fasta} \\ + ${inputs_command} \\ + ${normals_command} \\ + ${panels_command} \\ + -O ${prefix}.vcf.gz \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gatk4/mutect2/meta.yml b/modules/nf-core/modules/gatk4/mutect2/meta.yml new file mode 100644 index 0000000000..83f6cb7c6e --- /dev/null +++ b/modules/nf-core/modules/gatk4/mutect2/meta.yml @@ -0,0 +1,105 @@ +name: gatk4_mutect2 +description: Call somatic SNVs and indels via local assembly of haplotypes. +keywords: + - gatk4 + - mutect2 + - haplotype + - somatic +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['Apache-2.0'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - input: + type: list + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: + type: list + description: list of BAM file indexes, also able to take CRAM indexes as an input + pattern: "*.{bam.bai/cram.crai}" + - which_norm: + type: list + description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode) + pattern: "testN" + - run_single: + type: boolean + description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true) + pattern: "true/false" + - run_pon: + type: boolean + description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode + pattern: "true/false" + - run_mito: + type: boolean + description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode + pattern: "true/false" + - interval_label: + type: string + description: Specify the label used for mitochondrial chromosome when mutect2 is run in mitochondria mode. + pattern: "chrM" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - germline_resource: + type: file + description: Population vcf of germline sequencing, containing allele fractions. + pattern: "*.vcf.gz" + - germline_resource_tbi: + type: file + description: Index file for the germline resource. + pattern: "*.vcf.gz.tbi" + - panel_of_normals: + type: file + description: vcf file to be used as a panel of normals. + pattern: "*.vcf.gz" + - panel_of_normals_tbi: + type: file + description: Index for the panel of normals. + pattern: "*.vcf.gz.tbi" + +output: + - vcf: + type: file + description: compressed vcf file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of vcf file + pattern: "*vcf.gz.tbi" + - stats: + type: file + description: Stats file that pairs with output vcf file + pattern: "*vcf.gz.stats" + - f1r2: + type: file + description: file containing information to be passed to LearnReadOrientationModel (only outputted when tumor_normal_pair mode is run) + pattern: "*.f1r2.tar.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GCJMackenzie" From aa332f39b586d14bd3a85f6f84a2ea8f275c818a Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 13:04:01 +0100 Subject: [PATCH 02/30] update gatk4 modules with interval changes --- modules/nf-core/modules/gatk4/applybqsr/main.nf | 12 +++++++----- modules/nf-core/modules/gatk4/applybqsr/meta.yml | 7 ++++--- .../nf-core/modules/gatk4/baserecalibrator/main.nf | 7 ++++--- .../nf-core/modules/gatk4/baserecalibrator/meta.yml | 6 +++--- modules/nf-core/modules/gatk4/genotypegvcfs/main.nf | 5 ++--- modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml | 7 +++---- .../nf-core/modules/gatk4/haplotypecaller/main.nf | 5 ++--- .../nf-core/modules/gatk4/haplotypecaller/meta.yml | 6 +++--- 8 files changed, 28 insertions(+), 27 deletions(-) diff --git a/modules/nf-core/modules/gatk4/applybqsr/main.nf b/modules/nf-core/modules/gatk4/applybqsr/main.nf index 20294c2781..22702211e3 100644 --- a/modules/nf-core/modules/gatk4/applybqsr/main.nf +++ b/modules/nf-core/modules/gatk4/applybqsr/main.nf @@ -8,20 +8,22 @@ process GATK4_APPLYBQSR { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(bqsr_table) + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) path fasta path fai path dict - path intervals output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def interval = intervals ? "-L ${intervals}" : "" + def file_type = input.getExtension() + def avail_mem = 3 if (!task.memory) { log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -35,7 +37,7 @@ process GATK4_APPLYBQSR { --bqsr-recal-file $bqsr_table \\ $interval \\ --tmp-dir . \\ - -O ${prefix}.bam \\ + -O ${prefix}.${file_type} \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/gatk4/applybqsr/meta.yml b/modules/nf-core/modules/gatk4/applybqsr/meta.yml index 4e3b2f9a53..ad1f82a1e4 100644 --- a/modules/nf-core/modules/gatk4/applybqsr/meta.yml +++ b/modules/nf-core/modules/gatk4/applybqsr/meta.yml @@ -31,6 +31,9 @@ input: - bqsr_table: type: file description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -43,9 +46,7 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) + output: - meta: diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf index 8a45d6e241..3a26d6ccda 100644 --- a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf @@ -8,11 +8,10 @@ process GATK4_BASERECALIBRATOR { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict - path intervalsBed path knownSites path knownSites_tbi @@ -23,14 +22,16 @@ process GATK4_BASERECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" + def intervalsCommand = intervals ? "-L ${intervals}" : "" def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } + """ gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ -R $fasta \ diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml index 188340b471..641a50df02 100644 --- a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml @@ -28,6 +28,9 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -40,9 +43,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - knownSites: type: file description: Bed file with the genomic regions included in the library (optional) diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf index b1c15a5e53..f9107b1e94 100644 --- a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf @@ -8,13 +8,12 @@ process GATK4_GENOTYPEGVCFS { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(gvcf), path(gvcf_index) + tuple val(meta), path(gvcf), path(gvcf_index), path(intervals) path fasta path fasta_index path fasta_dict path dbsnp path dbsnp_index - path intervals_bed output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -25,7 +24,7 @@ process GATK4_GENOTYPEGVCFS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" - def interval_options = intervals_bed ? "-L ${intervals_bed}" : "" + def interval_options = intervals ? "-L ${intervals}" : "" def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" def avail_mem = 3 if (!task.memory) { diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml index e6b38863be..2c9767b2e5 100644 --- a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml +++ b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml @@ -25,6 +25,9 @@ input: description: | Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty) pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"] + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: Reference fasta file @@ -45,10 +48,6 @@ input: type: tuple of files description: dbSNP VCF index file pattern: "*.tbi" - - intervals_bed: - type: file - description: An intevals BED file - pattern: "*.bed" output: - meta: diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf index d6cf514db1..8b4c0e6463 100644 --- a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf +++ b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf @@ -8,13 +8,12 @@ process GATK4_HAPLOTYPECALLER { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict path dbsnp path dbsnp_tbi - path interval output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -24,7 +23,7 @@ process GATK4_HAPLOTYPECALLER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval_option = interval ? "-L ${interval}" : "" + def interval_option = intervals ? "-L ${intervals}" : "" def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" def avail_mem = 3 if (!task.memory) { diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml index 6c9d08911a..869bd1d2dd 100644 --- a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml @@ -29,6 +29,9 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -47,9 +50,6 @@ input: - dbsnp_tbi: type: file description: VCF index of dbsnp (optional) - - interval: - type: file - description: Bed file with the genomic regions included in the library (optional) output: - meta: From f17c203b7d507426ac1dfae830a97d926f94a26e Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 13:04:13 +0100 Subject: [PATCH 03/30] update gatk4 modules with interval changes --- modules.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules.json b/modules.json index cb5ffe864e..2486f4b29a 100644 --- a/modules.json +++ b/modules.json @@ -28,10 +28,10 @@ "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" }, "gatk4/applybqsr": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/baserecalibrator": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/createsequencedictionary": { "git_sha": "7389963d5cb18f81c10dff128c510e518ee4f0f6" @@ -43,10 +43,10 @@ "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" }, "gatk4/genotypegvcfs": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/haplotypecaller": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/markduplicates": { "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" From 13519f41c7870612d9e6502105f641283c7aaa2c Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 13:22:52 +0100 Subject: [PATCH 04/30] Replace gatk4 local modules with nf-core modules --- modules.json | 8 ++++---- modules/nf-core/modules/gatk4/applybqsr/main.nf | 12 +++++++----- modules/nf-core/modules/gatk4/applybqsr/meta.yml | 7 ++++--- .../nf-core/modules/gatk4/baserecalibrator/main.nf | 7 ++++--- .../nf-core/modules/gatk4/baserecalibrator/meta.yml | 6 +++--- modules/nf-core/modules/gatk4/genotypegvcfs/main.nf | 5 ++--- modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml | 7 +++---- .../nf-core/modules/gatk4/haplotypecaller/main.nf | 5 ++--- .../nf-core/modules/gatk4/haplotypecaller/meta.yml | 6 +++--- subworkflows/nf-core/markduplicates.nf | 4 ++-- subworkflows/nf-core/prepare_recalibration.nf | 6 +++--- subworkflows/nf-core/recalibrate.nf | 2 +- 12 files changed, 38 insertions(+), 37 deletions(-) diff --git a/modules.json b/modules.json index cb5ffe864e..2486f4b29a 100644 --- a/modules.json +++ b/modules.json @@ -28,10 +28,10 @@ "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" }, "gatk4/applybqsr": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/baserecalibrator": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/createsequencedictionary": { "git_sha": "7389963d5cb18f81c10dff128c510e518ee4f0f6" @@ -43,10 +43,10 @@ "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" }, "gatk4/genotypegvcfs": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/haplotypecaller": { - "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" + "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/markduplicates": { "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" diff --git a/modules/nf-core/modules/gatk4/applybqsr/main.nf b/modules/nf-core/modules/gatk4/applybqsr/main.nf index 20294c2781..22702211e3 100644 --- a/modules/nf-core/modules/gatk4/applybqsr/main.nf +++ b/modules/nf-core/modules/gatk4/applybqsr/main.nf @@ -8,20 +8,22 @@ process GATK4_APPLYBQSR { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(bqsr_table) + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) path fasta path fai path dict - path intervals output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def interval = intervals ? "-L ${intervals}" : "" + def file_type = input.getExtension() + def avail_mem = 3 if (!task.memory) { log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -35,7 +37,7 @@ process GATK4_APPLYBQSR { --bqsr-recal-file $bqsr_table \\ $interval \\ --tmp-dir . \\ - -O ${prefix}.bam \\ + -O ${prefix}.${file_type} \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/gatk4/applybqsr/meta.yml b/modules/nf-core/modules/gatk4/applybqsr/meta.yml index 4e3b2f9a53..ad1f82a1e4 100644 --- a/modules/nf-core/modules/gatk4/applybqsr/meta.yml +++ b/modules/nf-core/modules/gatk4/applybqsr/meta.yml @@ -31,6 +31,9 @@ input: - bqsr_table: type: file description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -43,9 +46,7 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) + output: - meta: diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf index 8a45d6e241..3a26d6ccda 100644 --- a/modules/nf-core/modules/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/modules/gatk4/baserecalibrator/main.nf @@ -8,11 +8,10 @@ process GATK4_BASERECALIBRATOR { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict - path intervalsBed path knownSites path knownSites_tbi @@ -23,14 +22,16 @@ process GATK4_BASERECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" + def intervalsCommand = intervals ? "-L ${intervals}" : "" def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } + """ gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ -R $fasta \ diff --git a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml index 188340b471..641a50df02 100644 --- a/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/nf-core/modules/gatk4/baserecalibrator/meta.yml @@ -28,6 +28,9 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -40,9 +43,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - knownSites: type: file description: Bed file with the genomic regions included in the library (optional) diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf index b1c15a5e53..f9107b1e94 100644 --- a/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/nf-core/modules/gatk4/genotypegvcfs/main.nf @@ -8,13 +8,12 @@ process GATK4_GENOTYPEGVCFS { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(gvcf), path(gvcf_index) + tuple val(meta), path(gvcf), path(gvcf_index), path(intervals) path fasta path fasta_index path fasta_dict path dbsnp path dbsnp_index - path intervals_bed output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -25,7 +24,7 @@ process GATK4_GENOTYPEGVCFS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" - def interval_options = intervals_bed ? "-L ${intervals_bed}" : "" + def interval_options = intervals ? "-L ${intervals}" : "" def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" def avail_mem = 3 if (!task.memory) { diff --git a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml index e6b38863be..2c9767b2e5 100644 --- a/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml +++ b/modules/nf-core/modules/gatk4/genotypegvcfs/meta.yml @@ -25,6 +25,9 @@ input: description: | Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty) pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"] + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: Reference fasta file @@ -45,10 +48,6 @@ input: type: tuple of files description: dbSNP VCF index file pattern: "*.tbi" - - intervals_bed: - type: file - description: An intevals BED file - pattern: "*.bed" output: - meta: diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf index d6cf514db1..8b4c0e6463 100644 --- a/modules/nf-core/modules/gatk4/haplotypecaller/main.nf +++ b/modules/nf-core/modules/gatk4/haplotypecaller/main.nf @@ -8,13 +8,12 @@ process GATK4_HAPLOTYPECALLER { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict path dbsnp path dbsnp_tbi - path interval output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -24,7 +23,7 @@ process GATK4_HAPLOTYPECALLER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval_option = interval ? "-L ${interval}" : "" + def interval_option = intervals ? "-L ${intervals}" : "" def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" def avail_mem = 3 if (!task.memory) { diff --git a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml index 6c9d08911a..869bd1d2dd 100644 --- a/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/nf-core/modules/gatk4/haplotypecaller/meta.yml @@ -29,6 +29,9 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -47,9 +50,6 @@ input: - dbsnp_tbi: type: file description: VCF index of dbsnp (optional) - - interval: - type: file - description: Bed file with the genomic regions included in the library (optional) output: - meta: diff --git a/subworkflows/nf-core/markduplicates.nf b/subworkflows/nf-core/markduplicates.nf index 8cf2905b1b..020e853d16 100644 --- a/subworkflows/nf-core/markduplicates.nf +++ b/subworkflows/nf-core/markduplicates.nf @@ -2,8 +2,8 @@ // MARKDUPLICATES AND/OR QC after mapping // -include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../modules/local/gatk4/estimatelibrarycomplexity/main' -include { GATK4_MARKDUPLICATES } from '../../modules/local/gatk4/markduplicates/main' +include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../modules/nf-core/modules/gatk4/estimatelibrarycomplexity/main' +include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/modules/gatk4/markduplicates/main' include { GATK4_MARKDUPLICATES_SPARK } from '../../modules/local/gatk4/markduplicatesspark/main' include { QUALIMAP_BAMQC } from '../../modules/local/qualimap/bamqc/main' include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../modules/local/samtools/index/main' diff --git a/subworkflows/nf-core/prepare_recalibration.nf b/subworkflows/nf-core/prepare_recalibration.nf index a7bf89b920..8796d7a828 100644 --- a/subworkflows/nf-core/prepare_recalibration.nf +++ b/subworkflows/nf-core/prepare_recalibration.nf @@ -2,9 +2,9 @@ // PREPARE RECALIBRATION // -include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/local/gatk4/baserecalibrator/main' -include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/local/gatk4/baserecalibratorspark/main' -include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/local/gatk4/gatherbqsrreports/main' +include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/modules/gatk4/baserecalibrator/main' +include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/nf-core/modules/gatk4/baserecalibratorspark/main' +include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/modules/gatk4/gatherbqsrreports/main' workflow PREPARE_RECALIBRATION { take: diff --git a/subworkflows/nf-core/recalibrate.nf b/subworkflows/nf-core/recalibrate.nf index 3ed6a626a9..0649ab61dc 100644 --- a/subworkflows/nf-core/recalibrate.nf +++ b/subworkflows/nf-core/recalibrate.nf @@ -2,7 +2,7 @@ // RECALIBRATE // -include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/local/gatk4/applybqsr/main' +include { GATK4_APPLYBQSR as APPLYBQSR } from '../../modules/nf-core/modules/gatk4/applybqsr/main' include { GATK4_APPLYBQSR_SPARK as APPLYBQSR_SPARK } from '../../modules/local/gatk4/applybqsrspark/main' include { QUALIMAP_BAMQC_CRAM } from '../../modules/local/qualimap/bamqccram/main' include { SAMTOOLS_INDEX as INDEX_RECALIBRATE } from '../../modules/local/samtools/index/main' From bac21312ac09226090c41963543ffaf21c1977dd Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 13:52:21 +0100 Subject: [PATCH 05/30] Fix CI tests, passing locally --- subworkflows/nf-core/markduplicates.nf | 10 ++++++---- subworkflows/nf-core/prepare_recalibration.nf | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/subworkflows/nf-core/markduplicates.nf b/subworkflows/nf-core/markduplicates.nf index 020e853d16..cb23d8917a 100644 --- a/subworkflows/nf-core/markduplicates.nf +++ b/subworkflows/nf-core/markduplicates.nf @@ -69,10 +69,12 @@ workflow MARKDUPLICATES { } else { GATK4_MARKDUPLICATES(bam_mapped) - report_markduplicates = GATK4_MARKDUPLICATES.out.metrics - bam_markduplicates = GATK4_MARKDUPLICATES.out.bam_bai + report_markduplicates = GATK4_MARKDUPLICATES.out.metrics + bam_markduplicates = GATK4_MARKDUPLICATES.out.bam + bai_markduplicates = GATK4_MARKDUPLICATES.out.bai + bam_bai_markduplicates = bam_markduplicates.join(bai_markduplicates) - SAMTOOLS_BAM_TO_CRAM(bam_markduplicates, fasta, fasta_fai) + SAMTOOLS_BAM_TO_CRAM(bam_bai_markduplicates, fasta, fasta_fai) cram_markduplicates = SAMTOOLS_BAM_TO_CRAM.out.cram_crai ch_versions = ch_versions.mix(GATK4_MARKDUPLICATES.out.versions.first()) @@ -93,7 +95,7 @@ workflow MARKDUPLICATES { qualimap_bamqc = Channel.empty() if (!skip_bamqc) { - QUALIMAP_BAMQC(bam_markduplicates, target_bed) + QUALIMAP_BAMQC(bam_bai_markduplicates, target_bed) qualimap_bamqc = QUALIMAP_BAMQC.out.results ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) diff --git a/subworkflows/nf-core/prepare_recalibration.nf b/subworkflows/nf-core/prepare_recalibration.nf index 8796d7a828..f86b442de8 100644 --- a/subworkflows/nf-core/prepare_recalibration.nf +++ b/subworkflows/nf-core/prepare_recalibration.nf @@ -3,7 +3,7 @@ // include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/modules/gatk4/baserecalibrator/main' -include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/nf-core/modules/gatk4/baserecalibratorspark/main' +include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/local/gatk4/baserecalibratorspark/main' include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/modules/gatk4/gatherbqsrreports/main' workflow PREPARE_RECALIBRATION { From c1c0fabf03dc9f4f45640425400a6bc406a8df0c Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 14:06:05 +0100 Subject: [PATCH 06/30] Fix skipmd CI tests, passing locally --- subworkflows/nf-core/markduplicates.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/nf-core/markduplicates.nf b/subworkflows/nf-core/markduplicates.nf index cb23d8917a..4da6430a68 100644 --- a/subworkflows/nf-core/markduplicates.nf +++ b/subworkflows/nf-core/markduplicates.nf @@ -32,8 +32,8 @@ workflow MARKDUPLICATES { report_markduplicates = Channel.empty() if (skip_markduplicates) { - bam_markduplicates = bam_indexed - SAMTOOLS_BAM_TO_CRAM(bam_markduplicates, fasta, fasta_fai) + bam_bai_markduplicates = bam_indexed + SAMTOOLS_BAM_TO_CRAM(bam_bai_markduplicates, fasta, fasta_fai) cram_markduplicates = SAMTOOLS_BAM_TO_CRAM.out.cram_crai ch_versions = ch_versions.mix(SAMTOOLS_BAM_TO_CRAM.out.versions.first()) From c6a0ffb750a9fd3bab80563970d29f13c2fc5bbb Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 14:38:33 +0100 Subject: [PATCH 07/30] weird test pass lcoally, checking remaining CI test --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3f692bf9c2..c8a9d31703 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - 'split_fastq' - 'gatk4_spark' - 'save_bam_mapped' - - 'skip_markduplicates' + # - 'skip_markduplicates' # - 'targeted' - 'tumor_normal_pair' # - 'variant_calling' From 94df2ff53067612fe77a81ef9f1a12280135b552 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 15:59:31 +0100 Subject: [PATCH 08/30] why is the recal table not on GHA but locally --- .github/workflows/ci.yml | 2 +- conf/modules.config | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c8a9d31703..3f692bf9c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - 'split_fastq' - 'gatk4_spark' - 'save_bam_mapped' - # - 'skip_markduplicates' + - 'skip_markduplicates' # - 'targeted' - 'tumor_normal_pair' # - 'variant_calling' diff --git a/conf/modules.config b/conf/modules.config index 38051a5b4e..ff33d6d6a5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -228,6 +228,7 @@ process { publishDir = [ path: { "${params.outdir}/preprocessing/${meta.id}/recal_table" }, enabled: true, + mode: 'copy', pattern: "*recal.table" ] } From cb12450c1136d929ce5edccf611021accde33128 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 16:14:51 +0100 Subject: [PATCH 09/30] Revert basecalibrator changes to see if that fixes it --- conf/modules.config | 1 - subworkflows/nf-core/prepare_recalibration.nf | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ff33d6d6a5..38051a5b4e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -228,7 +228,6 @@ process { publishDir = [ path: { "${params.outdir}/preprocessing/${meta.id}/recal_table" }, enabled: true, - mode: 'copy', pattern: "*recal.table" ] } diff --git a/subworkflows/nf-core/prepare_recalibration.nf b/subworkflows/nf-core/prepare_recalibration.nf index f86b442de8..046dd23ca3 100644 --- a/subworkflows/nf-core/prepare_recalibration.nf +++ b/subworkflows/nf-core/prepare_recalibration.nf @@ -2,7 +2,7 @@ // PREPARE RECALIBRATION // -include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/modules/gatk4/baserecalibrator/main' +include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/local/gatk4/baserecalibrator/main' include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/local/gatk4/baserecalibratorspark/main' include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/modules/gatk4/gatherbqsrreports/main' From 7728ae53240c8b4a11fe8eea55b3a51f536b39c6 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 16:44:10 +0100 Subject: [PATCH 10/30] Fix linting --- .../nf-core/modules/gatk4/createsequencedictionary/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf b/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf index e8f32106bd..4d82a643a9 100644 --- a/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/modules/gatk4/createsequencedictionary/main.nf @@ -2,10 +2,10 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.4.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.4.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: path fasta From f8b687360925cc1f2718b40d17f47111184a9c32 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 17:19:25 +0100 Subject: [PATCH 11/30] Change prefix, hopefully fixes publishing --- conf/modules.config | 4 +++- modules.json | 2 +- subworkflows/nf-core/prepare_recalibration.nf | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 38051a5b4e..e47572b5fb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -228,8 +228,10 @@ process { publishDir = [ path: { "${params.outdir}/preprocessing/${meta.id}/recal_table" }, enabled: true, - pattern: "*recal.table" + mode: 'copy', + pattern: "*.table" ] + ext.prefix = {"${meta.id}.recal"} } } diff --git a/modules.json b/modules.json index 2486f4b29a..24172d209a 100644 --- a/modules.json +++ b/modules.json @@ -34,7 +34,7 @@ "git_sha": "a6e0629e2430a9d4061ddc8afdf4cff8c93080b5" }, "gatk4/createsequencedictionary": { - "git_sha": "7389963d5cb18f81c10dff128c510e518ee4f0f6" + "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" }, "gatk4/estimatelibrarycomplexity": { "git_sha": "50eafb19c689bca7747a6d44b8e7a555ba172f4b" diff --git a/subworkflows/nf-core/prepare_recalibration.nf b/subworkflows/nf-core/prepare_recalibration.nf index 046dd23ca3..f86b442de8 100644 --- a/subworkflows/nf-core/prepare_recalibration.nf +++ b/subworkflows/nf-core/prepare_recalibration.nf @@ -2,7 +2,7 @@ // PREPARE RECALIBRATION // -include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/local/gatk4/baserecalibrator/main' +include { GATK4_BASERECALIBRATOR as BASERECALIBRATOR } from '../../modules/nf-core/modules/gatk4/baserecalibrator/main' include { GATK4_BASERECALIBRATOR_SPARK as BASERECALIBRATOR_SPARK } from '../../modules/local/gatk4/baserecalibratorspark/main' include { GATK4_GATHERBQSRREPORTS as GATHERBQSRREPORTS } from '../../modules/nf-core/modules/gatk4/gatherbqsrreports/main' From da7e4c6998df1f1d3f9a0f0d857ef7fe68353aef Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 23:36:50 +0100 Subject: [PATCH 12/30] Replace suffix annotation --- conf/modules.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e47572b5fb..c1a8347b0a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -167,7 +167,7 @@ if (('markduplicates' in params.use_gatk_spark) && (!params.skip_markduplicates) // MARKDUPLICATES process { withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { - ext.suffix = '.md' + ext.prefix = {"${meta.id}.md"} publishDir = [ path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, enabled: true, @@ -176,7 +176,7 @@ process { } withName: 'GATK4_MARKDUPLICATES' { ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT' - ext.suffix = '.md' + ext.prefix = {"${meta.id}.md"} publishDir = [ path: { "${params.outdir}/preprocessing/${meta.id}/markduplicates" }, enabled: false @@ -238,7 +238,7 @@ process { // RECALIBRATE process { withName: 'APPLYBQSR|APPLYBQSR_SPARK' { - ext.suffix = '.recal' + ext.prefix = {"${meta.id}.recal"} publishDir = [ enabled: false ] } withName: 'SAMTOOLS_MERGE_CRAM' { From a0aa01b0eca4df11a80475b9aecb589519e50682 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 23 Dec 2021 23:54:02 +0100 Subject: [PATCH 13/30] use released nf-core version instead of dev for linting --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index c5c9fe386c..3b448773c4 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -120,7 +120,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core git+https://github.com/nf-core/tools.git@dev + pip install nf-core - name: Run nf-core lint env: From 62b6980955761bb34199d1ceb10ed801cb060d51 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:06:42 +0100 Subject: [PATCH 14/30] Delete local gatk4 modules, if avail in nf-core --- modules/local/gatk4/applybqsr/main.nf | 45 ------------- modules/local/gatk4/applybqsr/meta.yml | 58 ----------------- modules/local/gatk4/baserecalibrator/main.nf | 48 -------------- modules/local/gatk4/baserecalibrator/meta.yml | 58 ----------------- .../gatk4/estimatelibrarycomplexity/main.nf | 43 ------------- modules/local/gatk4/gatherbqsrreports/main.nf | 39 ------------ modules/local/gatk4/genotypegvcf/main.nf | 50 --------------- modules/local/gatk4/haplotypecaller/main.nf | 52 --------------- modules/local/gatk4/haplotypecaller/meta.yml | 63 ------------------- modules/local/gatk4/markduplicates/main.nf | 42 ------------- modules/local/gatk4/markduplicates/meta.yml | 50 --------------- .../local/gatk4/mutect2/merge_stats/main.nf | 60 ------------------ .../local/gatk4/mutect2/merge_stats/meta.yml | 41 ------------ modules/local/gatk4/mutect2/somatic/main.nf | 61 ------------------ modules/local/gatk4/mutect2/somatic/meta.yml | 41 ------------ modules/local/gatk4/mutect2/tumor/main.nf | 60 ------------------ modules/local/gatk4/mutect2/tumor/meta.yml | 41 ------------ 17 files changed, 852 deletions(-) delete mode 100644 modules/local/gatk4/applybqsr/main.nf delete mode 100644 modules/local/gatk4/applybqsr/meta.yml delete mode 100644 modules/local/gatk4/baserecalibrator/main.nf delete mode 100644 modules/local/gatk4/baserecalibrator/meta.yml delete mode 100644 modules/local/gatk4/estimatelibrarycomplexity/main.nf delete mode 100644 modules/local/gatk4/gatherbqsrreports/main.nf delete mode 100644 modules/local/gatk4/genotypegvcf/main.nf delete mode 100644 modules/local/gatk4/haplotypecaller/main.nf delete mode 100644 modules/local/gatk4/haplotypecaller/meta.yml delete mode 100644 modules/local/gatk4/markduplicates/main.nf delete mode 100644 modules/local/gatk4/markduplicates/meta.yml delete mode 100644 modules/local/gatk4/mutect2/merge_stats/main.nf delete mode 100644 modules/local/gatk4/mutect2/merge_stats/meta.yml delete mode 100644 modules/local/gatk4/mutect2/somatic/main.nf delete mode 100644 modules/local/gatk4/mutect2/somatic/meta.yml delete mode 100644 modules/local/gatk4/mutect2/tumor/main.nf delete mode 100644 modules/local/gatk4/mutect2/tumor/meta.yml diff --git a/modules/local/gatk4/applybqsr/main.nf b/modules/local/gatk4/applybqsr/main.nf deleted file mode 100644 index 9da6f922e9..0000000000 --- a/modules/local/gatk4/applybqsr/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process GATK4_APPLYBQSR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram), path(crai), path(bqsr_table), path(intervals_bed) - path fasta - path fasta_fai - path dict - - output: - tuple val(meta), path("*.cram"), emit: cram - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - """ - gatk ApplyBQSR \\ - -R $fasta \\ - -I $cram \\ - --bqsr-recal-file $bqsr_table \\ - $intervals_command \\ - --tmp-dir . \\ - -O ${prefix}.cram \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/applybqsr/meta.yml b/modules/local/gatk4/applybqsr/meta.yml deleted file mode 100644 index 9bf12f09bb..0000000000 --- a/modules/local/gatk4/applybqsr/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: gatk4_applybqsr -description: Apply base quality score recalibration (BQSR) to a bam file -keywords: - - bqsr - - bam -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - bqsr_table: - type: file - description: Recalibration table from gatk4_baserecalibrator - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - bam: - type: file - description: Recalibrated BAM file - pattern: "*.{bam}" - -authors: - - "@yocra3" diff --git a/modules/local/gatk4/baserecalibrator/main.nf b/modules/local/gatk4/baserecalibrator/main.nf deleted file mode 100644 index 86a42f8df6..0000000000 --- a/modules/local/gatk4/baserecalibrator/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process GATK4_BASERECALIBRATOR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram), path(crai), path(intervals_bed) - path fasta - path fasta_fai - path dict - path known_sites - path known_sites_tbi - - output: - tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = known_sites.collect{"--known-sites ${it}"}.join(' ') - """ - gatk BaseRecalibrator \ - -R $fasta \ - -I $cram \ - $sites_command \ - $intervals_command \ - --tmp-dir . \ - $args \ - -O ${prefix}.table - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/baserecalibrator/meta.yml b/modules/local/gatk4/baserecalibrator/meta.yml deleted file mode 100644 index 0996dcbe51..0000000000 --- a/modules/local/gatk4/baserecalibrator/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: gatk4_baserecalibrator -description: Generate recalibration table for Base Quality Score Recalibration (BQSR) -keywords: - - sort -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - - knownSites: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - table: - type: file - description: Recalibration table from BaseRecalibrator - pattern: "*.{table}" - -authors: - - "@yocra3" diff --git a/modules/local/gatk4/estimatelibrarycomplexity/main.nf b/modules/local/gatk4/estimatelibrarycomplexity/main.nf deleted file mode 100644 index 1d6075c79b..0000000000 --- a/modules/local/gatk4/estimatelibrarycomplexity/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process GATK4_ESTIMATELIBRARYCOMPLEXITY { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(bam) - path fasta - path fasta_fai - path dict - - output: - path "*.md.metrics", emit: metrics - path "versions.yml", emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def bams = bam.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") - """ - gatk EstimateLibraryComplexity \ - ${bams} \ - -O ${prefix}.metrics \ - --REFERENCE_SEQUENCE ${fasta} \ - --VALIDATION_STRINGENCY SILENT \ - --TMP_DIR . $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/gatherbqsrreports/main.nf b/modules/local/gatk4/gatherbqsrreports/main.nf deleted file mode 100644 index 3284e698ba..0000000000 --- a/modules/local/gatk4/gatherbqsrreports/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process GATK4_GATHERBQSRREPORTS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(recal) - - output: - tuple val(meta), path("*.recal.table"), emit: table - path "*.recal.table" , emit: report - path "versions.yml" , emit: versions - - script: - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GatherBQSRReports] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def input = recal.collect{"-I ${it}"}.join(' ') - """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ - GatherBQSRReports \ - ${input} \ - --tmp-dir . \ - -O ${meta.sample}.recal.table - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/genotypegvcf/main.nf b/modules/local/gatk4/genotypegvcf/main.nf deleted file mode 100644 index 0474acf4df..0000000000 --- a/modules/local/gatk4/genotypegvcf/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process GATK4_GENOTYPEGVCF { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(gvcf), path(intervals_bed) - path fasta - path fasta_fai - path dict - path dbsnp - path dbsnp_tbi - - output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions - - script: - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = dbsnp ? "--D ${dbsnp}" : "" - """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ - IndexFeatureFile \ - -I ${gvcf} - - gatk --java-options -Xmx${task.memory.toGiga()}g \ - GenotypeGVCFs \ - -R ${fasta} \ - ${intervals_command} \ - ${sites_command} \ - -V ${gvcf} \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/haplotypecaller/main.nf b/modules/local/gatk4/haplotypecaller/main.nf deleted file mode 100644 index 6a6e337e20..0000000000 --- a/modules/local/gatk4/haplotypecaller/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -process GATK4_HAPLOTYPECALLER { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram), path(crai), path(interval) - path fasta - path fasta_fai - path dict - path dbsnp - path dbsnp_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path(interval), path("*.vcf"), emit: interval_vcf - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = dbsnp ? "--D ${dbsnp}" : "" - //TODO allow ploidy argument here since we allow it for the cnv callers? or is this covered with options? Might unintuitive to use - """ - gatk \\ - --java-options "-Xmx${avail_mem}g" \\ - HaplotypeCaller \\ - -R $fasta \\ - -I $cram \\ - ${sites_command} \\ - ${intervals_command} \\ - -O ${prefix}.vcf \\ - --tmp-dir . \ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/haplotypecaller/meta.yml b/modules/local/gatk4/haplotypecaller/meta.yml deleted file mode 100644 index f0fc3910e3..0000000000 --- a/modules/local/gatk4/haplotypecaller/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: gatk4_haplotypecaller -description: Call germline SNPs and indels via local re-assembly of haplotypes -keywords: - - gatk4 - - haplotypecaller - - haplotype -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.bam" - - bai: - type: file - description: Index of BAM file - pattern: "*.bam.bai" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - - tbi: - type: file - description: Index of VCF file - pattern: "*.vcf.gz.tbi" - -authors: - - "@suzannejin" diff --git a/modules/local/gatk4/markduplicates/main.nf b/modules/local/gatk4/markduplicates/main.nf deleted file mode 100644 index 1ac2e789a7..0000000000 --- a/modules/local/gatk4/markduplicates/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process GATK4_MARKDUPLICATES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(bams) - - output: - tuple val(meta), path("*.bam"), path("*.bai"), emit: bam_bai - tuple val(meta), path("*.metrics") , emit: metrics - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") - """ - gatk MarkDuplicates \\ - $bam_list \\ - --METRICS_FILE ${prefix}.metrics \\ - --TMP_DIR . \\ - --CREATE_INDEX true \\ - --OUTPUT ${prefix}.bam \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/markduplicates/meta.yml b/modules/local/gatk4/markduplicates/meta.yml deleted file mode 100644 index 5777067a4f..0000000000 --- a/modules/local/gatk4/markduplicates/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - markduplicates - - bam - - sort -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Marked duplicates BAM file - pattern: "*.{bam}" - - metrics: - type: file - description: Duplicate metrics file generated by GATK - pattern: "*.{metrics.txt}" - -authors: - - "@ajodeh-juma" - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/merge_stats/main.nf b/modules/local/gatk4/mutect2/merge_stats/main.nf deleted file mode 100644 index 44f110b40c..0000000000 --- a/modules/local/gatk4/mutect2/merge_stats/main.nf +++ /dev/null @@ -1,60 +0,0 @@ -process GATK4_MUTECT2_MERGE_STATS { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(stats), path(vcf) - path fasta - path fasta_fai - path dict - path germline_resource - path germline_resource_tbi - path pon - path pon_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.vcf.stats"), emit: vcf_stats - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def pon_command = pon ? "--panel-of-normals ${pon}" : "" - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - # Get raw calls - gatk --java-options "-Xmx${task.memory.toGiga()}g" \ - Mutect2 \ - -R $fasta \ - -I $cram_tumor -tumor ${meta.tumor} \ - -I $cram_normal -normal ${meta.normal} \ - $intervals_command \ - $pon_command \ - $args \ - --germline-resource $germline_resource \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/mutect2/merge_stats/meta.yml b/modules/local/gatk4/mutect2/merge_stats/meta.yml deleted file mode 100644 index 8a0932771f..0000000000 --- a/modules/local/gatk4/mutect2/merge_stats/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - mutect2 - - cram -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: Sorted cram file - pattern: "*.{cram}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - -authors: - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/somatic/main.nf b/modules/local/gatk4/mutect2/somatic/main.nf deleted file mode 100644 index cbbba9e773..0000000000 --- a/modules/local/gatk4/mutect2/somatic/main.nf +++ /dev/null @@ -1,61 +0,0 @@ -process GATK4_MUTECT2_MERGE { - //tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(interval) - path fasta_fai - path fasta - path dict - path pon - path pon_tbi - path germline_resource - path germline_resource_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.vcf.stats"), emit: vcf_stats - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def pon_command = pon ? "--panel-of-normals ${pon}" : "" - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - """ - # Get raw calls - gatk --java-options "-Xmx${task.memory.toGiga()}g" \ - Mutect2 \ - -R $fasta \ - -I $cram_tumor -tumor ${meta.tumor} \ - -I $cram_normal -normal ${meta.normal} \ - $intervals_command \ - $pon_command \ - $args \ - --germline-resource $germline_resource \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/mutect2/somatic/meta.yml b/modules/local/gatk4/mutect2/somatic/meta.yml deleted file mode 100644 index 8a0932771f..0000000000 --- a/modules/local/gatk4/mutect2/somatic/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - mutect2 - - cram -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: Sorted cram file - pattern: "*.{cram}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - -authors: - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/tumor/main.nf b/modules/local/gatk4/mutect2/tumor/main.nf deleted file mode 100644 index 4c3eabd072..0000000000 --- a/modules/local/gatk4/mutect2/tumor/main.nf +++ /dev/null @@ -1,60 +0,0 @@ -process GATK4_MUTECT2_TUMOR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(interval) - path fasta_fai - path fasta - path dict - path pon - path pon_tbi - path germline_resource - path germline_resource_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.vcf.stats"), emit: vcf_stats - path "versions.yml" , emit: versions - - script: - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def pon_command = pon ? "--panel-of-normals ${pon}" : "" - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - """ - # Get raw calls - gatk --java-options "-Xmx${task.memory.toGiga()}g" \ - Mutect2 \ - -R $fasta \ - -I $cram_tumor -tumor ${meta.tumor} \ - -I $cram_normal -normal ${meta.normal} \ - $intervals_command \ - $pon_command \ - $args \ - --germline-resource $germline_resource \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/mutect2/tumor/meta.yml b/modules/local/gatk4/mutect2/tumor/meta.yml deleted file mode 100644 index 8a0932771f..0000000000 --- a/modules/local/gatk4/mutect2/tumor/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - mutect2 - - cram -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: Sorted cram file - pattern: "*.{cram}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - -authors: - - "@FriederikeHanssen" From c2ef34d55cb8416064e5215021445c24bf2b52f5 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:08:28 +0100 Subject: [PATCH 15/30] Delete local gatk4 modules, if avail in nf-core --- subworkflows/local/germline_variant_calling.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 26e46e769b..19c053b97c 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -9,9 +9,9 @@ params.concat_gvcf_options = [:] params.concat_haplotypecaller_options = [:] params.strelka_options = [:] -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/local/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) include { DEEPVARIANT } from '../../modules/local/deepvariant/main' addParams(options: params.deepvariant_options) -include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/local/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) +include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options) include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options) include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/modules/strelka/germline/main' addParams(options: params.strelka_options) From b6ec73c57164aa22e492bb423690b6dfb1e004ae Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:16:54 +0100 Subject: [PATCH 16/30] Revert "Delete local gatk4 modules, if avail in nf-core" This reverts commit c2ef34d55cb8416064e5215021445c24bf2b52f5. --- subworkflows/local/germline_variant_calling.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 19c053b97c..26e46e769b 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -9,9 +9,9 @@ params.concat_gvcf_options = [:] params.concat_haplotypecaller_options = [:] params.strelka_options = [:] -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/local/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) include { DEEPVARIANT } from '../../modules/local/deepvariant/main' addParams(options: params.deepvariant_options) -include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) +include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/local/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options) include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options) include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/modules/strelka/germline/main' addParams(options: params.strelka_options) From e16802250561c5c5f3755afebaea727df0909984 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:17:02 +0100 Subject: [PATCH 17/30] Revert "Delete local gatk4 modules, if avail in nf-core" This reverts commit 62b6980955761bb34199d1ceb10ed801cb060d51. --- modules/local/gatk4/applybqsr/main.nf | 45 +++++++++++++ modules/local/gatk4/applybqsr/meta.yml | 58 +++++++++++++++++ modules/local/gatk4/baserecalibrator/main.nf | 48 ++++++++++++++ modules/local/gatk4/baserecalibrator/meta.yml | 58 +++++++++++++++++ .../gatk4/estimatelibrarycomplexity/main.nf | 43 +++++++++++++ modules/local/gatk4/gatherbqsrreports/main.nf | 39 ++++++++++++ modules/local/gatk4/genotypegvcf/main.nf | 50 +++++++++++++++ modules/local/gatk4/haplotypecaller/main.nf | 52 +++++++++++++++ modules/local/gatk4/haplotypecaller/meta.yml | 63 +++++++++++++++++++ modules/local/gatk4/markduplicates/main.nf | 42 +++++++++++++ modules/local/gatk4/markduplicates/meta.yml | 50 +++++++++++++++ .../local/gatk4/mutect2/merge_stats/main.nf | 60 ++++++++++++++++++ .../local/gatk4/mutect2/merge_stats/meta.yml | 41 ++++++++++++ modules/local/gatk4/mutect2/somatic/main.nf | 61 ++++++++++++++++++ modules/local/gatk4/mutect2/somatic/meta.yml | 41 ++++++++++++ modules/local/gatk4/mutect2/tumor/main.nf | 60 ++++++++++++++++++ modules/local/gatk4/mutect2/tumor/meta.yml | 41 ++++++++++++ 17 files changed, 852 insertions(+) create mode 100644 modules/local/gatk4/applybqsr/main.nf create mode 100644 modules/local/gatk4/applybqsr/meta.yml create mode 100644 modules/local/gatk4/baserecalibrator/main.nf create mode 100644 modules/local/gatk4/baserecalibrator/meta.yml create mode 100644 modules/local/gatk4/estimatelibrarycomplexity/main.nf create mode 100644 modules/local/gatk4/gatherbqsrreports/main.nf create mode 100644 modules/local/gatk4/genotypegvcf/main.nf create mode 100644 modules/local/gatk4/haplotypecaller/main.nf create mode 100644 modules/local/gatk4/haplotypecaller/meta.yml create mode 100644 modules/local/gatk4/markduplicates/main.nf create mode 100644 modules/local/gatk4/markduplicates/meta.yml create mode 100644 modules/local/gatk4/mutect2/merge_stats/main.nf create mode 100644 modules/local/gatk4/mutect2/merge_stats/meta.yml create mode 100644 modules/local/gatk4/mutect2/somatic/main.nf create mode 100644 modules/local/gatk4/mutect2/somatic/meta.yml create mode 100644 modules/local/gatk4/mutect2/tumor/main.nf create mode 100644 modules/local/gatk4/mutect2/tumor/meta.yml diff --git a/modules/local/gatk4/applybqsr/main.nf b/modules/local/gatk4/applybqsr/main.nf new file mode 100644 index 0000000000..9da6f922e9 --- /dev/null +++ b/modules/local/gatk4/applybqsr/main.nf @@ -0,0 +1,45 @@ +process GATK4_APPLYBQSR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(cram), path(crai), path(bqsr_table), path(intervals_bed) + path fasta + path fasta_fai + path dict + + output: + tuple val(meta), path("*.cram"), emit: cram + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + """ + gatk ApplyBQSR \\ + -R $fasta \\ + -I $cram \\ + --bqsr-recal-file $bqsr_table \\ + $intervals_command \\ + --tmp-dir . \\ + -O ${prefix}.cram \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/applybqsr/meta.yml b/modules/local/gatk4/applybqsr/meta.yml new file mode 100644 index 0000000000..9bf12f09bb --- /dev/null +++ b/modules/local/gatk4/applybqsr/meta.yml @@ -0,0 +1,58 @@ +name: gatk4_applybqsr +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bqsr + - bam +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file from alignment + pattern: "*.{bam}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - fasta: + type: file + description: The reference fasta file + - fastaidx: + type: file + description: Index of reference fasta file + - dict: + type: file + description: GATK sequence dictionary + - intervalsBed: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + +authors: + - "@yocra3" diff --git a/modules/local/gatk4/baserecalibrator/main.nf b/modules/local/gatk4/baserecalibrator/main.nf new file mode 100644 index 0000000000..86a42f8df6 --- /dev/null +++ b/modules/local/gatk4/baserecalibrator/main.nf @@ -0,0 +1,48 @@ +process GATK4_BASERECALIBRATOR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(cram), path(crai), path(intervals_bed) + path fasta + path fasta_fai + path dict + path known_sites + path known_sites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + def sites_command = known_sites.collect{"--known-sites ${it}"}.join(' ') + """ + gatk BaseRecalibrator \ + -R $fasta \ + -I $cram \ + $sites_command \ + $intervals_command \ + --tmp-dir . \ + $args \ + -O ${prefix}.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/baserecalibrator/meta.yml b/modules/local/gatk4/baserecalibrator/meta.yml new file mode 100644 index 0000000000..0996dcbe51 --- /dev/null +++ b/modules/local/gatk4/baserecalibrator/meta.yml @@ -0,0 +1,58 @@ +name: gatk4_baserecalibrator +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file from alignment + pattern: "*.{bam}" + - fasta: + type: file + description: The reference fasta file + - fastaidx: + type: file + description: Index of reference fasta file + - dict: + type: file + description: GATK sequence dictionary + - intervalsBed: + type: file + description: Bed file with the genomic regions included in the library (optional) + - knownSites: + type: file + description: Bed file with the genomic regions included in the library (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + +authors: + - "@yocra3" diff --git a/modules/local/gatk4/estimatelibrarycomplexity/main.nf b/modules/local/gatk4/estimatelibrarycomplexity/main.nf new file mode 100644 index 0000000000..1d6075c79b --- /dev/null +++ b/modules/local/gatk4/estimatelibrarycomplexity/main.nf @@ -0,0 +1,43 @@ +process GATK4_ESTIMATELIBRARYCOMPLEXITY { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + path dict + + output: + path "*.md.metrics", emit: metrics + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def bams = bam.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") + """ + gatk EstimateLibraryComplexity \ + ${bams} \ + -O ${prefix}.metrics \ + --REFERENCE_SEQUENCE ${fasta} \ + --VALIDATION_STRINGENCY SILENT \ + --TMP_DIR . $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/gatherbqsrreports/main.nf b/modules/local/gatk4/gatherbqsrreports/main.nf new file mode 100644 index 0000000000..3284e698ba --- /dev/null +++ b/modules/local/gatk4/gatherbqsrreports/main.nf @@ -0,0 +1,39 @@ +process GATK4_GATHERBQSRREPORTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(recal) + + output: + tuple val(meta), path("*.recal.table"), emit: table + path "*.recal.table" , emit: report + path "versions.yml" , emit: versions + + script: + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GatherBQSRReports] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def input = recal.collect{"-I ${it}"}.join(' ') + """ + gatk --java-options -Xmx${task.memory.toGiga()}g \ + GatherBQSRReports \ + ${input} \ + --tmp-dir . \ + -O ${meta.sample}.recal.table + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/genotypegvcf/main.nf b/modules/local/gatk4/genotypegvcf/main.nf new file mode 100644 index 0000000000..0474acf4df --- /dev/null +++ b/modules/local/gatk4/genotypegvcf/main.nf @@ -0,0 +1,50 @@ +process GATK4_GENOTYPEGVCF { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(gvcf), path(intervals_bed) + path fasta + path fasta_fai + path dict + path dbsnp + path dbsnp_tbi + + output: + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions + + script: + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + def sites_command = dbsnp ? "--D ${dbsnp}" : "" + """ + gatk --java-options -Xmx${task.memory.toGiga()}g \ + IndexFeatureFile \ + -I ${gvcf} + + gatk --java-options -Xmx${task.memory.toGiga()}g \ + GenotypeGVCFs \ + -R ${fasta} \ + ${intervals_command} \ + ${sites_command} \ + -V ${gvcf} \ + -O ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/haplotypecaller/main.nf b/modules/local/gatk4/haplotypecaller/main.nf new file mode 100644 index 0000000000..6a6e337e20 --- /dev/null +++ b/modules/local/gatk4/haplotypecaller/main.nf @@ -0,0 +1,52 @@ +process GATK4_HAPLOTYPECALLER { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(cram), path(crai), path(interval) + path fasta + path fasta_fai + path dict + path dbsnp + path dbsnp_tbi + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path(interval), path("*.vcf"), emit: interval_vcf + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + def sites_command = dbsnp ? "--D ${dbsnp}" : "" + //TODO allow ploidy argument here since we allow it for the cnv callers? or is this covered with options? Might unintuitive to use + """ + gatk \\ + --java-options "-Xmx${avail_mem}g" \\ + HaplotypeCaller \\ + -R $fasta \\ + -I $cram \\ + ${sites_command} \\ + ${intervals_command} \\ + -O ${prefix}.vcf \\ + --tmp-dir . \ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/haplotypecaller/meta.yml b/modules/local/gatk4/haplotypecaller/meta.yml new file mode 100644 index 0000000000..f0fc3910e3 --- /dev/null +++ b/modules/local/gatk4/haplotypecaller/meta.yml @@ -0,0 +1,63 @@ +name: gatk4_haplotypecaller +description: Call germline SNPs and indels via local re-assembly of haplotypes +keywords: + - gatk4 + - haplotypecaller + - haplotype +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.bam" + - bai: + type: file + description: Index of BAM file + pattern: "*.bam.bai" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" + +authors: + - "@suzannejin" diff --git a/modules/local/gatk4/markduplicates/main.nf b/modules/local/gatk4/markduplicates/main.nf new file mode 100644 index 0000000000..1ac2e789a7 --- /dev/null +++ b/modules/local/gatk4/markduplicates/main.nf @@ -0,0 +1,42 @@ +process GATK4_MARKDUPLICATES { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(bams) + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam_bai + tuple val(meta), path("*.metrics") , emit: metrics + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") + """ + gatk MarkDuplicates \\ + $bam_list \\ + --METRICS_FILE ${prefix}.metrics \\ + --TMP_DIR . \\ + --CREATE_INDEX true \\ + --OUTPUT ${prefix}.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/markduplicates/meta.yml b/modules/local/gatk4/markduplicates/meta.yml new file mode 100644 index 0000000000..5777067a4f --- /dev/null +++ b/modules/local/gatk4/markduplicates/meta.yml @@ -0,0 +1,50 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - markduplicates + - bam + - sort +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + - metrics: + type: file + description: Duplicate metrics file generated by GATK + pattern: "*.{metrics.txt}" + +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/merge_stats/main.nf b/modules/local/gatk4/mutect2/merge_stats/main.nf new file mode 100644 index 0000000000..44f110b40c --- /dev/null +++ b/modules/local/gatk4/mutect2/merge_stats/main.nf @@ -0,0 +1,60 @@ +process GATK4_MUTECT2_MERGE_STATS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(stats), path(vcf) + path fasta + path fasta_fai + path dict + path germline_resource + path germline_resource_tbi + path pon + path pon_tbi + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.vcf.stats"), emit: vcf_stats + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + def pon_command = pon ? "--panel-of-normals ${pon}" : "" + // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + # Get raw calls + gatk --java-options "-Xmx${task.memory.toGiga()}g" \ + Mutect2 \ + -R $fasta \ + -I $cram_tumor -tumor ${meta.tumor} \ + -I $cram_normal -normal ${meta.normal} \ + $intervals_command \ + $pon_command \ + $args \ + --germline-resource $germline_resource \ + -O ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/mutect2/merge_stats/meta.yml b/modules/local/gatk4/mutect2/merge_stats/meta.yml new file mode 100644 index 0000000000..8a0932771f --- /dev/null +++ b/modules/local/gatk4/mutect2/merge_stats/meta.yml @@ -0,0 +1,41 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - mutect2 + - cram +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cram: + type: file + description: Sorted cram file + pattern: "*.{cram}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + + +authors: + - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/somatic/main.nf b/modules/local/gatk4/mutect2/somatic/main.nf new file mode 100644 index 0000000000..cbbba9e773 --- /dev/null +++ b/modules/local/gatk4/mutect2/somatic/main.nf @@ -0,0 +1,61 @@ +process GATK4_MUTECT2_MERGE { + //tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(interval) + path fasta_fai + path fasta + path dict + path pon + path pon_tbi + path germline_resource + path germline_resource_tbi + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.vcf.stats"), emit: vcf_stats + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + def pon_command = pon ? "--panel-of-normals ${pon}" : "" + // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" + """ + # Get raw calls + gatk --java-options "-Xmx${task.memory.toGiga()}g" \ + Mutect2 \ + -R $fasta \ + -I $cram_tumor -tumor ${meta.tumor} \ + -I $cram_normal -normal ${meta.normal} \ + $intervals_command \ + $pon_command \ + $args \ + --germline-resource $germline_resource \ + -O ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/mutect2/somatic/meta.yml b/modules/local/gatk4/mutect2/somatic/meta.yml new file mode 100644 index 0000000000..8a0932771f --- /dev/null +++ b/modules/local/gatk4/mutect2/somatic/meta.yml @@ -0,0 +1,41 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - mutect2 + - cram +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cram: + type: file + description: Sorted cram file + pattern: "*.{cram}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + + +authors: + - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/tumor/main.nf b/modules/local/gatk4/mutect2/tumor/main.nf new file mode 100644 index 0000000000..4c3eabd072 --- /dev/null +++ b/modules/local/gatk4/mutect2/tumor/main.nf @@ -0,0 +1,60 @@ +process GATK4_MUTECT2_TUMOR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(interval) + path fasta_fai + path fasta + path dict + path pon + path pon_tbi + path germline_resource + path germline_resource_tbi + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.vcf.stats"), emit: vcf_stats + path "versions.yml" , emit: versions + + script: + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" + def pon_command = pon ? "--panel-of-normals ${pon}" : "" + // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" + if (!task.memory) { + log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" + """ + # Get raw calls + gatk --java-options "-Xmx${task.memory.toGiga()}g" \ + Mutect2 \ + -R $fasta \ + -I $cram_tumor -tumor ${meta.tumor} \ + -I $cram_normal -normal ${meta.normal} \ + $intervals_command \ + $pon_command \ + $args \ + --germline-resource $germline_resource \ + -O ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/gatk4/mutect2/tumor/meta.yml b/modules/local/gatk4/mutect2/tumor/meta.yml new file mode 100644 index 0000000000..8a0932771f --- /dev/null +++ b/modules/local/gatk4/mutect2/tumor/meta.yml @@ -0,0 +1,41 @@ +name: gatk4_markduplicates +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - mutect2 + - cram +tools: + - gatk4: + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ['BSD-3-clause'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cram: + type: file + description: Sorted cram file + pattern: "*.{cram}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" + + +authors: + - "@FriederikeHanssen" From 8c63aee73c5419463146658d2143a50baf83c682 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:22:47 +0100 Subject: [PATCH 18/30] Remove local applybqsr --- modules/local/gatk4/applybqsr/main.nf | 45 -------------------- modules/local/gatk4/applybqsr/meta.yml | 58 -------------------------- 2 files changed, 103 deletions(-) delete mode 100644 modules/local/gatk4/applybqsr/main.nf delete mode 100644 modules/local/gatk4/applybqsr/meta.yml diff --git a/modules/local/gatk4/applybqsr/main.nf b/modules/local/gatk4/applybqsr/main.nf deleted file mode 100644 index 9da6f922e9..0000000000 --- a/modules/local/gatk4/applybqsr/main.nf +++ /dev/null @@ -1,45 +0,0 @@ -process GATK4_APPLYBQSR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram), path(crai), path(bqsr_table), path(intervals_bed) - path fasta - path fasta_fai - path dict - - output: - tuple val(meta), path("*.cram"), emit: cram - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - """ - gatk ApplyBQSR \\ - -R $fasta \\ - -I $cram \\ - --bqsr-recal-file $bqsr_table \\ - $intervals_command \\ - --tmp-dir . \\ - -O ${prefix}.cram \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/applybqsr/meta.yml b/modules/local/gatk4/applybqsr/meta.yml deleted file mode 100644 index 9bf12f09bb..0000000000 --- a/modules/local/gatk4/applybqsr/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: gatk4_applybqsr -description: Apply base quality score recalibration (BQSR) to a bam file -keywords: - - bqsr - - bam -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - bqsr_table: - type: file - description: Recalibration table from gatk4_baserecalibrator - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - bam: - type: file - description: Recalibrated BAM file - pattern: "*.{bam}" - -authors: - - "@yocra3" From b88ce7418c8946693cd134dc317059c1f5d3cf2a Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:45:15 +0100 Subject: [PATCH 19/30] Revert removal of applybqsr lcoal --- workflows/sarek.nf | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index ae5e0f1d63..1ec75d2abb 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -165,7 +165,8 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // SUBWORKFLOWS // -include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' +include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' +include { CREATE_UMI_CONSENSUS } from '../subworkflows/nf-core/fgbio_create_umi_consensus' // // MODULES: Installed directly from nf-core/modules @@ -247,6 +248,12 @@ workflow SAREK { // Get versions from all software used ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + //Since read need additional mapping afterwards, I would argue for haveing the process here + if(params.read_structure){ + CREATE_UMI_CONSENSUS(reads_input, fasta, bwa, params.read_structure, params.group_by_umi_strategy , aligner) + reads_input = BAMTOFASTQ(CREATE_UMI_CONSENSUS.out.consensusbam).out + } + // STEP 1: MAPPING READS TO REFERENCE GENOME GATK4_MAPPING( params.aligner, From 141227a9c78cd379c713d88c4f26f586680f135e Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 14:47:54 +0100 Subject: [PATCH 20/30] Revert removal of applybqsr lcoal --- workflows/sarek.nf | 7 ------- 1 file changed, 7 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 1ec75d2abb..46bd5e6c1f 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -166,7 +166,6 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' -include { CREATE_UMI_CONSENSUS } from '../subworkflows/nf-core/fgbio_create_umi_consensus' // // MODULES: Installed directly from nf-core/modules @@ -248,12 +247,6 @@ workflow SAREK { // Get versions from all software used ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) - //Since read need additional mapping afterwards, I would argue for haveing the process here - if(params.read_structure){ - CREATE_UMI_CONSENSUS(reads_input, fasta, bwa, params.read_structure, params.group_by_umi_strategy , aligner) - reads_input = BAMTOFASTQ(CREATE_UMI_CONSENSUS.out.consensusbam).out - } - // STEP 1: MAPPING READS TO REFERENCE GENOME GATK4_MAPPING( params.aligner, From 0e62a91a6acf8eaab2f8a58d59e753fd2631865d Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 15:03:00 +0100 Subject: [PATCH 21/30] Remove local baserecak --- modules/local/gatk4/baserecalibrator/main.nf | 48 --------------- modules/local/gatk4/baserecalibrator/meta.yml | 58 ------------------- 2 files changed, 106 deletions(-) delete mode 100644 modules/local/gatk4/baserecalibrator/main.nf delete mode 100644 modules/local/gatk4/baserecalibrator/meta.yml diff --git a/modules/local/gatk4/baserecalibrator/main.nf b/modules/local/gatk4/baserecalibrator/main.nf deleted file mode 100644 index 86a42f8df6..0000000000 --- a/modules/local/gatk4/baserecalibrator/main.nf +++ /dev/null @@ -1,48 +0,0 @@ -process GATK4_BASERECALIBRATOR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram), path(crai), path(intervals_bed) - path fasta - path fasta_fai - path dict - path known_sites - path known_sites_tbi - - output: - tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = known_sites.collect{"--known-sites ${it}"}.join(' ') - """ - gatk BaseRecalibrator \ - -R $fasta \ - -I $cram \ - $sites_command \ - $intervals_command \ - --tmp-dir . \ - $args \ - -O ${prefix}.table - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/baserecalibrator/meta.yml b/modules/local/gatk4/baserecalibrator/meta.yml deleted file mode 100644 index 0996dcbe51..0000000000 --- a/modules/local/gatk4/baserecalibrator/meta.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: gatk4_baserecalibrator -description: Generate recalibration table for Base Quality Score Recalibration (BQSR) -keywords: - - sort -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file from alignment - pattern: "*.{bam}" - - fasta: - type: file - description: The reference fasta file - - fastaidx: - type: file - description: Index of reference fasta file - - dict: - type: file - description: GATK sequence dictionary - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - - knownSites: - type: file - description: Bed file with the genomic regions included in the library (optional) - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - table: - type: file - description: Recalibration table from BaseRecalibrator - pattern: "*.{table}" - -authors: - - "@yocra3" From 830321a140d3959a21070eff71370dd693d1670b Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 15:17:54 +0100 Subject: [PATCH 22/30] Remove local gatherbqsr --- modules/local/gatk4/gatherbqsrreports/main.nf | 39 ------------------- 1 file changed, 39 deletions(-) delete mode 100644 modules/local/gatk4/gatherbqsrreports/main.nf diff --git a/modules/local/gatk4/gatherbqsrreports/main.nf b/modules/local/gatk4/gatherbqsrreports/main.nf deleted file mode 100644 index 3284e698ba..0000000000 --- a/modules/local/gatk4/gatherbqsrreports/main.nf +++ /dev/null @@ -1,39 +0,0 @@ -process GATK4_GATHERBQSRREPORTS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(recal) - - output: - tuple val(meta), path("*.recal.table"), emit: table - path "*.recal.table" , emit: report - path "versions.yml" , emit: versions - - script: - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GatherBQSRReports] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def input = recal.collect{"-I ${it}"}.join(' ') - """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ - GatherBQSRReports \ - ${input} \ - --tmp-dir . \ - -O ${meta.sample}.recal.table - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} From f5f4b42e836befaa16043b2f0e5bf617f6d6d7ed Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 15:56:57 +0100 Subject: [PATCH 23/30] Remove local genotypegvcf --- modules/local/gatk4/genotypegvcf/main.nf | 50 ------------------------ 1 file changed, 50 deletions(-) delete mode 100644 modules/local/gatk4/genotypegvcf/main.nf diff --git a/modules/local/gatk4/genotypegvcf/main.nf b/modules/local/gatk4/genotypegvcf/main.nf deleted file mode 100644 index 0474acf4df..0000000000 --- a/modules/local/gatk4/genotypegvcf/main.nf +++ /dev/null @@ -1,50 +0,0 @@ -process GATK4_GENOTYPEGVCF { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(gvcf), path(intervals_bed) - path fasta - path fasta_fai - path dict - path dbsnp - path dbsnp_tbi - - output: - tuple val(meta), path("*.vcf"), emit: vcf - path "versions.yml" , emit: versions - - script: - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = dbsnp ? "--D ${dbsnp}" : "" - """ - gatk --java-options -Xmx${task.memory.toGiga()}g \ - IndexFeatureFile \ - -I ${gvcf} - - gatk --java-options -Xmx${task.memory.toGiga()}g \ - GenotypeGVCFs \ - -R ${fasta} \ - ${intervals_command} \ - ${sites_command} \ - -V ${gvcf} \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} From 72b6191fc5d420d69fae55e17ed697277577989e Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:02:09 +0100 Subject: [PATCH 24/30] Revert removal --- .../local/gatk4/mutect2/merge_stats/main.nf | 60 ------------------ .../local/gatk4/mutect2/merge_stats/meta.yml | 41 ------------- modules/local/gatk4/mutect2/somatic/main.nf | 61 ------------------- modules/local/gatk4/mutect2/somatic/meta.yml | 41 ------------- modules/local/gatk4/mutect2/tumor/main.nf | 60 ------------------ modules/local/gatk4/mutect2/tumor/meta.yml | 41 ------------- 6 files changed, 304 deletions(-) delete mode 100644 modules/local/gatk4/mutect2/merge_stats/main.nf delete mode 100644 modules/local/gatk4/mutect2/merge_stats/meta.yml delete mode 100644 modules/local/gatk4/mutect2/somatic/main.nf delete mode 100644 modules/local/gatk4/mutect2/somatic/meta.yml delete mode 100644 modules/local/gatk4/mutect2/tumor/main.nf delete mode 100644 modules/local/gatk4/mutect2/tumor/meta.yml diff --git a/modules/local/gatk4/mutect2/merge_stats/main.nf b/modules/local/gatk4/mutect2/merge_stats/main.nf deleted file mode 100644 index 44f110b40c..0000000000 --- a/modules/local/gatk4/mutect2/merge_stats/main.nf +++ /dev/null @@ -1,60 +0,0 @@ -process GATK4_MUTECT2_MERGE_STATS { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(stats), path(vcf) - path fasta - path fasta_fai - path dict - path germline_resource - path germline_resource_tbi - path pon - path pon_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.vcf.stats"), emit: vcf_stats - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def pon_command = pon ? "--panel-of-normals ${pon}" : "" - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - """ - # Get raw calls - gatk --java-options "-Xmx${task.memory.toGiga()}g" \ - Mutect2 \ - -R $fasta \ - -I $cram_tumor -tumor ${meta.tumor} \ - -I $cram_normal -normal ${meta.normal} \ - $intervals_command \ - $pon_command \ - $args \ - --germline-resource $germline_resource \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/mutect2/merge_stats/meta.yml b/modules/local/gatk4/mutect2/merge_stats/meta.yml deleted file mode 100644 index 8a0932771f..0000000000 --- a/modules/local/gatk4/mutect2/merge_stats/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - mutect2 - - cram -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: Sorted cram file - pattern: "*.{cram}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - -authors: - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/somatic/main.nf b/modules/local/gatk4/mutect2/somatic/main.nf deleted file mode 100644 index cbbba9e773..0000000000 --- a/modules/local/gatk4/mutect2/somatic/main.nf +++ /dev/null @@ -1,61 +0,0 @@ -process GATK4_MUTECT2_MERGE { - //tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(interval) - path fasta_fai - path fasta - path dict - path pon - path pon_tbi - path germline_resource - path germline_resource_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.vcf.stats"), emit: vcf_stats - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def pon_command = pon ? "--panel-of-normals ${pon}" : "" - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - """ - # Get raw calls - gatk --java-options "-Xmx${task.memory.toGiga()}g" \ - Mutect2 \ - -R $fasta \ - -I $cram_tumor -tumor ${meta.tumor} \ - -I $cram_normal -normal ${meta.normal} \ - $intervals_command \ - $pon_command \ - $args \ - --germline-resource $germline_resource \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/mutect2/somatic/meta.yml b/modules/local/gatk4/mutect2/somatic/meta.yml deleted file mode 100644 index 8a0932771f..0000000000 --- a/modules/local/gatk4/mutect2/somatic/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - mutect2 - - cram -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: Sorted cram file - pattern: "*.{cram}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - -authors: - - "@FriederikeHanssen" diff --git a/modules/local/gatk4/mutect2/tumor/main.nf b/modules/local/gatk4/mutect2/tumor/main.nf deleted file mode 100644 index 4c3eabd072..0000000000 --- a/modules/local/gatk4/mutect2/tumor/main.nf +++ /dev/null @@ -1,60 +0,0 @@ -process GATK4_MUTECT2_TUMOR { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(interval) - path fasta_fai - path fasta - path dict - path pon - path pon_tbi - path germline_resource - path germline_resource_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.vcf.stats"), emit: vcf_stats - path "versions.yml" , emit: versions - - script: - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def pon_command = pon ? "--panel-of-normals ${pon}" : "" - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - if (!task.memory) { - log.info '[GATK Mutect2] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - // def softClippedOption = params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true" : "" - """ - # Get raw calls - gatk --java-options "-Xmx${task.memory.toGiga()}g" \ - Mutect2 \ - -R $fasta \ - -I $cram_tumor -tumor ${meta.tumor} \ - -I $cram_normal -normal ${meta.normal} \ - $intervals_command \ - $pon_command \ - $args \ - --germline-resource $germline_resource \ - -O ${prefix}.vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/mutect2/tumor/meta.yml b/modules/local/gatk4/mutect2/tumor/meta.yml deleted file mode 100644 index 8a0932771f..0000000000 --- a/modules/local/gatk4/mutect2/tumor/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - mutect2 - - cram -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['BSD-3-clause'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cram: - type: file - description: Sorted cram file - pattern: "*.{cram}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - -authors: - - "@FriederikeHanssen" From 1a64cb63e36788001ea95e0ec07817cc86e13ea4 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:09:15 +0100 Subject: [PATCH 25/30] Remove more gatk4 --- modules/local/gatk4/markduplicates/main.nf | 42 ----------------- modules/local/gatk4/markduplicates/meta.yml | 50 --------------------- 2 files changed, 92 deletions(-) delete mode 100644 modules/local/gatk4/markduplicates/main.nf delete mode 100644 modules/local/gatk4/markduplicates/meta.yml diff --git a/modules/local/gatk4/markduplicates/main.nf b/modules/local/gatk4/markduplicates/main.nf deleted file mode 100644 index 1ac2e789a7..0000000000 --- a/modules/local/gatk4/markduplicates/main.nf +++ /dev/null @@ -1,42 +0,0 @@ -process GATK4_MARKDUPLICATES { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(bams) - - output: - tuple val(meta), path("*.bam"), path("*.bai"), emit: bam_bai - tuple val(meta), path("*.metrics") , emit: metrics - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") - """ - gatk MarkDuplicates \\ - $bam_list \\ - --METRICS_FILE ${prefix}.metrics \\ - --TMP_DIR . \\ - --CREATE_INDEX true \\ - --OUTPUT ${prefix}.bam \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/markduplicates/meta.yml b/modules/local/gatk4/markduplicates/meta.yml deleted file mode 100644 index 5777067a4f..0000000000 --- a/modules/local/gatk4/markduplicates/meta.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: gatk4_markduplicates -description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. -keywords: - - markduplicates - - bam - - sort -tools: - - gatk4: - description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- - tool_dev_url: https://github.com/broadinstitute/gatk - doi: 10.1158/1538-7445.AM2017-3590 - licence: ['MIT'] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM file - pattern: "*.{bam}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: Marked duplicates BAM file - pattern: "*.{bam}" - - metrics: - type: file - description: Duplicate metrics file generated by GATK - pattern: "*.{metrics.txt}" - -authors: - - "@ajodeh-juma" - - "@FriederikeHanssen" From 0ef0cfa3cff671c25093d1385a53344a19b582ea Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:11:18 +0100 Subject: [PATCH 26/30] Remove more gatk4 --- subworkflows/local/germline_variant_calling.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 26e46e769b..19c053b97c 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -9,9 +9,9 @@ params.concat_gvcf_options = [:] params.concat_haplotypecaller_options = [:] params.strelka_options = [:] -include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/local/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) +include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) include { DEEPVARIANT } from '../../modules/local/deepvariant/main' addParams(options: params.deepvariant_options) -include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/local/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) +include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options) include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options) include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/modules/strelka/germline/main' addParams(options: params.strelka_options) From 36000529627e3943f7a4cd1db0485330e8c863d7 Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:20:17 +0100 Subject: [PATCH 27/30] Replace gatk4 modules --- subworkflows/local/germline_variant_calling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 19c053b97c..cd4b36aeef 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -11,7 +11,7 @@ params.strelka_options = [:] include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) include { DEEPVARIANT } from '../../modules/local/deepvariant/main' addParams(options: params.deepvariant_options) -include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcf/main' addParams(options: params.genotypegvcf_options) +include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' addParams(options: params.genotypegvcf_options) include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options) include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options) include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/modules/strelka/germline/main' addParams(options: params.strelka_options) From 3908738c12ab4241b3db5c361e79db1186d7848c Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:22:22 +0100 Subject: [PATCH 28/30] Fix typo --- subworkflows/local/germline_variant_calling.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index cd4b36aeef..66e4020fc5 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -11,7 +11,7 @@ params.strelka_options = [:] include { GATK4_HAPLOTYPECALLER as HAPLOTYPECALLER } from '../../modules/nf-core/modules/gatk4/haplotypecaller/main' addParams(options: params.haplotypecaller_options) include { DEEPVARIANT } from '../../modules/local/deepvariant/main' addParams(options: params.deepvariant_options) -include { GATK4_GENOTYPEGVCF as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' addParams(options: params.genotypegvcf_options) +include { GATK4_GENOTYPEGVCFS as GENOTYPEGVCF } from '../../modules/nf-core/modules/gatk4/genotypegvcfs/main' addParams(options: params.genotypegvcf_options) include { CONCAT_VCF as CONCAT_GVCF } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_gvcf_options) include { CONCAT_VCF as CONCAT_HAPLOTYPECALLER } from '../../modules/local/concat_vcf/main' addParams(options: params.concat_haplotypecaller_options) include { STRELKA_GERMLINE as STRELKA } from '../../modules/nf-core/modules/strelka/germline/main' addParams(options: params.strelka_options) From 785ee372139687dcfbdd2ee2bcc954004cdaab6f Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:29:45 +0100 Subject: [PATCH 29/30] more replacements --- modules/local/gatk4/haplotypecaller/main.nf | 52 ---------------- modules/local/gatk4/haplotypecaller/meta.yml | 63 -------------------- 2 files changed, 115 deletions(-) delete mode 100644 modules/local/gatk4/haplotypecaller/main.nf delete mode 100644 modules/local/gatk4/haplotypecaller/meta.yml diff --git a/modules/local/gatk4/haplotypecaller/main.nf b/modules/local/gatk4/haplotypecaller/main.nf deleted file mode 100644 index 6a6e337e20..0000000000 --- a/modules/local/gatk4/haplotypecaller/main.nf +++ /dev/null @@ -1,52 +0,0 @@ -process GATK4_HAPLOTYPECALLER { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(cram), path(crai), path(interval) - path fasta - path fasta_fai - path dict - path dbsnp - path dbsnp_tbi - - output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path(interval), path("*.vcf"), emit: interval_vcf - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def intervals_command = intervals_bed ? "-L ${intervals_bed}" : "" - def sites_command = dbsnp ? "--D ${dbsnp}" : "" - //TODO allow ploidy argument here since we allow it for the cnv callers? or is this covered with options? Might unintuitive to use - """ - gatk \\ - --java-options "-Xmx${avail_mem}g" \\ - HaplotypeCaller \\ - -R $fasta \\ - -I $cram \\ - ${sites_command} \\ - ${intervals_command} \\ - -O ${prefix}.vcf \\ - --tmp-dir . \ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/haplotypecaller/meta.yml b/modules/local/gatk4/haplotypecaller/meta.yml deleted file mode 100644 index f0fc3910e3..0000000000 --- a/modules/local/gatk4/haplotypecaller/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: gatk4_haplotypecaller -description: Call germline SNPs and indels via local re-assembly of haplotypes -keywords: - - gatk4 - - haplotypecaller - - haplotype -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM file - pattern: "*.bam" - - bai: - type: file - description: Index of BAM file - pattern: "*.bam.bai" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "fasta.fai" - - dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "*.{version.txt}" - - vcf: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - - tbi: - type: file - description: Index of VCF file - pattern: "*.vcf.gz.tbi" - -authors: - - "@suzannejin" From 7f8a537d3599f874a1af2ea91d6920a82dc5449a Mon Sep 17 00:00:00 2001 From: Rike Date: Wed, 29 Dec 2021 16:42:06 +0100 Subject: [PATCH 30/30] more replacements --- .../gatk4/estimatelibrarycomplexity/main.nf | 43 ------------------- 1 file changed, 43 deletions(-) delete mode 100644 modules/local/gatk4/estimatelibrarycomplexity/main.nf diff --git a/modules/local/gatk4/estimatelibrarycomplexity/main.nf b/modules/local/gatk4/estimatelibrarycomplexity/main.nf deleted file mode 100644 index 1d6075c79b..0000000000 --- a/modules/local/gatk4/estimatelibrarycomplexity/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process GATK4_ESTIMATELIBRARYCOMPLEXITY { - tag "$meta.id" - label 'process_high' - - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" - - input: - tuple val(meta), path(bam) - path fasta - path fasta_fai - path dict - - output: - path "*.md.metrics", emit: metrics - path "versions.yml", emit: versions - - script: - def args = task.ext.args ?: '' - def avail_mem = 3 - if (!task.memory) { - log.info '[GATK EstimateLibraryComplexity] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.giga - } - def prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" - def bams = bam.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") - """ - gatk EstimateLibraryComplexity \ - ${bams} \ - -O ${prefix}.metrics \ - --REFERENCE_SEQUENCE ${fasta} \ - --VALIDATION_STRINGENCY SILENT \ - --TMP_DIR . $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -}