Skip to content

Commit

Permalink
Update gatk3 modules (nf-core#3660)
Browse files Browse the repository at this point in the history
* update mapdage2 module

* Add meta for all files in gatk modules
  • Loading branch information
TCLamnidis authored and limrp committed Jul 28, 2023
1 parent 35a1d9d commit 664e3c4
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 56 deletions.
8 changes: 4 additions & 4 deletions modules/nf-core/gatk/indelrealigner/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ process GATK_INDELREALIGNER {

input:
tuple val(meta), path(bam), path(bai), path(intervals)
path(fasta)
path(fai)
path(dict)
path(known_vcf)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)
tuple val(meta5), path(known_vcf)

output:
tuple val(meta), path("*.bam"), path("*.bai"), emit: bam
Expand Down
20 changes: 20 additions & 0 deletions modules/nf-core/gatk/indelrealigner/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,38 @@ input:
type: file
description: Intervals file created by gatk3 RealignerTargetCreator
pattern: "*.{intervals,list}"
- meta2:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- meta3:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- meta4:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- meta5:
type: map
description: |
Groovy Map containing file meta-information for known_vcf.
e.g. [ id:'test', single_end:false ]
- known_vcf:
type: file
description: Optional input VCF file(s) with known indels
Expand Down
14 changes: 7 additions & 7 deletions modules/nf-core/gatk/realignertargetcreator/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ process GATK_REALIGNERTARGETCREATOR {
'biocontainers/gatk:3.5--hdfd78af_11' }"

input:
tuple val(meta), path(input), path(index)
path fasta
path fai
path dict
path known_vcf
tuple val(meta), path(bam), path(bai)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)
tuple val(meta5), path(known_vcf)

output:
tuple val(meta), path("*.intervals"), emit: intervals
Expand All @@ -25,7 +25,7 @@ process GATK_REALIGNERTARGETCREATOR {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def known = known_vcf ? "-known ${known_vcf}" : ""
if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"

def avail_mem = 3072
if (!task.memory) {
Expand All @@ -39,7 +39,7 @@ process GATK_REALIGNERTARGETCREATOR {
-Xmx${avail_mem}M \\
-T RealignerTargetCreator \\
-nt ${task.cpus} \\
-I ${input} \\
-I ${bam} \\
-R ${fasta} \\
-o ${prefix}.intervals \\
${known} \\
Expand Down
24 changes: 22 additions & 2 deletions modules/nf-core/gatk/realignertargetcreator/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,46 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
- bam:
type: file
description: Sorted and indexed BAM/CRAM/SAM file
pattern: "*.bam"
- index:
- bai:
type: file
description: BAM index file
pattern: "*.bai"
- meta2:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- meta3:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- meta4:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- meta5:
type: map
description: |
Groovy Map containing file meta-information for known_vcf.
e.g. [ id:'test', single_end:false ]
- known_vcf:
type: file
description: Optional input VCF file(s) with known indels
Expand Down
18 changes: 9 additions & 9 deletions modules/nf-core/gatk/unifiedgenotyper/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ process GATK_UNIFIEDGENOTYPER {
'biocontainers/gatk:3.5--hdfd78af_11' }"

input:
tuple val(meta), path(input), path(index)
path fasta
path fai
path dict
path intervals
path contamination
path dbsnp
path comp
tuple val(meta), path(bam), path(bai)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)
tuple val(meta5), path(intervals)
tuple val(meta6), path(contamination)
tuple val(meta7), path(dbsnp)
tuple val(meta8), path(comp)

output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
Expand Down Expand Up @@ -44,7 +44,7 @@ process GATK_UNIFIEDGENOTYPER {
-Xmx${avail_mem}M \\
-nt ${task.cpus} \\
-T UnifiedGenotyper \\
-I ${input} \\
-I ${bam} \\
-R ${fasta} \\
${contamination_file} \\
${dbsnp_file} \\
Expand Down
39 changes: 37 additions & 2 deletions modules/nf-core/gatk/unifiedgenotyper/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,73 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
- bam:
type: file
description: Sorted and indexed BAM/CRAM/SAM file
pattern: "*.bam"
- index:
- bai:
type: file
description: BAM index file
pattern: "*.bai"
- meta2:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Reference file used to generate BAM file
pattern: ".{fasta,fa,fna}"
- meta3:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- fai:
type: file
description: Index of reference file used to generate BAM file
pattern: ".fai"
- meta4:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- dict:
type: file
description: GATK dict file for reference
pattern: ".dict"
- meta5:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- intervals:
type: file
description: Bed file with the genomic regions included in the library (optional)
pattern: "*.intervals"
- meta6:
type: map
description: |
Groovy Map containing file meta-information for the contamination file.
e.g. [ id:'test', single_end:false ]
- contamination:
type: file
description: Tab-separated file containing fraction of contamination in sequencing data (per sample) to aggressively remove
pattern: "*"
- meta7:
type: map
description: |
Groovy Map containing file meta-information for the dbsnps file.
e.g. [ id:'test', single_end:false ]
- dbsnps:
type: file
description: VCF file containing known sites (optional)
pattern: "*"
- meta8:
type: map
description: |
Groovy Map containing file meta-information for the VCF comparison file.
e.g. [ id:'test', single_end:false ]
- comp:
type: file
description: Comparison VCF file (optional)
Expand Down
42 changes: 26 additions & 16 deletions tests/modules/nf-core/gatk/indelrealigner/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,36 @@ include { GATK_INDELREALIGNER } from '../../../../../modules/nf-core/gatk/indelr

workflow test_gatk_indelrealigner {


fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)

input_realignertargetcreator = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]

GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] )
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
fai = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
dict = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]

input_realignertargetcreator = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]

GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [[],[]] )

ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals

ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
])
ch_bams_indelrealigner = Channel.of([
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
])

ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }

GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] )
GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [[],[]] )
}
26 changes: 18 additions & 8 deletions tests/modules/nf-core/gatk/realignertargetcreator/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,23 @@ include { GATK_REALIGNERTARGETCREATOR } from '../../../../../modules/nf-core/gat

workflow test_gatk_realignertargetcreator {

input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
fai = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
dict = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]

GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] )
GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [[],[]] )
}
26 changes: 18 additions & 8 deletions tests/modules/nf-core/gatk/unifiedgenotyper/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,23 @@ include { GATK_UNIFIEDGENOTYPER } from '../../../../../modules/nf-core/gatk/unif

workflow test_gatk_unifiedgenotyper {

input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
input = [
[ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
fai = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
dict = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]

GATK_UNIFIEDGENOTYPER ( input, fasta, fai, dict, [], [], [], [])
GATK_UNIFIEDGENOTYPER ( input, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]])
}

0 comments on commit 664e3c4

Please sign in to comment.