Skip to content

Commit

Permalink
Merge pull request #1184 from nf-core/extending_tests_for_sentieon_jo…
Browse files Browse the repository at this point in the history
…int_germline

Extending tests for sentieon joint germline AND avoiding duplicated variants in VQSR vcfs
  • Loading branch information
maxulysse authored Aug 23, 2023
2 parents a34f3f8 + 3006118 commit a456c18
Show file tree
Hide file tree
Showing 11 changed files with 224 additions and 101 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1159](https://github.com/nf-core/sarek/pull/1159) - ISMB Poster
- [#1173](https://github.com/nf-core/sarek/pull/1173) - CI tests for VQSR track with stub runs
- [#1122](https://github.com/nf-core/sarek/pull/1122) - Add `annotation cache` functionality
- [#1184](https://github.com/nf-core/sarek/pull/1184) - Stub-based CI-test of Sentieon joint-germline variant-calling with VQSR

### Changed

Expand Down Expand Up @@ -44,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
fixed grouping joint germline recalibrated VCF ([#1137](https://github.com/nf-core/sarek/pull/1137))
- [#1177](https://github.com/nf-core/sarek/pull/1177) - Fix status inference when using nf-validation plugin
- [#1183](https://github.com/nf-core/sarek/pull/1183) - Add docs for concatentated germline variants
- [#1184](https://github.com/nf-core/sarek/pull/1184) - Fix issue with duplicated variants in VCF from Sentieon-based joint-germline variant-calling with VQSR. (Corresponding to [#966](https://github.com/nf-core/sarek/issues/966) for GATK.)

### Dependencies

Expand Down
3 changes: 1 addition & 2 deletions conf/modules/sentieon_haplotyper.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ process {
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.haplotyper" : "${meta.id}.haplotyper.${intervals.simpleName}" }
ext.when = { params.tools && params.tools.split(',').contains('sentieon_haplotyper') }
publishDir = [
enabled: !params.joint_germline,
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/"},
pattern: "*{vcf.gz,vcf.gz.tbi}",
Expand All @@ -46,7 +45,7 @@ process {
}

if (params.tools && params.tools.contains('sentieon_haplotyper')) {
withName: '.*BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' {
withName: '.*FILTERVARIANTTRANCHES' {
ext.prefix = {"${meta.id}.haplotyper"}
ext.args = { "--info-key CNN_1D" }
publishDir = [
Expand Down
18 changes: 7 additions & 11 deletions conf/modules/sentieon_joint_germline.config
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,6 @@ process {
pattern: "*{vcf.gz,vcf.gz.tbi}"
]
}

withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_VQSR' {
ext.prefix = "joint_germline_recalibrated"
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"},
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
pattern: "*{vcf.gz,vcf.gz.tbi}"
]
}
}

withName: 'SENTIEON_VARCAL_INDEL' {
Expand All @@ -61,8 +51,14 @@ process {
}

withName: 'SENTIEON_APPLYVARCAL_INDEL' {
ext.prefix = { "${meta.id}_INDEL" }
ext.prefix = { "joint_germline_recalibrated" }
ext.args = '--sensitivity 99.9 --var_type INDEL'
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/sentieon_haplotyper/joint_variant_calling/"},
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
pattern: "*{vcf.gz,vcf.gz.tbi}"
]
}

withName: 'SENTIEON_VARCAL_SNP' {
Expand Down
6 changes: 3 additions & 3 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@
},
"sentieon/applyvarcal": {
"branch": "master",
"git_sha": "0ff77bd14d74332f4f0aaecfb302c51a9b24231a",
"git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a",
"installed_by": ["modules"]
},
"sentieon/bwamem": {
Expand All @@ -388,7 +388,7 @@
},
"sentieon/gvcftyper": {
"branch": "master",
"git_sha": "b9172e8c26a3db5009f7872654c44587e254f094",
"git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a",
"installed_by": ["modules"]
},
"sentieon/haplotyper": {
Expand All @@ -398,7 +398,7 @@
},
"sentieon/varcal": {
"branch": "master",
"git_sha": "b9172e8c26a3db5009f7872654c44587e254f094",
"git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a",
"installed_by": ["modules"]
},
"snpeff/download": {
Expand Down
16 changes: 16 additions & 0 deletions modules/nf-core/sentieon/applyvarcal/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions modules/nf-core/sentieon/gvcftyper/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions modules/nf-core/sentieon/varcal/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 40 additions & 20 deletions subworkflows/local/bam_joint_calling_germline_sentieon/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main'
include { GATK4_MERGEVCFS as MERGE_GENOTYPEGVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { GATK4_MERGEVCFS as MERGE_VQSR } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { SENTIEON_APPLYVARCAL as SENTIEON_APPLYVARCAL_INDEL } from '../../../modules/nf-core/sentieon/applyvarcal/main'
include { SENTIEON_APPLYVARCAL as SENTIEON_APPLYVARCAL_SNP } from '../../../modules/nf-core/sentieon/applyvarcal/main'
include { SENTIEON_GVCFTYPER } from '../../../modules/nf-core/sentieon/gvcftyper/main'
Expand Down Expand Up @@ -67,7 +66,9 @@ workflow BAM_JOINT_CALLING_GERMLINE_SENTIEON {
fasta,
fai)

//Prepare INDELs and SNPs separately for Sentieons applyvarcal
//Prepare SNPs and INDELs for Sentieon's applyvarcal
// Step 1. : applyvarcal to SNPs
// Step 2. : Use SENTIEON_APPLYVARCAL_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html

// Join results of variant recalibration into a single channel tuple
// Rework meta for variantscalled.csv and annotation tools
Expand All @@ -76,34 +77,53 @@ workflow BAM_JOINT_CALLING_GERMLINE_SENTIEON {
.join(SENTIEON_VARCAL_SNP.out.tranches, failOnDuplicate: true)
.map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }

// Join results of variant recalibration into a single channel tuple
SENTIEON_APPLYVARCAL_SNP(
vqsr_input_snp,
fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
fai.map{ fai -> [ [ id:fai.baseName ], fai ] })

// Join results of SENTIEON_APPLYVARCAL_SNP and use as input for SENTIEON_APPLYVARCAL_INDEL to avoid duplicate entries in the result
// Rework meta for variantscalled.csv and annotation tools
vqsr_input_indel = vqsr_input.join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true)
vqsr_input_indel = SENTIEON_APPLYVARCAL_SNP.out.vcf.join(SENTIEON_APPLYVARCAL_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]}
.join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true)
.join(SENTIEON_VARCAL_INDEL.out.idx, failOnDuplicate: true)
.join(SENTIEON_VARCAL_INDEL.out.tranches, failOnDuplicate: true)
.map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] }

SENTIEON_APPLYVARCAL_SNP(
vqsr_input_snp,
fasta,
fai)

SENTIEON_APPLYVARCAL_INDEL(
vqsr_input_indel,
fasta,
fai)
fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] },
fai.map{ fai -> [ [ id:fai.baseName ], fai ] })

// The following is an ugly monster to achieve the following:
// When MERGE_GENOTYPEGVCFS and SENTIEON_APPLYVARCAL are run, then use output from SENTIEON_APPLYVARCAL
// When MERGE_GENOTYPEGVCFS and NOT SENTIEON_APPLYVARCAL, then use the output from MERGE_GENOTYPEGVCFS

merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}

// Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements
vqsr_vcf_for_join = SENTIEON_APPLYVARCAL_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]}
vqsr_tbi_for_join = SENTIEON_APPLYVARCAL_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]}

// Join on metamap
// If both --> meta, vcf_merged, vcf_bqsr
// If not VQSR --> meta, vcf_merged, []
// if the second is empty, use the first
genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{
meta, joint_vcf, recal_vcf ->

vcf_out = recal_vcf ?: joint_vcf

vqsr_snp_vcf = SENTIEON_APPLYVARCAL_SNP.out.vcf
vqsr_indel_vcf = SENTIEON_APPLYVARCAL_INDEL.out.vcf
[[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], vcf_out]
}

//Merge VQSR outputs into final VCF
MERGE_VQSR(
vqsr_snp_vcf.mix(vqsr_indel_vcf).groupTuple(),
dict
)
genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{
meta, joint_tbi, recal_tbi ->

genotype_vcf = MERGE_GENOTYPEGVCFS.out.vcf.mix(MERGE_VQSR.out.vcf)
genotype_index = MERGE_GENOTYPEGVCFS.out.tbi.mix(MERGE_VQSR.out.tbi)
tbi_out = recal_tbi ?: joint_tbi
[[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], tbi_out]
}

versions = versions.mix(SENTIEON_GVCFTYPER.out.versions)
versions = versions.mix(SENTIEON_VARCAL_SNP.out.versions)
Expand Down
34 changes: 22 additions & 12 deletions subworkflows/local/bam_variant_calling_germline_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -190,22 +190,16 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
dbsnp,
dbsnp_tbi,
dbsnp_vqsr,
known_sites_indels,
known_sites_indels_tbi,
known_indels_vqsr,
known_sites_snps,
known_sites_snps_tbi,
known_snps_vqsr,
intervals,
intervals_bed_combined_haplotypec,
(skip_tools && skip_tools.split(',').contains('haplotyper_filter')),
joint_germline,
sentieon_haplotyper_emit_mode)

versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.versions)

vcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf
gvcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf
vcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf
vcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf_tbi
gvcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf
gvcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf_tbi

if (joint_germline) {
BAM_JOINT_CALLING_GERMLINE_SENTIEON(
Expand All @@ -223,11 +217,27 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
known_sites_snps_tbi,
known_snps_vqsr)

vcf_haplotypecaller = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf
vcf_sentieon_haplotyper = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf
versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions)
} else {

}
// If single sample track, check if filtering should be done
if (!(skip_tools && skip_tools.split(',').contains('haplotyper_filter'))) {

VCF_VARIANT_FILTERING_GATK(
vcf_sentieon_haplotyper.join(vcf_tbi_sentieon_haplotyper, failOnDuplicate: true, failOnMismatch: true),
fasta,
fasta_fai,
dict.map{ meta, dict -> [ dict ] },
intervals_bed_combined_haplotypec,
known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())

vcf_sentieon_haplotyper = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf

versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
}
}
}

// STRELKA
Expand Down
Loading

0 comments on commit a456c18

Please sign in to comment.