Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extending tests for sentieon joint germline AND avoiding duplicated variants in VQSR vcfs #1184

Merged
merged 19 commits into from
Aug 23, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
595cfb5
Failed to publish per-sample gvcf when using just one interval
asp8200 Aug 17, 2023
b456435
Adding test for setieon-based joint germline var calling all interval…
asp8200 Aug 17, 2023
6a82a5d
Adding stubs to some sentieon modules
asp8200 Aug 17, 2023
11c39bb
Moving VCF_VARIANT_FILTERING_GATK from BAM_VARIANT_CALLING_SENTIEON_H…
asp8200 Aug 17, 2023
3439bbc
Extend CI-test of sentieon-based joint-germline subworkflow with stub…
asp8200 Aug 17, 2023
999bf0f
Checking if skip_tools contains haplotyper_filter
asp8200 Aug 17, 2023
a82c529
MERGE_VQSR removed. Instead publishing from SENTIEON_APPLYVARCAL_INDEL
asp8200 Aug 17, 2023
3f48ffd
WIP: Avoid duplicated variants from VQSR vcfs
asp8200 Aug 17, 2023
bb3b083
Call to VCF_VARIANT_FILTERING_GATK moved from BAM_VARIANT_CALLING_SEN…
asp8200 Aug 17, 2023
26e96a0
Changing variantcaller-tag from haplotypecaller to sentieon_haplotype…
asp8200 Aug 17, 2023
81730d3
Using vcf_sentieon_haplotyper instead of vcf_haplotypecaller for Sent…
asp8200 Aug 17, 2023
72edd9a
Fixing pytest for sentieon_joint_germline incl vqsr
asp8200 Aug 18, 2023
a3f06f1
removing redundant option --skip_tools haplotyper_filter from pytests…
asp8200 Aug 18, 2023
82837e0
Moving sentieon-based pytest for joint-germline to the set of manual …
asp8200 Aug 22, 2023
31938f1
Adding stubs to three Sentieon modules
asp8200 Aug 22, 2023
2bbe666
Resolving conflict in modules.json
asp8200 Aug 22, 2023
d703151
Trying to run pytest of sentieon-based joint-germline with VQSR as GHA
asp8200 Aug 22, 2023
61c18ee
Trying to run pytest of sentieon-based joint-germline with VQSR as GH…
asp8200 Aug 22, 2023
3006118
Updating changelog
asp8200 Aug 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion conf/modules/sentieon_haplotyper.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ process {
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.haplotyper" : "${meta.id}.haplotyper.${intervals.simpleName}" }
ext.when = { params.tools && params.tools.split(',').contains('sentieon_haplotyper') }
publishDir = [
enabled: !params.joint_germline,
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/"},
pattern: "*{vcf.gz,vcf.gz.tbi}",
Expand Down
16 changes: 16 additions & 0 deletions modules/nf-core/sentieon/applyvarcal/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions modules/nf-core/sentieon/gvcftyper/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions modules/nf-core/sentieon/varcal/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 23 additions & 11 deletions subworkflows/local/bam_variant_calling_germline_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -190,22 +190,16 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
dbsnp,
dbsnp_tbi,
dbsnp_vqsr,
known_sites_indels,
known_sites_indels_tbi,
known_indels_vqsr,
known_sites_snps,
known_sites_snps_tbi,
known_snps_vqsr,
intervals,
intervals_bed_combined_haplotypec,
(skip_tools && skip_tools.split(',').contains('haplotyper_filter')),
joint_germline,
sentieon_haplotyper_emit_mode)

versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.versions)

vcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf
gvcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf
vcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf
vcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.vcf_tbi
gvcf_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf
gvcf_tbi_sentieon_haplotyper = BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER.out.gvcf_tbi

if (joint_germline) {
BAM_JOINT_CALLING_GERMLINE_SENTIEON(
Expand All @@ -223,11 +217,29 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
known_sites_snps_tbi,
known_snps_vqsr)

// TO-DO: Should this VCF from the sentieon haploTYPER really be put into a variable called vcf_haploTYPECALLER
// The vcf_haplotypecaller is just being sent to annotation in sarek.nf
vcf_haplotypecaller = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf
versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions)
} else {

}
// If single sample track, check if filtering should be done
if (!(skip_tools && skip_tools.split(',').contains('haplotyper_filter'))) {

VCF_VARIANT_FILTERING_GATK(
vcf_sentieon_haplotyper.join(vcf_tbi_sentieon_haplotyper, failOnDuplicate: true, failOnMismatch: true),
fasta,
fasta_fai,
dict.map{ meta, dict -> [ dict ] },
intervals_bed_combined_haplotypec,
known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())

vcf_haplotypecaller = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf

versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
}
}
}

// STRELKA
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { GATK4_MERGEVCFS as MERGE_SENTIEON_HAPLOTYPER_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { SENTIEON_HAPLOTYPER } from '../../../modules/nf-core/sentieon/haplotyper/main'
include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main'

workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
take:
Expand All @@ -18,23 +17,15 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
dbsnp // channel: [optional]
dbsnp_tbi // channel: [optional]
dbsnp_vqsr // channel: [optional]
known_sites_indels // channel: [optional]
known_sites_indels_tbi // channel: [optional]
known_indels_vqsr // channel: [optional]
known_sites_snps // channel: [optional]
known_sites_snps_tbi // channel: [optional]
known_snps_vqsr // channel: [optional]
intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals
intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals
skip_haplotyper_filter // boolean: [mandatory] [default: false] skip haplotyper filter
joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants
sentieon_haplotyper_emit_mode

main:
versions = Channel.empty()

gvcf = Channel.empty()
vcf = Channel.empty()
gvcf = Channel.empty()
vcf = Channel.empty()
genotype_intervals = Channel.empty()

// Combine cram and intervals for spread and gather strategy
Expand Down Expand Up @@ -73,25 +64,37 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
}

// Figure out if using intervals or no_intervals
SENTIEON_HAPLOTYPER.out.vcf.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}.set{haplotyper_vcf_branch}

SENTIEON_HAPLOTYPER.out.vcf_tbi.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}.set{haplotyper_vcf_tbi_branch}

SENTIEON_HAPLOTYPER.out.gvcf.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}.set{haplotyper_gvcf_branch}

SENTIEON_HAPLOTYPER.out.gvcf_tbi.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}.set{haplotyper_gvcf_tbi_branch}
haplotyper_vcf_branch = SENTIEON_HAPLOTYPER.out.vcf.map{
meta, vcf -> [ meta - meta.subMap('interval_name'), vcf]
}
.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}

haplotyper_vcf_tbi_branch = SENTIEON_HAPLOTYPER.out.vcf_tbi.map{
meta, vcf_tbi -> [ meta - meta.subMap('interval_name'), vcf_tbi]
}
.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}

haplotyper_gvcf_branch = SENTIEON_HAPLOTYPER.out.gvcf.map{
meta, gvcf -> [ meta - meta.subMap('interval_name'), gvcf]
}
.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}

haplotyper_gvcf_tbi_branch = SENTIEON_HAPLOTYPER.out.gvcf_tbi.map{
meta, gvcf_tbi -> [ meta - meta.subMap('interval_name'), gvcf_tbi]
}
.branch{
intervals: it[0].num_intervals > 1
no_intervals: it[0].num_intervals <= 1
}

vcfs_for_merging = haplotyper_vcf_branch.intervals.map{
meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]}
Expand All @@ -113,25 +116,9 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
MERGE_SENTIEON_HAPLOTYPER_VCFS.out.tbi,
haplotyper_vcf_tbi_branch.no_intervals)

if (!skip_haplotyper_filter) {
VCF_VARIANT_FILTERING_GATK(
haplotyper_vcf.join(
haplotyper_tbi,
failOnDuplicate: true,
failOnMismatch: true),
fasta,
fasta_fai,
dict.map{ meta, dict -> [ dict ] },
intervals_bed_combined,
known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())

vcf = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf.map{meta, vcf -> [meta + [variantcaller:"sentieon_haplotyper"], vcf]}
versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
} else vcf = haplotyper_vcf

// Remove no longer necessary field: num_intervals
vcf = vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] }
vcf = haplotyper_vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] }
vcf_tbi = haplotyper_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] }

// GVFs
// Only when using intervals
Expand All @@ -148,14 +135,20 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER {
MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.vcf,
haplotyper_gvcf_branch.no_intervals)

gvcf_tbi = Channel.empty().mix(
MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.tbi,
haplotyper_gvcf_tbi_branch.no_intervals)

versions = versions.mix(SENTIEON_HAPLOTYPER.out.versions)
versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_VCFS.out.versions)
versions = versions.mix(MERGE_SENTIEON_HAPLOTYPER_GVCFS.out.versions)

emit:
versions
vcf
vcf_tbi
gvcf
gvcf_tbi
genotype_intervals // For joint genotyping

}
66 changes: 62 additions & 4 deletions tests/test_sentieon_joint_germline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,9 @@
- path: results/preprocessing/recalibrated/test/test.recal.cram.crai
should_exist: false
- path: results/reports/bcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.bcftools_stats.txt
# md5sum: 404d1208df301a6726f5543245625ef3
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.FILTER.summary
# md5sum: 87a84b5f8ac3d3cbeeef7d60afcdbfe7
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.TsTv.count
# md5sum: 974f6922981c87bb017b124aa009f654
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.TsTv.qual
# md5sum: 1e34357e5848c318f8c2c7d3b041d229
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz.tbi
Expand All @@ -31,3 +27,65 @@
- path: results/variant_calling/sentieon_haplotyper/testT/testT.haplotyper.g.vcf.gz.tbi
- path: results/haplotyper
should_exist: false

- name: Run joint germline variant calling with sentieon haplotyper all intervals at once
command: nextflow run main.nf -profile test_cache,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --skip_tools haplotyper_filter --outdir results --sentieon_haplotyper_emit_mode gvcf --nucleotides_per_second 100
tags:
- germline
- sentieon_joint_germline
- variant_calling
- sentieon/haplotyper
files:
- path: results/csv/variantcalled.csv
md5sum: 6ec10f6455c2b5290c7f6fc687c529ca
- path: results/multiqc
- path: results/preprocessing/recalibrated/test/test.recal.cram
should_exist: false
- path: results/preprocessing/recalibrated/test/test.recal.cram.crai
should_exist: false
- path: results/reports/bcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.bcftools_stats.txt
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.FILTER.summary
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.TsTv.count
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline.TsTv.qual
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz.tbi
- path: results/variant_calling/sentieon_haplotyper/testN/testN.haplotyper.g.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/testN/testN.haplotyper.g.vcf.gz.tbi
- path: results/variant_calling/sentieon_haplotyper/testT/testT.haplotyper.g.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/testT/testT.haplotyper.g.vcf.gz.tbi
- path: results/haplotyper
should_exist: false

- name: Run joint germline variant calling with sentieon haplotyper with stub for VQSR
command: nextflow run main.nf -profile test_cache,tools_germline --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf -stub-run
tags:
- germline
- sentieon_joint_germline
- variant_calling
- vqsr
files:
- path: results/csv/variantcalled.csv
md5sum: b9d926c0bbb9c31d7659051ce98196d2
- path: results/multiqc
- path: results/preprocessing/recalibrated/test/test.recal.cram
should_exist: false
- path: results/preprocessing/recalibrated/test/test.recal.cram.crai
should_exist: false
- path: results/reports/bcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated.bcftools_stats.txt
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated.FILTER.summary
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated.TsTv.count
- path: results/reports/vcftools/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated.TsTv.qual
- path: results/reports/bcftools/sentieon_haplotyper/recalibrated_joint_variant_calling/joint_germline_recalibrated.bcftools_stats.txt
- path: results/reports/vcftools/sentieon_haplotyper/recalibrated_joint_variant_calling/joint_germline_recalibrated.FILTER.summary
- path: results/reports/vcftools/sentieon_haplotyper/recalibrated_joint_variant_calling/joint_germline_recalibrated.TsTv.count
- path: results/reports/vcftools/sentieon_haplotyper/recalibrated_joint_variant_calling/joint_germline_recalibrated.TsTv.qual
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline.vcf.gz.tbi
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/joint_variant_calling/joint_germline_recalibrated.vcf.gz.tbi
- path: results/variant_calling/sentieon_haplotyper/testN/testN.haplotyper.g.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/testN/testN.haplotyper.g.vcf.gz.tbi
- path: results/variant_calling/sentieon_haplotyper/testT/testT.haplotyper.g.vcf.gz
- path: results/variant_calling/sentieon_haplotyper/testT/testT.haplotyper.g.vcf.gz.tbi
- path: results/haplotyper
should_exist: false