Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add skip_tools haplotypecaller_filter #889

Merged
merged 13 commits into from
Dec 12, 2022
2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
"fa_icon": "fas fa-forward",
"description": "Disable specified tools.",
"help_text": "Multiple tools can be specified, separated by commas.\n\n> **NB** `--skip_tools baserecalibrator_report` is actually just not saving the reports.\n> **NB** `--skip_tools markduplicates_report` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.",
"pattern": "^((baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions)?,?)*[^,]+$"
"pattern": "^((baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|haplotypecaller_filter|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions)?,?)*[^,]+$"
}
},
"fa_icon": "fas fa-user-cog"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_sin
workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
take:
tools // Mandatory, list of tools to apply
skip_tools // Mandatory, list of tools to skip
cram_recalibrated // channel: [mandatory] cram
bwa // channel: [mandatory] bwa
dbsnp // channel: [mandatory] dbsnp
Expand Down Expand Up @@ -180,7 +181,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
known_sites_indels_tbi,
known_sites_snps,
known_sites_snps_tbi,
intervals_bed_combined_haplotypec)
intervals_bed_combined_haplotypec,
skip_tools)

haplotypecaller_vcf = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.filtered_vcf
ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions)
Expand Down
47 changes: 25 additions & 22 deletions subworkflows/local/bam_variant_calling_haplotypecaller/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {
known_sites_snps
known_sites_snps_tbi
intervals_bed_combined // channel: [mandatory] intervals/target regions in one file unzipped, no_intervals.bed if no_intervals

skip_tools

main:

Expand Down Expand Up @@ -126,31 +126,34 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER {

realigned_bam = BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai

VCF_VARIANT_FILTERING_GATK(haplotypecaller_vcf.join(haplotypecaller_tbi),
fasta,
fasta_fai,
dict,
intervals_bed_combined,
known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())

filtered_vcf = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf.map{ meta, vcf-> [
[
patient:meta.patient,
sample:meta.sample,
status:meta.status,
sex:meta.sex,
id:meta.sample,
num_intervals:meta.num_intervals,
variantcaller:"haplotypecaller"
],
vcf
]
if (!(skip_tools && skip_tools.split(',').contains('haplotypecaller_filter'))) {

VCF_VARIANT_FILTERING_GATK(haplotypecaller_vcf.join(haplotypecaller_tbi),
fasta,
fasta_fai,
dict,
intervals_bed_combined,
known_sites_indels.concat(known_sites_snps).flatten().unique().collect(),
known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect())

filtered_vcf = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf.map{ meta, vcf-> [
[
patient:meta.patient,
sample:meta.sample,
status:meta.status,
sex:meta.sex,
id:meta.sample,
num_intervals:meta.num_intervals,
variantcaller:"haplotypecaller"
],
vcf
]
}
ch_versions = ch_versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
}

ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLER.out.versions)
ch_versions = ch_versions.mix(MERGE_HAPLOTYPECALLER.out.versions)
ch_versions = ch_versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions)
}

emit:
Expand Down
28 changes: 28 additions & 0 deletions tests/test_haplotypecaller.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,34 @@
# binary changes md5sums on reruns.
- path: results/haplotypecaller
should_exist: false
- name: Run variant calling on germline sample with haplotypecaller and skip filter
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling --skip_tools haplotypecaller_filter
tags:
- germline
- haplotypecaller
- variant_calling
files:
- path: results/csv/variantcalled.csv
md5sum: d7d86e82902a4f57876b2414a4f812a4
- path: results/multiqc
- path: results/preprocessing/converted/test/test.converted.cram
# binary changes md5sums on reruns.
- path: results/preprocessing/converted/test/test.converted.cram.crai
# binary changes md5sums on reruns.
- path: results/preprocessing/recalibrated/test/test.recal.cram
should_exist: false
- path: results/preprocessing/recalibrated/test/test.recal.cram.crai
should_exist: false
- path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz
should_exist: false
- path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi
should_exist: false
- path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz
# binary changes md5sums on reruns.
- path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi
# binary changes md5sums on reruns.
- path: results/haplotypecaller
should_exist: false
- name: Run variant calling on germline sample with haplotypecaller without intervals
command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling --no_intervals
tags:
Expand Down
1 change: 1 addition & 0 deletions workflows/sarek.nf
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,7 @@ workflow SAREK {
// GERMLINE VARIANT CALLING
BAM_VARIANT_CALLING_GERMLINE_ALL(
params.tools,
params.skip_tools,
ch_cram_variant_calling_status_normal,
[[id:"bwa"],[]], //bwa_index for tiddit; not used here
dbsnp,
Expand Down