Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove calls to filter Numt and low het sites #7325

Merged
merged 5 commits into from
Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 2 additions & 119 deletions scripts/mitochondria_m2_wdl/AlignAndCall.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ workflow AlignAndCall {

input {
File unmapped_bam
Float? autosomal_coverage
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also delete the max_low_het_sites workflow input?

String base_name

File mt_dict
Expand Down Expand Up @@ -46,7 +45,6 @@ workflow AlignAndCall {
Float? f_score_beta
Boolean compress_output_vcf
Float? verifyBamID
Int? max_low_het_sites

# Read length used for optimization only. If this is too small CollectWgsMetrics might fail, but the results are not
# affected by this number. Default is 151.
Expand Down Expand Up @@ -218,45 +216,13 @@ workflow AlignAndCall {
preemptible_tries = preemptible_tries
}

if ( defined(autosomal_coverage) ) {
call FilterNuMTs {
input:
filtered_vcf = FilterContamination.filtered_vcf,
ref_fasta = mt_fasta,
ref_fai = mt_fasta_index,
ref_dict = mt_dict,
autosomal_coverage = autosomal_coverage,
gatk_override = gatk_override,
gatk_docker_override = gatk_docker_override,
compress = compress_output_vcf,
preemptible_tries = preemptible_tries
}
}

File low_het_vcf = select_first([FilterNuMTs.numt_filtered_vcf, FilterContamination.filtered_vcf])

call FilterLowHetSites {
input:
filtered_vcf = low_het_vcf,
ref_fasta = mt_fasta,
ref_fai = mt_fasta_index,
ref_dict = mt_dict,
max_low_het_sites = max_low_het_sites,
gatk_override = gatk_override,
gatk_docker_override = gatk_docker_override,
compress = compress_output_vcf,
base_name = base_name,
preemptible_tries = preemptible_tries
}


output {
File mt_aligned_bam = AlignToMt.mt_aligned_bam
File mt_aligned_bai = AlignToMt.mt_aligned_bai
File mt_aligned_shifted_bam = AlignToShiftedMt.mt_aligned_bam
File mt_aligned_shifted_bai = AlignToShiftedMt.mt_aligned_bai
File out_vcf = FilterLowHetSites.final_filtered_vcf
File out_vcf_index = FilterLowHetSites.final_filtered_vcf_idx
File out_vcf = FilterContamination.filtered_vcf
File out_vcf_index = FilterContamination.filtered_vcf_idx
File input_vcf_for_haplochecker = SplitMultiAllelicsAndRemoveNonPassSites.vcf_for_haplochecker
File duplicate_metrics = AlignToMt.duplicate_metrics
File coverage_metrics = CollectWgsMetrics.metrics
Expand Down Expand Up @@ -669,86 +635,3 @@ task SplitMultiAllelicsAndRemoveNonPassSites {
preemptible: select_first([preemptible_tries, 5])
}
}

task FilterNuMTs {
input {
File ref_fasta
File ref_fai
File ref_dict
File filtered_vcf
Float? autosomal_coverage
Int? preemptible_tries
File? gatk_override
String? gatk_docker_override
Boolean compress
}

String basename = basename(filtered_vcf, ".vcf")
String output_vcf = basename + ".numt" + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"

parameter_meta {
autosomal_coverage: "Median coverage of the autosomes for filtering potential polymorphic NuMT variants"
}

command {
set -e
export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
gatk NuMTFilterTool \
-R ~{ref_fasta} \
-V ~{filtered_vcf} \
-O ~{output_vcf} \
--autosomal-coverage ~{autosomal_coverage}

}
output {
File numt_filtered_vcf = "~{output_vcf}"
File numt_filtered_vcf_idx = "~{output_vcf_index}"
}
runtime {
docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.7.0"])
memory: "3 MB"
disks: "local-disk 20 HDD"
preemptible: select_first([preemptible_tries, 5])
}
}

task FilterLowHetSites {
input {
File ref_fasta
File ref_fai
File ref_dict
File filtered_vcf
String base_name
Int? max_low_het_sites
Int? preemptible_tries
File? gatk_override
String? gatk_docker_override
Boolean compress
}

String output_vcf = base_name + ".final" + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
Int max_sites = select_first([max_low_het_sites, 3])

command {
set -e
export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
gatk MTLowHeteroplasmyFilterTool \
-R ~{ref_fasta} \
-V ~{filtered_vcf} \
-O ~{output_vcf} \
--max-allowed-low-hets ~{max_sites}

}
output {
File final_filtered_vcf = "~{output_vcf}"
File final_filtered_vcf_idx = "~{output_vcf_index}"
}
runtime {
docker: select_first([gatk_docker_override, "us.gcr.io/broad-gatk/gatk:4.1.7.0"])
memory: "3 MB"
disks: "local-disk 20 HDD"
preemptible: select_first([preemptible_tries, 5])
}
}
5 changes: 0 additions & 5 deletions scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ workflow MitochondriaPipeline {
File wgs_aligned_input_bam_or_cram
File wgs_aligned_input_bam_or_cram_index
String contig_name = "chrM"
Float autosomal_coverage = 30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max_low_het_sites should be deleted from this WDL too.


# Read length used for optimization only. If this is too small CollectWgsMetrics might fail, but the results are not
# affected by this number. Default is 151.
Expand Down Expand Up @@ -62,15 +61,13 @@ workflow MitochondriaPipeline {
Float? f_score_beta
Float? verifyBamID
Boolean compress_output_vcf = false
Int? max_low_het_sites

#Optional runtime arguments
Int? preemptible_tries
}

parameter_meta {
wgs_aligned_input_bam_or_cram: "Full WGS hg38 bam or cram"
autosomal_coverage: "Median coverage of full input bam"
out_vcf: "Final VCF of mitochondrial SNPs and INDELs"
vaf_filter_threshold: "Hard threshold for filtering low VAF sites"
f_score_beta: "F-Score beta balances the filtering strategy between recall and precision. The relative weight of recall to precision."
Expand Down Expand Up @@ -103,7 +100,6 @@ workflow MitochondriaPipeline {
call AlignAndCall.AlignAndCall as AlignAndCall {
input:
unmapped_bam = RevertSam.unmapped_bam,
autosomal_coverage = autosomal_coverage,
base_name = base_name,
mt_dict = mt_dict,
mt_fasta = mt_fasta,
Expand Down Expand Up @@ -133,7 +129,6 @@ workflow MitochondriaPipeline {
verifyBamID = verifyBamID,
compress_output_vcf = compress_output_vcf,
max_read_length = max_read_length,
max_low_het_sites = max_low_het_sites,
preemptible_tries = preemptible_tries
}

Expand Down