Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make GATKSVPipelineSingleSample validate it in MiniWDL #154

Merged
merged 11 commits into from
Jun 15, 2021
5 changes: 3 additions & 2 deletions wdl/DepthPreprocessing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ task MergeSample {
input {
File gcnv
Array[File] cnmops
Float max_dist = 0.25
Float? max_dist
String sample_id
String sv_pipeline_docker
RuntimeAttr? runtime_attr_override
Expand All @@ -160,14 +160,15 @@ task MergeSample {
output {
File sample_bed = "~{sample_id}.merged.defrag.sorted.bed"
}

command <<<

set -euo pipefail
zcat ~{sep=" " cnmops} | awk -F "\t" -v OFS="\t" '{if ($5=="~{sample_id}") print}' > cnmops.cnv
cat ~{gcnv} cnmops.cnv | sort -k1,1V -k2,2n > ~{sample_id}.bed
bedtools merge -i ~{sample_id}.bed -d 0 -c 4,5,6,7 -o distinct > ~{sample_id}.merged.bed
/opt/sv-pipeline/00_preprocessing/scripts/defragment_cnvs.py \
--max-dist ~{max_dist} ~{sample_id}.merged.bed ~{sample_id}.merged.defrag.bed
--max-dist ~{if defined(max_dist) then max_dist else "0.25"} ~{sample_id}.merged.bed ~{sample_id}.merged.defrag.bed
sort -k1,1V -k2,2n ~{sample_id}.merged.defrag.bed > ~{sample_id}.merged.defrag.sorted.bed

>>>
Expand Down
42 changes: 18 additions & 24 deletions wdl/GATKSVPipelineSingleSample.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ workflow GATKSVPipelineSingleSample {

RuntimeAttr? runtime_attr_filter_large_pesr
RuntimeAttr? runtime_attr_srtest
RuntimeAttr? runtime_attr_split_vcf
RuntimeAttr? runtime_attr_split_vcf_srtest
RuntimeAttr? runtime_attr_merge_allo
RuntimeAttr? runtime_attr_merge_stats
RuntimeAttr? runtime_attr_rewritesrcoords
Expand All @@ -316,13 +316,15 @@ workflow GATKSVPipelineSingleSample {
File bin_exclude

# Common
RuntimeAttr? runtime_attr_split_vcf
illusional marked this conversation as resolved.
Show resolved Hide resolved
RuntimeAttr? runtime_attr_merge_counts
RuntimeAttr? runtime_attr_split_variants
RuntimeAttr? runtime_attr_make_subset_vcf
RuntimeAttr? runtime_attr_rdtest_genotype
RuntimeAttr? runtime_attr_add_genotypes
RuntimeAttr? runtime_attr_concat_vcfs
RuntimeAttr? runtime_attr_genotype_depths_concat_vcfs
RuntimeAttr? runtime_attr_genotype_pesr_concat_vcfs
RuntimeAttr? runtime_attr_split_vcf_module04


# Master
RuntimeAttr? runtime_attr_add_batch
Expand All @@ -339,7 +341,6 @@ workflow GATKSVPipelineSingleSample {

# Depth part 2
RuntimeAttr? runtime_attr_integrate_depth_gq
RuntimeAttr? runtime_attr_concat_vcfs
illusional marked this conversation as resolved.
Show resolved Hide resolved

############################################################
## Module 0506
Expand All @@ -364,7 +365,7 @@ workflow GATKSVPipelineSingleSample {

RuntimeAttr? runtime_override_update_sr_list
RuntimeAttr? runtime_override_merge_pesr_depth
RuntimeAttr? runtime_override_merge_pesr_depth
illusional marked this conversation as resolved.
Show resolved Hide resolved
RuntimeAttr? runtime_override_breakpoint_overlap_filter
RuntimeAttr? runtime_override_integrate_resolved_vcfs
RuntimeAttr? runtime_override_rename_variants

Expand Down Expand Up @@ -413,13 +414,6 @@ workflow GATKSVPipelineSingleSample {

}

if (defined(insert_size)) { Array[Float]? insert_size_input = [select_first([insert_size])]}
if (defined(read_length)) { Array[Int]? read_length_input = [select_first([read_length])]}
if (defined(coverage)) { Array[Float]? coverage_input = [select_first([coverage])]}
if (defined(pf_reads_improper_pairs)) { Array[Int]? pf_reads_improper_pairs_input = [select_first([pf_reads_improper_pairs])]}
if (defined(total_reads)) { Array[Float]? total_reads_input = [select_first([total_reads])]}
if (defined(pct_chimeras)) { Array[Float]? pct_chimeras_input = [select_first([pct_chimeras])]}

String? delly_docker_ = if (!defined(case_delly_vcf) && use_delly) then delly_docker else NONE_STRING_
String? manta_docker_ = if (!defined(case_manta_vcf) && use_manta) then manta_docker else NONE_STRING_
String? melt_docker_ = if (!defined(case_melt_vcf) && use_melt) then melt_docker else NONE_STRING_
Expand Down Expand Up @@ -451,13 +445,13 @@ workflow GATKSVPipelineSingleSample {
manta_mem_gb_per_job=manta_mem_gb_per_job,
melt_standard_vcf_header=melt_standard_vcf_header,
melt_metrics_intervals=melt_metrics_intervals,
insert_size=insert_size_input,
read_length=read_length_input,
coverage=coverage_input,
insert_size=insert_size,
read_length=read_length,
coverage=coverage,
metrics_intervals=metrics_intervals,
pf_reads_improper_pairs=pf_reads_improper_pairs_input,
pct_chimeras=pct_chimeras_input,
total_reads=total_reads_input,
pf_reads_improper_pairs=pf_reads_improper_pairs,
pct_chimeras=pct_chimeras,
total_reads=total_reads,
wham_include_list_bed_file=wham_include_list_bed_file,
sv_pipeline_docker=sv_pipeline_docker,
sv_base_mini_docker=sv_base_mini_docker,
Expand Down Expand Up @@ -778,7 +772,7 @@ workflow GATKSVPipelineSingleSample {
linux_docker = linux_docker,
sv_pipeline_docker = sv_pipeline_docker,
runtime_attr_srtest = runtime_attr_srtest,
runtime_attr_split_vcf = runtime_attr_split_vcf,
runtime_attr_split_vcf = runtime_attr_split_vcf_srtest,
runtime_attr_merge_allo = runtime_attr_merge_allo,
runtime_attr_merge_stats = runtime_attr_merge_stats
}
Expand Down Expand Up @@ -831,13 +825,14 @@ workflow GATKSVPipelineSingleSample {
sv_pipeline_docker=sv_pipeline_docker,
sv_pipeline_rdtest_docker=sv_pipeline_rdtest_docker,
linux_docker=linux_docker,
runtime_attr_split_vcf=runtime_attr_split_vcf,
runtime_attr_split_vcf=runtime_attr_split_vcf_module04,
runtime_attr_merge_counts=runtime_attr_merge_counts,
runtime_attr_split_variants=runtime_attr_split_variants,
runtime_attr_make_subset_vcf=runtime_attr_make_subset_vcf,
runtime_attr_rdtest_genotype=runtime_attr_rdtest_genotype,
runtime_attr_add_genotypes=runtime_attr_add_genotypes,
runtime_attr_concat_vcfs=runtime_attr_concat_vcfs,
runtime_attr_genotype_depths_concat_vcfs=runtime_attr_genotype_depths_concat_vcfs,
runtime_attr_genotype_pesr_concat_vcfs=runtime_attr_genotype_pesr_concat_vcfs,
runtime_attr_add_batch=runtime_attr_add_batch,
runtime_attr_index_vcf=runtime_attr_index_vcf,
runtime_attr_count_pe=runtime_attr_count_pe,
Expand All @@ -847,8 +842,7 @@ workflow GATKSVPipelineSingleSample {
runtime_attr_integrate_gq=runtime_attr_integrate_gq,
runtime_attr_integrate_pesr_gq=runtime_attr_integrate_pesr_gq,
runtime_attr_triple_stream_cat=runtime_attr_triple_stream_cat,
runtime_attr_integrate_depth_gq=runtime_attr_integrate_depth_gq,
runtime_attr_concat_vcfs=runtime_attr_concat_vcfs
runtime_attr_integrate_depth_gq=runtime_attr_integrate_depth_gq
}

call SingleSampleFiltering.ConvertCNVsWithoutDepthSupportToBNDs as ConvertCNVsWithoutDepthSupportToBNDs {
Expand Down Expand Up @@ -911,7 +905,7 @@ workflow GATKSVPipelineSingleSample {

runtime_override_update_sr_list=runtime_override_update_sr_list,
runtime_override_merge_pesr_depth=runtime_override_merge_pesr_depth,
runtime_override_breakpoint_overlap_filter=runtime_override_merge_pesr_depth,
runtime_override_breakpoint_overlap_filter=runtime_override_breakpoint_overlap_filter,
runtime_override_integrate_resolved_vcfs=runtime_override_integrate_resolved_vcfs,
runtime_override_rename_variants=runtime_override_rename_variants,

Expand Down
17 changes: 8 additions & 9 deletions wdl/MakeBincovMatrix.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ workflow MakeBincovMatrix {
input:
count_file = all_count_files[i],
sample = all_samples[i],
binsize = SetBins.binsize,
binsize = SetBins.out_binsize,
bin_locs = SetBins.bin_locs,
disk_overhead_gb = disk_overhead_gb,
sv_base_mini_docker = sv_base_mini_docker,
Expand Down Expand Up @@ -73,7 +73,7 @@ task SetBins {
File count_file
Int? binsize
Array[String]? bincov_matrix_samples
Int disk_overhead_gb = 10
Int? disk_overhead_gb
String sv_base_mini_docker
RuntimeAttr? runtime_attr_override
}
Expand All @@ -84,7 +84,7 @@ task SetBins {
String binsize_output_file_name = "binsize.txt"

Float disk_scale_factor = 10.0
Int disk_gb = disk_overhead_gb + ceil(disk_scale_factor * size(count_file, "GiB"))
Int disk_gb = select_first([disk_overhead_gb, 10]) + ceil(disk_scale_factor * size(count_file, "GiB"))
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 2.0,
Expand All @@ -97,7 +97,7 @@ task SetBins {

output {
File bin_locs = bin_file_name
Int binsize = read_int(binsize_output_file_name)
Int out_binsize = read_int(binsize_output_file_name)
File bincov_matrix_header_file = bincov_header_file_name
}

Expand Down Expand Up @@ -166,13 +166,12 @@ task MakeBincovMatrixColumns {
String sample
Int binsize
File bin_locs
Int disk_overhead_gb = 10
Int? disk_overhead_gb = 10
String sv_base_mini_docker
RuntimeAttr? runtime_attr_override
}

Float disk_scale_factor = 10.0
Int disk_gb = disk_overhead_gb + ceil(disk_scale_factor * (size(count_file, "GiB") + size(bin_locs, "GiB")))
Int disk_gb = select_first([disk_overhead_gb, 10])+ ceil(disk_scale_factor * (size(count_file, "GiB") + size(bin_locs, "GiB")))
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 2.0,
Expand Down Expand Up @@ -232,15 +231,15 @@ task ZPaste {
input {
Array[File]+ column_files
String matrix_file_name
Int disk_overhead_gb = 10
Int? disk_overhead_gb = 10
Float mem_overhead_gb = 1.0
String sv_base_docker
RuntimeAttr? runtime_attr_override
}

# Only compressed files are stored (if localization_optional, then only output file is stored),
# so this is a reasonably conservative estimate for disk:
Int disk_gb = disk_overhead_gb + ceil(3.0 * size(column_files, "GiB"))
Int disk_gb = select_first([disk_overhead_gb, 10]) + ceil(3.0 * size(column_files, "GiB"))
# Some memory is used up by the named pipes. Not a lot, but allocate in case the batch is huge:
Float mem_gb = mem_overhead_gb + 0.003 * length(column_files)
RuntimeAttr default_attr = object {
Expand Down
2 changes: 0 additions & 2 deletions wdl/MasterVcfQc.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,6 @@ workflow MasterVcfQc {
runtime_override_collect_vids_per_sample=runtime_override_collect_vids_per_sample,
runtime_override_split_samples_list=runtime_override_split_samples_list,
runtime_override_tar_shard_vid_lists=runtime_override_tar_shard_vid_lists,
sv_base_mini_docker=sv_base_mini_docker,
sv_pipeline_docker=sv_pipeline_docker
}

# Plot per-sample stats
Expand Down
30 changes: 12 additions & 18 deletions wdl/Module00a.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ workflow Module00a {
# Melt inputs
File? melt_standard_vcf_header # required if run_melt True
File? melt_metrics_intervals
Array[Float]? insert_size
Array[Int]? read_length
Array[Float]? coverage
Float? insert_size
Int? read_length
Float? coverage
File? metrics_intervals
Array[Float]? pct_chimeras
Array[Float]? total_reads
Array[Int]? pf_reads_improper_pairs
Float? pct_chimeras
Float? total_reads
Int? pf_reads_improper_pairs

# Wham inputs
File wham_include_list_bed_file
Expand Down Expand Up @@ -196,12 +196,6 @@ workflow Module00a {
}

if (run_melt) {
Float? insert_size_i = if defined(insert_size) then select_first([insert_size]) else NONE_FLOAT_
Int? read_length_i = if defined(read_length) then select_first([read_length]) else NONE_INT_
Float? coverage_i = if defined(coverage) then select_first([coverage]) else NONE_FLOAT_
Float? pct_chimeras_i = if defined(pct_chimeras) then select_first([pct_chimeras]) else NONE_FLOAT_
Float? total_reads_i = if defined(total_reads) then select_first([total_reads]) else NONE_INT_
Int? pf_reads_improper_pairs_i = if defined(pf_reads_improper_pairs) then select_first([pf_reads_improper_pairs]) else NONE_INT_
call melt.MELT {
input:
bam_or_cram_file = bam_file_,
Expand All @@ -212,13 +206,13 @@ workflow Module00a {
reference_index = reference_index,
reference_version = reference_version,
melt_standard_vcf_header = select_first([melt_standard_vcf_header]),
insert_size = insert_size_i,
read_length = read_length_i,
coverage = coverage_i,
insert_size = insert_size,
read_length = read_length,
coverage = coverage,
wgs_metrics_intervals = melt_metrics_intervals,
pct_chimeras = pct_chimeras_i,
total_reads = total_reads_i,
pf_reads_improper_pairs = pf_reads_improper_pairs_i,
pct_chimeras = pct_chimeras,
total_reads = total_reads,
pf_reads_improper_pairs = pf_reads_improper_pairs,
runtime_attr_coverage = runtime_attr_melt_coverage,
runtime_attr_metrics = runtime_attr_melt_metrics,
samtools_cloud_docker = samtools_cloud_docker,
Expand Down
16 changes: 9 additions & 7 deletions wdl/Module00aBatch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ workflow Module00aBatch {

File? NONE_FILE_
String? NONE_STRING_
Float? NONE_FLOAT_
Int? NONE_INT_
}

scatter (i in range(length(bam_or_cram_files))) {
Expand Down Expand Up @@ -112,13 +114,13 @@ workflow Module00aBatch {
manta_mem_gb_per_job = manta_mem_gb_per_job,
melt_standard_vcf_header = melt_standard_vcf_header,
melt_metrics_intervals = melt_metrics_intervals,
insert_size = insert_size,
read_length = read_length,
coverage = coverage,
insert_size = if defined(insert_size) then select_first([insert_size])[i] else NONE_FLOAT_,
read_length = if defined(read_length) then select_first([read_length])[i] else NONE_INT_,
coverage = if defined(coverage) then select_first([coverage])[i] else NONE_FLOAT_,
metrics_intervals = metrics_intervals,
pct_chimeras = pct_chimeras,
total_reads = total_reads,
pf_reads_improper_pairs = pf_reads_improper_pairs,
pct_chimeras = if defined(pct_chimeras) then select_first([pct_chimeras])[i] else NONE_FLOAT_,
total_reads = if defined(total_reads) then select_first([total_reads])[i] else NONE_FLOAT_,
pf_reads_improper_pairs = if defined(pf_reads_improper_pairs) then select_first([pf_reads_improper_pairs])[i] else NONE_INT_,
wham_include_list_bed_file = wham_include_list_bed_file,
sv_pipeline_docker = sv_pipeline_docker,
sv_base_mini_docker = sv_base_mini_docker,
Expand Down Expand Up @@ -168,4 +170,4 @@ workflow Module00aBatch {
Array[File?] wham_vcf = Module00a.wham_vcf
Array[File?] wham_index = Module00a.wham_index
}
}
}
1 change: 0 additions & 1 deletion wdl/Module00c.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ workflow Module00c {

RuntimeAttr? runtime_attr_merge_vcfs
RuntimeAttr? runtime_attr_baf_gen
RuntimeAttr? runtime_attr_merge_baf
RuntimeAttr? ploidy_score_runtime_attr
RuntimeAttr? ploidy_build_runtime_attr
RuntimeAttr? runtime_attr_subset_ped
Expand Down
11 changes: 4 additions & 7 deletions wdl/Module04.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ workflow Module04 {
RuntimeAttr? runtime_attr_make_subset_vcf
RuntimeAttr? runtime_attr_rdtest_genotype
RuntimeAttr? runtime_attr_add_genotypes
RuntimeAttr? runtime_attr_concat_vcfs
RuntimeAttr? runtime_attr_genotype_depths_concat_vcfs
RuntimeAttr? runtime_attr_genotype_pesr_concat_vcfs

# Master
RuntimeAttr? runtime_attr_add_batch
Expand All @@ -81,21 +82,17 @@ workflow Module04 {
RuntimeAttr? runtime_attr_genotype_train
RuntimeAttr? runtime_attr_generate_cutoff
RuntimeAttr? runtime_attr_update_cutoff
RuntimeAttr? runtime_attr_split_variants
RuntimeAttr? runtime_attr_merge_genotypes

# PESR part 2
RuntimeAttr? runtime_attr_count_pe
RuntimeAttr? runtime_attr_genotype_pe
RuntimeAttr? runtime_attr_count_sr
RuntimeAttr? runtime_attr_genotype_sr
RuntimeAttr? runtime_attr_integrate_gq
RuntimeAttr? runtime_attr_integrate_pesr_gq
RuntimeAttr? runtime_attr_triple_stream_cat

# Depth part 2
RuntimeAttr? runtime_attr_integrate_depth_gq
RuntimeAttr? runtime_attr_concat_vcfs
RuntimeAttr? runtime_attr_merge_regeno_cov_med

}
Expand Down Expand Up @@ -215,7 +212,7 @@ workflow Module04 {
runtime_attr_integrate_pesr_gq = runtime_attr_integrate_pesr_gq,
runtime_attr_add_genotypes = runtime_attr_add_genotypes,
runtime_attr_triple_stream_cat = runtime_attr_triple_stream_cat,
runtime_attr_concat_vcfs = runtime_attr_concat_vcfs
runtime_attr_concat_vcfs = runtime_attr_genotype_pesr_concat_vcfs
}

if (!single_sample_mode) {
Expand Down Expand Up @@ -270,7 +267,7 @@ workflow Module04 {
runtime_attr_make_subset_vcf = runtime_attr_make_subset_vcf,
runtime_attr_integrate_depth_gq = runtime_attr_integrate_depth_gq,
runtime_attr_add_genotypes = runtime_attr_add_genotypes,
runtime_attr_concat_vcfs = runtime_attr_concat_vcfs,
runtime_attr_concat_vcfs = runtime_attr_genotype_depths_concat_vcfs,
runtime_attr_merge_regeno_cov_med = runtime_attr_merge_regeno_cov_med
}
output {
Expand Down
2 changes: 1 addition & 1 deletion wdl/RDTest.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ workflow RDTest {

call tasks02.MergeStats as MergeStats {
input:
stats = flatten([RDTestAutosome.stats, RDTestAllosome.stats]),
stats = flatten([RDTestAutosome.out_stats, RDTestAllosome.out_stats]),
prefix = "${batch}.${algorithm}",
linux_docker = linux_docker,
runtime_attr_override = runtime_attr_merge_stats
Expand Down
2 changes: 1 addition & 1 deletion wdl/RDTestChromosome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ workflow RDTestChromosome {
}

output {
File stats = MergeStats.merged_stats
File out_stats = MergeStats.merged_stats
}
}

Expand Down
Loading