Skip to content


Merge branch 'release/1.27.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
Walt Shands committed Sep 13, 2018
2 parents 16f6ef9 + bf255db commit 58b98fe
Show file tree
Hide file tree
Showing 7 changed files with 417 additions and 199 deletions.
4 changes: 2 additions & 2 deletions CRAM-no-header-md5sum/CRAM_md5sum_checker_wrapper.wdl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "" as f1
import "" as f2
import "" as f1
import "" as f2

workflow CRAMMd5sumChecker {
File inputCRAMFile
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "" as TopMed_aligner
import "" as checker
import "" as TopMed_aligner
import "" as checker

workflow checkerWorkflow {
Int expectedNumofReads
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "" as TopMed_aligner
import "" as checker
import "" as TopMed_aligner
import "" as checker

workflow checkerWorkflow {
Expand Down
191 changes: 130 additions & 61 deletions aligner/u_of_michigan_aligner/u_of_michigan_aligner.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,67 @@ workflow TopMedAligner {
File dbSNP_vcf
File dbSNP_vcf_index

Int? preemptible_tries
Int preemptible_tries_default = select_first([preemptible_tries, 3])

# The CRAM to be realigned may have been aligned with a different reference
# genome than what will be used in the alignment step. The pre align step
# must use the reference genome that the CRAM was originally aligned with
# to convert the CRAM to a SAM
File? PreAlign_reference_genome
File PreAlign_reference_genome_default = select_first([PreAlign_reference_genome,ref_fasta])
File? PreAlign_reference_genome_index
File PreAlign_reference_genome_index_default = select_first([PreAlign_reference_genome_index,ref_fasta_index])

Int? PreAlign_preemptible_tries
Int PreAlign_preemptible_tries_default = select_first([PreAlign_preemptible_tries, 3])
Int? PreAlign_max_retries
Int PreAlign_max_retries_default = select_first([PreAlign_max_retries, 3])
Int? PreAlign_CPUs
Int PreAlign_CPUs_default = select_first([PreAlign_CPUs, 1])
Float? PreAlign_mem
Float PreAlign_mem_default = select_first([PreAlign_mem, 6.5])

Int? Align_preemptible_tries
Int Align_preemptible_tries_default = select_first([Align_preemptible_tries, 3])
Int? Align_max_retries
Int Align_max_retries_default = select_first([Align_max_retries, 3])
Int? Align_CPUs
Int Align_CPUs_default = select_first([Align_CPUs, 32])
Float? Align_mem
Float Align_mem_default = select_first([Align_mem, 7])

# Use one preemptible try for post alignment becuase it often takes more than 24
# hours and GCP preemptible nodes are terminated after 24 hours by GCP
# "Compute Engine always terminates preemptible instances after they run for 24 hours."
# So by using 0 for preemptible tries the task is non preemtible
# if preemptible is set to 0 -- then its set to false
# if preemptible is set to a positive integer -- its automatically true
Int? PostAlign_preemptible_tries
Int PostAlign_preemptible_tries_default = select_first([PostAlign_preemptible_tries, 0])
#if preemptible is 0 and maxRetries is 3 -- then that task can be retried upto 3 times
#if preemptible is 3 and maxRetries is 3 for a task -- that can be retried upto 6 times
Int? PostAlign_max_retries
Int PostAlign_max_retries_default = select_first([PostAlign_max_retries, 3])
Int? PostAlign_CPUs
Int PostAlign_CPUs_default = select_first([PostAlign_CPUs, 1])
Float? PostAlign_mem
Float PostAlign_mem_default = select_first([PostAlign_mem, 6.5])

Float? PreAlign_mem
Float PreAlign_mem_default = select_first([PreAlign_mem, 6.5])

Float? Align_mem
Float Align_mem_default = select_first([Align_mem, 7])
Boolean? dynamically_calculate_file_size
Boolean dynamically_calculate_disk_requirement = select_first([dynamically_calculate_file_size, true])

Float? PostAlign_mem
Float PostAlign_mem_default = select_first([PostAlign_mem, 6.5])
Float? CRAMandCRAI_disk_size_override
Float CRAMandCRAI_disk_size_override_default = select_first([CRAMandCRAI_disk_size_override, 200])

Float? ReferenceGenome_disk_size_override
Float ReferenceGenome_disk_size_override_default = select_first([ReferenceGenome_disk_size_override, 6.0])

Float? BWT_disk_size_override
Float BWT_disk_size_override_default = select_first([BWT_disk_size_override, 2.0])

Float? dbSNP_disk_size_override
Float dbSNP_disk_size_override_default = select_first([dbSNP_disk_size_override, 2.0])

# Get the file name only with no path and no .cram suffix
String input_cram_name = basename("${input_cram_file}", ".cram")
Expand All @@ -76,33 +117,64 @@ workflow TopMedAligner {
# larger multiplier
Float sort_sam_disk_multiplier = 3.25

# Get the size of the standard reference files as well as the additional reference files needed for BWA
Float ref_size = size(ref_fasta, "GB") + size(ref_fasta_index, "GB")
Float ref_extra_size = size(ref_alt, "GB") + size(ref_bwt, "GB") + size(ref_pac, "GB") + size(ref_ann, "GB") + size(ref_amb, "GB") + size(ref_sa, "GB")
Float dbsnp_size = size(dbSNP_vcf, "GB") + size(dbSNP_vcf_index, "GB")
Float cram_size = size(input_cram_file, "GB") + size(input_crai_file, "GB")
Float fastq_gz_files_size = CRAM_to_fastqgz_multiplier * cram_size

Float PreAlign_ref_size = if (defined(dynamically_calculate_disk_requirement)) then size(PreAlign_reference_genome_default, "GB") + size(PreAlign_reference_genome_index_default, "GB") +
additional_disk else ReferenceGenome_disk_size_override_default + additional_disk

Float ref_size = if (defined(dynamically_calculate_disk_requirement)) then size(ref_fasta, "GB") + size(ref_fasta_index, "GB") +
additional_disk else ReferenceGenome_disk_size_override_default + additional_disk

Float ref_extra_size = if (defined(dynamically_calculate_disk_requirement)) then size(ref_alt, "GB") + size(ref_bwt, "GB") + size(ref_pac, "GB") +
size(ref_ann, "GB") + size(ref_amb, "GB") + size(ref_sa, "GB") +
additional_disk else BWT_disk_size_override_default + additional_disk

Float dbsnp_size =if (defined(dynamically_calculate_disk_requirement)) then size(dbSNP_vcf, "GB") + size(dbSNP_vcf_index, "GB") +
additional_disk else dbSNP_disk_size_override_default + additional_disk

Float cram_and_crai_size = if (defined(dynamically_calculate_disk_requirement)) then size(input_cram_file, "GB") + size(input_crai_file, "GB") +
additional_disk else CRAMandCRAI_disk_size_override_default + additional_disk

Float fastq_gz_files_size = CRAM_to_fastqgz_multiplier * cram_and_crai_size

Float PreAlign_disk_size = PreAlign_ref_size + (bwa_disk_multiplier * cram_and_crai_size) +
(sort_sam_disk_multiplier * cram_and_crai_size) + cram_and_crai_size + additional_disk + fastq_gz_files_size

Float Align_disk_size = ref_size + ref_extra_size + (bwa_disk_multiplier * fastq_gz_files_size) + additional_disk

# The merged cram can be bigger than the summed sizes of the individual aligned crams,
# so account for the output size by multiplying the input size by bwa disk multiplier.
Float PostAlign_disk_size = ref_size + dbsnp_size + cram_and_crai_size +
(sort_sam_disk_multiplier * cram_and_crai_size) + (bwa_disk_multiplier * cram_and_crai_size) + additional_disk

call PreAlign {
input_crai = input_crai_file,
input_cram = input_cram_file,
disk_size = ref_size + (bwa_disk_multiplier * cram_size) + (sort_sam_disk_multiplier * cram_size) + cram_size + additional_disk + fastq_gz_files_size,
ref_fasta = PreAlign_reference_genome_default,
ref_fasta_index = PreAlign_reference_genome_index_default,

disk_size = PreAlign_disk_size,
docker_image = docker_image,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
PreAlign_CPUs_default = PreAlign_CPUs_default,
PreAlign_mem_default = PreAlign_mem_default,
preemptible_tries_default = preemptible_tries_default
CPUs = PreAlign_CPUs_default,
memory = PreAlign_mem_default,
preemptible_tries = PreAlign_preemptible_tries_default,
max_retries = PreAlign_preemptible_tries_default

call Align {
input_list_file = PreAlign.output_list_file,
input_fastq_gz_files = PreAlign.output_fastq_gz_files,

disk_size = ref_size + ref_extra_size + (bwa_disk_multiplier * fastq_gz_files_size) + additional_disk,
disk_size = Align_disk_size,
docker_image = docker_image,
CPUs = Align_CPUs_default,
memory = Align_mem_default,
preemptible_tries = Align_preemptible_tries_default,
max_retries = Align_max_retries_default,

ref_alt = ref_alt,
ref_bwt = ref_bwt,
Expand All @@ -112,33 +184,31 @@ workflow TopMedAligner {
ref_sa = ref_sa,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
Align_CPUs_default = Align_CPUs_default,
Align_mem_default = Align_mem_default,
preemptible_tries_default = preemptible_tries_default


Float CRAMS_files_size = fastq_gz_to_CRAM_multiplier * cram_size

call PostAlign {
input_cram_files = Align.output_cram_files,

# The merged cram can be bigger than the summed sizes of the individual aligned crams,
# so account for the output size by multiplying the input size by bwa disk multiplier.
disk_size = ref_size + dbsnp_size + CRAMS_files_size + (sort_sam_disk_multiplier * CRAMS_files_size) + (bwa_disk_multiplier * CRAMS_files_size) + additional_disk,
disk_size = PostAlign_disk_size,
docker_image = docker_image,
max_retries = PostAlign_max_retries_default,
preemptible_tries = PostAlign_preemptible_tries_default,
CPUs = PostAlign_CPUs_default,
memory = PostAlign_mem_default,

ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,

dbSNP_vcf = dbSNP_vcf,
dbSNP_vcf_index = dbSNP_vcf_index,
PostAlign_CPUs_default = PostAlign_CPUs_default,
PostAlign_mem_default = PostAlign_mem_default,

input_cram_name = input_cram_name,
preemptible_tries_default = preemptible_tries_default


Expand All @@ -152,20 +222,19 @@ workflow TopMedAligner {
File input_crai
File input_cram

Float disk_size
String docker_image

File ref_fasta
File ref_fasta_index

Int PreAlign_CPUs_default
Float PreAlign_mem_default
Float memory
Float disk_size
Int CPUs
Int preemptible_tries
String docker_image
Int max_retries

# Assign a basename to the intermediate files
String pre_output_base = "pre_output_base"

Int preemptible_tries_default

command {

# Set the exit code of a pipeline to that of the rightmost command
Expand Down Expand Up @@ -195,10 +264,11 @@ workflow TopMedAligner {
Array[File] output_fastq_gz_files = glob("${pre_output_base}.*")
runtime {
preemptible: preemptible_tries_default
maxRetries: max_retries
preemptible: preemptible_tries
#memory: "6.5 GB"
memory: sub(PreAlign_mem_default, "\\..*", "") + " GB"
cpu: sub(PreAlign_CPUs_default, "\\..*", "")
memory: sub(memory, "\\..*", "") + " GB"
cpu: sub(CPUs, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand All @@ -210,9 +280,6 @@ workflow TopMedAligner {
File input_list_file
Array[File] input_fastq_gz_files

Float disk_size
String docker_image

File ref_alt
File ref_bwt
File ref_pac
Expand All @@ -223,10 +290,13 @@ workflow TopMedAligner {
File ref_fasta
File ref_fasta_index

Int Align_CPUs_default
Float Align_mem_default
Float memory
Float disk_size
Int CPUs
Int preemptible_tries
String docker_image
Int max_retries

Int preemptible_tries_default

# We have to use a trick to make Cromwell
# skip substitution when using the bash ${<variable} syntax
Expand Down Expand Up @@ -280,20 +350,18 @@ workflow TopMedAligner {
Array[File] output_cram_files = glob("*.cram")
runtime {
preemptible: preemptible_tries_default
memory: sub(Align_mem_default, "\\..*", "") + " GB"
maxRetries: max_retries
preemptible: preemptible_tries
memory: sub(memory, "\\..*", "") + " GB"
#memory: "10 GB"
cpu: sub(Align_CPUs_default, "\\..*", "")
cpu: sub(CPUs, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image

task PostAlign {
Float disk_size
String docker_image

File ref_fasta
File ref_fasta_index

Expand All @@ -302,10 +370,12 @@ task PostAlign {

Array[File] input_cram_files

Int PostAlign_CPUs_default
Float PostAlign_mem_default

Int preemptible_tries_default
Float memory
Float disk_size
Int CPUs
Int preemptible_tries
String docker_image
Int max_retries

String input_cram_name
String output_cram_file_name = "${input_cram_name}_realigned.cram"
Expand Down Expand Up @@ -350,8 +420,6 @@ task PostAlign {
[[ $rc != 0 ]] && break
rm -f ${dollar}{input_file} ${dollar}{tmp_prefix}*
# Remove the tmp file; no need to remove the input file from the previous task
# rm -f ${dollar}{tmp_prefix}*

if [[ $rc == 0 ]]
Expand All @@ -369,10 +437,11 @@ task PostAlign {
File output_crai_file = "${output_crai_file_name}"
runtime {
preemptible: preemptible_tries_default
maxRetries: max_retries
preemptible: preemptible_tries
#memory: "6.5 GB"
memory: sub(PostAlign_mem_default, "\\..*", "") + " GB"
cpu: sub(PostAlign_CPUs_default, "\\..*", "")
memory: sub(memory, "\\..*", "") + " GB"
cpu: sub(CPUs, "\\..*", "")
disks: "local-disk " + sub(disk_size, "\\..*", "") + " HDD"
zones: "us-central1-a us-central1-b us-east1-d us-central1-c us-central1-f us-east1-c"
docker: docker_image
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import "" as TopMed_variantcaller
import "" as checker
import "" as TopMed_variantcaller
import "" as checker

workflow checkerWorkflow {
File inputTruthVCFFile
Expand Down

0 comments on commit 58b98fe

Please sign in to comment.