Skip to content

Commit

Permalink
tweak dt to make DOI, use dv gpu container
Browse files Browse the repository at this point in the history
  • Loading branch information
jmonlong committed Jun 17, 2022
1 parent 3e54e07 commit 1e77116
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 35 deletions.
15 changes: 15 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
version: 1.2
workflows:
- name: GiraffeDeepTrio
subclass: WDL
description: Core VG Giraffe mapping and DeepTrio calling workflow for maternal-paternal-child sample datasets. It takes as inputs reads in FASTQ and graphs containing the population-based haplotypes to genotype. The graphs files required include the XG, GCSA, GBWT, graph GBWT, Distance and Minimizer indexes. It outputs a VCF file and BAM file for the child along with optional RTG and hap.py vcf evaluation if the user provides benchmark truth-set VCFs.
primaryDescriptorPath: /workflows/giraffe_and_deeptrio.wdl
testParameterFiles:
- /params/vg_hprc_deeptrio_map_call.inputs_tiny.gs_url.json
branches:
- master
authors:
- name: Charles Markello
email: cmarkell@ucsc.edu
- name: Jean Monlong
email: jmonlong@ucsc.edu
- name: Adam Novak
email: anovak@soe.ucsc.edu
- name: GiraffeDeepVariant
subclass: WDL
description: Maps raw short reads from a pair of FASTQs or a CRAM file on a pangenome with vg Giraffe and call variants with DeepVariant.
Expand Down
19 changes: 4 additions & 15 deletions workflows/giraffe_and_deeptrio.mapper.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ workflow vgGiraffeMap {
File INPUT_READ_FILE_1 # Input sample 1st read pair fastq.gz
File INPUT_READ_FILE_2 # Input sample 2nd read pair fastq.gz
String SAMPLE_NAME # The sample name
# VG Container used in the pipeline (e.g. quay.io/vgteam/vg:v1.16.0)
String VG_CONTAINER = "quay.io/vgteam/vg:v1.36.0"
Int READS_PER_CHUNK = 20000000 # Number of reads contained in each mapping chunk (20000000 for wgs)
String? GIRAFFE_OPTIONS # (OPTIONAL) extra command line options for Giraffe mapper
File PATH_LIST_FILE # Text file where each line is a path name in the XG index, to use instead of CONTIGS. If neither is given, paths are extracted from the XG and subset to chromosome-looking paths.
Expand Down Expand Up @@ -47,7 +45,6 @@ workflow vgGiraffeMap {
input:
in_read_file=INPUT_READ_FILE_1,
in_pair_id="1",
in_vg_container=VG_CONTAINER,
in_reads_per_chunk=READS_PER_CHUNK,
in_split_read_cores=SPLIT_READ_CORES,
in_split_read_disk=SPLIT_READ_DISK
Expand All @@ -56,7 +53,6 @@ workflow vgGiraffeMap {
input:
in_read_file=INPUT_READ_FILE_2,
in_pair_id="2",
in_vg_container=VG_CONTAINER,
in_reads_per_chunk=READS_PER_CHUNK,
in_split_read_cores=SPLIT_READ_CORES,
in_split_read_disk=SPLIT_READ_DISK
Expand All @@ -69,7 +65,6 @@ workflow vgGiraffeMap {
input:
in_xg_file=XG_FILE,
in_path_list_file=PATH_LIST_FILE,
in_vg_container=VG_CONTAINER,
in_extract_disk=MAP_DISK,
in_extract_mem=MAP_MEM
}
Expand All @@ -96,7 +91,6 @@ workflow vgGiraffeMap {
input:
in_left_read_pair_chunk_file=read_pair_chunk_files.left,
in_right_read_pair_chunk_file=read_pair_chunk_files.right,
in_vg_container=VG_CONTAINER,
in_giraffe_options=GIRAFFE_OPTIONS,
in_xg_file=XG_FILE,
in_gbwt_file=GBWT_FILE,
Expand Down Expand Up @@ -256,7 +250,6 @@ task splitReads {
input {
File in_read_file
String in_pair_id
String in_vg_container
Int in_reads_per_chunk
Int in_split_read_cores
Int in_split_read_disk
Expand Down Expand Up @@ -293,7 +286,6 @@ task splitReads {
task extractSubsetPathNames {
input {
File in_xg_file
String in_vg_container
Int in_extract_disk
Int in_extract_mem
}
Expand All @@ -314,15 +306,14 @@ task extractSubsetPathNames {
preemptible: 2
memory: in_extract_mem + " GB"
disks: "local-disk " + in_extract_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

task extractReference {
input {
File in_xg_file
File in_path_list_file
String in_vg_container
Int in_extract_disk
Int in_extract_mem
}
Expand All @@ -344,7 +335,7 @@ task extractReference {
preemptible: 2
memory: in_extract_mem + " GB"
disks: "local-disk " + in_extract_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

Expand Down Expand Up @@ -390,7 +381,6 @@ task runVGGIRAFFE {
File in_dist_file
File in_min_file
File in_ref_dict
String in_vg_container
String? in_giraffe_options
String in_sample_name
Int in_map_cores
Expand Down Expand Up @@ -435,7 +425,7 @@ task runVGGIRAFFE {
memory: in_map_mem + " GB"
cpu: in_map_cores
disks: "local-disk " + in_map_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

Expand Down Expand Up @@ -1060,7 +1050,6 @@ task bgzipMergedVCF {
input {
String in_sample_name
File in_merged_vcf_file
String in_vg_container
Int in_call_disk
Int in_call_mem
}
Expand Down Expand Up @@ -1091,7 +1080,7 @@ task bgzipMergedVCF {
time: 30
memory: in_call_mem + " GB"
disks: "local-disk " + in_call_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

Expand Down
23 changes: 4 additions & 19 deletions workflows/giraffe_and_deeptrio.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ workflow vgGiraffeDeeptrio {
String SAMPLE_NAME # The child sample name
String MATERNAL_NAME # The maternal sample name
String PATERNAL_NAME # The paternal sample name
# VG Container used in the pipeline (e.g. quay.io/vgteam/vg:v1.16.0)
String VG_CONTAINER = "quay.io/vgteam/vg:v1.36.0"
Int READS_PER_CHUNK = 20000000 # Number of reads contained in each mapping chunk (20000000 for wgs)
String? GIRAFFE_OPTIONS # (OPTIONAL) extra command line options for Giraffe mapper
Array[String]+? CONTIGS # (OPTIONAL) Desired reference genome contigs, which are all paths in the XG index.
Expand Down Expand Up @@ -77,7 +75,6 @@ workflow vgGiraffeDeeptrio {
call extractSubsetPathNames {
input:
in_xg_file=XG_FILE,
in_vg_container=VG_CONTAINER,
in_extract_disk=MAP_DISK,
in_extract_mem=MAP_MEM
}
Expand All @@ -98,7 +95,6 @@ workflow vgGiraffeDeeptrio {
input:
in_xg_file=XG_FILE,
in_path_list_file=pipeline_path_list_file,
in_vg_container=VG_CONTAINER,
in_extract_disk=MAP_DISK,
in_extract_mem=MAP_MEM
}
Expand All @@ -124,7 +120,6 @@ workflow vgGiraffeDeeptrio {
INPUT_READ_FILE_1=MATERNAL_INPUT_READ_FILE_1,
INPUT_READ_FILE_2=MATERNAL_INPUT_READ_FILE_2,
SAMPLE_NAME=MATERNAL_NAME,
VG_CONTAINER=VG_CONTAINER,
READS_PER_CHUNK=READS_PER_CHUNK,
GIRAFFE_OPTIONS=GIRAFFE_OPTIONS,
PATH_LIST_FILE=pipeline_path_list_file,
Expand Down Expand Up @@ -154,7 +149,6 @@ workflow vgGiraffeDeeptrio {
INPUT_READ_FILE_1=PATERNAL_INPUT_READ_FILE_1,
INPUT_READ_FILE_2=PATERNAL_INPUT_READ_FILE_2,
SAMPLE_NAME=PATERNAL_NAME,
VG_CONTAINER=VG_CONTAINER,
READS_PER_CHUNK=READS_PER_CHUNK,
GIRAFFE_OPTIONS=GIRAFFE_OPTIONS,
PATH_LIST_FILE=pipeline_path_list_file,
Expand Down Expand Up @@ -184,7 +178,6 @@ workflow vgGiraffeDeeptrio {
INPUT_READ_FILE_1=CHILD_INPUT_READ_FILE_1,
INPUT_READ_FILE_2=CHILD_INPUT_READ_FILE_2,
SAMPLE_NAME=SAMPLE_NAME,
VG_CONTAINER=VG_CONTAINER,
READS_PER_CHUNK=READS_PER_CHUNK,
GIRAFFE_OPTIONS=GIRAFFE_OPTIONS,
PATH_LIST_FILE=pipeline_path_list_file,
Expand Down Expand Up @@ -411,7 +404,6 @@ workflow vgGiraffeDeeptrio {
input:
in_sample_name=SAMPLE_NAME,
in_merged_vcf_file=concatVCFChunksChild.output_merged_vcf,
in_vg_container=VG_CONTAINER,
in_call_disk=CALL_DISK,
in_call_mem=CALL_MEM
}
Expand All @@ -431,7 +423,6 @@ workflow vgGiraffeDeeptrio {
input:
in_sample_name=MATERNAL_NAME,
in_merged_vcf_file=concatVCFChunksMaternal.output_merged_vcf,
in_vg_container=VG_CONTAINER,
in_call_disk=CALL_DISK,
in_call_mem=CALL_MEM
}
Expand All @@ -451,7 +442,6 @@ workflow vgGiraffeDeeptrio {
input:
in_sample_name=PATERNAL_NAME,
in_merged_vcf_file=concatVCFChunksPaternal.output_merged_vcf,
in_vg_container=VG_CONTAINER,
in_call_disk=CALL_DISK,
in_call_mem=CALL_MEM
}
Expand Down Expand Up @@ -509,7 +499,6 @@ task splitReads {
input {
File in_read_file
String in_pair_id
String in_vg_container
Int in_reads_per_chunk
Int in_split_read_cores
Int in_split_read_disk
Expand Down Expand Up @@ -546,7 +535,6 @@ task splitReads {
task extractSubsetPathNames {
input {
File in_xg_file
String in_vg_container
Int in_extract_disk
Int in_extract_mem
}
Expand All @@ -567,15 +555,14 @@ task extractSubsetPathNames {
preemptible: 2
memory: in_extract_mem + " GB"
disks: "local-disk " + in_extract_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

task extractReference {
input {
File in_xg_file
File in_path_list_file
String in_vg_container
Int in_extract_disk
Int in_extract_mem
}
Expand All @@ -597,7 +584,7 @@ task extractReference {
preemptible: 2
memory: in_extract_mem + " GB"
disks: "local-disk " + in_extract_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

Expand Down Expand Up @@ -643,7 +630,6 @@ task runVGGIRAFFE {
File in_dist_file
File in_min_file
File in_ref_dict
String in_vg_container
String? in_giraffe_options
String in_sample_name
Int in_map_cores
Expand Down Expand Up @@ -688,7 +674,7 @@ task runVGGIRAFFE {
memory: in_map_mem + " GB"
cpu: in_map_cores
disks: "local-disk " + in_map_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

Expand Down Expand Up @@ -1313,7 +1299,6 @@ task bgzipMergedVCF {
input {
String in_sample_name
File in_merged_vcf_file
String in_vg_container
Int in_call_disk
Int in_call_mem
}
Expand Down Expand Up @@ -1344,7 +1329,7 @@ task bgzipMergedVCF {
time: 30
memory: in_call_mem + " GB"
disks: "local-disk " + in_call_disk + " SSD"
docker: in_vg_container
docker: "quay.io/vgteam/vg:v1.38.0"
}
}

Expand Down
2 changes: 1 addition & 1 deletion workflows/giraffe_and_deepvariant_lite.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ task runDeepVariantCallVariants {
gpuCount: 1
nvidiaDriverVersion: "418.87.00"
disks: "local-disk " + disk_size + " SSD"
docker: "google/deepvariant:1.3.0"
docker: "google/deepvariant:1.3.0-gpu"
}
}

0 comments on commit 1e77116

Please sign in to comment.