From 1e7711611251b8aa90e3f569f4fdcc747817238e Mon Sep 17 00:00:00 2001 From: Jean Monlong Date: Fri, 17 Jun 2022 09:27:02 +0200 Subject: [PATCH] tweak dt to make DOI, use dv gpu container --- .dockstore.yml | 15 ++++++++++++++ workflows/giraffe_and_deeptrio.mapper.wdl | 19 ++++-------------- workflows/giraffe_and_deeptrio.wdl | 23 ++++------------------ workflows/giraffe_and_deepvariant_lite.wdl | 2 +- 4 files changed, 24 insertions(+), 35 deletions(-) diff --git a/.dockstore.yml b/.dockstore.yml index 182f4c1..dfbfa41 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -1,5 +1,20 @@ version: 1.2 workflows: + - name: GiraffeDeepTrio + subclass: WDL + description: Core VG Giraffe mapping and DeepTrio calling workflow for maternal-paternal-child sample datasets. It takes as inputs reads in FASTQ and graphs containing the population-based haplotypes to genotype. The graphs files required include the XG, GCSA, GBWT, graph GBWT, Distance and Minimizer indexes. It outputs a VCF file and BAM file for the child along with optional RTG and hap.py vcf evaluation if the user provides benchmark truth-set VCFs. + primaryDescriptorPath: /workflows/giraffe_and_deeptrio.wdl + testParameterFiles: + - /params/vg_hprc_deeptrio_map_call.inputs_tiny.gs_url.json + branches: + - master + authors: + - name: Charles Markello + email: cmarkell@ucsc.edu + - name: Jean Monlong + email: jmonlong@ucsc.edu + - name: Adam Novak + email: anovak@soe.ucsc.edu - name: GiraffeDeepVariant subclass: WDL description: Maps raw short reads from a pair of FASTQs or a CRAM file on a pangenome with vg Giraffe and call variants with DeepVariant. diff --git a/workflows/giraffe_and_deeptrio.mapper.wdl b/workflows/giraffe_and_deeptrio.mapper.wdl index f4a01b0..36dc2e6 100644 --- a/workflows/giraffe_and_deeptrio.mapper.wdl +++ b/workflows/giraffe_and_deeptrio.mapper.wdl @@ -15,8 +15,6 @@ workflow vgGiraffeMap { File INPUT_READ_FILE_1 # Input sample 1st read pair fastq.gz File INPUT_READ_FILE_2 # Input sample 2nd read pair fastq.gz String SAMPLE_NAME # The sample name - # VG Container used in the pipeline (e.g. quay.io/vgteam/vg:v1.16.0) - String VG_CONTAINER = "quay.io/vgteam/vg:v1.36.0" Int READS_PER_CHUNK = 20000000 # Number of reads contained in each mapping chunk (20000000 for wgs) String? GIRAFFE_OPTIONS # (OPTIONAL) extra command line options for Giraffe mapper File PATH_LIST_FILE # Text file where each line is a path name in the XG index, to use instead of CONTIGS. If neither is given, paths are extracted from the XG and subset to chromosome-looking paths. @@ -47,7 +45,6 @@ workflow vgGiraffeMap { input: in_read_file=INPUT_READ_FILE_1, in_pair_id="1", - in_vg_container=VG_CONTAINER, in_reads_per_chunk=READS_PER_CHUNK, in_split_read_cores=SPLIT_READ_CORES, in_split_read_disk=SPLIT_READ_DISK @@ -56,7 +53,6 @@ workflow vgGiraffeMap { input: in_read_file=INPUT_READ_FILE_2, in_pair_id="2", - in_vg_container=VG_CONTAINER, in_reads_per_chunk=READS_PER_CHUNK, in_split_read_cores=SPLIT_READ_CORES, in_split_read_disk=SPLIT_READ_DISK @@ -69,7 +65,6 @@ workflow vgGiraffeMap { input: in_xg_file=XG_FILE, in_path_list_file=PATH_LIST_FILE, - in_vg_container=VG_CONTAINER, in_extract_disk=MAP_DISK, in_extract_mem=MAP_MEM } @@ -96,7 +91,6 @@ workflow vgGiraffeMap { input: in_left_read_pair_chunk_file=read_pair_chunk_files.left, in_right_read_pair_chunk_file=read_pair_chunk_files.right, - in_vg_container=VG_CONTAINER, in_giraffe_options=GIRAFFE_OPTIONS, in_xg_file=XG_FILE, in_gbwt_file=GBWT_FILE, @@ -256,7 +250,6 @@ task splitReads { input { File in_read_file String in_pair_id - String in_vg_container Int in_reads_per_chunk Int in_split_read_cores Int in_split_read_disk @@ -293,7 +286,6 @@ task splitReads { task extractSubsetPathNames { input { File in_xg_file - String in_vg_container Int in_extract_disk Int in_extract_mem } @@ -314,7 +306,7 @@ task extractSubsetPathNames { preemptible: 2 memory: in_extract_mem + " GB" disks: "local-disk " + in_extract_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } @@ -322,7 +314,6 @@ task extractReference { input { File in_xg_file File in_path_list_file - String in_vg_container Int in_extract_disk Int in_extract_mem } @@ -344,7 +335,7 @@ task extractReference { preemptible: 2 memory: in_extract_mem + " GB" disks: "local-disk " + in_extract_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } @@ -390,7 +381,6 @@ task runVGGIRAFFE { File in_dist_file File in_min_file File in_ref_dict - String in_vg_container String? in_giraffe_options String in_sample_name Int in_map_cores @@ -435,7 +425,7 @@ task runVGGIRAFFE { memory: in_map_mem + " GB" cpu: in_map_cores disks: "local-disk " + in_map_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } @@ -1060,7 +1050,6 @@ task bgzipMergedVCF { input { String in_sample_name File in_merged_vcf_file - String in_vg_container Int in_call_disk Int in_call_mem } @@ -1091,7 +1080,7 @@ task bgzipMergedVCF { time: 30 memory: in_call_mem + " GB" disks: "local-disk " + in_call_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } diff --git a/workflows/giraffe_and_deeptrio.wdl b/workflows/giraffe_and_deeptrio.wdl index 2e5a92e..ac3501d 100644 --- a/workflows/giraffe_and_deeptrio.wdl +++ b/workflows/giraffe_and_deeptrio.wdl @@ -23,8 +23,6 @@ workflow vgGiraffeDeeptrio { String SAMPLE_NAME # The child sample name String MATERNAL_NAME # The maternal sample name String PATERNAL_NAME # The paternal sample name - # VG Container used in the pipeline (e.g. quay.io/vgteam/vg:v1.16.0) - String VG_CONTAINER = "quay.io/vgteam/vg:v1.36.0" Int READS_PER_CHUNK = 20000000 # Number of reads contained in each mapping chunk (20000000 for wgs) String? GIRAFFE_OPTIONS # (OPTIONAL) extra command line options for Giraffe mapper Array[String]+? CONTIGS # (OPTIONAL) Desired reference genome contigs, which are all paths in the XG index. @@ -77,7 +75,6 @@ workflow vgGiraffeDeeptrio { call extractSubsetPathNames { input: in_xg_file=XG_FILE, - in_vg_container=VG_CONTAINER, in_extract_disk=MAP_DISK, in_extract_mem=MAP_MEM } @@ -98,7 +95,6 @@ workflow vgGiraffeDeeptrio { input: in_xg_file=XG_FILE, in_path_list_file=pipeline_path_list_file, - in_vg_container=VG_CONTAINER, in_extract_disk=MAP_DISK, in_extract_mem=MAP_MEM } @@ -124,7 +120,6 @@ workflow vgGiraffeDeeptrio { INPUT_READ_FILE_1=MATERNAL_INPUT_READ_FILE_1, INPUT_READ_FILE_2=MATERNAL_INPUT_READ_FILE_2, SAMPLE_NAME=MATERNAL_NAME, - VG_CONTAINER=VG_CONTAINER, READS_PER_CHUNK=READS_PER_CHUNK, GIRAFFE_OPTIONS=GIRAFFE_OPTIONS, PATH_LIST_FILE=pipeline_path_list_file, @@ -154,7 +149,6 @@ workflow vgGiraffeDeeptrio { INPUT_READ_FILE_1=PATERNAL_INPUT_READ_FILE_1, INPUT_READ_FILE_2=PATERNAL_INPUT_READ_FILE_2, SAMPLE_NAME=PATERNAL_NAME, - VG_CONTAINER=VG_CONTAINER, READS_PER_CHUNK=READS_PER_CHUNK, GIRAFFE_OPTIONS=GIRAFFE_OPTIONS, PATH_LIST_FILE=pipeline_path_list_file, @@ -184,7 +178,6 @@ workflow vgGiraffeDeeptrio { INPUT_READ_FILE_1=CHILD_INPUT_READ_FILE_1, INPUT_READ_FILE_2=CHILD_INPUT_READ_FILE_2, SAMPLE_NAME=SAMPLE_NAME, - VG_CONTAINER=VG_CONTAINER, READS_PER_CHUNK=READS_PER_CHUNK, GIRAFFE_OPTIONS=GIRAFFE_OPTIONS, PATH_LIST_FILE=pipeline_path_list_file, @@ -411,7 +404,6 @@ workflow vgGiraffeDeeptrio { input: in_sample_name=SAMPLE_NAME, in_merged_vcf_file=concatVCFChunksChild.output_merged_vcf, - in_vg_container=VG_CONTAINER, in_call_disk=CALL_DISK, in_call_mem=CALL_MEM } @@ -431,7 +423,6 @@ workflow vgGiraffeDeeptrio { input: in_sample_name=MATERNAL_NAME, in_merged_vcf_file=concatVCFChunksMaternal.output_merged_vcf, - in_vg_container=VG_CONTAINER, in_call_disk=CALL_DISK, in_call_mem=CALL_MEM } @@ -451,7 +442,6 @@ workflow vgGiraffeDeeptrio { input: in_sample_name=PATERNAL_NAME, in_merged_vcf_file=concatVCFChunksPaternal.output_merged_vcf, - in_vg_container=VG_CONTAINER, in_call_disk=CALL_DISK, in_call_mem=CALL_MEM } @@ -509,7 +499,6 @@ task splitReads { input { File in_read_file String in_pair_id - String in_vg_container Int in_reads_per_chunk Int in_split_read_cores Int in_split_read_disk @@ -546,7 +535,6 @@ task splitReads { task extractSubsetPathNames { input { File in_xg_file - String in_vg_container Int in_extract_disk Int in_extract_mem } @@ -567,7 +555,7 @@ task extractSubsetPathNames { preemptible: 2 memory: in_extract_mem + " GB" disks: "local-disk " + in_extract_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } @@ -575,7 +563,6 @@ task extractReference { input { File in_xg_file File in_path_list_file - String in_vg_container Int in_extract_disk Int in_extract_mem } @@ -597,7 +584,7 @@ task extractReference { preemptible: 2 memory: in_extract_mem + " GB" disks: "local-disk " + in_extract_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } @@ -643,7 +630,6 @@ task runVGGIRAFFE { File in_dist_file File in_min_file File in_ref_dict - String in_vg_container String? in_giraffe_options String in_sample_name Int in_map_cores @@ -688,7 +674,7 @@ task runVGGIRAFFE { memory: in_map_mem + " GB" cpu: in_map_cores disks: "local-disk " + in_map_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } @@ -1313,7 +1299,6 @@ task bgzipMergedVCF { input { String in_sample_name File in_merged_vcf_file - String in_vg_container Int in_call_disk Int in_call_mem } @@ -1344,7 +1329,7 @@ task bgzipMergedVCF { time: 30 memory: in_call_mem + " GB" disks: "local-disk " + in_call_disk + " SSD" - docker: in_vg_container + docker: "quay.io/vgteam/vg:v1.38.0" } } diff --git a/workflows/giraffe_and_deepvariant_lite.wdl b/workflows/giraffe_and_deepvariant_lite.wdl index 7d27d58..6396245 100644 --- a/workflows/giraffe_and_deepvariant_lite.wdl +++ b/workflows/giraffe_and_deepvariant_lite.wdl @@ -975,7 +975,7 @@ task runDeepVariantCallVariants { gpuCount: 1 nvidiaDriverVersion: "418.87.00" disks: "local-disk " + disk_size + " SSD" - docker: "google/deepvariant:1.3.0" + docker: "google/deepvariant:1.3.0-gpu" } }