From 99c0b8fdc5b90ccc9f529e14737133393d9c1742 Mon Sep 17 00:00:00 2001 From: Paul Sud Date: Tue, 1 Mar 2022 13:20:45 -0800 Subject: [PATCH 1/3] PIPE-52-gatk-reference-tarball --- genophase.wdl | 50 ++++------------------- tests/functional/json/test_genophase.json | 9 ---- 2 files changed, 8 insertions(+), 51 deletions(-) diff --git a/genophase.wdl b/genophase.wdl index 6d48671d..7140330a 100644 --- a/genophase.wdl +++ b/genophase.wdl @@ -13,17 +13,10 @@ workflow genophase { input { File reference_fasta Array[File] bams - # From GATK bundle + # .tar.gz archive containing Ommi, Mills, Hapmap, and 1000G VCFs + indexes # https://gatk.broadinstitute.org/hc/en-us/articles/360035890811-Resource-bundle # https://console.cloud.google.com/storage/browser/genomics-public-data/resources/broad/hg38/v0/ - File dbsnp_vcf - File dbsnp_vcf_index - File hapmap_vcf_index - File hapmap_vcf - File mills_vcf - File mills_vcf_index - File omni_vcf - File omni_vcf_index + File? gatk_bundle_tar Int? gatk_num_cpus Int? gatk_disk_size_gb Int? gatk_ram_gb @@ -31,8 +24,6 @@ workflow genophase { Int? run_3d_dna_disk_size_gb Int? run_3d_dna_ram_gb Boolean no_phasing = false - # Only for testing purposes - Boolean no_bundle = false String docker = "encodedcc/hic-pipeline:1.11.2" String singularity = "docker://encodedcc/hic-pipeline:1.11.2" @@ -66,15 +57,7 @@ workflow genophase { reference_fasta_index = create_reference_fasta_index.fasta_index, sequence_dictionary = create_gatk_references.sequence_dictionary, interval_list = create_gatk_references.interval_list, - mills_vcf = mills_vcf, - omni_vcf = omni_vcf, - hapmap_vcf = hapmap_vcf, - dbsnp_vcf = dbsnp_vcf, - mills_vcf_index = mills_vcf_index, - omni_vcf_index = omni_vcf_index, - hapmap_vcf_index = hapmap_vcf_index, - dbsnp_vcf_index = dbsnp_vcf_index, - no_bundle = no_bundle, + bundle_tar = gatk_bundle_tar, num_cpus = gatk_num_cpus, ram_gb = gatk_ram_gb, disk_size_gb = gatk_disk_size_gb, @@ -154,6 +137,7 @@ task create_gatk_references { } } + task gatk { input { File bam @@ -161,16 +145,7 @@ task gatk { File reference_fasta_index File sequence_dictionary File interval_list - File mills_vcf - File omni_vcf - File hapmap_vcf - File dbsnp_vcf - File mills_vcf_index - File omni_vcf_index - File hapmap_vcf_index - File dbsnp_vcf_index - # Only for testing purposes - Boolean no_bundle = false + File? bundle_tar Int num_cpus = 16 Int ram_gb = 128 Int disk_size_gb = 1000 @@ -182,25 +157,16 @@ task gatk { command <<< mkdir bundle - if [[ ~{if(no_bundle) then "0" else "1"} -eq 1 ]] + if [[ ~{if(defined(bundle_tar)) then "0" else "1"} -eq 1 ]] then - mv \ - ~{mills_vcf} \ - ~{omni_vcf} \ - ~{hapmap_vcf} \ - ~{dbsnp_vcf} \ - ~{mills_vcf_index} \ - ~{omni_vcf_index} \ - ~{hapmap_vcf_index} \ - ~{dbsnp_vcf_index} \ - bundle + tar -xvzf ~{bundle_tar} -C bundle fi mkdir reference mv ~{reference_fasta_index} ~{sequence_dictionary} ~{interval_list} reference gzip -dc ~{reference_fasta} > reference/~{basename(reference_fasta, ".gz")} run-gatk-after-juicer2.sh \ -r reference/~{basename(reference_fasta, ".gz")} \ - ~{if !no_bundle then "--gatk-bundle bundle" else ""} \ + ~{if defined(bundle_tar) then "--gatk-bundle bundle" else ""} \ --threads ~{num_cpus} \ ~{bam} gzip -n ~{final_snp_vcf_name} diff --git a/tests/functional/json/test_genophase.json b/tests/functional/json/test_genophase.json index 12733a7a..3bbe8c07 100644 --- a/tests/functional/json/test_genophase.json +++ b/tests/functional/json/test_genophase.json @@ -2,16 +2,7 @@ "genophase.bams": [ "tests/data/sample_subsampled.bam" ], - "genophase.dbsnp_vcf": "tests/data/dummy.txt", - "genophase.dbsnp_vcf_index": "tests/data/dummy.txt", "genophase.gatk_num_cpus": 1, - "genophase.hapmap_vcf": "tests/data/dummy.txt", - "genophase.hapmap_vcf_index": "tests/data/dummy.txt", - "genophase.mills_vcf": "tests/data/dummy.txt", - "genophase.mills_vcf_index": "tests/data/dummy.txt", - "genophase.no_bundle": true, "genophase.no_phasing": true, - "genophase.omni_vcf": "tests/data/dummy.txt", - "genophase.omni_vcf_index": "tests/data/dummy.txt", "genophase.reference_fasta": "https://www.encodeproject.org/files/GRCh38_no_alt_analysis_set_GCA_000001405.15/@@download/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.gz" } From fa2b4cae23526faf38cd23a4398f12eb4eb150a4 Mon Sep 17 00:00:00 2001 From: Paul Sud Date: Wed, 2 Mar 2022 09:46:07 -0800 Subject: [PATCH 2/3] fix bundle unpacking --- genophase.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genophase.wdl b/genophase.wdl index 7140330a..c9980454 100644 --- a/genophase.wdl +++ b/genophase.wdl @@ -157,7 +157,7 @@ task gatk { command <<< mkdir bundle - if [[ ~{if(defined(bundle_tar)) then "0" else "1"} -eq 1 ]] + if [[ ~{if defined(bundle_tar) then "1" else "0"} -eq 1 ]] then tar -xvzf ~{bundle_tar} -C bundle fi From 0431949aff61f14d86814ea87055db796b5619ee Mon Sep 17 00:00:00 2001 From: Paul Sud Date: Thu, 3 Mar 2022 12:27:31 -0800 Subject: [PATCH 3/3] bump versions --- genophase.wdl | 10 +++++----- hic.wdl | 14 +++++++------- hic_pipeline/__init__.py | 2 +- make_restriction_site_locations.wdl | 10 +++++----- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/genophase.wdl b/genophase.wdl index c9980454..15a48d60 100644 --- a/genophase.wdl +++ b/genophase.wdl @@ -4,9 +4,9 @@ import "./hic.wdl" workflow genophase { meta { - version: "1.11.2" - caper_docker: "encodedcc/hic-pipeline:1.11.2" - caper_singularity: "docker://encodedcc/hic-pipeline:1.11.2" + version: "1.11.3" + caper_docker: "encodedcc/hic-pipeline:1.11.3" + caper_singularity: "docker://encodedcc/hic-pipeline:1.11.3" croo_out_def: "https://raw.githubusercontent.com/ENCODE-DCC/hic-pipeline/dev/croo_out_def.json" } @@ -25,8 +25,8 @@ workflow genophase { Int? run_3d_dna_ram_gb Boolean no_phasing = false - String docker = "encodedcc/hic-pipeline:1.11.2" - String singularity = "docker://encodedcc/hic-pipeline:1.11.2" + String docker = "encodedcc/hic-pipeline:1.11.3" + String singularity = "docker://encodedcc/hic-pipeline:1.11.3" } RuntimeEnvironment runtime_environment = { diff --git a/hic.wdl b/hic.wdl index 515d8d24..d9a1cb1d 100644 --- a/hic.wdl +++ b/hic.wdl @@ -19,9 +19,9 @@ struct RuntimeEnvironment { workflow hic { meta { - version: "1.11.2" - caper_docker: "encodedcc/hic-pipeline:1.11.2" - caper_singularity: "docker://encodedcc/hic-pipeline:1.11.2" + version: "1.11.3" + caper_docker: "encodedcc/hic-pipeline:1.11.3" + caper_singularity: "docker://encodedcc/hic-pipeline:1.11.3" croo_out_def: "https://raw.githubusercontent.com/ENCODE-DCC/hic-pipeline/dev/croo_out_def.json" description: "ENCODE Hi-C pipeline, see https://github.com/ENCODE-DCC/hic-pipeline for details." } @@ -65,10 +65,10 @@ workflow hic { Int? create_accessibility_track_disk_size_gb String assembly_name = "undefined" - String docker = "encodedcc/hic-pipeline:1.11.2" - String singularity = "docker://encodedcc/hic-pipeline:1.11.2" - String delta_docker = "encodedcc/hic-pipeline:1.11.2_delta" - String hiccups_docker = "encodedcc/hic-pipeline:1.11.2_hiccups" + String docker = "encodedcc/hic-pipeline:1.11.3" + String singularity = "docker://encodedcc/hic-pipeline:1.11.3" + String delta_docker = "encodedcc/hic-pipeline:1.11.3_delta" + String hiccups_docker = "encodedcc/hic-pipeline:1.11.3_hiccups" } RuntimeEnvironment runtime_environment = { diff --git a/hic_pipeline/__init__.py b/hic_pipeline/__init__.py index 2857d531..a673cd59 100644 --- a/hic_pipeline/__init__.py +++ b/hic_pipeline/__init__.py @@ -1,5 +1,5 @@ __title__ = "hic-pipeline" -__version__ = "1.11.2" +__version__ = "1.11.3" __description__ = "ENCODE Hi-C uniform processing pipeline." __url__ = "https://github.com/ENCODE-DCC/hic-pipeline" __uri__ = __url__ diff --git a/make_restriction_site_locations.wdl b/make_restriction_site_locations.wdl index 2c103ee0..e8c7fa7a 100644 --- a/make_restriction_site_locations.wdl +++ b/make_restriction_site_locations.wdl @@ -7,9 +7,9 @@ struct RuntimeEnvironment { workflow make_restriction_site_locations { meta { - version: "1.11.2" - caper_docker: "encodedcc/hic-pipeline:1.11.2" - caper_singularity: "docker://encodedcc/hic-pipeline:1.11.2" + version: "1.11.3" + caper_docker: "encodedcc/hic-pipeline:1.11.3" + caper_singularity: "docker://encodedcc/hic-pipeline:1.11.3" } parameter_meta { @@ -22,8 +22,8 @@ workflow make_restriction_site_locations { File reference_fasta String assembly_name String restriction_enzyme - String docker = "encodedcc/hic-pipeline:1.11.2" - String singularity = "docker://encodedcc/hic-pipeline:1.11.2" + String docker = "encodedcc/hic-pipeline:1.11.3" + String singularity = "docker://encodedcc/hic-pipeline:1.11.3" }