Skip to content

Commit

Permalink
add vqsr cutoffs to GvsExtractCallset wdl; clean up dockstore yml (#7209
Browse files Browse the repository at this point in the history
)

* add vqsr cutoffs to GvsExtractCohort wdl; clean up dockstore yml

* remove lod cutoffs from wdl, indicate sensitivity cutoff override

* attempt at filtering arg organization

* fix multiline string formatting

* rename samples to extract table input

* cleanup echos

* update example inputs json

* remove feature branch from dockstore yml

* update interval list to wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list
  • Loading branch information
mmorgantaylor authored Apr 21, 2021
1 parent b7b193c commit 1a0041b
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 33 deletions.
4 changes: 0 additions & 4 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ workflows:
branches:
- master
- ah_var_store
- mmt_SA_support_and_wdl_renaming
- name: GvsExtractCallset
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsExtractCallset.wdl
Expand All @@ -75,7 +74,6 @@ workflows:
branches:
- master
- ah_var_store
- mmt_SA_support_and_wdl_renaming
- name: GvsImportGenomes
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsImportGenomes.wdl
Expand All @@ -85,7 +83,6 @@ workflows:
branches:
- master
- ah_var_store
- mmt_SA_support_and_wdl_renaming
- name: GvsPrepareCallset
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsPrepareCallset.wdl
Expand All @@ -95,7 +92,6 @@ workflows:
branches:
- master
- ah_var_store
- mmt_SA_support_and_wdl_renaming
- name: MitochondriaPipeline
subclass: WDL
primaryDescriptorPath: /scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl
Expand Down
17 changes: 8 additions & 9 deletions scripts/variantstore/wdl/GvsExtractCallset.example.inputs.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
{
"GvsExtractCallset.fq_cohort_extract_table": "PROJECT_ID.DATASET_NAME.COHORT_TABLE",
"GvsExtractCallset.filter_set_name": "FILTER_SET_NAME",
"GvsExtractCallset.data_project": "PROJECT_ID",
"GvsExtractCallset.fq_samples_to_extract_table": "PROJECT_ID.DATASET_NAME.SAMPLE_TABLE_CONTAINING_SAMPLES_TO_EXTRACT",
"GvsExtractCallset.default_dataset": "DATASET_NAME",
"GvsExtractCallset.output_file_base_name": "OUTPUT_FILE_BASE_NAME",

"GvsExtractCallset.reference": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta",
"GvsExtractCallset.reference_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai",
"GvsExtractCallset.reference_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
"GvsExtractCallset.wgs_intervals": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
"GvsExtractCallset.wgs_intervals": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list",
"GvsExtractCallset.scatter_count": 50,

"GvsExtractCallset.fq_sample_table": "PROJECT_ID.DATASET_NAME.sample_info",
"GvsExtractCallset.fq_cohort_extract_table": "PROJECT_ID.DATASET_NAME.COHORT_TABLE",
"GvsExtractCallset.fq_filter_set_table": "PROJECT_ID.DATASET_NAME.filter_set_info",
"GvsExtractCallset.filter_set_name": "FILTER_SET_NAME",
"GvsExtractCallset.query_project": "PROJECT_ID",

"GvsExtractCallset.output_file_base_name": "OUTPUT_FILE_BASE_NAME",

"GvsExtractCallset.gatk_override": "gs://broad-dsp-spec-ops/scratch/bigquery-jointcalling/jars/mmt_SA_support_and_wdl_renaming_20210415/gatk-package-4.2.0.0-345-g8a7821a-SNAPSHOT-local.jar"
}
62 changes: 43 additions & 19 deletions scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ version 1.0

workflow GvsExtractCallset {
input {
String data_project
String default_dataset
String filter_set_name

File wgs_intervals
Int scatter_count
Expand All @@ -10,13 +13,19 @@ workflow GvsExtractCallset {
File reference_index
File reference_dict

String fq_sample_table
String fq_samples_to_extract_table
String fq_cohort_extract_table
String query_project
String? fq_filter_set_info_table
String? fq_filter_set_site_table
String? fq_filter_set_tranches_table
String? filter_set_name
String query_project = data_project

String fq_filter_set_info_table = "~{data_project}.~{default_dataset}.filter_set_info"
String fq_filter_set_site_table = "~{data_project}.~{default_dataset}.filter_set_sites"
String fq_filter_set_tranches_table = "~{data_project}.~{default_dataset}.filter_set_tranches"
Boolean do_not_filter_override = false

# if these are unset, default sensitivity levels will be used
Float? snps_truth_sensitivity_filter_level_override
Float? indels_truth_sensitivity_filter_level_override

File? excluded_intervals
Boolean? emit_pls = false

Expand All @@ -42,14 +51,17 @@ workflow GvsExtractCallset {
reference = reference,
reference_index = reference_index,
reference_dict = reference_dict,
fq_sample_table = fq_sample_table,
fq_samples_to_extract_table = fq_samples_to_extract_table,
intervals = SplitIntervals.interval_files[i],
fq_cohort_extract_table = fq_cohort_extract_table,
read_project_id = query_project,
do_not_filter_override = do_not_filter_override,
fq_filter_set_info_table = fq_filter_set_info_table,
fq_filter_set_site_table = fq_filter_set_site_table,
fq_filter_set_tranches_table = fq_filter_set_tranches_table,
filter_set_name = filter_set_name,
snps_truth_sensitivity_filter_level = snps_truth_sensitivity_filter_level_override,
indels_truth_sensitivity_filter_level = indels_truth_sensitivity_filter_level_override,
excluded_intervals = excluded_intervals,
emit_pls = emit_pls,
service_account_json = service_account_json,
Expand All @@ -72,19 +84,23 @@ task ExtractTask {
File reference_index
File reference_dict

String fq_sample_table
String fq_samples_to_extract_table

File intervals

String fq_cohort_extract_table
String read_project_id
String output_file
String? fq_filter_set_info_table
String? fq_filter_set_site_table
String? fq_filter_set_tranches_table
String? filter_set_name
File? excluded_intervals
String fq_filter_set_info_table
String fq_filter_set_site_table
String fq_filter_set_tranches_table
String filter_set_name
Float? snps_truth_sensitivity_filter_level
Float? indels_truth_sensitivity_filter_level

Boolean do_not_filter_override

File? excluded_intervals
Boolean? emit_pls

# Runtime Options:
Expand All @@ -110,22 +126,30 @@ task ExtractTask {

df -h

if [ ~{do_not_filter_override} = 'true' ]; then
FILTERING_ARGS=''
else
FILTERING_ARGS='--filter-set-info-table ~{fq_filter_set_info_table}
--filter-set-site-table ~{fq_filter_set_site_table}
--tranches-table ~{fq_filter_set_tranches_table}
--filter-set-name ~{filter_set_name}
~{"--snps-truth-sensitivity-filter-level " + snps_truth_sensitivity_filter_level}
~{"--indels-truth-sensitivity-filter-level " + indels_truth_sensitivity_filter_level}'
fi

gatk --java-options "-Xmx9g" \
ExtractCohort \
--mode GENOMES --ref-version 38 --query-mode LOCAL_SORT \
-R "~{reference}" \
-R ~{reference} \
-O ~{output_file} \
--local-sort-max-records-in-ram ~{local_sort_max_records_in_ram} \
--sample-table ~{fq_sample_table} \
--sample-table ~{fq_samples_to_extract_table} \
--cohort-extract-table ~{fq_cohort_extract_table} \
-L ~{intervals} \
~{"-XL " + excluded_intervals} \
--project-id ~{read_project_id} \
~{true='--emit-pls' false='' emit_pls} \
~{"--filter-set-info-table " + fq_filter_set_info_table} \
~{"--filter-set-site-table " + fq_filter_set_site_table} \
~{"--tranches-table " + fq_filter_set_tranches_table} \
~{"--filter-set-name " + filter_set_name}
${FILTERING_ARGS}
>>>

# ------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"GvsImportGenomes.input_vcfs": "${this.samples.hg38_reblocked_gvcf}",
"GvsImportGenomes.input_vcf_indexes": "${this.samples.hg38_reblocked_gvcf_index}",

"GvsImportGenomes.interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list",
"GvsImportGenomes.interval_list": "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list",
"GvsImportGenomes.drop_state": "SIXTY",

"GvsImportGenomes.project_id": "PROJECT_ID",
Expand Down

0 comments on commit 1a0041b

Please sign in to comment.