Skip to content

Commit

Permalink
VS-695. Updates to run Precision and Sensitivity on VQSR Lite (#8230)
Browse files Browse the repository at this point in the history
* Update GvsCalculatePrecisionAndSensitivity.wdl to allow for different scale of calibration_sensitivity vs. lod score.
Also retrieving score from JointVcfFiltering and storing that in BQ and in the VCF.
  • Loading branch information
gbggrant authored Mar 3, 2023
1 parent 3a7f6e2 commit 5645e88
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 36 deletions.
3 changes: 3 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ workflows:
branches:
- master
- ah_var_store
- gg_VS-695_RunPandSForVQSR_Lite
- name: GvsPopulateAltAllele
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsPopulateAltAllele.wdl
Expand All @@ -118,6 +119,7 @@ workflows:
branches:
- master
- ah_var_store
- gg_VS-695_RunPandSForVQSR_Lite
- name: GvsImportGenomes
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsImportGenomes.wdl
Expand Down Expand Up @@ -202,6 +204,7 @@ workflows:
branches:
- master
- ah_var_store
- gg_VS-695_RunPandSForVQSR_Lite
- name: GvsQuickstartVcfIntegration
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsQuickstartVcfIntegration.wdl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ task EvaluateVcf {
~{if all_records then "--all-records" else ""} \
--roc-subset snp,indel \
--vcf-score-field=INFO.~{max_score_field_tag} \
~{if use_classic_VQSR then "--sort-order descending" else "--sort-order ascending"} \
-t human_REF_SDF \
-b ~{truth_vcf} \
-e ~{truth_bed}\
Expand Down
12 changes: 6 additions & 6 deletions scripts/variantstore/wdl/GvsCreateFilterSet.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ workflow GvsCreateFilterSet {
String fq_filter_sites_destination_table = "~{project_id}.~{dataset_name}.filter_set_sites"

String fq_info_destination_table_schema = "filter_set_name:string,type:string,location:integer,ref:string,alt:string,vqslod:float,culprit:string,training_label:string,yng_status:string"
String fq_info_destination_table_vqsr_lite_schema = "filter_set_name:string,type:string,location:integer,ref:string,alt:string,calibration_sensitivity:float,culprit:string,training_label:string,yng_status:string"
String fq_info_destination_table_vqsr_lite_schema = "filter_set_name:string,type:string,location:integer,ref:string,alt:string,calibration_sensitivity:float,score:float,training_label:string,yng_status:string"

call Utils.GetBQTableLastModifiedDatetime as SamplesTableDatetimeCheck {
input:
Expand Down Expand Up @@ -181,7 +181,7 @@ workflow GvsCreateFilterSet {
output_basename = "${filter_set_name}.filtered.scored.indels"
}

call PopulateFilterSetInfo {
call PopulateFilterSetInfo {
input:
gatk_override = gatk_override,
filter_set_name = filter_set_name,
Expand Down Expand Up @@ -425,7 +425,7 @@ task ExtractFilterTask {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "7 GB"
disks: "local-disk 10 HDD"
bootDiskSizeGb: 15
Expand Down Expand Up @@ -506,7 +506,7 @@ task PopulateFilterSetInfo {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "3500 MB"
disks: "local-disk 250 HDD"
bootDiskSizeGb: 15
Expand Down Expand Up @@ -562,7 +562,7 @@ task PopulateFilterSetSites {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "3500 MB"
disks: "local-disk 200 HDD"
bootDiskSizeGb: 15
Expand Down Expand Up @@ -609,7 +609,7 @@ task PopulateFilterSetTranches {
>>>

runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "3500 MB"
disks: "local-disk 200 HDD"
bootDiskSizeGb: 15
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsExtractCallset.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ task ExtractTask {
echo ~{interval_index},${OUTPUT_FILE_DEST},${OUTPUT_FILE_BYTES},${OUTPUT_FILE_INDEX_DEST},${OUTPUT_FILE_INDEX_BYTES} >> manifest.txt
>>>
runtime {
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_02_15_7274e012706cb2fa15ed3fb1e12d7e9ae28aa4a1"
docker: "us.gcr.io/broad-dsde-methods/broad-gatk-snapshots:varstore_2023_03_01_b01183576153cf000e17dea32144d332cb7b79a9"
memory: "12 GB"
disks: "local-disk 150 HDD"
bootDiskSizeGb: 15
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class SchemaUtils {
public static final String FILTER_SET_NAME = "filter_set_name";
public static final String VQSLOD = "vqslod";
public static final String CALIBRATION_SENSITIVITY = "calibration_sensitivity";
public static final String SCORE = "score";
public static final String YNG_STATUS = "yng_status";

//Tranches table
Expand All @@ -65,7 +66,7 @@ public class SchemaUtils {

public static final List<String> SAMPLE_FIELDS = Arrays.asList(SchemaUtils.SAMPLE_NAME_FIELD_NAME, SchemaUtils.SAMPLE_ID_FIELD_NAME);
public static final List<String> YNG_FIELDS = Arrays.asList(FILTER_SET_NAME, LOCATION_FIELD_NAME, REF_ALLELE_FIELD_NAME, ALT_ALLELE_FIELD_NAME, VQSLOD, YNG_STATUS);
public static final List<String> VQSLITE_YNG_FIELDS = Arrays.asList(FILTER_SET_NAME, LOCATION_FIELD_NAME, REF_ALLELE_FIELD_NAME, ALT_ALLELE_FIELD_NAME, CALIBRATION_SENSITIVITY, YNG_STATUS);
public static final List<String> VQSLITE_YNG_FIELDS = Arrays.asList(FILTER_SET_NAME, LOCATION_FIELD_NAME, REF_ALLELE_FIELD_NAME, ALT_ALLELE_FIELD_NAME, CALIBRATION_SENSITIVITY, SCORE, YNG_STATUS);
public static final List<String> TRANCHE_FIELDS = Arrays.asList(TARGET_TRUTH_SENSITIVITY, MIN_VQSLOD, TRANCHE_FILTER_NAME, TRANCHE_MODEL);

public static final List<String> ALT_ALLELE_FIELDS = Arrays.asList(LOCATION_FIELD_NAME, SAMPLE_ID_FIELD_NAME, REF_ALLELE_FIELD_NAME, "allele", ALT_ALLELE_FIELD_NAME, "allele_pos", CALL_GT, AS_RAW_MQ, RAW_MQ, AS_RAW_MQRankSum, "raw_mqranksum_x_10", AS_QUALapprox, "qual", AS_RAW_ReadPosRankSum, "raw_readposranksum_x_10", AS_SB_TABLE, "SB_REF_PLUS","SB_REF_MINUS","SB_ALT_PLUS","SB_ALT_MINUS", CALL_AD, "ref_ad", "ad");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ public enum SensitivityFilteringType { GENOTYPE, SITES, NONE }

@Argument(
fullName ="snps-truth-sensitivity-filter-level",
doc = "The truth sensitivity level at which to start filtering SNPs",
doc = "The truth sensitivity level above which to start filtering SNPs",
optional = true
)
private Double truthSensitivitySNPThreshold = FilterSensitivityTools.DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_SNPS / 100;

@Argument(
fullName = "indels-truth-sensitivity-filter-level",
doc = "The truth sensitivity level at which to start filtering INDELs",
doc = "The truth sensitivity level above which to start filtering INDELs",
optional = true
)
private Double truthSensitivityINDELThreshold = FilterSensitivityTools.DEFAULT_TRUTH_SENSITIVITY_THRESHOLD_INDELS / 100;
Expand Down Expand Up @@ -224,6 +224,7 @@ protected static VCFHeader generateVcfHeader(Set<String> sampleNames,
);
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_SENS_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_VQS_SCORE_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.AS_YNG_STATUS_KEY));


Expand All @@ -247,6 +248,12 @@ protected String[] customCommandLineValidation() {
errors.add("Parameters 'project-id', 'dataset-id', 'call-set-identifier', 'wdl-step', 'wdl-call', and 'shardIdentifier' must be set if 'cost-observability-tablename' is set.");
}
}
if (truthSensitivitySNPThreshold < 0.0 || truthSensitivitySNPThreshold > 1.0) {
errors.add("Parameter 'snps-truth-sensitivity-filter-level' MUST be between 0.0 and 1.0 NOT: " + truthSensitivitySNPThreshold);
}
if (truthSensitivityINDELThreshold < 0.0 || truthSensitivityINDELThreshold > 1.0) {
errors.add("Parameter 'indels-truth-sensitivity-filter-level' MUST be between 0.0 and 1.0 NOT: " + truthSensitivityINDELThreshold);
}
if (!errors.isEmpty()) {
return errors.toArray(new String[0]);
}
Expand Down Expand Up @@ -274,7 +281,8 @@ protected void onStartup() {
}

if (!sensitivityFilteringType.equals(SensitivityFilteringType.NONE)) {
// TODO - put a validation that sensitivity between 0 and 1
logger.info("Passing all SNP variants with VQSLOD >= " + truthSensitivitySNPThreshold);
logger.info("Passing all INDEL variants with VQSLOD >= " + truthSensitivityINDELThreshold);

extraHeaderLines.add(new VCFFilterHeaderLine(GATKVCFConstants.VQS_SENS_FAILURE_SNP,
"Site failed SNP model calibration sensitivity cutoff (" + truthSensitivitySNPThreshold.toString() + ")"));
Expand Down
Loading

0 comments on commit 5645e88

Please sign in to comment.