Skip to content

Commit

Permalink
Fix to quality score calculation for sites with spanning deletions (#…
Browse files Browse the repository at this point in the history
…6859)

* fix off-by one error that was causing the qual calculation to miss probability assigned to homozygous star allele genotypes
* make sure we output all sites if forceOutput it true, even if low-qual 
* updates to handle the case where the only alleles are ref and star
* add update expected results option to GenotypeGVCFsIntegrationTest
* update integration test files
  • Loading branch information
cwhelan authored and mwalker174 committed Nov 3, 2020
1 parent 24273bc commit 1218956
Show file tree
Hide file tree
Showing 60 changed files with 1,758 additions and 1,794 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ public void apply(final Locatable loc, List<VariantContext> variants, ReadsConte

if (regenotypedVC != null) {
final SimpleInterval variantStart = new SimpleInterval(regenotypedVC.getContig(), regenotypedVC.getStart(), regenotypedVC.getStart());
if ((inForceOutputIntervals || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC)) &&
if ((forceOutput || !GATKVariantContextUtils.isSpanningDeletionOnly(regenotypedVC)) &&
(!onlyOutputCallsStartingInIntervals || intervals.stream().anyMatch(interval -> interval.contains (variantStart)))) {
vcfWriter.add(regenotypedVC);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ public VariantContextWriter setupVCFWriter(Set<VCFHeaderLine> defaultToolVCFHead
if ( dbsnp.dbsnp != null ) {
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
}
headerLines.add(GATKVCFHeaderLines.getFilterLine(GATKVCFConstants.LOW_QUAL_FILTER_NAME));

final Set<String> sampleNameSet = samples.asSetOfSamples();
outputHeader = new VCFHeader(headerLines, new TreeSet<>(sampleNameSet));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ private static int[] genotypeIndicesWithOnlyRefAndSpanDel(final int ploidy, fina
} else {
final int spanDelIndex = alleles.indexOf(Allele.SPAN_DEL);
// allele counts are in the GenotypeLikelihoodCalculator format of {ref index, ref count, span del index, span del count}
return new IndexRange(0, ploidy).mapToInteger(n -> glCalc.alleleCountsToIndex(new int[]{0, ploidy - n, spanDelIndex, n}));
return new IndexRange(0, ploidy + 1).mapToInteger(n -> glCalc.alleleCountsToIndex(new int[]{0, ploidy - n, spanDelIndex, n}));
}
}

Expand Down Expand Up @@ -162,7 +162,7 @@ public AFCalculationResult calculate(final VariantContext vc, final int defaultP
}

// if the VC is biallelic the allele-specific qual equals the variant qual
if (numAlleles == 2) {
if (numAlleles == 2 && !spanningDeletionPresent) {
continue;
}

Expand All @@ -183,7 +183,7 @@ public AFCalculationResult calculate(final VariantContext vc, final int defaultP
}

// for biallelic the allele-specific qual equals the variant qual, and we short-circuited the calculation above
if (numAlleles == 2) {
if (numAlleles == 2 && !spanningDeletionPresent) {
log10POfZeroCountsByAllele[1] = log10PNoVariant;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@

public class GenotypeGVCFsIntegrationTest extends CommandLineProgramTest {

// If true, update the expected outputs in tests that assert an exact match vs. prior output,
// instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=GenotypeGVCFsIntegrationTest"
// to update all of the exact-match tests at once. After you do this, you should look at the
// diffs in the new expected outputs in git to confirm that they are consistent with expectations.
public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;

private static final List<String> NO_EXTRA_ARGS = Collections.emptyList();
private static final String BASE_PAIR_EXPECTED = "gvcf.basepairResolution.gatk3.7_30_ga4f720357.output.vcf";
private static final String b38_reference_20_21 = largeFileTestDir + "Homo_sapiens_assembly38.20.21.fasta";
Expand Down Expand Up @@ -163,6 +169,14 @@ public Object[][] GVCFsWithNewMQFormat() {
};
}

/*
* Make sure that someone didn't leave the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS toggle turned on
*/
@Test
public void assertThatExpectedOutputUpdateToggleIsDisabled() {
Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled");
}

/*
This test is useful for testing changes in GATK4 versus different versions of GATK3.
To use, set GATK3_PATH to point to a particular version of gatk, then enable this test and run.
Expand Down Expand Up @@ -354,25 +368,28 @@ private void runGenotypeGVCFSAndAssertSomething(File input, File expected, List<
}

private void runGenotypeGVCFSAndAssertSomething(String input, File expected, List<String> additionalArguments, BiConsumer<VariantContext, VariantContext> assertion, String reference) throws IOException {
final File output = createTempFile("genotypegvcf", ".vcf");
final File output = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected : createTempFile("genotypegvcf", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(reference))
.add("V", input)
.addFlag(RMSMappingQuality.RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT)
.add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, false)
.addOutput(output);

additionalArguments.forEach(args::addRaw);

Utils.resetRandomGenerator();
runCommandLine(args);

final List<VariantContext> expectedVC = VariantContextTestUtils.getVariantContexts(expected);
final List<VariantContext> actualVC = VariantContextTestUtils.getVariantContexts(output);
try {
assertForEachElementInLists(actualVC, expectedVC, assertion);
} catch (final AssertionError error) {
throw error;
if (! UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
final List<VariantContext> expectedVC = VariantContextTestUtils.getVariantContexts(expected);
final List<VariantContext> actualVC = VariantContextTestUtils.getVariantContexts(output);
try {
assertForEachElementInLists(actualVC, expectedVC, assertion);
} catch (final AssertionError error) {
throw error;
}
}
}

Expand Down
Git LFS file not shown
Git LFS file not shown
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@
20 10068151 . A <NON_REF> . . END=10068155 GT:DP:GQ:MIN_DP:PL 0/0:29:63:28:0,63,945
20 10068156 . A <NON_REF> . . END=10068157 GT:DP:GQ:MIN_DP:PL 0/0:27:57:27:0,57,855
20 10068158 . GTGTATATATATA G,<NON_REF> 97.60 . AS_RAW_BaseQRankSum=|-0.9,1|NaN;AS_RAW_MQ=5282.00|8882.00|0.00;AS_RAW_MQRankSum=|0.3,1|NaN;AS_RAW_ReadPosRankSum=|0.5,1|NaN;AS_SB_TABLE=0,3|2,2|0,0;BaseQRankSum=-0.842;DP=28;ExcessHet=3.0103;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=0.328;RAW_MQandDP=89764,28;ReadPosRankSum=0.524 GT:AD:DP:GQ:PL:SB 0/1:3,4,0:7:57:105,0,57,114,69,183:0,3,2,2
20 10068160 . GTATATATATATGTA G,*,<NON_REF> 88.01 . AS_RAW_BaseQRankSum=|||;AS_RAW_MQ=0.00|1682.00|8882.00|0.00;AS_RAW_MQRankSum=|||;AS_RAW_ReadPosRankSum=|||;AS_SB_TABLE=0,0|0,2|2,2|0,0;DP=32;ExcessHet=3.0103;MLEAC=1,1,0;MLEAF=0.500,0.500,0.00;RAW_MQandDP=101405,32 GT:AD:DP:GQ:PL:SB 1/2:0,2,4,0:6:53:706,158,131,98,0,53,489,171,104,458:0,0,2,4
20 10068160 . GTATATATATATGTA G,*,<NON_REF> 56.01 . AS_RAW_BaseQRankSum=|||;AS_RAW_MQ=0.00|1682.00|8882.00|0.00;AS_RAW_MQRankSum=|||;AS_RAW_ReadPosRankSum=|||;AS_SB_TABLE=0,0|0,2|2,2|0,0;DP=32;ExcessHet=3.0103;MLEAC=1,1,0;MLEAF=0.500,0.500,0.00;RAW_MQandDP=101405,32 GT:AD:DP:GQ:PL:SB 1/2:0,2,4,0:6:53:706,158,131,98,0,53,489,171,104,458:0,0,2,4
20 10068175 . T <NON_REF> . . END=10068175 GT:DP:GQ:MIN_DP:PL 0/0:20:18:20:0,18,270
20 10068176 . A <NON_REF> . . END=10068180 GT:DP:GQ:MIN_DP:PL 0/0:19:24:17:0,24,360
20 10068181 . T <NON_REF> . . END=10068185 GT:DP:GQ:MIN_DP:PL 0/0:16:27:16:0,27,405
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@
20 10068151 . A <NON_REF> . . END=10068155 GT:DP:GQ:MIN_DP:PL 0/0:29:63:28:0,63,945
20 10068156 . A <NON_REF> . . END=10068157 GT:DP:GQ:MIN_DP:PL 0/0:27:57:27:0,57,855
20 10068158 . GTGTATATATATA G,<NON_REF> 97.60 . BaseQRankSum=-0.842;DP=28;ExcessHet=3.0103;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=0.328;RAW_MQandDP=89764,28;ReadPosRankSum=0.524 GT:AD:DP:GQ:PL:SB 0/1:3,4,0:7:57:105,0,57,114,69,183:0,3,2,2
20 10068160 . GTATATATATATGTA G,*,<NON_REF> 88.01 . DP=32;ExcessHet=3.0103;MLEAC=1,1,0;MLEAF=0.500,0.500,0.00;RAW_MQandDP=101405,32 GT:AD:DP:GQ:PL:SB 1/2:0,2,4,0:6:53:706,158,131,98,0,53,489,171,104,458:0,0,2,4
20 10068160 . GTATATATATATGTA G,*,<NON_REF> 56.01 . DP=32;ExcessHet=3.0103;MLEAC=1,1,0;MLEAF=0.500,0.500,0.00;RAW_MQandDP=101405,32 GT:AD:DP:GQ:PL:SB 1/2:0,2,4,0:6:53:706,158,131,98,0,53,489,171,104,458:0,0,2,4
20 10068175 . T <NON_REF> . . END=10068175 GT:DP:GQ:MIN_DP:PL 0/0:20:18:20:0,18,270
20 10068176 . A <NON_REF> . . END=10068180 GT:DP:GQ:MIN_DP:PL 0/0:19:24:17:0,24,360
20 10068181 . T <NON_REF> . . END=10068185 GT:DP:GQ:MIN_DP:PL 0/0:16:27:16:0,27,405
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=29.53;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0
20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=32.63;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0
20 10068158 . GTGTATATATATA G 97.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.842;DP=28;ExcessHet=3.0103;FS=3.680;MLEAC=1;MLEAF=0.500;MQ=56.62;MQRankSum=0.328;QD=13.94;ReadPosRankSum=0.524;SOR=0.061 GT:AD:DP:GQ:PL 0/1:3,4:7:57:105,0,57
20 10068160 . GTATATATATATGTA G,* 88.01 . AC=1,1;AF=0.500,0.500;AN=2;DP=32;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=56.29;QD=14.67;SOR=1.329 GT:AD:DP:GQ:PL 1/2:0,2,4:6:53:706,158,131,98,0,53
20 10068160 . GTATATATATATGTA G,* 56.01 . AC=1,1;AF=0.500,0.500;AN=2;DP=32;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=56.29;QD=9.33;SOR=1.329 GT:AD:DP:GQ:PL 1/2:0,2,4:6:53:706,158,131,98,0,53
20 10068981 . G A 2528.06 . AC=2;AF=1.00;AN=2;DP=65;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.36;SOR=0.941 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2542,184,0
20 10070602 . T C 2706.06 . AC=2;AF=1.00;AN=2;DP=76;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=28.13;SOR=0.749 GT:AD:DP:GQ:PL 1/1:0,72:72:99:2720,216,0
20 10070936 . T A 3646.06 . AC=2;AF=1.00;AN=2;DP=86;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.96;QD=27.97;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3660,247,0
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=33.11;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0
20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.20;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0
20 10068158 . GTGTATATATATA G 97.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.842;DP=28;ExcessHet=3.0103;FS=3.680;MLEAC=1;MLEAF=0.500;MQ=56.62;MQRankSum=0.328;QD=13.94;ReadPosRankSum=0.524;SOR=0.061 GT:AD:DP:GQ:PL 0/1:3,4:7:57:105,0,57
20 10068160 . GTATATATATATGTA G,* 88.01 . AC=1,1;AF=0.500,0.500;AN=2;DP=32;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=56.29;QD=14.67;SOR=1.329 GT:AD:DP:GQ:PL 1/2:0,2,4:6:53:706,158,131,98,0,53
20 10068160 . GTATATATATATGTA G,* 56.01 . AC=1,1;AF=0.500,0.500;AN=2;DP=32;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=56.29;QD=9.33;SOR=1.329 GT:AD:DP:GQ:PL 1/2:0,2,4:6:53:706,158,131,98,0,53
20 10068981 . G A 2518.06 . AC=2;AF=1.00;AN=2;DP=65;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.49;SOR=0.941 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2532,184,0
20 10070602 . T C 2706.06 . AC=2;AF=1.00;AN=2;DP=76;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=31.67;SOR=0.749 GT:AD:DP:GQ:PL 1/1:0,72:72:99:2720,216,0
20 10070936 . T A 3646.06 . AC=2;AF=1.00;AN=2;DP=86;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.96;QD=29.53;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3660,247,0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@
20 10067264 . G A 2937.06 . AC=2;AF=1.00;AN=2;DP=73;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=59.37;QD=33.11;SOR=0.776 GT:AD:DP:GQ:PL 1/1:0,73:73:99:2951,219,0
20 10067722 . A C 2161.06 . AC=2;AF=1.00;AN=2;DP=59;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.20;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,54:54:99:2175,163,0
20 10068158 . GTGTATATATATA G 97.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=-0.842;DP=28;ExcessHet=3.0103;FS=3.680;MLEAC=1;MLEAF=0.500;MQ=56.62;MQRankSum=0.328;QD=13.94;ReadPosRankSum=0.524;SOR=0.061 GT:AD:DP:GQ:PL 0/1:3,4:7:57:105,0,57
20 10068160 . GTATATATATATGTA G,* 88.01 . AC=1,1;AF=0.500,0.500;AN=2;DP=32;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=56.29;QD=14.67;SOR=1.329 GT:AD:DP:GQ:PL 1/2:0,2,4:6:53:706,158,131,98,0,53
20 10068160 . GTATATATATATGTA G,* 56.01 . AC=1,1;AF=0.500,0.500;AN=2;DP=32;ExcessHet=3.0103;FS=0.000;MLEAC=1,1;MLEAF=0.500,0.500;MQ=56.29;QD=9.33;SOR=1.329 GT:AD:DP:GQ:PL 1/2:0,2,4:6:53:706,158,131,98,0,53
20 10068981 . G A 2518.06 . AC=2;AF=1.00;AN=2;DP=65;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.49;SOR=0.941 GT:AD:DP:GQ:PL 1/1:0,61:61:99:2532,184,0
20 10070602 . T C 2706.06 . AC=2;AF=1.00;AN=2;DP=76;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.78;QD=31.67;SOR=0.749 GT:AD:DP:GQ:PL 1/1:0,72:72:99:2720,216,0
20 10070936 . T A 3646.06 . AC=2;AF=1.00;AN=2;DP=86;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.96;QD=29.53;SOR=0.846 GT:AD:DP:GQ:PL 1/1:0,82:82:99:3660,247,0
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 1218956

Please sign in to comment.