Skip to content

Commit

Permalink
[ADAM-1964] Lower point where phred conversions are done using log code.
Browse files Browse the repository at this point in the history
Resolves #1964. There was an underflow somewhere causing round trip from
log space values to Phred to mismatch for values between 164 and 254.
This change lowers the point where we switch from using the precomputed
table to where we use the log math. Also, we add a unit test checking
round trip values using PhredUtils.
  • Loading branch information
Frank Austin Nothaft authored and heuermh committed Mar 23, 2018
1 parent dc3f4a5 commit c15b129
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 10 deletions.
4 changes: 2 additions & 2 deletions adam-cli/src/test/resources/sorted.lex.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
13 752721 rs3131972 A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,201 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
4 changes: 2 additions & 2 deletions adam-cli/src/test/resources/sorted.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,201 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
13 752721 rs3131972 A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ object PhredUtils extends Serializable {
* @return The input phred score as a log success probability.
*/
def phredToLogProbability(phred: Int): Double = {
if (phred < 255) {
if (phred < 156) {
log(phredToSuccessProbability(phred)).toFloat
} else {
log1p(-exp(phred * MLOG10_DIV10))
Expand Down
4 changes: 2 additions & 2 deletions adam-core/src/test/resources/sorted.lex.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.8;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=1.138;ClippingRankSum=0.448;DB;DP=176;FS=13.597;MLEAC=1;MLEAF=0.167;MQ=31.06;MQ0=0;MQRankSum=0.636;QD=9.98;ReadPosRankSum=-1.18 GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
13 752721 rs3131972 A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,201 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
4 changes: 2 additions & 2 deletions adam-core/src/test/resources/sorted.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.8;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=1.138;ClippingRankSum=0.448;DB;DP=176;FS=13.597;MLEAC=1;MLEAF=0.167;MQ=31.06;MQ0=0;MQRankSum=0.636;QD=9.98;ReadPosRankSum=-1.18 GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,201 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
13 752721 rs3131972 A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGT1.getGenotypeLikelihoods
.map(d => d: scala.Double)
.map(PhredUtils.logProbabilityToPhred)
.sameElements(List(59, 0, 3233)))
.sameElements(List(59, 0, 181)))

assert(adamGT2.getAlleles.sameElements(List(GenotypeAllele.OTHER_ALT, GenotypeAllele.ALT)))
assert(adamGT2.getAlternateReadDepth === 3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,21 @@ class PhredUtilsSuite extends FunSuite {
val phred = PhredUtils.logProbabilityToPhred(-0.0)
assert(phred === 3233)
}

test("round trip log probabilities") {
def roundTrip(i: Int): Int = {
PhredUtils.logProbabilityToPhred(PhredUtils.phredToLogProbability(i))
}

(0 to 3228).foreach(i => {
assert(i === roundTrip(i))
})

// there is roundoff above 3228 due to floating point underflow
assert(3228 === roundTrip(3229))
assert(3230 === roundTrip(3230))
assert(3230 === roundTrip(3231))
assert(3233 === roundTrip(3232))
assert(3233 === roundTrip(3233))
}
}

0 comments on commit c15b129

Please sign in to comment.