Skip to content

Commit

Permalink
[ADAM-1934] Clip phred values to 3233, instead of Int.MaxValue.
Browse files Browse the repository at this point in the history
Resolves #1934. When phred values underflow log space double precision floating
point numbers, clip them to the lowest phred score representable as a log valued
double (3233), instead of Int.MaxValue.
  • Loading branch information
Frank Austin Nothaft authored and heuermh committed Mar 4, 2018
1 parent babf839 commit 8248447
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 13 deletions.
4 changes: 2 additions & 2 deletions adam-cli/src/test/resources/sorted.lex.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,2147483647 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
13 752721 rs3131972 A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,2147483647 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
4 changes: 2 additions & 2 deletions adam-cli/src/test/resources/sorted.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,2147483647 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,2147483647 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
13 752721 rs3131972 A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,9 @@ object PhredUtils extends Serializable {
* @return Returns this probability as a Phred score. If the log value is 0.0,
* we clip the phred score to Int.MaxValue.
*/
def logProbabilityToPhred(p: Double): Int = if (p == 0.0) {
Int.MaxValue
def logProbabilityToPhred(p: Double): Int = if (p == 0.0 || p == -0.0) {
// doubles in log space underflow at the probability equivalent to Phred 3233
3233
} else {
round(M10_DIV_LOG10 * log(-expm1(p))).toInt
}
Expand Down
4 changes: 2 additions & 2 deletions adam-core/src/test/resources/sorted.lex.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.8;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,2147483647 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=1.138;ClippingRankSum=0.448;DB;DP=176;FS=13.597;MLEAC=1;MLEAF=0.167;MQ=31.06;MQ0=0;MQRankSum=0.636;QD=9.98;ReadPosRankSum=-1.18 GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
13 752721 rs3131972 A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,2147483647 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
4 changes: 2 additions & 2 deletions adam-core/src/test/resources/sorted.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C 139.12 IndelQD AC=2;AF=0.333;AN=6;BaseQRankSum=1.8;ClippingRankSum=0.138;DP=69;FS=7.786;MLEAC=2;MLEAF=0.333;MQ=26.84;MQ0=0;MQRankSum=-1.906;QD=1.55;ReadPosRankSum=0.384 GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,827 0/1:8,2:10:dp;rd:60:60,0,414 0/0:39,0:39:PASS:99:0,116,2114
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,2147483647 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 14522 . G A 195.16 VQSRTrancheSNP99.95to100.00 AC=2;AF=0.333;AN=6;BaseQRankSum=2.044;ClippingRankSum=-2.196;DP=48;FS=13.179;MLEAC=2;MLEAF=0.333;MQ=25.89;MQ0=0;MQRankSum=-0.063;QD=8.87;ReadPosRankSum=0.952;VQSLOD=-3.333;culprit=MQ GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,3233 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,783
1 63735 rs201888535 CCTA C 2994.09 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=1.138;ClippingRankSum=0.448;DB;DP=176;FS=13.597;MLEAC=1;MLEAF=0.167;MQ=31.06;MQ0=0;MQRankSum=0.636;QD=9.98;ReadPosRankSum=-1.18 GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,1425 0/0:40,0:40:PASS:99:0,117,2120 0/1:23,74:97:rd:99:3034,0,942
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,2147483647 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
2 19190 . GC G 1186.88 PASS AC=3;AF=0.5;AN=6;BaseQRankSum=4.157;ClippingRankSum=3.666;DP=74;FS=37.037;MLEAC=3;MLEAF=0.5;MQ=22.26;MQ0=0;MQRankSum=0.195;QD=16.04;ReadPosRankSum=-4.072 GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:416,0,3233 0/1:18,13:31:PASS:99:353,0,503 0/1:5,15:20:rd:99:457,0,107
13 752721 rs3131972 A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:1021,81,0 1/1:0,19:19:dp:57:661,57,0 1/1:0,22:22:PASS:66:831,66,0
13 752791 . A G 2486.90 PASS AC=6;AF=1.0;AN=6;DB;DP=69;FS=0.0;MLEAC=6;MLEAF=1.0;MQ=60.0;MQ0=0;POSITIVE_TRAIN_SITE;QD=31.67;VQSLOD=18.94;culprit=QD GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:1021,81,0:0,1,2,3 1/1:0,19:19:dp:57:661,57,0:4,5,6,7 1/1:0,22:22:PASS:66:831,66,0:2,3,4,5
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGT1.getGenotypeLikelihoods
.map(d => d: scala.Double)
.map(PhredUtils.logProbabilityToPhred)
.sameElements(List(59, 0, Int.MaxValue)))
.sameElements(List(59, 0, 3233)))

assert(adamGT2.getAlleles.sameElements(List(GenotypeAllele.OTHER_ALT, GenotypeAllele.ALT)))
assert(adamGT2.getAlternateReadDepth === 3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,18 @@ class PhredUtilsSuite extends FunSuite {

test("convert high phred score to log and back") {
val logP = PhredUtils.phredToLogProbability(1000)
println(logP)
val phred = PhredUtils.logProbabilityToPhred(logP)
println(phred)
assert(phred === 1000)
}

test("convert overflowing phred score to log and back and clip") {
val logP = PhredUtils.phredToLogProbability(10000)
val phred = PhredUtils.logProbabilityToPhred(logP)
assert(phred === 3233)
}

test("convert negative zero log probability to phred and clip") {
val phred = PhredUtils.logProbabilityToPhred(-0.0)
assert(phred === 3233)
}
}

0 comments on commit 8248447

Please sign in to comment.