From fac7efc51a078b8953560c167acc8722205a16e3 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Sat, 30 Apr 2016 17:13:03 +0800 Subject: [PATCH 1/2] add maxScore When we map reads to a ref,we need each read maxScore;Moreover ,a read maybe have many seed and each seed need extension,we need max score of eac extension in seed ,then we can take the high score in each max score in extension and decide the read mapping location in ref. I add the maxScores function and lazy val maxScore --- .../smithwaterman/SmithWaterman.scala | 78 +++++++++++-------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala index 4e2ab4af9b..cba1c53202 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala @@ -24,7 +24,8 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria lazy val (scoringMatrix, moveMatrix) = buildScoringMatrix() lazy val (cigarX, cigarY, xStart, yStart) = trackback(scoringMatrix, moveMatrix) - + lazy val maxScore=maxScores(scoringMatrix) + /** * Builds Smith-Waterman scoring matrix. * @@ -48,38 +49,47 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria * @return Tuple of (i, j) coordinates. */ private[smithwaterman] final def maxCoordinates(matrix: Array[Array[Double]]): (Int, Int) = { - def maxInCol(col: Array[Double]): (Double, Int) = { - def takeMax(a: (Double, Int), b: (Double, Int)): (Double, Int) = { - if (a._1 > b._1) { - a - } else { - b + var xMax = 0 + var yMax = 0 + var max = Double.MinValue + var x = 0 + while (x < matrix.length) { + var y = 0 + while (y < matrix(x).length) { + if (matrix(x)(y) >= max) { + max = matrix(x)(y) + xMax = x + yMax = y } + y += 1 } - - val c: Array[(Double, Int)] = col.zipWithIndex - - c.reduce(takeMax) + x += 1 } + (yMax, xMax) + } - def maxCol(cols: Array[(Double, Int)]): (Int, Int) = { - def takeMax(a: (Double, Int, Int), b: (Double, Int, Int)): (Double, Int, Int) = { - if (a._1 > b._1) { - a - } else { - b + /** + * Finds max Score of a matrix with highest value. + * + * @param matrix Matrix to score. + * @return maxScore: max score in Matrix. + */ + private[smithwaterman] final def maxScores(matrix: Array[Array[Double]]):Double = { + + var maxScoreReturn = 0 + var max = Double.MinValue + var x = 0 + while (x < matrix.length) { + var y = 0 + while (y < matrix(x).length) { + if (matrix(x)(y) >= max) { + max = matrix(x)(y) } + y += 1 } - - val c: Array[((Double, Int), Int)] = cols.zipWithIndex - - val m: (Double, Int, Int) = c.map(kv => (kv._1._1, kv._1._2, kv._2)) - .reduce(takeMax) - - (m._2, m._3) + x += 1 } - - maxCol(matrix.map(maxInCol)) + max } /** @@ -130,11 +140,12 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria * * @see buildScoringMatrix */ - @tailrec private[smithwaterman] final def move(matrix: Array[Array[Char]], - i: Int, - j: Int, - cX: String, - cY: String): (String, String, Int, Int) = { + @tailrec private[smithwaterman] final def move( + matrix: Array[Array[Char]], + i: Int, + j: Int, + cX: String, + cY: String): (String, String, Int, Int) = { if (matrix(i)(j) == 'T') { // return if told to terminate (cigarFromRNNCigar(cX), cigarFromRNNCigar(cY), i, j) @@ -160,8 +171,9 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria * @param moveMatrix Move matrix to track back on. * @return Tuple of Cigar for X, Y. */ - private[smithwaterman] def trackback(scoreMatrix: Array[Array[Double]], - moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = { + private[smithwaterman] def trackback( + scoreMatrix: Array[Array[Double]], + moveMatrix: Array[Array[Char]]): (Cigar, Cigar, Int, Int) = { assert(scoreMatrix.length == xSequence.length + 1) assert(scoreMatrix.forall(_.length == ySequence.length + 1)) assert(moveMatrix.length == xSequence.length + 1) From 447f4b8a4011cca8674ce47b0f6a33e33ec33129 Mon Sep 17 00:00:00 2001 From: xubo245 <601450868@qq.com> Date: Sat, 21 May 2016 18:24:23 +0800 Subject: [PATCH 2/2] change test to sparkTest change test to sparkTest --- .../org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala index 28eaf933bb..8210b515c8 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala @@ -172,7 +172,7 @@ class MarkDuplicatesSuite extends ADAMFunSuite { assert(dups.size == 10 && dups.forall(p => p.getReadName.startsWith("fragment"))) } - test("quality scores") { + sparkTest("quality scores") { // The ascii value 53 is equal to a phred score of 20 val qual = 53.toChar.toString * 100 val record = AlignmentRecord.newBuilder().setQual(qual).build()