From a505bb97bcd22a1ccfb8ff0c11f6f65a8466eb2f Mon Sep 17 00:00:00 2001 From: Noam Barkai Date: Tue, 1 Mar 2016 17:18:50 +0200 Subject: [PATCH] replace functional for loop in smith waterman with simpler while loop to improve performance --- .../smithwaterman/SmithWaterman.scala | 43 ++++++------------- .../SmithWatermanGapScoringFromFn.scala | 8 +++- .../smithwaterman/SmithWatermanSuite.scala | 10 +++++ 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala index 65e231f56a..83179a948f 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWaterman.scala @@ -48,38 +48,23 @@ abstract class SmithWaterman(xSequence: String, ySequence: String) extends Seria * @return Tuple of (i, j) coordinates. */ private[smithwaterman] final def maxCoordinates(matrix: Array[Array[Double]]): (Int, Int) = { - def maxInCol(col: Array[Double]): (Double, Int) = { - def takeMax(a: (Double, Int), b: (Double, Int)): (Double, Int) = { - if (a._1 > b._1) { - a - } else { - b - } - } - - val c: Array[(Double, Int)] = col.zipWithIndex - - c.reduce(takeMax) - } - - def maxCol(cols: Array[(Double, Int)]): (Int, Int) = { - def takeMax(a: (Double, Int, Int), b: (Double, Int, Int)): (Double, Int, Int) = { - if (a._1 > b._1) { - a - } else { - b + var xMax = 0 + var yMax = 0 + var max = Double.MinValue + var x = 0 + while (x < matrix.length) { + var y = 0 + while (y < matrix(x).length) { + if (matrix(x)(y) >= max) { + max = matrix(x)(y) + xMax = x + yMax = y } + y += 1 } - - val c: Array[((Double, Int), Int)] = cols.zipWithIndex - - val m: (Double, Int, Int) = c.map(kv => (kv._1._1, kv._1._2, kv._2)) - .reduce(takeMax) - - (m._2, m._3) + x += 1 } - - maxCol(matrix.map(maxInCol)) + (yMax, xMax) } /** diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala index 3df9e2fb86..ad1344dc4a 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanGapScoringFromFn.scala @@ -46,8 +46,10 @@ abstract class SmithWatermanGapScoringFromFn( } // score matrix - for (i <- 1 to x) { - for (j <- 1 to y) { + var i = 1 + while (i <= x) { + var j = 1 + while (j <= y) { val m = scoreMatrix(i - 1)(j - 1) + scoreFn(i, j, xSequence(i - 1), ySequence(j - 1)) val d = scoreMatrix(i - 1)(j) + scoreFn(i, j, xSequence(i - 1), '_') val in = scoreMatrix(i)(j - 1) + scoreFn(i, j, '_', ySequence(j - 1)) @@ -64,7 +66,9 @@ abstract class SmithWatermanGapScoringFromFn( scoreMatrix(i)(j) = scoreUpdate moveMatrix(i)(j) = moveUpdate + j += 1 } + i += 1 } (scoreMatrix, moveMatrix) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala index c0b1d2b078..1994e34260 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala @@ -19,6 +19,7 @@ package org.bdgenomics.adam.algorithms.smithwaterman import org.scalatest.FunSuite import scala.math.abs +import scala.util.Random class SmithWatermanSuite extends FunSuite { @@ -237,4 +238,13 @@ class SmithWatermanSuite extends FunSuite { assert(sw.xStart === 8) } + test("smithWaterman - simple alignment") { + val sw = new SmithWatermanConstantGapScoring("AAA", + "AAT", + 1.0, 0.0, -0.333, -0.333) + assert(sw.cigarX.toString === "3M") + assert(sw.cigarY.toString === "3M") + assert(sw.xStart === 0) + } + }