Skip to content

Commit

Permalink
Add a unit test for sorting reads
Browse files Browse the repository at this point in the history
  • Loading branch information
massie committed Dec 11, 2013
1 parent 1b11ff8 commit 0241d91
Showing 1 changed file with 24 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
package edu.berkeley.cs.amplab.adam.rdd

import org.apache.spark.rdd.RDD
import edu.berkeley.cs.amplab.adam.avro.{ADAMPileup, Base}
import edu.berkeley.cs.amplab.adam.avro.{ADAMRecord, ADAMPileup, Base}
import edu.berkeley.cs.amplab.adam.util.SparkFunSuite
import edu.berkeley.cs.amplab.adam.rdd.AdamContext._
import edu.berkeley.cs.amplab.adam.models.ADAMRod
import scala.util.Random

class AdamRDDFunctionsSuite extends SparkFunSuite {

Expand Down Expand Up @@ -163,4 +163,26 @@ class AdamRDDFunctionsSuite extends SparkFunSuite {
assert(coverage > 1.99 && coverage < 2.01)
}

sparkTest("sorting reads") {
val random = new Random("sorting".hashCode)
val numReadsToCreate = 1000
val reads = for (i <- 0 until numReadsToCreate) yield {
val mapped = random.nextBoolean()
val builder = ADAMRecord.newBuilder().setReadMapped(mapped)
if (mapped) {
builder.setReferenceId(random.nextInt(numReadsToCreate / 10)).setStart(random.nextInt(1000000))
}
builder.build()
}
val rdd = sc.parallelize(reads)
val sortedReads = rdd.adamSortReadsByReferencePosition().collect().zipWithIndex
val (mapped, unmapped) = sortedReads.partition(_._1.getReadMapped)
// Make sure that all the unmapped reads are placed at the end
assert(unmapped.forall(p => p._2 > mapped.takeRight(1)(0)._2))
// Make sure that we appropriately sorted the reads
val expectedSortedReads = mapped.sortWith(
(a, b) => a._1.getReferenceId < b._1.getReferenceId && a._1.getStart < b._1.getStart)
assert(expectedSortedReads === mapped)
}

}

0 comments on commit 0241d91

Please sign in to comment.