Skip to content

Commit

Permalink
Merge pull request #263 from genomebridge/tdanford/referenceLengthFro…
Browse files Browse the repository at this point in the history
…mCigar

Added AdamContext.referenceLengthFromCigar
  • Loading branch information
fnothaft committed Jun 9, 2014
2 parents 596eae3 + 5dbb02e commit 12ab6fc
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,39 @@ import org.bdgenomics.adam.util._
import scala.Some
import scala.collection.immutable.NumericRange
import org.bdgenomics.adam.models.{ ReferenceRegion, ReferencePosition, Attribute }
import java.util.regex.Pattern

object RichADAMRecord {
val CIGAR_CODEC: TextCigarCodec = TextCigarCodec.getSingleton
val ILLUMINA_READNAME_REGEX = "[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*".r

val cigarPattern = Pattern.compile("([0-9]+)([MIDNSHPX=])")

/**
* Parses a CIGAR string, and returns the aligned length with respect to the
* reference genome (i.e. skipping clipping, padding, and insertion operators)
*
* @param cigar The CIGAR string whose reference length is to be measured
* @return A non-negative integer, the sum of the MDNX= operators in the CIGAR string.
*/
def referenceLengthFromCigar(cigar: String): Int = {
val m = cigarPattern.matcher(cigar)
var i = 0
var len: Int = 0
while (i < cigar.length) {
if (m.find(i)) {
val op = m.group(2)
if ("MDNX=".indexOf(op) != -1) {
len += m.group(1).toInt
}
} else {
return len
}
i = m.end()
}
len
}

def apply(record: ADAMRecord) = {
new RichADAMRecord(record)
}
Expand All @@ -39,6 +67,8 @@ class IlluminaOptics(val tile: Long, val x: Long, val y: Long) {}

class RichADAMRecord(val record: ADAMRecord) {

lazy val referenceLength: Int = RichADAMRecord.referenceLengthFromCigar(record.cigar)

lazy val readRegion = ReferenceRegion(this)

// Returns the quality scores as a list of bytes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ import org.bdgenomics.adam.models.{ ReferencePosition, TagType, Attribute }

class RichADAMRecordSuite extends FunSuite {

test("referenceLengthFromCigar") {
assert(referenceLengthFromCigar("3M") === 3)
assert(referenceLengthFromCigar("30M") === 30)
assert(referenceLengthFromCigar("10Y") === 0) // should abort when it hits an illegal operator
assert(referenceLengthFromCigar("10M1Y") === 10) // same
assert(referenceLengthFromCigar("10M1I10M") === 20)
assert(referenceLengthFromCigar("10M1D10M") === 21)
assert(referenceLengthFromCigar("1S10M1S") === 10)
}

test("Unclipped Start") {
val recordWithoutClipping = ADAMRecord.newBuilder().setReadMapped(true).setCigar("10M").setStart(42).build()
val recordWithClipping = ADAMRecord.newBuilder().setReadMapped(true).setCigar("2S8M").setStart(42).build()
Expand Down

0 comments on commit 12ab6fc

Please sign in to comment.