Skip to content

Commit

Permalink
Performance improvements to SAM reading and processing (#2280)
Browse files Browse the repository at this point in the history
* Performance fixes to sam reading and processing

* Correct implementation of toString in SequenceDictionary

Co-authored-by: Rahamim, Ben <brahamim@paypal.com>
  • Loading branch information
benraha and Rahamim, Ben authored Nov 5, 2020
1 parent 2d9f1c8 commit 61c4809
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,20 @@ case class Attribute(tag: String, tagType: TagType.Value, value: Any) {
val byteSequenceTypes = Array(TagType.NumericByteSequence, TagType.NumericUnsignedByteSequence)
val intSequenceTypes = Array(TagType.NumericIntSequence, TagType.NumericUnsignedIntSequence)
val shortSequenceTypes = Array(TagType.NumericShortSequence, TagType.NumericUnsignedShortSequence)
val sb = new StringBuilder
sb.append(tag)
sb.append(":")
sb.append(tagType)
if (byteSequenceTypes contains tagType) {
"%s:%s,%s".format(tag, tagType, value.asInstanceOf[Array[Byte]].mkString(","))
sb.append(",").append(value.asInstanceOf[Array[Byte]].mkString(",")).toString()
} else if (shortSequenceTypes contains tagType) {
"%s:%s,%s".format(tag, tagType, value.asInstanceOf[Array[Short]].mkString(","))
sb.append(",").append(value.asInstanceOf[Array[Short]].mkString(",")).toString()
} else if (intSequenceTypes contains tagType) {
"%s:%s,%s".format(tag, tagType, value.asInstanceOf[Array[Int]].mkString(","))
sb.append(",").append(value.asInstanceOf[Array[Int]].mkString(",")).toString()
} else if (tagType == TagType.NumericFloatSequence) {
"%s:%s,%s".format(tag, tagType, value.asInstanceOf[Array[Float]].mkString(","))
sb.append(",").append(value.asInstanceOf[Array[Float]].mkString(",")).toString()
} else {
"%s:%s:%s".format(tag, tagType, value.toString)
sb.append(":").append(value).toString()
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ class SequenceDictionary(val records: Vector[SequenceRecord]) extends Serializab
* @return Returns a SAM formatted sequence dictionary.
*/
def toSAMSequenceDictionary: SAMSequenceDictionary = {
new SAMSequenceDictionary(records.iterator.map(_.toSAMSequenceRecord).toList)
new SAMSequenceDictionary(records.map(_.toSAMSequenceRecord).asJava)
}

/**
Expand Down Expand Up @@ -259,7 +259,7 @@ class SequenceDictionary(val records: Vector[SequenceRecord]) extends Serializab
}

override def toString: String = {
records.map(_.toString).fold("SequenceDictionary{")(_ + "\n" + _) + "}"
records.map(_.toString).mkString("SequenceDictionary{", "\n", "}")
}

private[adam] def toAvro: Seq[Reference] = {
Expand Down

0 comments on commit 61c4809

Please sign in to comment.