Skip to content

Commit

Permalink
feat: raise exception if CollectDuplexSeqMetrics run on consensus BAM (
Browse files Browse the repository at this point in the history
…#1003)

* feat: add isConsensus method

* feat: assert DuplexSeqMetrics collected on grouped BAM

* refactor: move isConsensus

* refactor: move isConsensus check

* fix: remove unused import

* refactor: only import ConsensusTags

* refactor: only check first record

* chore: cleanup imports

* chore: cleanup import and unused val

* fix: cleanup error reporting

* fix: use val instead of def

* refactor: rename isConsensusRead to isFgbioStyleConsensus

* docs: improve doc string accuracy
  • Loading branch information
znorgaard authored Jul 31, 2024
1 parent ba11bfe commit 6ca7733
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,20 @@ class CollectDuplexSeqMetrics
override def execute(): Unit = {
// Build the iterator we'll use based on whether or not we're restricting to a set of intervals
val in = SamSource(input)
val _filteredIterator = in.iterator.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
val _filteredIterator = in
.iterator
.filter(r => r.paired && r.mapped && r.mateMapped && r.firstOfPair && !r.secondary && !r.supplementary)
.bufferBetter
// Ensure the records are not consensus records
_filteredIterator.headOption.foreach {
rec =>
val exceptionString = s"Input BAM file to CollectDuplexSeqMetrics ($input) appears to contain consensus sequences. " +
"CollectDuplexSeqMetrics cannot run on consensus BAMs, and instead requires the UMI-grouped BAM generated " +
"by GroupReadsByUmi which is run prior to consensus calling." +
"\nFirst record in $input has consensus SAM tags present:\n$rec"

if (Umis.isFgbioStyleConsensus(rec)) throw new IllegalArgumentException(exceptionString)
}
val iterator = intervals match {
case None => _filteredIterator
case Some(path) =>
Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/com/fulcrumgenomics/umi/Umis.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.umi.ConsensusTags
import com.fulcrumgenomics.util.Sequences

object Umis {
Expand Down Expand Up @@ -127,4 +128,15 @@ object Umis {
@inline private def isValidUmiCharacter(ch: Char): Boolean = {
ch == 'A' || ch == 'C' || ch == 'G' || ch == 'T' || ch == 'N' || ch == '-'
}

/** Returns True if the record appears to be a consensus read,
* typically produced by fgbio's CallMolecularConsensusReads or CallDuplexConsensusReads.
*
* @param rec the record to check
* @return boolean indicating if the record is a consensus or not
*/
def isFgbioStyleConsensus(rec: SamRecord): Boolean = {
rec.contains(ConsensusTags.PerRead.RawReadCount) ||
(rec.contains(ConsensusTags.PerRead.AbRawReadCount) && rec.contains(ConsensusTags.PerRead.BaRawReadCount))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,22 @@ class CollectDuplexSeqMetricsTest extends UnitSpec {
duplexFamilies.find(f => f.ab_size == 6 && f.ba_size == 0).get.count shouldBe 1
}

it should "raise an exception if duplex consensus records are provided" in {
val builder = new SamBuilder(readLength=100, sort=Some(SamOrder.TemplateCoordinate))
builder.addPair(
contig=1,
start1=1000,
start2=1100,
attrs=Map(
RX -> "AAA-GGG",
MI -> "1/A",
ConsensusTags.PerRead.AbRawReadCount -> 10,
ConsensusTags.PerRead.BaRawReadCount -> 10
)
)
an[IllegalArgumentException] shouldBe thrownBy { exec(builder) }
}

"CollectDuplexSeqMetrics.updateUmiMetrics" should "not count duplex umis" in collector(duplex=false).foreach { c =>
val builder = new SamBuilder(readLength=10)
builder.addPair(start1=100, start2=200, attrs=Map(RX -> "AAA-CCC", MI -> "1/A"))
Expand Down
18 changes: 17 additions & 1 deletion src/test/scala/com/fulcrumgenomics/umi/UmisTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

package com.fulcrumgenomics.umi

import com.fulcrumgenomics.bam.api.SamRecord
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord}
import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec}
import org.scalatest.OptionValues

Expand Down Expand Up @@ -111,4 +111,20 @@ class UmisTest extends UnitSpec with OptionValues {
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:ACGT-CCKC"))
an[Exception] should be thrownBy copyUmiFromReadName(rec=rec("NAME:CCKC-ACGT"))
}

"Umis.isConsensusRead" should "return false for reads without consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=100).exists(Umis.isFgbioStyleConsensus) shouldBe false
builder.addPair(start1=100, start2=100, unmapped2=true).exists(Umis.isFgbioStyleConsensus) shouldBe false
}

it should "return true for reads with consensus tags" in {
val builder = new SamBuilder(sort=Some(SamOrder.Coordinate), readLength=10, baseQuality=20)
builder.addFrag(start=10, attrs=Map(ConsensusTags.PerRead.RawReadCount -> 10))
.exists(Umis.isFgbioStyleConsensus) shouldBe true
builder.addFrag(
start=10,
attrs=Map(ConsensusTags.PerRead.AbRawReadCount -> 10, ConsensusTags.PerRead.BaRawReadCount -> 10)
).exists(Umis.isFgbioStyleConsensus) shouldBe true
}
}

0 comments on commit 6ca7733

Please sign in to comment.