Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ADAM-946] Fixes to FlagStat for Samtools concordance issue #954

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ class FlagStat(protected val args: FlagStatArgs) extends BDGSparkCommand[FlagSta
AlignmentRecordField.readInFragment,
AlignmentRecordField.properPair,
AlignmentRecordField.mapq,
AlignmentRecordField.failedVendorQualityChecks
AlignmentRecordField.failedVendorQualityChecks,
AlignmentRecordField.supplementaryAlignment
)

val adamFile: RDD[AlignmentRecord] = sc.loadAlignments(args.inputPath, projection = Some(projection))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,28 +104,25 @@ class SAMRecordConverter extends Serializable with Logging {
builder.setMapq(mapq)
}

// set mapping flags
// oddly enough, it appears that reads can show up with mapping info (mapq, cigar, position)
// even if the read unmapped flag is set...
if (samRecord.getReadUnmappedFlag) {
builder.setReadMapped(false)
} else {
builder.setReadMapped(true)
if (samRecord.getReadNegativeStrandFlag) {
builder.setReadNegativeStrand(true)
}
if (!samRecord.getNotPrimaryAlignmentFlag) {
builder.setPrimaryAlignment(true)
} else {
// if the read is not a primary alignment, it can be either secondary or supplementary
// - secondary: not the best linear alignment
// - supplementary: part of a chimeric alignment
builder.setSupplementaryAlignment(samRecord.getSupplementaryAlignmentFlag)
builder.setSecondaryAlignment(!samRecord.getSupplementaryAlignmentFlag)
}
}
}

// set mapping flags
// oddly enough, it appears that reads can show up with mapping
// info (mapq, cigar, position)
// even if the read unmapped flag is set...

// While the meaning of the ReadMapped, ReadNegativeStand,
// PrimaryAlignmentFlag and SupplementaryAlignmentFlag
// are unclear when the read is not mapped or reference is not defined,
// it is nonetheless favorable to set these flags in the ADAM file
// in same way as they appear in the input BAM inorder to match exactly
// the statistics output by other programs, specifically Samtools Flagstat
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with this


builder.setReadMapped(!samRecord.getReadUnmappedFlag)
builder.setReadNegativeStrand(samRecord.getReadNegativeStrandFlag)
builder.setPrimaryAlignment(!samRecord.getNotPrimaryAlignmentFlag)
builder.setSupplementaryAlignment(samRecord.getSupplementaryAlignmentFlag)

// Position of the mate/next segment
val mateReference: Int = samRecord.getMateReferenceIndex

Expand All @@ -141,7 +138,8 @@ class SAMRecordConverter extends Serializable with Logging {
}
}

// The Avro scheme defines all flags as defaulting to 'false'. We only need to set the flags that are true.
// The Avro scheme defines all flags as defaulting to 'false'. We only
// need to set the flags that are true.
if (samRecord.getFlags != 0) {
if (samRecord.getReadPairedFlag) {
builder.setReadPaired(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,14 @@ object FlagStat {
1,
primaryDuplicates, secondaryDuplicates,
b2i(b(p.getReadMapped)),
b2i(b(p.getReadPaired)),
b2i(b(p.getReadPaired) && p.getReadInFragment == 0),
b2i(b(p.getReadPaired) && p.getReadInFragment == 1),
b2i(b(p.getReadPaired) && b(p.getProperPair)),
b2i(b(p.getReadPaired) && b(p.getReadMapped) && b(p.getMateMapped)),
b2i(b(p.getReadPaired) && b(p.getReadMapped) && b(!p.getMateMapped)),
b2i(b(mateMappedToDiffChromosome)),
b2i(b(mateMappedToDiffChromosome && i(p.getMapq) >= 5)),
b2i(b(p.getReadPaired) && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(p.getReadPaired) && p.getReadInFragment == 0 && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(p.getReadPaired) && p.getReadInFragment == 1 && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(p.getReadPaired) && b(p.getProperPair) && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(p.getReadPaired) && b(p.getReadMapped) && b(p.getMateMapped) && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(p.getReadPaired) && b(p.getReadMapped) && b(!p.getMateMapped) && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(mateMappedToDiffChromosome) && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
b2i(b(mateMappedToDiffChromosome && i(p.getMapq) >= 5) && b(!p.getSupplementaryAlignment) && p.getPrimaryAlignment),
p.getFailedVendorQualityChecks
)
}.aggregate((FlagStatMetrics.emptyFailedQuality, FlagStatMetrics.emptyPassedQuality))(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ private[adam] object DecadentRead extends Logging with Serializable {

@deprecated("Use RichAlignmentRecord wherever possible in new development.", since = "0.18.0")
private[adam] class DecadentRead(val record: RichAlignmentRecord) extends Logging {
// Can't be a primary alignment unless it has been aligned
require(!record.getPrimaryAlignment || record.getReadMapped, "Unaligned read can't be a primary alignment")

// Should have quality scores for all residues
require(record.getQual == null ||
record.getSequence.length == record.qualityScores.length, "sequence and qualityScores must be same length")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ class AlignmentRecordConverterSuite extends FunSuite {
.split('\n')

assert(!secondRecord.getReadMapped)
assert(!secondRecord.getReadNegativeStrand)
assert(secondRecord.getReadNegativeStrand)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given the input data, this seems likely a bug in the test that was previously covered up by fact that for unmapped reads the code previously did not set this flag even if present in input.

assert(secondRecordFastq(0) === "@SRR062634.10448889/1")
assert(secondRecordFastq(1) === secondRecord.getSequence)
assert(secondRecordFastq(2) === "+")
Expand Down