Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modifying conversion code to resolve #112. #260

Merged
merged 1 commit into from
Jun 9, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class ADAMRecordConverter extends Serializable {
.foreach(v => builder.setReadNegativeStrandFlag(v.booleanValue))
Option(adamRecord.getPrimaryAlignment)
.foreach(v => builder.setNotPrimaryAlignmentFlag(!v.booleanValue))
Option(adamRecord.getSupplementaryAlignment)
.foreach(v => builder.setSupplementaryAlignmentFlag(v.booleanValue))
Option(adamRecord.getStart)
.foreach(s => builder.setAlignmentStart(s.toInt + 1))
Option(adamRecord.getMapq).foreach(v => builder.setMappingQuality(v))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,33 @@ class SAMRecordConverter extends Serializable {
assert(start != 0, "Start cannot equal 0 if contig is set.")
builder.setStart((start - 1).asInstanceOf[Long])

// set read mapped flag
builder.setReadMapped(true)

// set mapping quality
val mapq: Int = samRecord.getMappingQuality

if (mapq != SAMRecord.UNKNOWN_MAPPING_QUALITY) {
builder.setMapq(mapq)
}

// set mapping flags
// oddly enough, it appears that reads can show up with mapping info (mapq, cigar, position)
// even if the read unmapped flag is set...
if (samRecord.getReadUnmappedFlag) {
builder.setReadMapped(false)
} else {
builder.setReadMapped(true)
if (samRecord.getReadNegativeStrandFlag) {
builder.setReadNegativeStrand(true)
}
if (!samRecord.getNotPrimaryAlignmentFlag) {
builder.setPrimaryAlignment(true)
} else {
// if the read is not a primary alignment, it can be either secondary or supplementary
// - secondary: not the best linear alignment
// - supplementary: part of a chimeric alignment
builder.setSupplementaryAlignment(samRecord.getSupplementaryAlignmentFlag)
builder.setSecondaryAlignment(!samRecord.getSupplementaryAlignmentFlag)
}
}
}

// Position of the mate/next segment
Expand Down Expand Up @@ -110,18 +129,9 @@ class SAMRecordConverter extends Serializable {
if (samRecord.getDuplicateReadFlag) {
builder.setDuplicateRead(true)
}
if (samRecord.getReadNegativeStrandFlag) {
builder.setReadNegativeStrand(true)
}
if (!samRecord.getNotPrimaryAlignmentFlag) {
builder.setPrimaryAlignment(true)
}
if (samRecord.getReadFailsVendorQualityCheckFlag) {
builder.setFailedVendorQualityChecks(true)
}
if (samRecord.getReadUnmappedFlag) {
builder.setReadMapped(false)
}
}

if (samRecord.getAttributes != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,13 @@ object DecadentRead {

class DecadentRead(val record: RichADAMRecord) extends Logging {
// Can't be a primary alignment unless it has been aligned
//
// FIXME(#112): This is currently unenforceable; SAMRecordConverter sets PrimaryAlignment on unmapped reads
//require(!record.getPrimaryAlignment || record.getReadMapped, "Unaligned read can't be a primary alignment")
require(!record.getPrimaryAlignment || record.getReadMapped, "Unaligned read can't be a primary alignment")

// Should have quality scores for all residues
require(record.getSequence.length == record.qualityScores.length, "sequence and qualityScores must be same length")

// MapQ should be valid
require(record.getMapq == null || (record.getMapq >= 0 && record.getMapq <= 255), "MapQ must be in [0, 255]")
require(record.getMapq == null || (record.getMapq >= 0 && record.getMapq <= 93), "MapQ must be in [0, 255]")

// Alignment must be valid
require(!record.getReadMapped || record.getStart >= 0, "Invalid alignment start index")
Expand Down
17 changes: 14 additions & 3 deletions adam-format/src/main/resources/avro/adam.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,25 @@ record ADAMRecord {
union { boolean, null } properPair = false;
union { boolean, null } readMapped = false;
union { boolean, null } mateMapped = false;
union { boolean, null } readNegativeStrand = false;
union { boolean, null } mateNegativeStrand = false;
union { boolean, null } firstOfPair = false;
union { boolean, null } secondOfPair = false;
union { boolean, null } primaryAlignment = false;
union { boolean, null } failedVendorQualityChecks = false;
union { boolean, null } duplicateRead = false;

// alignment flags (all default to null, as readMapped defaults to false)
union { boolean, null } readNegativeStrand = false; // aligned to reverse compliment
union { boolean, null } mateNegativeStrand = false; // mate aligned to reverse compliment
// primary vs. secondary vs. supplementary:
// - primary is either the best linear alignment or the "first" linear alignment
// in a chimeric alignment
// - supplementary is a non-primary linear alignment in a chimeric alignment
// - secondary is an alignment that is not the best alignment (an alignment that is
// worse than the primary alignment) and that is not a supplimental linear alignment in a
// chimeric alignment (thus not supplimentary)
union { boolean, null } primaryAlignment = false;
union { boolean, null } secondaryAlignment = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I read the code above correctly, your new "secondaryAlignment" flag is equivalent to "mapped, but not primary or supplementary." Is that right? If so, can you clarify the description in the comment block above?

Also, is there any interaction here with the code in ReadBucket and SingleReadBucket? In particular, there are already fields in those classes with particles like "secondary" in their names -- do they match the use of this "secondaryAlignment" flag?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the comment. I'm not going to touch the ReadBucket/SingleReadBucket code. I think primary/secondary there maps to primary/(secondary || supplemental), but it's not clear. @massie, can you clarify?

union { boolean, null } supplementaryAlignment = false;

// Commonly used optional attributes
union { null, string } mismatchingPositions = null;
union { null, string } origQual = null;
Expand Down