Skip to content

Commit

Permalink
Fix decoding of CRAM Scores read feature during normalization. (#1592)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad authored Apr 25, 2022
1 parent 70e4259 commit 22aec67
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore;
import htsjdk.samtools.cram.encoding.readfeatures.ReadBase;
import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
import htsjdk.samtools.cram.encoding.readfeatures.Scores;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.utils.ValidationUtils;
Expand Down Expand Up @@ -364,7 +365,7 @@ public void assignReadName() {
* that the record's read bases, quality scores, and mate graph are not stored directly as part of the
* record. Normalization is the process of resolving these values, and is performed at Slice granularity,
* across all records in a Slice.
* (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion, SubstitutionMatrix)}).
* (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion)}).
*/
void setIsNormalized() { isNormalized = true; }

Expand All @@ -373,7 +374,7 @@ public void assignReadName() {
* scores, and mate graph are not stored directly as part of the record. These values must be resolved
* through the separate process of normalization, which is performed at Slice granularity (all records in a
* Slice are normalized at the same time).
* (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion, SubstitutionMatrix)}).
* (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion)} )}).
* @return true if this record is normalized
*/
public boolean isNormalized() { return isNormalized; }
Expand All @@ -393,6 +394,17 @@ public void resolveQualityScores() {
scores[pos - 1] = ((BaseQualityScore) feature).getQualityScore();
hasMissingScores = false;
break;
case Scores.operator:
final Scores scoresFeature = (Scores) feature;
pos = scoresFeature.getPosition();
System.arraycopy(
scoresFeature.getScores(),
0,
scores,
pos - 1,
scoresFeature.getScores().length);
hasMissingScores = false;
break;
case ReadBase.operator:
pos = feature.getPosition();
scores[pos - 1] = ((ReadBase) feature).getQualityScore();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,13 @@ private Object[][] getReadFeatureTestData() {
// test CRAM file (provided as part of https://github.com/samtools/htsjdk/issues/1379) does not
// use reference-based compression (requires no reference) and uses Bases ('b') and SoftClip ('S')
// feature codes; with the sam file created from the cram via samtools
{testDir + "referenceNotRequired.cram", testDir + "referenceNotRequired.sam", null}
{testDir + "referenceNotRequired.cram", testDir + "referenceNotRequired.sam", null},

// test CRAM file taken from the CRAM test files in hts-specs, has Scores ('q') read feature
// Note: the specs site compliance documentation for this test file says:
// Quality absent, mapped with diff (1005_qual.cram) As 1004_qual.cram but using 'q' instead of a series
// of 'Q' features. [ complex to generate! see CRAM.q.gen.patch ]
{testDir + "1005_qual.cram", testDir + "1005_qual.sam", testDir + "ce.fa" }
};
}

Expand Down
Binary file not shown.
3 changes: 3 additions & 0 deletions src/test/resources/htsjdk/samtools/cram/1005_qual.sam
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd UR:/nfs/users/nfs_j/jkb/work/samtools_master/hts-specs/test/cram/passed/../ce.fa
match 99 CHROMOSOME_I 1010 40 10S80M10S = 1200 300 RTTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCACTCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATR ##########????????????????????????????????????????????????????????????????????????????????CCCCCCCCCC
match 147 CHROMOSOME_I 1210 40 10S80M10S = 1000 -300 YYGTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTATTTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAAGYY ##########????????????????????????????????????????????????????????????????????????????????CCCCCCCCCC

0 comments on commit 22aec67

Please sign in to comment.