From 22aec6782b33f8d169a5d1cf63e952126a3f09e0 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Mon, 25 Apr 2022 18:06:52 -0400 Subject: [PATCH] Fix decoding of CRAM Scores read feature during normalization. (#1592) --- .../cram/structure/CRAMCompressionRecord.java | 16 ++++++++++++++-- .../block/CRAMRecordReadFeaturesTest.java | 8 +++++++- .../htsjdk/samtools/cram/1005_qual.cram | Bin 0 -> 821 bytes .../htsjdk/samtools/cram/1005_qual.sam | 3 +++ 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 src/test/resources/htsjdk/samtools/cram/1005_qual.cram create mode 100644 src/test/resources/htsjdk/samtools/cram/1005_qual.sam diff --git a/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java b/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java index 85d8106bd8..6bd584c11e 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java @@ -32,6 +32,7 @@ import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore; import htsjdk.samtools.cram.encoding.readfeatures.ReadBase; import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature; +import htsjdk.samtools.cram.encoding.readfeatures.Scores; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.SequenceUtil; import htsjdk.utils.ValidationUtils; @@ -364,7 +365,7 @@ public void assignReadName() { * that the record's read bases, quality scores, and mate graph are not stored directly as part of the * record. Normalization is the process of resolving these values, and is performed at Slice granularity, * across all records in a Slice. - * (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion, SubstitutionMatrix)}). + * (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion)}). */ void setIsNormalized() { isNormalized = true; } @@ -373,7 +374,7 @@ public void assignReadName() { * scores, and mate graph are not stored directly as part of the record. These values must be resolved * through the separate process of normalization, which is performed at Slice granularity (all records in a * Slice are normalized at the same time). - * (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion, SubstitutionMatrix)}). + * (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion)} )}). * @return true if this record is normalized */ public boolean isNormalized() { return isNormalized; } @@ -393,6 +394,17 @@ public void resolveQualityScores() { scores[pos - 1] = ((BaseQualityScore) feature).getQualityScore(); hasMissingScores = false; break; + case Scores.operator: + final Scores scoresFeature = (Scores) feature; + pos = scoresFeature.getPosition(); + System.arraycopy( + scoresFeature.getScores(), + 0, + scores, + pos - 1, + scoresFeature.getScores().length); + hasMissingScores = false; + break; case ReadBase.operator: pos = feature.getPosition(); scores[pos - 1] = ((ReadBase) feature).getQualityScore(); diff --git a/src/test/java/htsjdk/samtools/cram/structure/block/CRAMRecordReadFeaturesTest.java b/src/test/java/htsjdk/samtools/cram/structure/block/CRAMRecordReadFeaturesTest.java index a4f6e224f5..268f5baed0 100644 --- a/src/test/java/htsjdk/samtools/cram/structure/block/CRAMRecordReadFeaturesTest.java +++ b/src/test/java/htsjdk/samtools/cram/structure/block/CRAMRecordReadFeaturesTest.java @@ -54,7 +54,13 @@ private Object[][] getReadFeatureTestData() { // test CRAM file (provided as part of https://github.com/samtools/htsjdk/issues/1379) does not // use reference-based compression (requires no reference) and uses Bases ('b') and SoftClip ('S') // feature codes; with the sam file created from the cram via samtools - {testDir + "referenceNotRequired.cram", testDir + "referenceNotRequired.sam", null} + {testDir + "referenceNotRequired.cram", testDir + "referenceNotRequired.sam", null}, + + // test CRAM file taken from the CRAM test files in hts-specs, has Scores ('q') read feature + // Note: the specs site compliance documentation for this test file says: + // Quality absent, mapped with diff (1005_qual.cram) As 1004_qual.cram but using 'q' instead of a series + // of 'Q' features. [ complex to generate! see CRAM.q.gen.patch ] + {testDir + "1005_qual.cram", testDir + "1005_qual.sam", testDir + "ce.fa" } }; } diff --git a/src/test/resources/htsjdk/samtools/cram/1005_qual.cram b/src/test/resources/htsjdk/samtools/cram/1005_qual.cram new file mode 100644 index 0000000000000000000000000000000000000000..be4297f03790b6625f1a1803a9c73f5ebbce748d GIT binary patch literal 821 zcmb_a-D}fO6hAk)+xoSZiQR+_9|qPTZIjYzYafPLwr0a6U6T6HftIdKnXBDL6DNpB z#{36lqVv^(=tOV~@e5y7P{vSv&dcu`}>`Ha?ZICH9=;f z5G&P-HTVlVjUB^)z2skBc!`X?XfIwvdqP*;x{?+LwXB@gv+_V;$UUs2<2-*n#q)0Y zXgXz-jl{8dB5v^Iu}Y#Ro=ovcqbF5KCMyYJtn5CerMbz9$(=Wiy6Jd@32ve~#$A}I zS2?p-v!w2V48ZkbVY+9;WvWtbM{l*>&QP17iIu^3k}VwIvt`ZBYGrKxJ8 z^8e)wP6m6{31*xBF?Rr`@v4>B51&;xo&W)R*$|@7HhiXc_NmLdeRTzzuyw9w`#Vst&sQs2rE%DMZ@eG*1k@ji zXgLR>BuOCUBvFtAA*Vq;FF7>s07+301Sy|~wbz^5Km#mWvKLOTrlUaHSQdx68eO{u zG|1qzdhb*Z0&Q<;nTudmU+KzY{RYquO0PVa-@Mcne*`q-a_KXBYybUwZ-92XTm(jO zWTSAGe{mCNI1=$N4S2d5_&57=as6EY8{Hkl=Yed-`FjX7J^zqJ)7-Ir$p` CR=0Wp literal 0 HcmV?d00001 diff --git a/src/test/resources/htsjdk/samtools/cram/1005_qual.sam b/src/test/resources/htsjdk/samtools/cram/1005_qual.sam new file mode 100644 index 0000000000..0393ae36fb --- /dev/null +++ b/src/test/resources/htsjdk/samtools/cram/1005_qual.sam @@ -0,0 +1,3 @@ +@SQ SN:CHROMOSOME_I LN:1009800 M5:8ede36131e0dbf3417807e48f77f3ebd UR:/nfs/users/nfs_j/jkb/work/samtools_master/hts-specs/test/cram/passed/../ce.fa +match 99 CHROMOSOME_I 1010 40 10S80M10S = 1200 300 RTTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCACTCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATR ##########????????????????????????????????????????????????????????????????????????????????CCCCCCCCCC +match 147 CHROMOSOME_I 1210 40 10S80M10S = 1000 -300 YYGTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTATTTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAAGYY ##########????????????????????????????????????????????????????????????????????????????????CCCCCCCCCC