diff --git a/hts.c b/hts.c index a8a8bead2..c688f4c1b 100644 --- a/hts.c +++ b/hts.c @@ -232,6 +232,9 @@ const char *hts_feature_string(void) { } +// Converts ASCII to BAM nibble encoding. +// Note 0123 is treated as ACGT (ABI colourspace encoding) and +// U is treated as T. HTSLIB_EXPORT const unsigned char seq_nt16_table[256] = { 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, @@ -239,9 +242,9 @@ const unsigned char seq_nt16_table[256] = { 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, - 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, + 15,15, 5, 6, 8, 8, 7, 9, 15,10,15,15, 15,15,15,15, 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, - 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, + 15,15, 5, 6, 8, 8, 7, 9, 15,10,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, diff --git a/htslib/hts.h b/htslib/hts.h index 08bf71c2c..de41a14cd 100644 --- a/htslib/hts.h +++ b/htslib/hts.h @@ -455,6 +455,7 @@ int hts_parse_opt_list(htsFormat *opt, const char *str); The input character may be either an IUPAC ambiguity code, '=' for 0, or '0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8 for A/C/G/T or combinations of these bits for ambiguous bases. +Additionally RNA U is treated as a T (8). */ HTSLIB_EXPORT extern const unsigned char seq_nt16_table[256]; diff --git a/test/compare_sam.pl b/test/compare_sam.pl index 499cb2390..27319888f 100755 --- a/test/compare_sam.pl +++ b/test/compare_sam.pl @@ -163,6 +163,9 @@ $ln1[9] = uc($ln1[9]); $ln2[9] = uc($ln2[9]); + # RNA U to T is an expected change + $ln1[9] =~ s/U/T/g; + # Cram will populate a sequence string that starts as "*" $ln2[9] = "*" if ($ln1[9] eq "*"); diff --git a/test/xx#u.sam b/test/xx#u.sam new file mode 100644 index 000000000..338a9273f --- /dev/null +++ b/test/xx#u.sam @@ -0,0 +1,3 @@ +@SQ SN:xx LN:20 +a1 99 xx 1 1 16M = 11 20 =ACMGRSVTWYHKDBN **************** +b1 99 xx 1 1 16M = 11 20 =ACMGRSVUWYHKDBN ****************