diff --git a/sam_mods.c b/sam_mods.c index fe8db85f77..3263903ce4 100644 --- a/sam_mods.c +++ b/sam_mods.c @@ -245,7 +245,7 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, } uint8_t *mi = bam_aux_get(b, "MN"); - if (mi && bam_aux2i(mi) != b->core.l_qseq) { + if (mi && bam_aux2i(mi) != b->core.l_qseq && b->core.l_qseq) { // bam_aux2i with set errno = EINVAL and return 0 if the tag // isn't integer, but 0 will be a seq-length mismatch anyway so // triggers an error here too. @@ -359,7 +359,7 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, if (!cp_end) { // empty list delta = INT_MAX; - cp_end = cp+1; + cp_end = cp; } } // Now delta is first in list or computed remainder, @@ -426,6 +426,10 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state, } } } + if (ml && ml != ml_end) { + hts_log_error("%s: Too many entries in ML tag", bam_get_qname(b)); + return -1; + } state->nmods = mod_num; @@ -544,8 +548,24 @@ int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state, */ int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state, hts_base_mod *mods, int n_mods, int *pos) { - if (state->seq_pos >= b->core.l_qseq) + if (state->seq_pos >= b->core.l_qseq) { + // Spots +ve orientation run-overs. + // The -ve orientation is spotted in bam_parse_basemod2 + int i; + for (i = 0; i < state->nmods; i++) { + // Check if any remaining items in MM after hitting the end + // of the sequence. + if (!b->core.l_qseq) + continue; + + if (state->MMcount[i] < 0x7f000000 || + (*state->MM[i]!=0 && *state->MM[i]!=';')) { + hts_log_warning("MM tag refers to bases beyond sequence length"); + return -1; + } + } return 0; + } // Look through state->MMcount arrays to see when the next lowest is // per base type; @@ -579,18 +599,6 @@ int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state, } *pos = state->seq_pos = i; - if (i >= b->core.l_qseq) { - // Check for more MM elements than bases present. - for (i = 0; i < state->nmods; i++) { - if (!(b->core.flag & BAM_FREVERSE) && - state->MMcount[i] < 0x7f000000) { - hts_log_warning("MM tag refers to bases beyond sequence length"); - return -1; - } - } - return 0; - } - if (b->core.flag & BAM_FREVERSE) { for (i = 0; i < state->nmods; i++) state->MMcount[i] -= freq[seqi_rc[state->canonical[i]]]; diff --git a/test/base_mods/MM-chebi.sam b/test/base_mods/MM-chebi.sam index 0ec8b9ddb0..475a7d5994 100644 --- a/test/base_mods/MM-chebi.sam +++ b/test/base_mods/MM-chebi.sam @@ -1,2 +1,2 @@ @CO Separate m, h and N modifications -* 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+76792,6,7;N+n,15; Ml:B:C,102,128,153,179,204,161,33,212,169 +* 0 * 0 0 * * 0 0 AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA * Mm:Z:C+m,2,2,1,4,1;C+76792,6,7;N+n,15; Ml:B:C,102,128,153,179,204,161,33,212