Skip to content

Commit

Permalink
Add bam_aux_first()/bam_aux_next() tagged aux field iterator API
Browse files Browse the repository at this point in the history
Add new API functions for iterating through a BAM record's aux fields,
inline accessor methods for field tag and type (or code can continue
to use s-2 and *s), and a variant of bam_aux_del() that returns the
(updated) iterator to the following field (for use in iterator-based
loops that delete fields).
  • Loading branch information
jmarshall committed Nov 9, 2021
1 parent 2f2318e commit 326e3d2
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 14 deletions.
50 changes: 49 additions & 1 deletion htslib/sam.h
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,6 @@ int sam_passes_filter(const sam_hdr_t *h, const bam1_t *b,

/// Converts a BAM aux tag to SAM format
/*
* @param b Pointer to the bam record
* @param key Two letter tag key
* @param type Single letter type code: ACcSsIifHZB.
* @param tag Tag data pointer, in BAM format
Expand Down Expand Up @@ -1616,6 +1615,29 @@ static inline const uint8_t *sam_format_aux1(const uint8_t *key,
return NULL;
}

/// Return a pointer to a BAM record's first aux field
/** @param b Pointer to the BAM record
@return Aux field pointer, or NULL if the record has none
When NULL is returned, errno will also be set to ENOENT. ("Aux field pointers"
point to the TYPE byte within the auxiliary data for that field; but in general
it is unnecessary for user code to be aware of this.)
*/
HTSLIB_EXPORT
uint8_t *bam_aux_first(const bam1_t *b);

/// Return a pointer to a BAM record's next aux field
/** @param b Pointer to the BAM record
@param s Aux field pointer, as returned by bam_aux_first()/_next()/_get()
@return Pointer to the next aux field, or NULL if already last or error
Whenever NULL is returned, errno will also be set: ENOENT if @p s was the
record's last aux field; otherwise EINVAL, indicating that the BAM record's
aux data is corrupt.
*/
HTSLIB_EXPORT
uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s);

/// Return a pointer to an aux record
/** @param b Pointer to the bam record
@param tag Desired aux tag
Expand All @@ -1628,6 +1650,19 @@ static inline const uint8_t *sam_format_aux1(const uint8_t *key,
HTSLIB_EXPORT
uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);

/// Return the aux field's 2-character tag
/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get()
@return Pointer to the tag characters, NOT NUL-terminated
*/
static inline
const char *bam_aux_tag(const uint8_t *s) { return (const char *) (s-2); }

/// Return the aux field's type character
/** @param s Aux field pointer, as returned by bam_aux_first()/_next()/_get()
@return The type character: one of cCsSiI/fd/A/Z/H/B
*/
static inline char bam_aux_type(const uint8_t *s) { return *s; }

/// Return a SAM formatting string containing a BAM tag
/** @param b Pointer to the bam record
@param tag Desired aux tag
Expand Down Expand Up @@ -1739,6 +1774,19 @@ int bam_aux_append(bam1_t *b, const char tag[2], char type, int len, const uint8
HTSLIB_EXPORT
int bam_aux_del(bam1_t *b, uint8_t *s);

/// Delete tag data from a bam record
/* @param b The bam record to update
@param s Pointer to the aux field to delete, as returned by
bam_aux_first()/_next()/_get()
@return Pointer to the following aux field, or NULL if none or on error
Whenever NULL is returned, errno will also be set: ENOENT if the aux field
deleted was the record's last one; otherwise EINVAL, indicating that the
BAM record's aux data is corrupt.
*/
HTSLIB_EXPORT
uint8_t *bam_aux_erase(bam1_t *b, uint8_t *s);

/// Update or add a string-type tag
/* @param b The bam record to update
@param tag Tag identifier
Expand Down
77 changes: 64 additions & 13 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -4508,7 +4508,52 @@ static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end)
}
}

uint8_t *bam_aux_first(const bam1_t *b)
{
uint8_t *s = bam_get_aux(b);
uint8_t *end = b->data + b->l_data;
if (s >= end) { errno = ENOENT; return NULL; }
return s+2;
}

uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s)
{
uint8_t *end = b->data + b->l_data;
uint8_t *next = s? skip_aux((uint8_t *) s, end) : end;
if (next == NULL) goto bad_aux;
if (next >= end) { errno = ENOENT; return NULL; }
return next+2;

bad_aux:
hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
errno = EINVAL;
return NULL;
}

uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
{
uint8_t *s;
for (s = bam_aux_first(b); s; s = bam_aux_next(b, s))
if (s[-2] == tag[0] && s[-1] == tag[1]) {
// Check the tag value is valid and complete
uint8_t *e = skip_aux(s, b->data + b->l_data);
if (e == NULL) goto bad_aux;
if ((*s == 'Z' || *s == 'H') && *(e - 1) != '\0') goto bad_aux;

return s;
}

// errno now as set by bam_aux_first()/bam_aux_next()
return NULL;

bad_aux:
hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
errno = EINVAL;
return NULL;
}

#if 0
uint8_t *bam_aux_get_old(const bam1_t *b, const char tag[2])
{
uint8_t *s, *end, *t = (uint8_t *) tag;
uint16_t y = (uint16_t) t[0]<<8 | t[1];
Expand Down Expand Up @@ -4540,24 +4585,30 @@ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
errno = EINVAL;
return NULL;
}
#endif

// s MUST BE returned by bam_aux_get()
int bam_aux_del(bam1_t *b, uint8_t *s)
uint8_t *bam_aux_erase(bam1_t *b, uint8_t *s)
{
uint8_t *p, *aux;
int l_aux = bam_get_l_aux(b);
aux = bam_get_aux(b);
p = s - 2;
s = skip_aux(s, aux + l_aux);
if (s == NULL) goto bad_aux;
memmove(p, s, l_aux - (s - aux));
b->l_data -= s - p;
return 0;
uint8_t *end = b->data + b->l_data;
uint8_t *next = skip_aux(s, end);
if (next == NULL) goto bad_aux;

bad_aux:
memmove(s-2, next, end - next);
b->l_data -= next - (s-2);

if (next >= end) { errno = ENOENT; return NULL; }
return s;

bad_aux:
hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
errno = EINVAL;
return -1;
return NULL;
}

int bam_aux_del(bam1_t *b, uint8_t *s)
{
s = bam_aux_erase(b, s);
return (s || errno == ENOENT)? 0 : -1;
}

int bam_aux_update_str(bam1_t *b, const char tag[2], int len, const char *data)
Expand Down

0 comments on commit 326e3d2

Please sign in to comment.