Skip to content

Commit

Permalink
Add SAM header iterator methods [DRAFT]
Browse files Browse the repository at this point in the history
  • Loading branch information
jmarshall committed Nov 9, 2021
1 parent 2f2318e commit 24406ca
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 2 deletions.
65 changes: 64 additions & 1 deletion header.c
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,18 @@ int sam_hdr_rebuild(sam_hdr_t *bh) {
return 0;
}

sam_hdr_line_t *sam_hdr_first_line(sam_hdr_t *bh) {
if (!bh->hrecs) {
if (sam_hdr_fill_hrecs(bh) != 0) return NULL;
}
return bh->hrecs->first_line;
}

sam_hdr_line_t *sam_hdr_next_line(sam_hdr_t *bh, sam_hdr_line_t *line) {
line = line->global_next;
return (line != bh->hrecs->first_line)? line : NULL;
}

/*
* Appends a formatted line to an existing SAM header.
* Line is a full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
Expand Down Expand Up @@ -1346,6 +1358,13 @@ int sam_hdr_add_line(sam_hdr_t *bh, const char *type, ...) {
return ret;
}

int sam_hdr_format_line_append(const sam_hdr_line_t *line, kstring_t *ks) {
if (!line) return -1;

if (build_header_line(line, ks) < 0) return -2;
return 0;
}

/*
* Returns a complete line of formatted text for a specific head type/ID
* combination. If ID_key is NULL then it returns the first line of the specified
Expand Down Expand Up @@ -1399,6 +1418,27 @@ int sam_hdr_find_line_pos(sam_hdr_t *bh, const char *type,
return 0;
}

/*
* Remove a line from the header via an iterator.
*/

sam_hdr_line_t *sam_hdr_remove_line(sam_hdr_t *bh, sam_hdr_line_t *line) {
if (!bh || !line) return NULL;

if (line->type == TYPEKEY("PG")) {
hts_log_warning("Removing PG lines is not supported!");
return NULL;
}

sam_hdr_line_t *next = sam_hdr_next_line(bh, line);
char type[2] = { line->type >> 8, line->type & 0xff };
if (sam_hrecs_remove_line(bh->hrecs, type, line) < 0) return NULL;

if (bh->hrecs->refs_changed >= 0 && rebuild_target_arrays(bh) != 0) return NULL;
if (bh->hrecs->dirty) redact_header_text(bh);
return next;
}

/*
* Remove a line from the header by specifying a tag:value that uniquely
* identifies a line, i.e. the @SQ line containing "SN:ref1".
Expand Down Expand Up @@ -1857,6 +1897,19 @@ const char *sam_hdr_line_name(sam_hdr_t *bh,

/* ==== Key:val level methods ==== */

int sam_hdr_find_tag(const sam_hdr_line_t *line,
const char *key,
kstring_t *ks) {
if (!line || !key) return -2;

sam_hrec_tag_t *tag = sam_hrecs_find_key(line, key, NULL);
if (!tag || tag->len < 3) return -1;

ks_clear(ks);
if (kputsn(&tag->str[3], tag->len-3, ks) < 0) return -2;
return 0;
}

int sam_hdr_find_tag_id(sam_hdr_t *bh,
const char *type,
const char *ID_key,
Expand Down Expand Up @@ -1889,6 +1942,16 @@ int sam_hdr_find_tag_id(sam_hdr_t *bh,
return 0;
}

int sam_hdr_remove_tag(sam_hdr_t *bh,
sam_hdr_line_t *line,
const char *key) {
if (!bh || !line || !key) return -1;

int ret = sam_hrecs_remove_key(bh->hrecs, line, key);
if (ret == 0 && bh->hrecs->dirty) redact_header_text(bh);
return ret;
}

int sam_hdr_find_tag_pos(sam_hdr_t *bh,
const char *type,
int pos,
Expand Down Expand Up @@ -2572,7 +2635,7 @@ static int sam_hrecs_update(sam_hrecs_t *hrecs, sam_hrec_type_t *type, ...) {
* Returns the tag pointer on success
* NULL on failure
*/
sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
sam_hrec_tag_t *sam_hrecs_find_key(const sam_hrec_type_t *type,
const char *key,
sam_hrec_tag_t **prev) {
sam_hrec_tag_t *tag, *p = NULL;
Expand Down
2 changes: 1 addition & 1 deletion header.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void sam_hrecs_free(sam_hrecs_t *hrecs);
sam_hrec_type_t *sam_hrecs_find_type_id(sam_hrecs_t *hrecs, const char *type,
const char *ID_key, const char *ID_value);

sam_hrec_tag_t *sam_hrecs_find_key(sam_hrec_type_t *type,
sam_hrec_tag_t *sam_hrecs_find_key(const sam_hrec_type_t *type,
const char *key,
sam_hrec_tag_t **prev);

Expand Down
80 changes: 80 additions & 0 deletions htslib/sam.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ DEALINGS IN THE SOFTWARE. */
extern "C" {
#endif

struct sam_hrec_type_s;

/// Highest SAM format version supported by this library
#define SAM_FORMAT_VERSION "1.6"

Expand Down Expand Up @@ -458,6 +460,22 @@ int sam_hdr_nref(const sam_hdr_t *h);

/* ==== Line level methods ==== */

/*! @typedef
* @abstract Opaque type used as an iterator over header lines.
*/
typedef struct sam_hrec_type_s sam_hdr_line_t;

/// Return an iterator pointing to the first header line
HTSLIB_EXPORT
sam_hdr_line_t *sam_hdr_first_line(sam_hdr_t *h);

/// Return an iterator pointing to the next header line
/*!
* @return An iterator pointing to the next line, or NULL if there is none.
*/
HTSLIB_EXPORT
sam_hdr_line_t *sam_hdr_next_line(sam_hdr_t *h, sam_hdr_line_t *line);

/// Add formatted lines to an existing header.
/*!
* @param lines Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
Expand Down Expand Up @@ -490,6 +508,36 @@ int sam_hdr_add_lines(sam_hdr_t *h, const char *lines, size_t len);
HTSLIB_EXPORT
int sam_hdr_add_line(sam_hdr_t *h, const char *type, ...);

/// Returns a complete line of formatted text for the line pointed to.
/*!
* @param line Iterator pointing to a header line
* @param ks kstring to which to append the result
* @return 0 on success;
* -1 if @p line does not point to a header line
* -2 on other failures
*
* Puts a complete line of formatted text for a specific line into @p ks.
* Appends the text to the existing content in @p ks, if any.
*/
HTSLIB_EXPORT
int sam_hdr_format_line_append(const sam_hdr_line_t *line, kstring_t *ks);

/// Returns a complete line of formatted text for the line pointed to.
/*!
* @param line Iterator pointing to a header line
* @param ks kstring to hold the result
* @return 0 on success;
* -1 if @p line does not point to a header line
* -2 on other failures
*
* Puts a complete line of formatted text for a specific line into @p ks.
* Any existing content in @p ks will be overwritten.
*/
static inline int sam_hdr_format_line(const sam_hdr_line_t *line, kstring_t *ks)
{
return sam_hdr_format_line_append(line, ks_clear(ks));
}

/// Returns a complete line of formatted text for a given type and ID.
/*!
* @param type Type of the searched line. Eg. "SQ"
Expand Down Expand Up @@ -528,6 +576,14 @@ HTSLIB_EXPORT
int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type,
int pos, kstring_t *ks);

/// Remove line pointed to by iterator from a header
/*!
* @param line Iterator pointing to a header line
* @return An iterator pointing to the following line, or NULL on error FIXME or if it was the last line
*/
HTSLIB_EXPORT
sam_hdr_line_t *sam_hdr_remove_line(sam_hdr_t *h, sam_hdr_line_t *line);

/// Remove a line with given type / id from a header
/*!
* @param type Type of the searched line. Eg. "SQ"
Expand Down Expand Up @@ -679,6 +735,21 @@ const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos);

/* ==== Key:val level methods ==== */

/// Return the value associated with a key for a header line identified by iterator
/*!
* @param line Iterator pointing to a header line
* @param key Key of the searched tag. Eg. "LN"
* @param ks kstring where the value will be written
* @return 0 on success
* -1 if the requested tag does not exist
* -2 on other errors
*
* Looks for a specific key in the SAM header line pointed to by @p line and writes the
* associated value into @p ks. Any pre-existing content in @p ks will be overwritten.
*/
HTSLIB_EXPORT
int sam_hdr_find_tag(const sam_hdr_line_t *line, const char *key, kstring_t *ks);

/// Return the value associated with a key for a header line identified by ID_key:ID_val
/*!
* @param type Type of the line to which the tag belongs. Eg. "SQ"
Expand Down Expand Up @@ -716,6 +787,15 @@ int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, cons
HTSLIB_EXPORT
int sam_hdr_find_tag_pos(sam_hdr_t *h, const char *type, int pos, const char *key, kstring_t *ks);

/// Remove the key from the line pointed to by the iterator.
/*!
* @param line Iterator pointing to a header line
* @param key Key of the targeted tag. Eg. "M5"
* @return 1 if the key was removed; 0 if it was not present; -1 on error
*/
HTSLIB_EXPORT
int sam_hdr_remove_tag(sam_hdr_t *h, sam_hdr_line_t *line, const char *key);

/// Remove the key from the line identified by type, ID_key and ID_value.
/*!
* @param type Type of the line to which the tag belongs. Eg. "SQ"
Expand Down

0 comments on commit 24406ca

Please sign in to comment.