Skip to content

Commit

Permalink
Allow kseq_t to read sequences larger than 2^31 bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Jun 15, 2023
1 parent 3e43617 commit 07ca4a7
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 20 deletions.
19 changes: 11 additions & 8 deletions lib/ksw2/kseq.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>

#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define KS_SEP_TAB 1 // isspace() && !' '
Expand All @@ -39,8 +40,9 @@

#define __KS_TYPE(type_t) \
typedef struct __kstream_t { \
char *buf; \
int begin, end, is_eof; \
char *buf; \
int64_t begin, end; \
int is_eof; \
size_t cur_buf_pos; \
size_t newline; \
type_t f; \
Expand Down Expand Up @@ -94,13 +96,13 @@ typedef struct __kstring_t {
#endif

#define __KS_GETUNTIL(__read, __bufsize) \
static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
static int64_t ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
{ \
int gotany = 0; \
if (dret) *dret = 0; \
str->l = append? str->l : 0; \
for (;;) { \
int i; \
int64_t i; \
if (ks_err(ks)) return -3; \
if (ks->begin >= ks->end) { \
if (!ks->is_eof) { \
Expand Down Expand Up @@ -146,7 +148,7 @@ typedef struct __kstring_t {
str->s[str->l] = '\0'; \
return str->l; \
} \
static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
static inline int64_t ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
{ return ks_getuntil2(ks, delimiter, str, dret, 0); }

#define KSTREAM_INIT(type_t, __read, __bufsize) \
Expand Down Expand Up @@ -182,9 +184,10 @@ typedef struct __kstring_t {
-3 error reading stream
*/
#define __KSEQ_READ(SCOPE) \
SCOPE int kseq_read(kseq_t *seq) \
SCOPE int64_t kseq_read(kseq_t *seq) \
{ \
int c,r; \
int c; \
int64_t r; \
kstream_t *ks = seq->f; \
ks->newline = 0; \
if (seq->last_char == 0) { /* then jump to the next header line */ \
Expand Down Expand Up @@ -255,6 +258,6 @@ typedef struct __kstring_t {
__KSEQ_TYPE(type_t) \
extern kseq_t *kseq_init(type_t fd); \
void kseq_destroy(kseq_t *ks); \
int kseq_read(kseq_t *seq);
int64_t kseq_read(kseq_t *seq);

#endif
15 changes: 5 additions & 10 deletions src/commons/KSeqWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ KSeqFile::KSeqFile(const char* fileName) {

bool KSeqFile::ReadEntry() {
KSEQFILE::kseq_t* s = (KSEQFILE::kseq_t*) seq;
int result = KSEQFILE::kseq_read(s);
if (result < 0)
if (KSEQFILE::kseq_read(s) < 0)
return false;
entry.headerOffset = s->headerOffset;
entry.sequenceOffset = s->sequenceOffset;
Expand Down Expand Up @@ -51,8 +50,7 @@ KSeqStream::KSeqStream() {

bool KSeqStream::ReadEntry() {
KSEQSTREAM::kseq_t* s = (KSEQSTREAM::kseq_t*) seq;
int result = KSEQSTREAM::kseq_read(s);
if (result < 0)
if (KSEQSTREAM::kseq_read(s) < 0)
return false;

entry.name = s->name;
Expand Down Expand Up @@ -90,8 +88,7 @@ KSeqGzip::KSeqGzip(const char* fileName) {

bool KSeqGzip::ReadEntry() {
KSEQGZIP::kseq_t* s = (KSEQGZIP::kseq_t*) seq;
int result = KSEQGZIP::kseq_read(s);
if (result < 0)
if (KSEQGZIP::kseq_read(s) < 0)
return false;

entry.name = s->name;
Expand Down Expand Up @@ -135,8 +132,7 @@ KSeqBzip::KSeqBzip(const char* fileName) {

bool KSeqBzip::ReadEntry() {
KSEQBZIP::kseq_t* s = (KSEQBZIP::kseq_t*) seq;
int result = KSEQBZIP::kseq_read(s);
if (result < 0)
if (KSEQBZIP::kseq_read(s) < 0)
return false;

entry.name = s->name;
Expand Down Expand Up @@ -209,8 +205,7 @@ KSeqBuffer::KSeqBuffer(const char* buffer, size_t length) {

bool KSeqBuffer::ReadEntry() {
KSEQBUFFER::kseq_t* s = (KSEQBUFFER::kseq_t*) seq;
int result = KSEQBUFFER::kseq_read(s);
if (result < 0)
if (KSEQBUFFER::kseq_read(s) < 0)
return false;
entry.headerOffset = s->headerOffset;
entry.sequenceOffset = s->sequenceOffset;
Expand Down
3 changes: 1 addition & 2 deletions src/test/TestAlignmentPerformance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,8 @@ std::vector<std::string> readData(std::string fasta_filename){
FILE* fasta_file = fopen(fasta_filename.c_str(), "r");
if(fasta_file == NULL) {std::cout << "Could not open " << fasta_filename<<std::endl; EXIT(1); }
seq = kseq_init(fileno(fasta_file));
int l;
size_t entries_num = 0;
while ((l = kseq_read(seq)) >= 0) {
while (kseq_read(seq) >= 0) {
if (entries_num > 1000)
break;
if (seq->seq.l > 500) {
Expand Down

0 comments on commit 07ca4a7

Please sign in to comment.