Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make repeated seeks possible #1363

Merged
merged 1 commit into from
Jan 4, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions synced_bcf_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ static int _regions_add(bcf_sr_regions_t *reg, const char *chr, hts_pos_t start,
static bcf_sr_regions_t *_regions_init_string(const char *str);
static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec);
static void _regions_sort_and_merge(bcf_sr_regions_t *reg);
static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler);

char *bcf_sr_strerror(int errnum)
{
Expand Down Expand Up @@ -838,6 +839,7 @@ static void bcf_sr_seek_start(bcf_srs_t *readers)
for (i=0; i<reg->nseqs; i++)
reg->regs[i].creg = -1;
reg->iseq = 0;
reg->prev_seq = -1;
}


Expand All @@ -851,8 +853,18 @@ int bcf_sr_seek(bcf_srs_t *readers, const char *seq, hts_pos_t pos)
bcf_sr_seek_start(readers);
return 0;
}
bcf_sr_regions_overlap(readers->regions, seq, pos, pos);

int i, nret = 0;

// Need to position both the readers and the regions. The latter is a bit of a mess
// because we can have in memory or external regions. The safe way is:
// - reset all regions as if they were not read from at all (bcf_sr_seek_start)
// - find the requested iseq (stored in the seq_hash)
// - position regions to the requested position (bcf_sr_regions_overlap)
bcf_sr_seek_start(readers);
if ( khash_str2int_get(readers->regions->seq_hash, seq, &i)>=0 ) readers->regions->iseq = i;
_bcf_sr_regions_overlap(readers->regions, seq, pos, pos, 0);

for (i=0; i<readers->nreaders; i++)
{
nret += _reader_seek(&readers->readers[i],seq,pos,MAX_CSI_COOR-1);
Expand Down Expand Up @@ -1406,14 +1418,20 @@ static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *re
}

int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end)
{
return _bcf_sr_regions_overlap(reg,seq,start,end,1);
}

static int _bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t start, hts_pos_t end, int missed_reg_handler)
{
int iseq;
if ( khash_str2int_get(reg->seq_hash, seq, &iseq)<0 ) return -1; // no such sequence
if ( missed_reg_handler && !reg->missed_reg_handler ) missed_reg_handler = 0;

if ( reg->prev_seq==-1 || iseq!=reg->prev_seq || reg->prev_start > start ) // new chromosome or after a seek
{
// flush regions left on previous chromosome
if ( reg->missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 )
if ( missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 )
bcf_sr_regions_flush(reg);

bcf_sr_regions_seek(reg, seq);
Expand All @@ -1427,7 +1445,7 @@ int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, hts_pos_t sta
{
if ( bcf_sr_regions_next(reg) < 0 ) return -2; // no more regions left
if ( reg->iseq != iseq ) return -1; // does not overlap any regions
if ( reg->missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data);
if ( missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data);
}
if ( reg->start <= end ) return 0; // region overlap
return -1; // no overlap
Expand Down