Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
chhylp123 committed May 2, 2021
1 parent 9c205c8 commit 24e5453
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 29 deletions.
1 change: 0 additions & 1 deletion CommandLines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ void init_opt(hifiasm_opt_t* asm_opt)
asm_opt->purge_level_trio = 0;
asm_opt->purge_simi_rate_l2 = 0.75;
asm_opt->purge_simi_rate_l3 = 0.55;
///asm_opt->purge_simi_rate_hic = 0.85;
asm_opt->purge_overlap_len = 1;
///asm_opt->purge_overlap_len_hic = 50;
asm_opt->recover_atg_cov_min = -1024;
Expand Down
3 changes: 2 additions & 1 deletion CommandLines.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <pthread.h>
#include <stdint.h>

#define HA_VERSION "0.15.1-r333"
#define HA_VERSION "0.15.1-r334"

#define VERBOSE 0

Expand Down Expand Up @@ -86,6 +86,7 @@ typedef struct {
float purge_simi_rate_l2;
float purge_simi_rate_l3;
float purge_simi_thres;

///float purge_simi_rate_hic;

long long small_pop_bubble_size;
Expand Down
29 changes: 8 additions & 21 deletions Overlaps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11200,7 +11200,7 @@ uint32_t only_len)
break;
}
}
if(k == nv) fprintf(stderr, "ERROR\n");
if(k == nv) fprintf(stderr, "ERROR-set_utg_offset\n");
}

p_v = v; len += l;
Expand Down Expand Up @@ -11272,8 +11272,8 @@ void refine_u_trans_t(u_trans_hit_t *q, kv_ca_buf_t* cb)
if(si != cb->n && ei != cb->n) break;
}

if(si == 0 || ei == 0) fprintf(stderr, "ERROR\n");
if(si >= cb->n || ei >= cb->n) fprintf(stderr, "ERROR\n");
if(si == 0 || ei == 0) fprintf(stderr, "ERROR-si-ei-0\n");
if(si >= cb->n || ei >= cb->n) fprintf(stderr, "ERROR-si-ei-1\n");
si--; ei--;

if(s < cb->a[si].c_x_p || ((si + 1) < cb->n && s >= cb->a[si + 1].c_x_p))
Expand Down Expand Up @@ -11452,7 +11452,7 @@ uint32_t get_u_trans_hit(u_trans_hit_idx *t, u_trans_hit_t *hit)
break;
}
}
if(k == nv) fprintf(stderr, "ERROR\n");
if(k == nv) fprintf(stderr, "ERROR-nv\n");
}

///[t->cBeg, t->cEnd)
Expand Down Expand Up @@ -12762,22 +12762,14 @@ long long gap_fuzz, bub_label_t* b_mask_t)
if((asm_opt.flag & HA_F_VERBOSE_GFA)) write_trans_chain(cov->t_ch, output_file_name);
}

hic_analysis(ug, sg, cov?cov->t_ch:t_ch);





char* gfa_name = (char*)malloc(strlen(output_file_name)+25);
sprintf(gfa_name, "%s.clean_d_utg.noseq.gfa", output_file_name);
FILE* output_file = fopen(gfa_name, "w");
ma_ug_print_simple(ug, &R_INF, sg, coverage_cut, sources, ruIndex, "utg", output_file);
fclose(output_file);
free(gfa_name);




hic_analysis(ug, sg, cov?cov->t_ch:t_ch);

if(cov) destory_hap_cov_t(&cov);
if(t_ch) destory_trans_chain(&t_ch);
Expand Down Expand Up @@ -16593,13 +16585,13 @@ void chain_origin_trans_uid_s_bubble(buf_t *pri, buf_t* aux, uint32_t beg, uint3
for (i = 0; i < nv; ++i)
{
if(av[i].del) continue;
if(av[i].v == pri_v) priEnd = pri_len - av[i].ol - 1;
if(av[i].v == aux_v) auxEnd = aux_len - av[i].ol - 1;
if(av[i].v == pri_v) priEnd = ((pri_len > av[i].ol)? (pri_len - av[i].ol - 1) : 0);
if(av[i].v == aux_v) auxEnd = ((aux_len > av[i].ol)? (aux_len - av[i].ol - 1) : 0);
}

if(priBeg == (uint32_t)-1 || priEnd == (uint32_t)-1 || auxBeg == (uint32_t)-1 || auxEnd == (uint32_t)-1)
{
fprintf(stderr, "ERROR\n");
fprintf(stderr, "ERROR-s_bubble\n");
}

cov->u_buffer.a.n = cov->tailIndex.a.n = 0;
Expand Down Expand Up @@ -27630,11 +27622,6 @@ bub_label_t* b_mask_t)
ma_ug_t *ug = NULL;
ug = ma_ug_gen_primary(sg, PRIMARY_LABLE);

// FILE* output_file = fopen("straw-debug.noseq.gfa", "w");
// ma_ug_print_simple(ug, &R_INF, sg, coverage_cut, sources, ruIndex, "utg", output_file);
// fclose(output_file);


hap_cov_t *cov = NULL;
asg_t *copy_sg = copy_read_graph(sg);
ma_ug_t *copy_ug = copy_untig_graph(ug);
Expand Down
70 changes: 64 additions & 6 deletions Purge_Dups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "rcut.h"

KDQ_INIT(uint64_t)
KSORT_INIT_GENERIC(uint64_t)

uint8_t debug_enable = 0;

Expand Down Expand Up @@ -2423,7 +2424,7 @@ uint64_t* position_index, uint32_t xUid, uint32_t yUid, ma_utg_t* xReads, ma_utg
ma_hit_t_alloc* sources, ma_hit_t_alloc* reverse_sources, asg_t *read_g, R_to_U* ruIndex, ma_sub_t *coverage_cut,
float Hap_rate, int is_local, int max_hang, int min_ovlp, uint64_t cov_threshold, kvec_asg_arc_t_offset* u_buffer,
kvec_t_i32_warp* tailIndex, kvec_t_i32_warp* prevIndex, hap_cov_t *cov, long long* r_x_pos_beg, long long* r_x_pos_end,
long long* r_y_pos_beg, long long* r_y_pos_end)
long long* r_y_pos_beg, long long* r_y_pos_end, float *sim)
{
uint32_t max_count = 0, min_count = 0, flag;
uint32_t xLen = xReads->n, xIndex;
Expand Down Expand Up @@ -2537,6 +2538,7 @@ long long* r_y_pos_beg, long long* r_y_pos_end)

get_pair_hap_similarity_by_base(xReads, read_g, yUid, reverse_sources, ruIndex,
*r_x_pos_beg, *r_x_pos_end, &xLeftMatch, &xLeftTotal);
(*sim) = ((double)xLeftMatch)/((double)xLeftTotal);
if(xLeftMatch == 0 || xLeftTotal == 0 || xLeftMatch <= xLeftTotal*Hap_rate)
{
return NON_PLOID;
Expand Down Expand Up @@ -2842,6 +2844,63 @@ int filter_secondary_chain(long long max_score, long long cur_score, double rate
return 1;
}


void filter_secondary_ovlp(kvec_hap_overlaps *x, kvec_t_u64_warp *a, float sim_flt, float ovlp_flt)
{
if(sim_flt == 0 || ovlp_flt == 0 || x->a.n == 0) return;
#define f_ovlp(s_0, e_0, s_1, e_1) ((MIN((e_0), (e_1)) > MAX((s_0), (s_1)))? MIN((e_0), (e_1)) - MAX((s_0), (s_1)):0)
uint32_t i, m, k;
uint64_t t, ovlp;
hap_overlaps *p = NULL;
a->a.n = 0;
for (i = 0; i < x->a.n; i++)
{
if(x->a.a[i].s < sim_flt) continue;
t = x->a.a[i].x_beg_pos; t<<=32; t |= x->a.a[i].x_end_pos;
kv_push(uint64_t, a->a, t);
}

if(a->a.n == 0) return;
ks_introsort_uint64_t(a->a.n, a->a.a);

for (i = m = 1; i < a->a.n; ++i)
{
t = a->a.a[m-1];
ovlp = f_ovlp(t>>32, (uint32_t)t, a->a.a[i]>>32, (uint32_t)a->a.a[i]);
if(ovlp == 0)
{
a->a.a[m] = a->a.a[i];
m++;
}
else
{
t = MIN(a->a.a[m-1]>>32, a->a.a[i]>>32);
t<<=32;
t |= MAX((uint32_t)a->a.a[m-1], (uint32_t)a->a.a[i]);
a->a.a[m-1] = t;
}
}
a->a.n = m;

for (i = m = 0; i < x->a.n; i++)
{
p = &(x->a.a[i]);
if(p->s < sim_flt)
{
for (k = ovlp = 0; k < a->a.n; k++)
{
ovlp += f_ovlp(p->x_beg_pos, p->x_end_pos, a->a.a[k]>>32, (uint32_t)a->a.a[k]);
if(ovlp >= ovlp_flt*(p->x_end_pos-p->x_beg_pos)) break;
}
if(k < a->a.n) continue;
if(ovlp >= ovlp_flt*(p->x_end_pos-p->x_beg_pos)) continue;
}
x->a.a[m] = x->a.a[i];
m++;
}
x->a.n = m;
}

static void hap_alignment_advance_worker(void *_data, long eid, int tid)
{
hap_alignment_struct_pip* hap_buf = (hap_alignment_struct_pip*)_data;
Expand All @@ -2852,7 +2911,7 @@ static void hap_alignment_advance_worker(void *_data, long eid, int tid)
R_to_U* ruIndex = hap_buf->ruIndex;
ma_sub_t *coverage_cut = hap_buf->coverage_cut;
uint64_t* position_index = hap_buf->position_index;
float Hap_rate = hap_buf->Hap_rate;
float Hap_rate = hap_buf->Hap_rate/**MIN(hap_buf->Hap_rate, 0.2)**/, sim;
int max_hang = hap_buf->max_hang;
int min_ovlp = hap_buf->min_ovlp;
float chain_rate = hap_buf->chain_rate;
Expand Down Expand Up @@ -3017,7 +3076,7 @@ static void hap_alignment_advance_worker(void *_data, long eid, int tid)
if(calculate_pair_hap_similarity_advance(&(u_can->a.a[k]), position_index, xUid, yUid,
xReads, yReads, sources, reverse_sources, read_g, ruIndex, coverage_cut, Hap_rate,
(asm_opt.purge_level_primary<=2? 0:1), max_hang, min_ovlp, cov_threshold, u_buffer,
score_vc, prevIndex_vec, cov, &r_x_pos_beg, &r_x_pos_end, &r_y_pos_beg, &r_y_pos_end)!=PLOID)
score_vc, prevIndex_vec, cov, &r_x_pos_beg, &r_x_pos_end, &r_y_pos_beg, &r_y_pos_end, &sim)!=PLOID)
{
continue;
}
Expand Down Expand Up @@ -3050,6 +3109,7 @@ static void hap_alignment_advance_worker(void *_data, long eid, int tid)
hap_align.xUid = xUid;
hap_align.yUid = yUid;
hap_align.status = SELF_EXIST;
hap_align.s = sim;
kv_push(hap_overlaps, all_ovlp->x[hap_align.xUid].a, hap_align);
}
/**
Expand Down Expand Up @@ -3091,7 +3151,7 @@ static void hap_alignment_advance_worker(void *_data, long eid, int tid)
all_ovlp->x[xUid].a.n = m + 1;
}
}

// filter_secondary_ovlp(&all_ovlp->x[xUid], u_vecs, hap_buf->Hap_rate, 0.7);
}

int get_specific_hap_overlap(kvec_hap_overlaps* x, uint32_t qn, uint32_t tn)
Expand Down Expand Up @@ -4513,7 +4573,6 @@ void print_all_purge_ovlp(ma_ug_t *ug, hap_overlaps_list* all_ovlp, const char*
for (v = 0; v < all_ovlp->num; v++)
{
uId = v;
///if(uId != 96 && uId != 272) continue;
for (i = 0; i < all_ovlp->x[uId].a.n; i++)
{
print_hap_paf(ug, &(all_ovlp->x[uId].a.a[i]));
Expand Down Expand Up @@ -5242,7 +5301,6 @@ uint32_t just_coverage, hap_cov_t *cov, uint32_t collect_p_trans, uint32_t colle
if(asm_opt.polyploidy <= 2)
{
mc_solve(&all_ovlp, cov->t_ch, NULL, ug, read_g, 0.8, R_INF.trio_flag, 1, NULL, 1, NULL, NULL);
///pt_solve(&all_ovlp, cov->t_ch, ug, read_g, 0.8, R_INF.trio_flag);
}

if(collect_p_trans && collect_p_trans_f == 1)
Expand Down
1 change: 1 addition & 0 deletions Purge_Dups.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ typedef struct {
uint32_t yUid;
uint32_t weight;
long long score;
float s;
}hap_overlaps;

typedef struct {
Expand Down
15 changes: 15 additions & 0 deletions hic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15410,6 +15410,20 @@ void resolve_tangles_hic(ha_ug_index *idx, bubble_type *bub, kvec_pe_hit *hits,
ta->idx.n = ta->n = 0;
}


void print_kv_u_trans_t(kv_u_trans_t *ta)
{
uint32_t i;
u_trans_t *p = NULL;
for (i = 0; i < ta->n; i++)
{
p = &(ta->a[i]);
fprintf(stderr, "q-utg%.6ul\tqs(%u)\tqe(%u)\tt-utg%.6ul\tts(%u)\tte(%u)\trev(%u)\tw(%f)\tf(%u)\n",
p->qn+1, p->qs, p->qe, p->tn+1, p->ts, p->te, p->rev, p->nw, p->f);
}
fprintf(stderr, "[M::%s::] \n", __func__);
}

int hic_short_align(const enzyme *fn1, const enzyme *fn2, ha_ug_index* idx)
{
double index_time = yak_realtime();
Expand All @@ -15434,6 +15448,7 @@ int hic_short_align(const enzyme *fn1, const enzyme *fn2, ha_ug_index* idx)
///debug_hc_hits_v14(&sl.hits, asm_opt.output_file_name, sl.idx);
////dedup_hits(&(sl.hits), sl.idx);
///write_hc_hits_v14(&sl.hits, asm_opt.output_file_name);
// print_kv_u_trans_t(&(idx->t_ch->k_trans));

hc_links link;
init_hc_links(&link, idx->ug->g->n_seq, idx->t_ch);
Expand Down

0 comments on commit 24e5453

Please sign in to comment.