Skip to content

Commit

Permalink
Add put long segs back to original reads, but output seems to be wrong??
Browse files Browse the repository at this point in the history
  • Loading branch information
joydddd committed Feb 23, 2024
1 parent 474e746 commit 3239112
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 13 deletions.
26 changes: 26 additions & 0 deletions gpu/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,32 @@ void debug_print_score(const int64_t *p, const int32_t *score, int64_t n) {
read_idx++;
}


void debug_print_score_rel_p(const uint16_t *p, const int32_t *score, int64_t n) {
static FILE *fout_score = NULL;
static int read_idx = 0;
if (fout_score == NULL) {
char fout_score_filename[50];
strcpy(fout_score_filename, debug_folder);
strcat(fout_score_filename, ".score.out");
if ((fout_score = fopen(fout_score_filename, "w+")) == NULL) {
fprintf(stderr, "[Error]: Cannot create score output file: %s \n",
fout_score_filename);
exit(1);
}
fprintf(stderr, "[Info] Writing score to file %s\n",
fout_score_filename);
fprintf(fout_score, "@@@<qname\tqlen\n");
}
fprintf(fout_score, "<%ld\t\n", read_idx);
fprintf(fout_score, "#%ld\n", n);
for (int i = 0; i < n; ++i) {
fprintf(fout_score, "%d,%u\t", score[i], (unsigned int)p[i]);
}
fprintf(fout_score, "\n");
read_idx++;
}

void debug_print_chain(mm128_t *a, uint64_t *u, int32_t n_u, char* qname) {
static FILE *fout_chain = NULL;
if (fout_chain == NULL) {
Expand Down
1 change: 1 addition & 0 deletions gpu/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void debug_output_meta(const char debug_folder[], input_meta_t *meta);
void debug_print_successor_range(int32_t *range, int64_t n);
int debug_print_cut(const size_t *cut, size_t max_cut, size_t n, size_t offset, char* qname);
void debug_print_score(const int64_t *p, const int32_t *score, int64_t n);
void debug_print_score_rel_p(const uint16_t *p, const int32_t *score, int64_t n);
void debug_print_chain(mm128_t* a, uint64_t *u, int32_t n_u, char* qname);
void debug_print_regs(mm_reg1_t *regs, int n_u, char *qname);
void debug_print_segs(seg_t *segs, chain_read_t *reads, int num_segs, int num_reads);
Expand Down
17 changes: 9 additions & 8 deletions gpu/plchain.cu
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,6 @@ void plchain_cal_sc_pair_density(size_t total_n, size_t num_cut, deviceMemPtr* d

#ifdef DEBUG_CHECK
void plchain_debug_analysis_short(stream_ptr_t stream, int uid, float throughput[]){
// TODO: analysis multiple or current host mems
// TODO: this needs to be recalculated
cudaStreamSynchronize(stream.cudastream);
size_t total_n = stream.host_mems[uid].total_n;
chain_read_t* reads = stream.reads;
Expand Down Expand Up @@ -642,8 +640,9 @@ int plchain_finish_batch(streamSetup_t stream_setup, int stream_id, Misc misc, v
#endif


seg_t* long_segs = stream_setup.streams[stream_id].long_mem.long_segs;
seg_t* long_segs = stream_setup.streams[stream_id].long_mem.long_segs_og_idx;
size_t long_seg_idx = 0;
size_t long_i = 0;
for (int uid = 0; uid < MICRO_BATCH; uid++) {
// regorg long to each host mem ptr
// NOTE: this is the number of long segs till this microbatch
Expand All @@ -654,7 +653,13 @@ int plchain_finish_batch(streamSetup_t stream_setup, int stream_id, Misc misc, v
#endif // DEBUG_PRINT
size_t total_n_long_segs = 0;
for (; long_seg_idx < long_segs_num; long_seg_idx++) {
// TODO: write long_segs + long_seg_idx to f/p
for (size_t i = long_segs[long_seg_idx].start_idx;
i < long_segs[long_seg_idx].end_idx; i++, long_i++) {
stream_setup.streams[stream_id].host_mems[uid].f[i] =
stream_setup.streams[stream_id].long_mem.f_long[long_i];
stream_setup.streams[stream_id].host_mems[uid].p[i] =
stream_setup.streams[stream_id].long_mem.p_long[long_i];
}
total_n_long_segs += long_segs[long_seg_idx].end_idx - long_segs[long_seg_idx].start_idx;
}

Expand All @@ -673,7 +678,6 @@ int plchain_finish_batch(streamSetup_t stream_setup, int stream_id, Misc misc, v
#endif // DEBUG_PRINT
}


#ifdef DEBUG_PRINT
fprintf(stderr, "----------------------------------------------------------------------------\n ");
for (int uid = 0; uid < MICRO_BATCH; uid++) fprintf(stderr, " Short%d", uid);
Expand Down Expand Up @@ -1029,9 +1033,6 @@ void finish_stream_gpu(const mm_idx_t *mi, const mm_mapopt_t *opt, chain_read_t*

n_read = plchain_finish_batch(stream_setup, t, misc, km);
reads = stream_setup.streams[t].reads;
#ifdef DEBUG_PRINT
fprintf(stderr, "[Debug] %s finish (%s:%d) n_read %d\n", __func__, __FILE__, __LINE__, n_read);
#endif // DEBUG_PRINT
stream_setup.streams[t].busy = false;

for (int i = 0; i < n_read; i++) {
Expand Down
8 changes: 4 additions & 4 deletions gpu/plmem.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void plmem_malloc_long_mem(longMemPtr *long_mem, size_t buffer_size_long) {
fprintf(stderr, "[Info] Host Malloc Pinned Memory Size %.2f GB (long seg)\n", (float)host_mem_size / OneG);
#endif
// data array
cudaMallocHost((void**)&long_mem->long_segs, buffer_size_long / (MM_LONG_SEG_CUTOFF * MM_CUT_SIZE) * sizeof(seg_t));
cudaMallocHost((void**)&long_mem->long_segs_og_idx, buffer_size_long / (MM_LONG_SEG_CUTOFF * MM_CUT_SIZE) * sizeof(seg_t));
cudaMallocHost((void**)&long_mem->f_long, buffer_size_long * sizeof(int32_t));
cudaMallocHost((void**)&long_mem->p_long, buffer_size_long * sizeof(uint16_t));
cudaMallocHost((void**)&long_mem->total_long_segs_num, sizeof(unsigned int));
Expand All @@ -72,7 +72,7 @@ void plmem_free_host_mem(hostMemPtr *host_mem) {
}

void plmem_free_long_mem(longMemPtr *long_mem) {
cudaFreeHost(long_mem->long_segs);
cudaFreeHost(long_mem->long_segs_og_idx);
cudaFreeHost(long_mem->f_long);
cudaFreeHost(long_mem->p_long);
cudaFreeHost(long_mem->total_long_segs_num);
Expand Down Expand Up @@ -314,7 +314,7 @@ void plmem_async_d2h_memcpy(stream_ptr_t *stream_ptrs) {
cudaMemcpyAsync(host_mem->p, dev_mem->d_p,
sizeof(uint16_t) * host_mem->total_n,
cudaMemcpyDeviceToHost, *stream);
cudaMemcpyAsync(long_mem->long_segs, dev_mem->d_long_seg_og,
cudaMemcpyAsync(long_mem->long_segs_og_idx, dev_mem->d_long_seg_og,
dev_mem->buffer_size_long / (MM_LONG_SEG_CUTOFF * MM_CUT_SIZE) * sizeof(seg_t),
cudaMemcpyDeviceToHost, *stream);
cudaMemcpyAsync(host_mem->long_segs_num, dev_mem->d_long_seg_count,
Expand Down Expand Up @@ -348,7 +348,7 @@ void plmem_async_d2h_long_memcpy(stream_ptr_t *stream_ptrs) {
longMemPtr *long_mem = &stream_ptrs->long_mem;
deviceMemPtr *dev_mem = &stream_ptrs->dev_mem;
cudaStream_t *stream = &stream_ptrs->cudastream;
cudaMemcpyAsync(long_mem->long_segs, dev_mem->d_long_seg_og,
cudaMemcpyAsync(long_mem->long_segs_og_idx, dev_mem->d_long_seg_og,
dev_mem->buffer_size_long / (MM_LONG_SEG_CUTOFF * MM_CUT_SIZE) * sizeof(seg_t),
cudaMemcpyDeviceToHost, *stream);
// cudaMemcpyAsync(&long_mem->total_long_segs_num, dev_mem->d_long_seg_count,
Expand Down
2 changes: 1 addition & 1 deletion gpu/plmem.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ typedef struct {

typedef struct {
// array size: number of cuts in the batch / long_seg_cut
seg_t *long_segs;
seg_t *long_segs_og_idx; // start & end idx of long segs in the original micro batch
unsigned int *total_long_segs_num; // sum of mini batch long_segs_num
size_t *total_long_segs_n; // number of anchors in all the long segs
int32_t *f_long; // score for long segs
Expand Down

0 comments on commit 3239112

Please sign in to comment.