Skip to content

Commit

Permalink
Merge commit '1e8781a' into gpu_kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
joydddd committed Oct 11, 2023
2 parents 1d5c636 + 1e8781a commit 247a17f
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 19 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ ncu
nsys
profile_output*
workloads
.cmake/**
.depend
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CFLAGS= -O2 -g -DNDEBUG
CDEBUG_FLAGS= -g -Wall -Wc++-compat -Wextra -O2 #-O0 -DNDEBUG
CDEBUG_FLAGS= -g -Wall -Wc++-compat -Wextra -DDEBUG_PRINT -O2 #-O0 -DNDEBUG
CPPFLAGS= -DHAVE_KALLOC -D__AMD_SPLIT_KERNELS__ # -Wno-unused-but-set-variable -Wno-unused-variable
INCLUDES= -I .
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
Expand Down Expand Up @@ -40,6 +40,7 @@ ifneq ($(DEBUG),) # turn on debug flags
endif
ifneq ($(DEBUG_ANALYSIS),) # turn on debug flags
CFLAGS = $(CDEBUG_FLAGS)
CFLAGS += -DDEBUG_CHECK -DDEBUG_VERBOSE
endif

.PHONY:all extra clean depend # profile
Expand Down
10 changes: 9 additions & 1 deletion gpu/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void debug_output_anchors(const char debug_folder[], chain_read_t *in) {

/* Write Sequence Name and Length, rep_len*/
fprintf(f_anchors, "<%s\t%d\n", in->seq.name, in->seq.len);
fprintf(f_anchors, "*%d\n", in->seq.rep_len);
fprintf(f_anchors, "*%d\n", in->rep_len);

/* Read Number of Anchors */
fprintf(f_anchors, "#%d\n", in->n);
Expand All @@ -45,6 +45,8 @@ void debug_output_anchors(const char debug_folder[], chain_read_t *in) {
fprintf(f_anchors, "\n");
}

// DEBUG: not used
#if 0
void debug_output_score(const char debug_folder[], chain_read_t *in) {
static FILE *f_score = NULL;
if (!f_score) {
Expand Down Expand Up @@ -73,6 +75,7 @@ void debug_output_score(const char debug_folder[], chain_read_t *in) {
}
fprintf(f_score, "\n");
}
#endif

void debug_output_meta(const char debug_folder[], input_meta_t *meta) {
static FILE *f_metaout = NULL;
Expand Down Expand Up @@ -234,6 +237,10 @@ void debug_print_regs(mm_reg1_t* regs, int n_u, char* qname){
///////////// check functions ///////////////////////////////
///////////////////////////////////////////////////////////////////////////
#ifdef DEBUG_CHECK


// DEBUG: uses with gold standard input score and range. SCORE CHECK
#if 0
/**
* Read Plaintxt input file for Chaining scores from <debug_folder>.score
* Allocate and Populate chain_read_t.f, chain_read_t.p
Expand Down Expand Up @@ -373,6 +380,7 @@ int debug_check_score(const int64_t *p, const int32_t *f, const int64_t *p_gold,
readid++;
return rt;
}
#endif // uses if we have gold standard input


void debug_check_range(const int32_t* range, size_t n){
Expand Down
2 changes: 1 addition & 1 deletion gpu/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
extern "C" {
#endif

extern char debug_folder[];
const char debug_folder[] = "debug";

// #define ITER_LIMIT 10000
// #define MAX_READ_NUM 100000
Expand Down
11 changes: 6 additions & 5 deletions gpu/gpu.mk
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,23 @@ CONFIG += $(if $(LONG_CUT),-DMM_LONG_SEG_CUTOFF=\($(LONG_CUT)\))
###################################################
CU_SRC = $(wildcard gpu/*.cu)
CU_OBJS = $(CU_SRC:%.cu=%.o)
C_SRC = $(wildcard gpu/*.c)
OBJS += $(C_SRC:%.c=%.o)
INCLUDES += -I gpu

###################################################
############ CUDA Compile ###################
###################################################
NVCC = nvcc
CUDAFLAGS = -rdc=true -lineinfo
CUDATESTFLAG = -G -DDEBUG_PRINT
CUDATESTFLAG = -G

###################################################
############ HIP Compile ###################
###################################################
HIPCC = hipcc
HIPFLAGS = -DUSEHIP -Rpass-analysis=kernel-resource-usage
HIPTESTFLAGS = -g -DDEBUG_PRINT
HIPFLAGS = -DUSEHIP
HIPTESTFLAGS = -g -Rpass-analysis=kernel-resource-usage

###################################################
############ DEBUG Options ###################
Expand All @@ -46,15 +48,14 @@ endif

ifneq ($(DEBUG_ANALYSIS),)
GPU_FLAGS += $(GPU_TESTFL)
GPU_FLAGS += -DDEBUG_CHECK -DDEBUG_VERBOSE
endif


%.o: %.cu
$(GPU_CC) -c $(GPU_FLAGS) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $(CONFIG) $< -o $@

cleangpu:
rm -f $(CU_OBJS)
rm -f gpu/*.o

# profile:CFLAGS += -pg -g3
# profile:all
Expand Down
16 changes: 7 additions & 9 deletions gpu/plchain.cu
Original file line number Diff line number Diff line change
Expand Up @@ -426,11 +426,13 @@ void plchain_debug_analysis(stream_ptr_t stream){

fprintf(stderr, "[DEBUG] total segs: %lu, short:%lu mid: %u long: %u\n", cut_num, cut_num - num_mid_seg - num_long_seg, num_mid_seg, num_long_seg);

// DEBUG: check range w.r.t to input and range violations
#if defined(DEBUG_CHECK) && 0

int32_t* range = (int32_t*)malloc(sizeof(int32_t) * total_n);
cudaMemcpy(range, dev_mem->d_range, sizeof(int32_t) * total_n,
cudaMemcpyDeviceToHost);
size_t* cut = (size_t*)malloc(sizeof(size_t) * cut_num);
cudaMemcpy(cut, dev_mem->d_cut, sizeof(size_t) * cut_num,
cudaMemcpyDeviceToHost);

// Check range w.r.t input (MAKE SURE INPUT RANGE EXISTS)
#if 0
Expand All @@ -447,22 +449,18 @@ void plchain_debug_analysis(stream_ptr_t stream){

// DEBUG: Check voilation of cut
#if defined(DEBUG_CHECK) && 0
size_t* cut = (size_t*)malloc(sizeof(size_t) * cut_num);
cudaMemcpy(cut, dev_mem->d_cut, sizeof(size_t) * cut_num,
cudaMemcpyDeviceToHost);
for (int readid = 0, cid = 0, idx = 0; readid < dev_mem->size; readid++) {
// DEBUG: Print cuts
#if defined(DEBUG_VERBOSE) && 0
debug_print_cut(cut + cid, cut_num - cid, reads[readid].n, idx, reads[readid].seq.name);
#endif
cid += debug_check_cut(cut + cid, range, cut_num - cid,
reads[readid].n, idx);
cid += debug_check_cut(cut + cid, range, cut_num - cid, reads[readid].n, idx);
idx += reads[readid].n;
}
free(cut);
#endif

free(cut);
free(range);
#endif // DEBUG_CHECK

// DEBUG: Calculate workload distribution
#if defined(DEBUG_VERBOSE) && 1
Expand Down
4 changes: 2 additions & 2 deletions gpu_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"num_streams": 1,
"min_n": 512,
"//min_n": "queries with less anchors will be handled on cpu",
"max_total_n": 126844000,
"max_read": 8120009,
"//max_total_n": 126844000,
"//max_read": 8120009,
"avg_read_n": 20000,
"//avg_read_n": "expect average number of anchors per read",
"range_kernel": {
Expand Down

0 comments on commit 247a17f

Please sign in to comment.