Skip to content

Commit

Permalink
config aac that maximize memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
xenshinu committed Mar 2, 2024
1 parent eed2640 commit 12c3fc0
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 9 deletions.
2 changes: 1 addition & 1 deletion aac_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"mid_blockdim": 64,
"//blockdim config": "options are not used: static config specified at compile time (make ... LONG_BLOCK_SIZE=1024)",
"short_griddim": 16128,
"long_griddim": 208,
"long_griddim": 150,
"mid_griddim": 16128
}
}
15 changes: 8 additions & 7 deletions gpu/plchain.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ void plchain_backtracking(hostMemPtr *host_mem, chain_read_t *reads, Misc misc,

uint16_t* p_hostmem = host_mem->p;
int32_t* f = host_mem->f;
// FIXME: DISABLED BACKTRACK, REMOVE THE RETURN HERE
return;
for (int i = 0; i < n_read; i++) {
int64_t* p;
KMALLOC(km, p, reads[i].n);
Expand Down Expand Up @@ -803,24 +805,23 @@ void plchain_cal_score_async(chain_read_t **reads_, int *n_read_, Misc misc, str
#ifdef USEHIP
roctxRangePop();
#endif

plmem_async_h2d_short_memcpy(&stream_setup.streams[stream_id], uid);
// step3: range selection
#ifdef DEBUG_PRINT
cudaEventRecord(stream_setup.streams[stream_id].short_kernel_start_event[uid],
stream_setup.streams[stream_id].cudastream);
#endif // DEBUG_PRINT
plmem_async_h2d_short_memcpy(&stream_setup.streams[stream_id], uid);
// step3: range selection
plrange_async_range_selection(&stream_setup.streams[stream_id].dev_mem,
&stream_setup.streams[stream_id].cudastream);
// step4: score generation for short and mid segs
plscore_async_short_mid_forward_dp(&stream_setup.streams[stream_id].dev_mem,
&stream_setup.streams[stream_id].cudastream);
// step5: copy short and mid results back
plmem_async_d2h_short_memcpy(&stream_setup.streams[stream_id], uid);
#ifdef DEBUG_PRINT
cudaEventRecord(stream_setup.streams[stream_id].short_kernel_stop_event[uid],
stream_setup.streams[stream_id].cudastream);
#endif // DEBUG_PRINT
// step5: copy short and mid results back
plmem_async_d2h_short_memcpy(&stream_setup.streams[stream_id], uid);
// update index
read_start = read_end;

Expand Down Expand Up @@ -872,9 +873,9 @@ void plchain_cal_score_async(chain_read_t **reads_, int *n_read_, Misc misc, str
stream_setup.streams[stream_id].cudastream);
plscore_async_long_forward_dp(&stream_setup.streams[stream_id].dev_mem,
&stream_setup.streams[stream_id].cudastream);
plmem_async_d2h_long_memcpy(&stream_setup.streams[stream_id]);
cudaEventRecord(stream_setup.streams[stream_id].stopevent,
stream_setup.streams[stream_id].cudastream);
plmem_async_d2h_long_memcpy(&stream_setup.streams[stream_id]);
stream_setup.streams[stream_id].busy = true;
cudaCheck();
}
Expand Down Expand Up @@ -922,7 +923,7 @@ void chain_blocking_gpu(const mm_idx_t *mi, const mm_mapopt_t *opt, chain_read_t
// void chain_stream_gpu(const input_meta_t* meta, chain_read_t**in_arr_, int *n_read_) {
// static int batchid = 0;
// Misc misc = build_misc(INT64_MAX);
// chain_stream_gpu(in_arr_, n_read_, misc, stream_setup, batchid);
// plchain_cal_score_launch(in_arr_, n_read_, misc, stream_setup, batchid);
// batchid++;
// if (in_arr_){
// int n_read = *n_read_;
Expand Down
4 changes: 4 additions & 0 deletions gpu/plscore.cu
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,10 @@ void plscore_async_long_forward_dp(deviceMemPtr* dev_mem, cudaStream_t* stream)
size_t buffer_size_long = dev_mem->buffer_size_long;
dim3 longDimGrid(score_kernel_config.long_griddim, 1, 1);

#ifdef DEBUG_CHECK
fprintf(stderr, "[Info] %s (%s:%d) Long Grid Dim = %d\n", __func__, __FILE__, __LINE__, longDimGrid.x);
#endif

#ifdef __LONG_BLOCK_SIZE__
// fprintf(stderr, "long block size: %d\n", __LONG_BLOCK_SIZE__);
score_generation_long_map<__LONG_BLOCK_SIZE__><<<longDimGrid, dim3(__LONG_BLOCK_SIZE__, 1, 1), 0, *stream>>>(
Expand Down
13 changes: 12 additions & 1 deletion scripts/acc_integrated.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#SBATCH --ntasks-per-node=1 # Number of tasks (processes) per node
#SBATCH --cpus-per-task=16 # Number of CPU cores per task
#SBATCH --mem=500g # Memory per node
#SBATCH --time=01:40:00 # Maximum execution time (HH:MM:SS)
#SBATCH --time=10:40:00 # Maximum execution time (HH:MM:SS)
#SBATCH --output=slurm_output/sample_sbatch_job.%j.out # Output file
#SBATCH --error=slurm_output/sample_sbatch_job.%j.err # Error file

Expand Down Expand Up @@ -39,7 +39,18 @@ cd $MM2_ROOT
# export AMD_LOG_LEVEL=4
make clean
make MICRO_BATCH=5 GPU_CONFIG=aac_config.json SHORT_BLOCK_SIZE=64 LONG_BLOCK_SIZE=1024 MID_BLOCK_SIZE=512 MID_CUT=1 LONG_CUT=40 DEBUG=1 DEBUG_ANALYSIS=1
./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/long_read_600M.fa
echo "Exit: $?"
./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_5GBases_10kto20k.fa
echo "Exit: $?"
./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_5GBases_40kto50k.fa
echo "Exit: $?"
./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_5GBases_10kto300k.fa
echo "Exit: $?"
./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_5GBases_90kto100k.fa
echo "Exit: $?"
./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_5GBases_100kto300k.fa
echo "Exit: $?"
# ./minimap2 -K 2000000000 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_4GBases_10kto300k.fa


Expand Down

0 comments on commit 12c3fc0

Please sign in to comment.