forked from lh3/minimap2
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
219 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ ncu | |
nsys | ||
profile_output* | ||
workloads | ||
.depend |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/bin/bash | ||
WORKSPACE_DIR=/shared/prod/home/liuxs/bioinfo/minimap2 | ||
EXE_PATH=$WORKSPACE_DIR | ||
CONFIG_PATH=$WORKSPACE_DIR | ||
DATA_PATH=/shared/prod/home/liuxs/bioinfo/Profile_mm2/data | ||
# NOTE: currently `-t n_thread`, the n_thread must be equal to the num_streams in gpu_config.json | ||
N_THREAD=$(sed -n 's/.*"num_streams": \([0-9]*\).*/\1/p' ${CONFIG_PATH}/gpu_config.json) | ||
# Array of LONG_BLOCK_SIZE values | ||
MID_BLOCK_SIZES=( 64 ) | ||
MID_CUTS=( 1 ) | ||
LONG_CUTS=( 50 100 ) | ||
GPU_CONFIGS=( gpu_config.json ) | ||
DATA_SETS=( 1kto300k 200kto300k ) | ||
|
||
# Iterate over LONG_BLOCK_SIZES array | ||
for DATA_SET in "${DATA_SETS[@]}" | ||
do | ||
QUERY_FILE=$DATA_PATH/random_500MBases_${DATA_SET}.fa | ||
for MID_BLOCK_SIZE in "${MID_BLOCK_SIZES[@]}" | ||
do | ||
for MID_CUT in "${MID_CUTS[@]}" | ||
do | ||
for LONG_CUT in "${LONG_CUTS[@]}" | ||
do | ||
for GPU_CONFIG in "${GPU_CONFIGS[@]}" | ||
do | ||
echo "Executing with MID_BLOCK_SIZE=${MID_BLOCK_SIZE} MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT}" | ||
|
||
make clean | ||
|
||
make GPU_CONFIG=${GPU_CONFIG} SHORT_BLOCK_SIZE=64 MID_BLOCK_SIZE=${MID_BLOCK_SIZE} LONG_BLOCK_SIZE=1024 MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT} | ||
|
||
omniperf profile -n integrated_${MID_BLOCK_SIZE}_${LONG_CUT}_${DATA_SET}_report --device 0 -- ${EXE_PATH}/minimap2 ${DATA_PATH}/hg38.mmi -t ${N_THREAD} --max-chain-skip=2147483647 --gpu-chain ${QUERY_FILE} > test_logs.out | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=minimap2 # Job name | ||
#SBATCH --partition=1CN128C8G2H_2IB_MI210_RHEL8 # Specify the MI210 GPU partition or queue | ||
#SBATCH --gres=gpu:1 # Request 1 GPU | ||
#SBATCH --nodes=1 # Number of nodes | ||
#SBATCH --ntasks-per-node=1 # Number of tasks (processes) per node | ||
#SBATCH --cpus-per-task=16 # Number of CPU cores per task | ||
#SBATCH --mem=0 # Memory per node | ||
#SBATCH --time=00:40:00 # Maximum execution time (HH:MM:SS) | ||
#SBATCH --output=slurm_output/sample_sbatch_job.%j.out # Output file | ||
#SBATCH --error=slurm_output/sample_sbatch_job.%j.err # Error file | ||
|
||
# Load necessary modules (if required) | ||
source /etc/profile.d/modules.sh | ||
scl enable gcc-toolset-11 bash | ||
module unuse /shared/apps/modules/ubuntu/modulefiles | ||
module use /shared/apps/modules/rhel8/modulefiles | ||
module unuse /shared/apps/modules/rhel9/modulefiles | ||
module unuse /shared/apps/modules/sles15sp4/modulefiles | ||
module unuse /shared/apps/modules/centos8/modulefiles | ||
module unuse /shared/apps/modules/rocky9/modulefiles | ||
|
||
module load rocm-5.4.3 | ||
# export AMD_LOG_LEVEL=4 | ||
# Replace the following line with the actual command(s) you want to run | ||
cd /shared/prod/home/liuxs/bioinfo/minimap2/ | ||
make clean | ||
make GPU_CONFIG=gpu_config.json SHORT_BLOCK_SIZE=64 LONG_BLOCK_SIZE=1024 MID_BLOCK_SIZE=512 MID_CUT=1 LONG_CUT=100 | ||
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/reads_4f452f4a-d82a-4580-981b-32d14b997217.fa | ||
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_500MBases_200kto300k.fa | ||
rocprof --stats -o rocprof_output/long_seg.${SLURM_JOB_ID}.csv ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/short_seg_reads_from_1kto10k_distri.fa | ||
# rocprof --stats -o rocprof_output/long_seg.${SLURM_JOB_ID}.csv ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/reads_4f452f4a-d82a-4580-981b-32d14b997217.fa | ||
|
||
# Optional: You can add post-processing commands here | ||
|
||
# End of the script |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/bin/bash | ||
WORKSPACE_DIR=/shared/prod/home/liuxs/bioinfo/minimap2 | ||
EXE_PATH=$WORKSPACE_DIR | ||
CONFIG_PATH=$WORKSPACE_DIR | ||
# DATA_PATH=/shared/prod/home/liuxs/bioinfo/Profile_mm2/data | ||
DATA_PATH=/shareddata/umich_folder/data/ONT | ||
# NOTE: currently `-t n_thread`, the n_thread must be equal to the num_streams in gpu_config.json | ||
N_THREAD=$(sed -n 's/.*"num_streams": \([0-9]*\).*/\1/p' ${CONFIG_PATH}/gpu_config.json) | ||
# Array of LONG_BLOCK_SIZE values | ||
MID_BLOCK_SIZES=( 512 ) | ||
MID_CUTS=( 1 ) | ||
LONG_CUTS=( 100 ) | ||
GPU_CONFIGS=( gpu_config.json ) | ||
# DATA_SETS=( 50kto100k ) | ||
# DATA_SETS=( 1kto5k 1kto10k 1kto50k 1kto300k 200kto300k 1kto20k 1kto30k 1kto70k 1kto200k 10kto50k 10kto100k 50kto100k) | ||
# DATA_SETS=( 1kto300k 50kto300k 100kto300k 150kto300k 200kto300k 250kto300k 1kto200k 20kto200k 50kto200k 70kto200k 100kto200k 130kto200k 150kto200k 170kto200k) | ||
DATA_SETS=( 1kto5k 9kto10k 10kto20k 20kto30k 40kto50k 90kto100k 110kto120k 140kto150k 180kto200k 200kto250k 200kto300k ) | ||
|
||
# Iterate over LONG_BLOCK_SIZES array | ||
for MID_BLOCK_SIZE in "${MID_BLOCK_SIZES[@]}" | ||
do | ||
for MID_CUT in "${MID_CUTS[@]}" | ||
do | ||
for LONG_CUT in "${LONG_CUTS[@]}" | ||
do | ||
echo "Executing with MID_BLOCK_SIZE=${MID_BLOCK_SIZE} MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT}" | ||
make clean | ||
make GPU_CONFIG=${GPU_CONFIG} SHORT_BLOCK_SIZE=64 MID_BLOCK_SIZE=${MID_BLOCK_SIZE} LONG_BLOCK_SIZE=1024 MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT} | ||
|
||
for GPU_CONFIG in "${GPU_CONFIGS[@]}" | ||
do | ||
for DATA_SET in "${DATA_SETS[@]}" | ||
do | ||
QUERY_FILE=$DATA_PATH/random_500MBases_${DATA_SET}.fa | ||
echo "Executing on dataset ${DATA_SET}" | ||
filename="profile_output/data-${DATA_SET}_profile_${N_THREAD}_midblk-${MID_BLOCK_SIZE}_cut-${LONG_CUT}" | ||
${EXE_PATH}/minimap2 ${DATA_PATH}/hg38.mmi -t ${N_THREAD} --max-chain-skip=2147483647 --gpu-chain ${QUERY_FILE} > test.out 2> $filename | ||
done | ||
done | ||
done | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name=minimap2 # Job name | ||
#SBATCH --partition=1CN128C8G2H_2IB_MI210_RHEL8 # Specify the MI210 GPU partition or queue | ||
#SBATCH --gres=gpu:1 # Request 1 GPU | ||
#SBATCH --nodes=1 # Number of nodes | ||
#SBATCH --ntasks-per-node=1 # Number of tasks (processes) per node | ||
#SBATCH --cpus-per-task=16 # Number of CPU cores per task | ||
#SBATCH --mem=0 # Memory per node | ||
#SBATCH --time=00:40:00 # Maximum execution time (HH:MM:SS) | ||
#SBATCH --output=slurm_output/sample_sbatch_job.%j.out # Output file | ||
#SBATCH --error=slurm_output/sample_sbatch_job.%j.err # Error file | ||
|
||
# Load necessary modules (if required) | ||
source /etc/profile.d/modules.sh | ||
scl enable gcc-toolset-11 bash | ||
module unuse /shared/apps/modules/ubuntu/modulefiles | ||
module use /shared/apps/modules/rhel8/modulefiles | ||
module unuse /shared/apps/modules/rhel9/modulefiles | ||
module unuse /shared/apps/modules/sles15sp4/modulefiles | ||
module unuse /shared/apps/modules/centos8/modulefiles | ||
module unuse /shared/apps/modules/rocky9/modulefiles | ||
|
||
module load rocm-5.4.3 | ||
# export AMD_LOG_LEVEL=4 | ||
# Replace the following line with the actual command(s) you want to run | ||
cd /shared/prod/home/liuxs/bioinfo/minimap2/ | ||
make clean | ||
make GPU_CONFIG=gpu_config.json SHORT_BLOCK_SIZE=64 LONG_BLOCK_SIZE=1024 MID_BLOCK_SIZE=512 MID_CUT=1 LONG_CUT=100 | ||
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/reads_4f452f4a-d82a-4580-981b-32d14b997217.fa | ||
./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_500MBases_200kto300k.fa | ||
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/short_seg_reads_from_1kto10k_distri.fa | ||
|
||
|
||
# Optional: You can add post-processing commands here | ||
|
||
# End of the script |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import re | ||
import argparse | ||
import matplotlib.pyplot as plt | ||
|
||
# Define a function to extract segments from a line | ||
def extract_segments(line): | ||
match = re.search(r'long segments (\d+)', line) | ||
if match: | ||
if int(match.group(1)) > 1000000: | ||
print(line) | ||
return int(match.group(1)) | ||
return None | ||
|
||
# Define a function to extract runtime from a line | ||
def extract_runtime(line): | ||
match = re.search(r'last launch runtime: (\d+\.\d+) ms', line) | ||
if match: | ||
return float(match.group(1)) | ||
return None | ||
|
||
|
||
# Initialize variables to store segment counts | ||
segment_counts = [] | ||
runtimes = [] | ||
|
||
# Create an argument parser to get the output file name from the command line | ||
parser = argparse.ArgumentParser(description='Compute and plot a histogram of segments from an output file.') | ||
parser.add_argument('output_file', help='Path to the output file containing segment data') | ||
parser.add_argument('runtime_file', help='Path to the file containing runtime data') | ||
args = parser.parse_args() | ||
|
||
# Read the output file specified in the command line argument | ||
with open(args.output_file, 'r') as file: | ||
for line in file: | ||
segments = extract_segments(line) | ||
if segments is None: | ||
continue | ||
segment_counts.append(segments) | ||
|
||
|
||
# Read the runtime file specified in the command line argument | ||
with open(args.runtime_file, 'r') as runtime_file: | ||
for line in runtime_file: | ||
runtime = extract_runtime(line) | ||
if runtime is None: | ||
continue | ||
runtimes.append(runtime) | ||
|
||
# Calculate the total number of segments | ||
total_segments = sum(segment_counts) | ||
total_runtime = sum(runtimes) | ||
throughput = total_segments / total_runtime # anchors/ms | ||
|
||
# Create a histogram | ||
plt.hist(segment_counts, bins=200, edgecolor='k') | ||
plt.xlabel('Segments') | ||
plt.ylabel('Frequency') | ||
plt.title(f'Total Segments: {total_segments}, throughput: {throughput} anchors/ms') | ||
plt.grid(True) | ||
|
||
# Save the figure with an appropriate name based on the input file name | ||
output_filename = args.output_file #.split('.')[0] # Remove the file extension | ||
plt.savefig(f'{output_filename}_segment_histogram.png') | ||
|
||
# Display the histogram | ||
# plt.show() |