Skip to content

Commit

Permalink
Delete gpu/.depend
Browse files Browse the repository at this point in the history
  • Loading branch information
joydddd committed Sep 27, 2023
2 parents d334c9e + ccddf5c commit 1d5c636
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ ncu
nsys
profile_output*
workloads
.depend
38 changes: 38 additions & 0 deletions scripts/aac_integrated.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
WORKSPACE_DIR=/shared/prod/home/liuxs/bioinfo/minimap2
EXE_PATH=$WORKSPACE_DIR
CONFIG_PATH=$WORKSPACE_DIR
DATA_PATH=/shared/prod/home/liuxs/bioinfo/Profile_mm2/data
# NOTE: currently `-t n_thread`, the n_thread must be equal to the num_streams in gpu_config.json
N_THREAD=$(sed -n 's/.*"num_streams": \([0-9]*\).*/\1/p' ${CONFIG_PATH}/gpu_config.json)
# Array of LONG_BLOCK_SIZE values
MID_BLOCK_SIZES=( 64 )
MID_CUTS=( 1 )
LONG_CUTS=( 50 100 )
GPU_CONFIGS=( gpu_config.json )
DATA_SETS=( 1kto300k 200kto300k )

# Iterate over LONG_BLOCK_SIZES array
for DATA_SET in "${DATA_SETS[@]}"
do
QUERY_FILE=$DATA_PATH/random_500MBases_${DATA_SET}.fa
for MID_BLOCK_SIZE in "${MID_BLOCK_SIZES[@]}"
do
for MID_CUT in "${MID_CUTS[@]}"
do
for LONG_CUT in "${LONG_CUTS[@]}"
do
for GPU_CONFIG in "${GPU_CONFIGS[@]}"
do
echo "Executing with MID_BLOCK_SIZE=${MID_BLOCK_SIZE} MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT}"

make clean

make GPU_CONFIG=${GPU_CONFIG} SHORT_BLOCK_SIZE=64 MID_BLOCK_SIZE=${MID_BLOCK_SIZE} LONG_BLOCK_SIZE=1024 MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT}

omniperf profile -n integrated_${MID_BLOCK_SIZE}_${LONG_CUT}_${DATA_SET}_report --device 0 -- ${EXE_PATH}/minimap2 ${DATA_PATH}/hg38.mmi -t ${N_THREAD} --max-chain-skip=2147483647 --gpu-chain ${QUERY_FILE} > test_logs.out
done
done
done
done
done
36 changes: 36 additions & 0 deletions scripts/aac_rocprof.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH --job-name=minimap2 # Job name
#SBATCH --partition=1CN128C8G2H_2IB_MI210_RHEL8 # Specify the MI210 GPU partition or queue
#SBATCH --gres=gpu:1 # Request 1 GPU
#SBATCH --nodes=1 # Number of nodes
#SBATCH --ntasks-per-node=1 # Number of tasks (processes) per node
#SBATCH --cpus-per-task=16 # Number of CPU cores per task
#SBATCH --mem=0 # Memory per node
#SBATCH --time=00:40:00 # Maximum execution time (HH:MM:SS)
#SBATCH --output=slurm_output/sample_sbatch_job.%j.out # Output file
#SBATCH --error=slurm_output/sample_sbatch_job.%j.err # Error file

# Load necessary modules (if required)
source /etc/profile.d/modules.sh
scl enable gcc-toolset-11 bash
module unuse /shared/apps/modules/ubuntu/modulefiles
module use /shared/apps/modules/rhel8/modulefiles
module unuse /shared/apps/modules/rhel9/modulefiles
module unuse /shared/apps/modules/sles15sp4/modulefiles
module unuse /shared/apps/modules/centos8/modulefiles
module unuse /shared/apps/modules/rocky9/modulefiles

module load rocm-5.4.3
# export AMD_LOG_LEVEL=4
# Replace the following line with the actual command(s) you want to run
cd /shared/prod/home/liuxs/bioinfo/minimap2/
make clean
make GPU_CONFIG=gpu_config.json SHORT_BLOCK_SIZE=64 LONG_BLOCK_SIZE=1024 MID_BLOCK_SIZE=512 MID_CUT=1 LONG_CUT=100
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/reads_4f452f4a-d82a-4580-981b-32d14b997217.fa
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_500MBases_200kto300k.fa
rocprof --stats -o rocprof_output/long_seg.${SLURM_JOB_ID}.csv ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/short_seg_reads_from_1kto10k_distri.fa
# rocprof --stats -o rocprof_output/long_seg.${SLURM_JOB_ID}.csv ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/reads_4f452f4a-d82a-4580-981b-32d14b997217.fa

# Optional: You can add post-processing commands here

# End of the script
42 changes: 42 additions & 0 deletions scripts/aac_timing_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
WORKSPACE_DIR=/shared/prod/home/liuxs/bioinfo/minimap2
EXE_PATH=$WORKSPACE_DIR
CONFIG_PATH=$WORKSPACE_DIR
# DATA_PATH=/shared/prod/home/liuxs/bioinfo/Profile_mm2/data
DATA_PATH=/shareddata/umich_folder/data/ONT
# NOTE: currently `-t n_thread`, the n_thread must be equal to the num_streams in gpu_config.json
N_THREAD=$(sed -n 's/.*"num_streams": \([0-9]*\).*/\1/p' ${CONFIG_PATH}/gpu_config.json)
# Array of LONG_BLOCK_SIZE values
MID_BLOCK_SIZES=( 512 )
MID_CUTS=( 1 )
LONG_CUTS=( 100 )
GPU_CONFIGS=( gpu_config.json )
# DATA_SETS=( 50kto100k )
# DATA_SETS=( 1kto5k 1kto10k 1kto50k 1kto300k 200kto300k 1kto20k 1kto30k 1kto70k 1kto200k 10kto50k 10kto100k 50kto100k)
# DATA_SETS=( 1kto300k 50kto300k 100kto300k 150kto300k 200kto300k 250kto300k 1kto200k 20kto200k 50kto200k 70kto200k 100kto200k 130kto200k 150kto200k 170kto200k)
DATA_SETS=( 1kto5k 9kto10k 10kto20k 20kto30k 40kto50k 90kto100k 110kto120k 140kto150k 180kto200k 200kto250k 200kto300k )

# Iterate over LONG_BLOCK_SIZES array
for MID_BLOCK_SIZE in "${MID_BLOCK_SIZES[@]}"
do
for MID_CUT in "${MID_CUTS[@]}"
do
for LONG_CUT in "${LONG_CUTS[@]}"
do
echo "Executing with MID_BLOCK_SIZE=${MID_BLOCK_SIZE} MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT}"
make clean
make GPU_CONFIG=${GPU_CONFIG} SHORT_BLOCK_SIZE=64 MID_BLOCK_SIZE=${MID_BLOCK_SIZE} LONG_BLOCK_SIZE=1024 MID_CUT=${MID_CUT} LONG_CUT=${LONG_CUT}

for GPU_CONFIG in "${GPU_CONFIGS[@]}"
do
for DATA_SET in "${DATA_SETS[@]}"
do
QUERY_FILE=$DATA_PATH/random_500MBases_${DATA_SET}.fa
echo "Executing on dataset ${DATA_SET}"
filename="profile_output/data-${DATA_SET}_profile_${N_THREAD}_midblk-${MID_BLOCK_SIZE}_cut-${LONG_CUT}"
${EXE_PATH}/minimap2 ${DATA_PATH}/hg38.mmi -t ${N_THREAD} --max-chain-skip=2147483647 --gpu-chain ${QUERY_FILE} > test.out 2> $filename
done
done
done
done
done
36 changes: 36 additions & 0 deletions scripts/acc_integrated.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH --job-name=minimap2 # Job name
#SBATCH --partition=1CN128C8G2H_2IB_MI210_RHEL8 # Specify the MI210 GPU partition or queue
#SBATCH --gres=gpu:1 # Request 1 GPU
#SBATCH --nodes=1 # Number of nodes
#SBATCH --ntasks-per-node=1 # Number of tasks (processes) per node
#SBATCH --cpus-per-task=16 # Number of CPU cores per task
#SBATCH --mem=0 # Memory per node
#SBATCH --time=00:40:00 # Maximum execution time (HH:MM:SS)
#SBATCH --output=slurm_output/sample_sbatch_job.%j.out # Output file
#SBATCH --error=slurm_output/sample_sbatch_job.%j.err # Error file

# Load necessary modules (if required)
source /etc/profile.d/modules.sh
scl enable gcc-toolset-11 bash
module unuse /shared/apps/modules/ubuntu/modulefiles
module use /shared/apps/modules/rhel8/modulefiles
module unuse /shared/apps/modules/rhel9/modulefiles
module unuse /shared/apps/modules/sles15sp4/modulefiles
module unuse /shared/apps/modules/centos8/modulefiles
module unuse /shared/apps/modules/rocky9/modulefiles

module load rocm-5.4.3
# export AMD_LOG_LEVEL=4
# Replace the following line with the actual command(s) you want to run
cd /shared/prod/home/liuxs/bioinfo/minimap2/
make clean
make GPU_CONFIG=gpu_config.json SHORT_BLOCK_SIZE=64 LONG_BLOCK_SIZE=1024 MID_BLOCK_SIZE=512 MID_CUT=1 LONG_CUT=100
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/reads_4f452f4a-d82a-4580-981b-32d14b997217.fa
./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/random_500MBases_200kto300k.fa
# ./minimap2 -t 1 --max-chain-skip=2147483647 --gpu-chain /shareddata/umich_folder/data/ONT/hg38.mmi /shareddata/umich_folder/data/ONT/short_seg_reads_from_1kto10k_distri.fa


# Optional: You can add post-processing commands here

# End of the script
66 changes: 66 additions & 0 deletions scripts/parse_seg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import re
import argparse
import matplotlib.pyplot as plt

# Define a function to extract segments from a line
def extract_segments(line):
match = re.search(r'long segments (\d+)', line)
if match:
if int(match.group(1)) > 1000000:
print(line)
return int(match.group(1))
return None

# Define a function to extract runtime from a line
def extract_runtime(line):
match = re.search(r'last launch runtime: (\d+\.\d+) ms', line)
if match:
return float(match.group(1))
return None


# Initialize variables to store segment counts
segment_counts = []
runtimes = []

# Create an argument parser to get the output file name from the command line
parser = argparse.ArgumentParser(description='Compute and plot a histogram of segments from an output file.')
parser.add_argument('output_file', help='Path to the output file containing segment data')
parser.add_argument('runtime_file', help='Path to the file containing runtime data')
args = parser.parse_args()

# Read the output file specified in the command line argument
with open(args.output_file, 'r') as file:
for line in file:
segments = extract_segments(line)
if segments is None:
continue
segment_counts.append(segments)


# Read the runtime file specified in the command line argument
with open(args.runtime_file, 'r') as runtime_file:
for line in runtime_file:
runtime = extract_runtime(line)
if runtime is None:
continue
runtimes.append(runtime)

# Calculate the total number of segments
total_segments = sum(segment_counts)
total_runtime = sum(runtimes)
throughput = total_segments / total_runtime # anchors/ms

# Create a histogram
plt.hist(segment_counts, bins=200, edgecolor='k')
plt.xlabel('Segments')
plt.ylabel('Frequency')
plt.title(f'Total Segments: {total_segments}, throughput: {throughput} anchors/ms')
plt.grid(True)

# Save the figure with an appropriate name based on the input file name
output_filename = args.output_file #.split('.')[0] # Remove the file extension
plt.savefig(f'{output_filename}_segment_histogram.png')

# Display the histogram
# plt.show()

0 comments on commit 1d5c636

Please sign in to comment.