1+ #! /bin/bash
2+
3+ set -eux
4+ set -o pipefail
5+
6+ DEFAULT_CONTAINER_IMAGE=" "
7+ container_image=${DEFAULT_CONTAINER_IMAGE}
8+
9+ DEFAULT_DATASET_TOKEN=" "
10+ dataset_token=${DEFAULT_DATASET_TOKEN}
11+
12+ DEFAULT_MODEL_REPO_ID=Qwen/Qwen3-VL-235B-A22B-Instruct
13+ model_repo_id=${DEFAULT_MODEL_REPO_ID}
14+
15+ DEFAULT_SCENARIO=offline
16+ scenario=${DEFAULT_SCENARIO}
17+
18+ DEFAULT_MODE=accuracy_only
19+ mode=${DEFAULT_MODE}
20+
21+ DEFAULT_CACHE_HOST_DIR=" "
22+ cache_host_dir=${DEFAULT_CACHE_HOST_DIR}
23+
24+ DEFAULT_OUTPUT_HOST_DIR=$( pwd) /outputs
25+ output_host_dir=${DEFAULT_OUTPUT_HOST_DIR}
26+
27+ DEFAULT_SLURM_ACCOUNT=" "
28+ slurm_account=${DEFAULT_SLURM_ACCOUNT}
29+
30+ DEFAULT_BENCHMARK_SLURM_PARTITION=" "
31+ benchmark_slurm_partition=${DEFAULT_BENCHMARK_SLURM_PARTITION}
32+
33+ DEFAULT_EVALUATE_SLURM_PARTITION=" "
34+ evaluate_slurm_partition=${DEFAULT_EVALUATE_SLURM_PARTITION}
35+
36+ function _exit_with_help_msg() {
37+ cat << EOF
38+ Submit a benchmarking (and optionally, an evaluation) job(s) for the VL2L benchmark.
39+
40+ Usage: ${BASH_SOURCE[0]}
41+ [-ci | --container-image] Container image to run the benchmark (default: ${DEFAULT_CONTAINER_IMAGE} ).
42+ [-dt | --dataset-token] Access token for the Shopify Global Catalogue dataset (default: ${DEFAULT_DATASET_TOKEN} ).
43+ [-mri | --model-repo-id] HuggingFace repo ID of the model to benchmark (default: ${DEFAULT_MODEL_REPO_ID} ).
44+ [-s | --scenario] Benchmark scenario (default: ${DEFAULT_SCENARIO} ).
45+ [-m | --mode] Benchmark mode (default: ${DEFAULT_MODE} ).
46+ [-chd | --cache-host-dir] Host directory of the ` .cache` directory to which HuggingFace will dump the dataset and the model checkpoint, and vLLM will dump compilation artifacts (default: ${DEFAULT_CACHE_HOST_DIR} ).
47+ [-ohd | --output-host-dir] Host directory to which the benchmark and evaluation results will be dumped (default: ${DEFAULT_OUTPUT_HOST_DIR} ).
48+ [-sa | --slurm-account] Slurm account for submitting the benchmark and evaluation jobs (default: ${DEFAULT_SLURM_ACCOUNT} ).
49+ [-bsp | --benchmark-slurm-partition] Slurm partition for submitting the benchmarking job; usually a partition with nodes that have GPUs (default: ${DEFAULT_BENCHMARK_SLURM_PARTITION} ).
50+ [-esp | --evaluate-slurm-partition] Slurm partition for submitting the evaluation job; usually a partition with nodes that have CPUs only (default: ${DEFAULT_EVALUATE_SLURM_PARTITION} ).
51+ [-h | --help] Print this help message.
52+ EOF
53+ if [ -n " $1 " ]; then
54+ echo " $( tput bold setab 1) $1 $( tput sgr0) "
55+ fi
56+ exit " $2 "
57+ }
58+
59+ while [[ $# -gt 0 ]]; do
60+ case $1 in
61+ -ci | --container-image)
62+ container_image=$2
63+ shift
64+ shift
65+ ;;
66+ -ci=* | --container-image=* )
67+ container_image=${1#* =}
68+ shift
69+ ;;
70+ -dt | --dataset-token)
71+ dataset_token=$2
72+ shift
73+ shift
74+ ;;
75+ -dt=* | --dataset-token=* )
76+ dataset_token=${1#* =}
77+ shift
78+ ;;
79+ -mri | --model-repo-id)
80+ model_repo_id=$2
81+ shift
82+ shift
83+ ;;
84+ -mri=* | --model-repo-id=* )
85+ model_repo_id=${1#* =}
86+ shift
87+ ;;
88+ -s | --scenario)
89+ scenario=$2
90+ shift
91+ shift
92+ ;;
93+ -s=* | --scenario=* )
94+ scenario=${1#* =}
95+ shift
96+ ;;
97+ -m | --mode)
98+ mode=$2
99+ shift
100+ shift
101+ ;;
102+ -m=* | --mode=* )
103+ mode=${1#* =}
104+ shift
105+ ;;
106+ -chd | --cache-host-dir)
107+ cache_host_dir=$2
108+ shift
109+ shift
110+ ;;
111+ -chd=* | --cache-host-dir=* )
112+ cache_host_dir=${1#* =}
113+ shift
114+ ;;
115+ -ohd | --output-host-dir)
116+ output_host_dir=$2
117+ shift
118+ shift
119+ ;;
120+ -ohd=* | --output-host-dir=* )
121+ output_host_dir=${1#* =}
122+ shift
123+ ;;
124+ -sa | --slurm-account)
125+ slurm_account=$2
126+ shift
127+ shift
128+ ;;
129+ -sa=* | --slurm-account=* )
130+ slurm_account=${1#* =}
131+ shift
132+ ;;
133+ -bsp | --benchmark-slurm-partition)
134+ benchmark_slurm_partition=$2
135+ shift
136+ shift
137+ ;;
138+ -bsp=* | --benchmark-slurm-partition=* )
139+ benchmark_slurm_partition=${1#* =}
140+ shift
141+ ;;
142+ -esp | --evaluate-slurm-partition)
143+ evaluate_slurm_partition=$2
144+ shift
145+ shift
146+ ;;
147+ -esp=* | --evaluate-slurm-partition=* )
148+ evaluate_slurm_partition=${1#* =}
149+ shift
150+ ;;
151+ -h | --help)
152+ _exit_with_help_msg " " 0
153+ ;;
154+ * )
155+ _exit_with_help_msg " [ERROR] Unknown option: $1 " 1
156+ ;;
157+ esac
158+ done
159+
160+ if [[ -z " ${container_image} " ]]; then
161+ _exit_with_help_msg " [ERROR] -ci or --container-image is required." 1
162+ fi
163+
164+ if [[ -z " ${dataset_token} " ]]; then
165+ _exit_with_help_msg " [ERROR] -dt or --dataset-token is required." 1
166+ fi
167+
168+ if [[ -z " ${cache_host_dir} " ]]; then
169+ _exit_with_help_msg " [ERROR] -chd or --cache-host-dir is required." 1
170+ fi
171+
172+ if [[ -z " ${slurm_account} " ]]; then
173+ _exit_with_help_msg " [ERROR] -sa or --slurm-account is required." 1
174+ fi
175+
176+ if [[ -z " ${benchmark_slurm_partition} " ]]; then
177+ _exit_with_help_msg " [ERROR] -bsp or --benchmark-slurm-partition is required." 1
178+ fi
179+
180+ if [[ -z " ${evaluate_slurm_partition} " ]]; then
181+ _exit_with_help_msg " [ERROR] -esp or --evaluate-slurm-partition is required." 1
182+ fi
183+
184+ cache_container_dir=/root/.cache
185+ output_container_dir=/outputs
186+
187+ mkdir -p " ${output_host_dir} "
188+
189+ benchmark_job_id=$(
190+ sbatch --parsable \
191+ --export=CACHE_HOST_DIR=" ${cache_host_dir} " ,CACHE_CONTAINER_DIR=" ${cache_container_dir} " ,OUTPUT_HOST_DIR=" ${output_host_dir} " ,OUTPUT_CONTAINER_DIR=" ${output_container_dir} " ,CONTAINER_IMAGE=" ${container_image} " ,SCENARIO=" ${scenario} " ,MODE=" ${mode} " ,DATASET_TOKEN=" ${dataset_token} " ,MODEL_REPO_ID=" ${model_repo_id} " \
192+ --account=" ${slurm_account} " \
193+ --partition=" ${benchmark_slurm_partition} " \
194+ benchmark.sh
195+ )
196+
197+ if [[ " ${mode} " == " accuracy_only" ]]; then
198+ sbatch \
199+ --dependency=afterok:" ${benchmark_job_id} " \
200+ --export=CACHE_HOST_DIR=" ${cache_host_dir} " ,CACHE_CONTAINER_DIR=" ${cache_container_dir} " ,OUTPUT_HOST_DIR=" ${output_host_dir} " ,OUTPUT_CONTAINER_DIR=" ${output_container_dir} " ,CONTAINER_IMAGE=" ${container_image} " ,DATASET_TOKEN=" ${dataset_token} " \
201+ --account=" ${slurm_account} " \
202+ --partition=" ${evaluate_slurm_partition} " \
203+ evaluate.sh
204+ fi
0 commit comments