Skip to content

Commit bb0254a

Browse files
committed
Merge branch 'main' into luka/custom-op-matching-2
# Conflicts: # tests/utils_/test_utils.py Signed-off-by: Luka Govedič <lgovedic@redhat.com>
2 parents 465ce58 + 136a17f commit bb0254a

File tree

142 files changed

+2087
-1310
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

142 files changed

+2087
-1310
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m HandH1998/QQQ-Llama-3-8b-g128 -b 32 -l 1000 -f 5 -t 1
3+
model_name: "HandH1998/QQQ-Llama-3-8b-g128"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.419
9+
- name: "exact_match,flexible-extract"
10+
value: 0.416
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -b 32 -l 100 -t 8
3+
model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
4+
backend: "vllm-vlm"
5+
tasks:
6+
- name: "chartqa"
7+
metrics:
8+
- name: "relaxed_accuracy,none"
9+
value: 0.90
10+
limit: 100
11+
num_fewshot: 0
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -b 32 -l 250 -t 8 -f 5
3+
model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
4+
backend: "vllm-vlm"
5+
tasks:
6+
- name: "mmlu_pro"
7+
metrics:
8+
- name: "exact_match,custom-extract"
9+
value: 0.80
10+
limit: 250 # will run on 250 * 14 subjects = 3500 samples
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -b auto -l 1319 -f 5 -t 1
1+
# For vllm script, with -t option (tensor parallel size)
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -l 1319 -t 1
23
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
34
tasks:
45
- name: "gsm8k"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m Qwen/Qwen2.5-VL-7B-Instruct -l 2500 -t 1
3+
4+
model_name: "Qwen/Qwen2.5-VL-7B-Instruct"
5+
backend: "vllm-vlm"
6+
tasks:
7+
- name: "chartqa"
8+
metrics:
9+
- name: "relaxed_accuracy,none"
10+
value: 0.855
11+
limit: 2500
12+
num_fewshot: 0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8-MM.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Qwen2.5-VL-7B-Instruct.yaml
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
# We can use this script to compute baseline accuracy on chartqa for vllm.
3+
#
4+
# Make sure you have lm-eval-harness installed:
5+
# pip install lm-eval==0.4.9
6+
7+
usage() {
8+
echo``
9+
echo "Runs lm eval harness on ChartQA using multimodal vllm."
10+
echo "This pathway is intended to be used to create baselines for "
11+
echo "our correctness tests in vllm's CI."
12+
echo
13+
echo "usage: ${0} <options>"
14+
echo
15+
echo " -m - huggingface stub or local directory of the model"
16+
echo " -l - limit number of samples to run"
17+
echo " -t - tensor parallel size to run at"
18+
echo
19+
}
20+
21+
while getopts "m:l:t:" OPT; do
22+
case ${OPT} in
23+
m )
24+
MODEL="$OPTARG"
25+
;;
26+
l )
27+
LIMIT="$OPTARG"
28+
;;
29+
t )
30+
TP_SIZE="$OPTARG"
31+
;;
32+
\? )
33+
usage
34+
exit 1
35+
;;
36+
esac
37+
done
38+
39+
lm_eval --model vllm-vlm \
40+
--model_args "pretrained=$MODEL,tensor_parallel_size=$TP_SIZE" \
41+
--tasks chartqa \
42+
--batch_size auto \
43+
--apply_chat_template \
44+
--limit $LIMIT

.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh

100644100755
File mode changed.

0 commit comments

Comments
 (0)