Skip to content

Commit ee013d7

Browse files
committed
Merge remote-tracking branch 'origin/main' into remove-lora-additional-vocabulary
2 parents 44f75d4 + 136a17f commit ee013d7

File tree

2,037 files changed

+196461
-156885
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,037 files changed

+196461
-156885
lines changed

.buildkite/check-wheel-size.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import sys
66
import zipfile
77

8-
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 450 MiB
8+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 500 MiB
99
# Note that we have 800 MiB quota, please use it wisely.
1010
# See https://github.com/pypi/support/issues/6326 .
1111
# Please also sync the value with the one in Dockerfile.
12-
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 450))
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 500))
1313

1414

1515
def print_top_10_largest_files(zip_file):
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m HandH1998/QQQ-Llama-3-8b-g128 -b 32 -l 1000 -f 5 -t 1
3+
model_name: "HandH1998/QQQ-Llama-3-8b-g128"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.419
9+
- name: "exact_match,flexible-extract"
10+
value: 0.416
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -b 32 -l 100 -t 8
3+
model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
4+
backend: "vllm-vlm"
5+
tasks:
6+
- name: "chartqa"
7+
metrics:
8+
- name: "relaxed_accuracy,none"
9+
value: 0.90
10+
limit: 100
11+
num_fewshot: 0
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -b 32 -l 250 -t 8 -f 5
3+
model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
4+
backend: "vllm-vlm"
5+
tasks:
6+
- name: "mmlu_pro"
7+
metrics:
8+
- name: "exact_match,custom-extract"
9+
value: 0.80
10+
limit: 250 # will run on 250 * 14 subjects = 3500 samples
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -b auto -l 1319 -f 5 -t 1
1+
# For vllm script, with -t option (tensor parallel size)
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -l 1319 -t 1
23
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
34
tasks:
45
- name: "gsm8k"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m Qwen/Qwen2.5-VL-7B-Instruct -l 2500 -t 1
3+
4+
model_name: "Qwen/Qwen2.5-VL-7B-Instruct"
5+
backend: "vllm-vlm"
6+
tasks:
7+
- name: "chartqa"
8+
metrics:
9+
- name: "relaxed_accuracy,none"
10+
value: 0.855
11+
limit: 2500
12+
num_fewshot: 0
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8-MM.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Qwen2.5-VL-7B-Instruct.yaml
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
# We can use this script to compute baseline accuracy on chartqa for vllm.
3+
#
4+
# Make sure you have lm-eval-harness installed:
5+
# pip install lm-eval==0.4.9
6+
7+
usage() {
8+
echo``
9+
echo "Runs lm eval harness on ChartQA using multimodal vllm."
10+
echo "This pathway is intended to be used to create baselines for "
11+
echo "our correctness tests in vllm's CI."
12+
echo
13+
echo "usage: ${0} <options>"
14+
echo
15+
echo " -m - huggingface stub or local directory of the model"
16+
echo " -l - limit number of samples to run"
17+
echo " -t - tensor parallel size to run at"
18+
echo
19+
}
20+
21+
while getopts "m:l:t:" OPT; do
22+
case ${OPT} in
23+
m )
24+
MODEL="$OPTARG"
25+
;;
26+
l )
27+
LIMIT="$OPTARG"
28+
;;
29+
t )
30+
TP_SIZE="$OPTARG"
31+
;;
32+
\? )
33+
usage
34+
exit 1
35+
;;
36+
esac
37+
done
38+
39+
lm_eval --model vllm-vlm \
40+
--model_args "pretrained=$MODEL,tensor_parallel_size=$TP_SIZE" \
41+
--tasks chartqa \
42+
--batch_size auto \
43+
--apply_chat_template \
44+
--limit $LIMIT

0 commit comments

Comments
 (0)