Skip to content

Commit 27b6f72

Browse files
authored
Merge branch 'main' into add-record-function
Signed-off-by: Dayeol Lee <dayeolee@gmail.com>
2 parents 181b452 + 878fd5a commit 27b6f72

File tree

2,618 files changed

+319519
-212235
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,618 files changed

+319519
-212235
lines changed

.buildkite/check-wheel-size.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import sys
66
import zipfile
77

8-
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
9-
# Note that we have 400 MiB quota, please use it wisely.
10-
# See https://github.com/pypi/support/issues/3792 .
8+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 500 MiB
9+
# Note that we have 800 MiB quota, please use it wisely.
10+
# See https://github.com/pypi/support/issues/6326 .
1111
# Please also sync the value with the one in Dockerfile.
12-
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 500))
1313

1414

1515
def print_top_10_largest_files(zip_file):
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m HandH1998/QQQ-Llama-3-8b-g128 -b 32 -l 1000 -f 5 -t 1
3+
model_name: "HandH1998/QQQ-Llama-3-8b-g128"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.419
9+
- name: "exact_match,flexible-extract"
10+
value: 0.416
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -l 100 -t 8
3+
model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
4+
backend: "vllm-vlm"
5+
tasks:
6+
- name: "chartqa"
7+
metrics:
8+
- name: "relaxed_accuracy,none"
9+
# TODO(zhewenl): model card is 0.90, but the actual score is 0.80.
10+
value: 0.80
11+
limit: 100
12+
num_fewshot: 0
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-mmlupro-vllm-baseline.sh -m meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -l 250 -t 8 -f 5
3+
model_name: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8"
4+
tasks:
5+
- name: "mmlu_pro"
6+
metrics:
7+
- name: "exact_match,custom-extract"
8+
value: 0.80
9+
limit: 250 # will run on 250 * 14 subjects = 3500 samples
10+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -b auto -l 1319 -f 5 -t 1
1+
# For vllm script, with -t option (tensor parallel size)
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -l 1319 -t 1
23
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
34
tasks:
45
- name: "gsm8k"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-chartqa-vllm-vlm-baseline.sh -m Qwen/Qwen2.5-VL-7B-Instruct -l 2500 -t 1
3+
4+
model_name: "Qwen/Qwen2.5-VL-7B-Instruct"
5+
backend: "vllm-vlm"
6+
tasks:
7+
- name: "chartqa"
8+
metrics:
9+
- name: "relaxed_accuracy,none"
10+
value: 0.855
11+
limit: 2500
12+
num_fewshot: 0
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
model_name: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8"
2+
tasks:
3+
- name: "mmlu_pro"
4+
metrics:
5+
- name: "exact_match,custom-extract"
6+
value: 0.82
7+
limit: 250 # will run on 250 * 14 subjects = 3500 samples
8+
num_fewshot: 5
9+
enforce_eager: false # we use false to speed up the eval process
10+
kv_cache_dtype: fp8 # we use fp8 to speed up the eval process
11+
max_model_len: 40960
12+
apply_chat_template: true
13+
fewshot_as_multiturn: true
14+
gen_kwargs: "temperature=0,top_p=1,top_k=0,max_gen_toks=5632,until=<|ENDANSWER|>"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Qwen3-235B-A22B-Instruct-2507-FP8.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Meta-Llama-4-Maverick-17B-128E-Instruct-FP8-MM.yaml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Qwen2.5-VL-7B-Instruct.yaml

0 commit comments

Comments
 (0)