Skip to content

Commit 2bc5a58

Browse files
committed
Add latency benchmarks for pytorch models
1 parent 4bde5d3 commit 2bc5a58

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

vllm-benchmarks/benchmarks/cuda/latency-tests.json

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,5 +138,59 @@
138138
"num_iters": 15,
139139
"max_model_len": 8192
140140
}
141+
},
142+
{
143+
"test_name": "latency_gemma3_12b_it_fp8",
144+
"parameters": {
145+
"model": "pytorch/gemma-3-12b-it-FP8",
146+
"load_format": "dummy",
147+
"num_iters_warmup": 5,
148+
"num_iters": 15
149+
}
150+
},
151+
{
152+
"test_name": "latency_gemma3_12b_it_int4",
153+
"parameters": {
154+
"model": "pytorch/gemma-3-12b-it-INT4",
155+
"load_format": "dummy",
156+
"num_iters_warmup": 5,
157+
"num_iters": 15
158+
}
159+
},
160+
{
161+
"test_name": "latency_gemma3_12b_it_awq_int4",
162+
"parameters": {
163+
"model": "pytorch/gemma-3-12b-it-AWQ-INT4",
164+
"load_format": "dummy",
165+
"num_iters_warmup": 5,
166+
"num_iters": 15
167+
}
168+
},
169+
{
170+
"test_name": "latency_gemma3_27b_it_fp8",
171+
"parameters": {
172+
"model": "pytorch/gemma-3-27b-it-FP8",
173+
"load_format": "dummy",
174+
"num_iters_warmup": 5,
175+
"num_iters": 15
176+
}
177+
},
178+
{
179+
"test_name": "latency_gemma3_27b_it_int4",
180+
"parameters": {
181+
"model": "pytorch/gemma-3-27b-it-INT4",
182+
"load_format": "dummy",
183+
"num_iters_warmup": 5,
184+
"num_iters": 15
185+
}
186+
},
187+
{
188+
"test_name": "latency_gemma3_27b_it_awq_int4",
189+
"parameters": {
190+
"model": "pytorch/gemma-3-27b-it-AWQ-INT4",
191+
"load_format": "dummy",
192+
"num_iters_warmup": 5,
193+
"num_iters": 15
194+
}
141195
}
142196
]

0 commit comments

Comments
 (0)