File tree Expand file tree Collapse file tree 1 file changed +54
-0
lines changed
vllm-benchmarks/benchmarks/cuda Expand file tree Collapse file tree 1 file changed +54
-0
lines changed Original file line number Diff line number Diff line change 138138 "num_iters" : 15 ,
139139 "max_model_len" : 8192
140140 }
141+ },
142+ {
143+ "test_name" : " latency_gemma3_12b_it_fp8" ,
144+ "parameters" : {
145+ "model" : " pytorch/gemma-3-12b-it-FP8" ,
146+ "load_format" : " dummy" ,
147+ "num_iters_warmup" : 5 ,
148+ "num_iters" : 15
149+ }
150+ },
151+ {
152+ "test_name" : " latency_gemma3_12b_it_int4" ,
153+ "parameters" : {
154+ "model" : " pytorch/gemma-3-12b-it-INT4" ,
155+ "load_format" : " dummy" ,
156+ "num_iters_warmup" : 5 ,
157+ "num_iters" : 15
158+ }
159+ },
160+ {
161+ "test_name" : " latency_gemma3_12b_it_awq_int4" ,
162+ "parameters" : {
163+ "model" : " pytorch/gemma-3-12b-it-AWQ-INT4" ,
164+ "load_format" : " dummy" ,
165+ "num_iters_warmup" : 5 ,
166+ "num_iters" : 15
167+ }
168+ },
169+ {
170+ "test_name" : " latency_gemma3_27b_it_fp8" ,
171+ "parameters" : {
172+ "model" : " pytorch/gemma-3-27b-it-FP8" ,
173+ "load_format" : " dummy" ,
174+ "num_iters_warmup" : 5 ,
175+ "num_iters" : 15
176+ }
177+ },
178+ {
179+ "test_name" : " latency_gemma3_27b_it_int4" ,
180+ "parameters" : {
181+ "model" : " pytorch/gemma-3-27b-it-INT4" ,
182+ "load_format" : " dummy" ,
183+ "num_iters_warmup" : 5 ,
184+ "num_iters" : 15
185+ }
186+ },
187+ {
188+ "test_name" : " latency_gemma3_27b_it_awq_int4" ,
189+ "parameters" : {
190+ "model" : " pytorch/gemma-3-27b-it-AWQ-INT4" ,
191+ "load_format" : " dummy" ,
192+ "num_iters_warmup" : 5 ,
193+ "num_iters" : 15
194+ }
141195 }
142196]
You can’t perform that action at this time.
0 commit comments