File tree Expand file tree Collapse file tree 3 files changed +48
-2
lines changed 
vllm-benchmarks/benchmarks/cuda Expand file tree Collapse file tree 3 files changed +48
-2
lines changed Original file line number Diff line number Diff line change @@ -2,8 +2,8 @@ name: vLLM Benchmark
22
33on :
44  schedule :
5-     #  Run every 6  hours
6-     - cron : ' 0 */6  * * *' 
5+     #  Run every 12  hours
6+     - cron : ' 0 */12  * * *' 
77  workflow_dispatch :
88    inputs :
99      vllm_branch :
Original file line number Diff line number Diff line change 105105            "num_iters" : 15 ,
106106            "max_model_len" : 8192 
107107        }
108+     },
109+     {
110+         "test_name" : " latency_gemma_3_27b_it_tp8" 
111+         "parameters" : {
112+             "model" : " google/gemma-3-27b-it" 
113+             "tensor_parallel_size" : 8 ,
114+             "load_format" : " dummy" 
115+             "num_iters_warmup" : 5 ,
116+             "num_iters" : 15 ,
117+             "max_model_len" : 8192 
118+         }
119+     },
120+     {
121+         "test_name" : " latency_qwen3_30b_a3b_tp8" 
122+         "parameters" : {
123+             "model" : " Qwen/Qwen3-30B-A3B" 
124+             "tensor_parallel_size" : 8 ,
125+             "load_format" : " dummy" 
126+             "num_iters_warmup" : 5 ,
127+             "num_iters" : 15 ,
128+             "max_model_len" : 8192 
129+         }
108130    }
109131]
Original file line number Diff line number Diff line change 115115            "backend" : " vllm" 
116116            "max_model_len" : 8192 
117117        }
118+     },
119+     {
120+         "test_name" : " throughput_gemma_3_27b_it_tp8" 
121+         "parameters" : {
122+             "model" : " google/gemma-3-27b-it" 
123+             "tensor_parallel_size" : 8 ,
124+             "load_format" : " dummy" 
125+             "dataset" : " ./ShareGPT_V3_unfiltered_cleaned_split.json" 
126+             "num_prompts" : 200 ,
127+             "backend" : " vllm" 
128+             "max_model_len" : 8192 
129+         }
130+     },
131+     {
132+         "test_name" : " throughput_qwen3_30b_a3b_tp8" 
133+         "parameters" : {
134+             "model" : " Qwen/Qwen3-30B-A3B" 
135+             "tensor_parallel_size" : 8 ,
136+             "load_format" : " dummy" 
137+             "dataset" : " ./ShareGPT_V3_unfiltered_cleaned_split.json" 
138+             "num_prompts" : 200 ,
139+             "backend" : " vllm" 
140+             "max_model_len" : 8192 
141+         }
118142    }
119143]
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments