11# SPDX-License-Identifier: Apache-2.0
22# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
4+ import time
45from dataclasses import dataclass , field
56from typing import Optional
67
@@ -58,6 +59,7 @@ def reset(self):
5859 self .num_draft_tokens : list [int ] = []
5960 self .num_accepted_tokens : list [int ] = []
6061 self .accepted_tokens_per_pos_lists : list [list [int ]] = []
62+ self .last_log_time = time .monotonic ()
6163
6264 def observe (self , spec_decoding_stats : SpecDecodingStats ):
6365 self .num_drafts .append (spec_decoding_stats .num_drafts )
@@ -73,6 +75,13 @@ def log(self, log_fn=logger.info):
7375 num_drafts = np .sum (self .num_drafts )
7476 num_draft_tokens = np .sum (self .num_draft_tokens )
7577 num_accepted_tokens = np .sum (self .num_accepted_tokens )
78+ draft_throughput = 0
79+ accepted_throughput = 0
80+
81+ elapsed_time = time .monotonic () - self .last_log_time
82+ if elapsed_time > 0 :
83+ draft_throughput = num_draft_tokens / elapsed_time
84+ accepted_throughput = num_accepted_tokens / elapsed_time
7685
7786 draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
7887 100 if num_draft_tokens > 0 else float ("nan" ))
@@ -86,16 +95,20 @@ def log(self, log_fn=logger.info):
8695
8796 log_fn (
8897 "SpecDecoding metrics: "
89- "Draft acceptance rate: %.1f%%, "
9098 "Mean acceptance length: %.2f, "
99+ "Accepted throughput: %.2f tokens/s, "
100+ "Drafted throughput: %.2f tokens/s, "
91101 "Accepted: %d tokens, "
92102 "Drafted: %d tokens, "
93- "Per-position acceptance rate: %s" ,
94- draft_acceptance_rate ,
103+ "Per-position acceptance rate: %s, "
104+ "Avg Draft acceptance rate: %.1f%%" ,
95105 mean_acceptance_length ,
106+ accepted_throughput ,
107+ draft_throughput ,
96108 num_accepted_tokens ,
97109 num_draft_tokens ,
98110 rates_str ,
111+ draft_acceptance_rate ,
99112 )
100113 self .reset ()
101114
0 commit comments