Skip to content

Commit 79ac59f

Browse files
authored
Update Spec Decode metrics to include drafted and accepted token throughput (#24127)
Signed-off-by: Andrew Xia <axia@meta.com>
1 parent b971f91 commit 79ac59f

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

vllm/v1/spec_decode/metrics.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
import time
45
from dataclasses import dataclass, field
56
from typing import Optional
67

@@ -58,6 +59,7 @@ def reset(self):
5859
self.num_draft_tokens: list[int] = []
5960
self.num_accepted_tokens: list[int] = []
6061
self.accepted_tokens_per_pos_lists: list[list[int]] = []
62+
self.last_log_time = time.monotonic()
6163

6264
def observe(self, spec_decoding_stats: SpecDecodingStats):
6365
self.num_drafts.append(spec_decoding_stats.num_drafts)
@@ -73,6 +75,13 @@ def log(self, log_fn=logger.info):
7375
num_drafts = np.sum(self.num_drafts)
7476
num_draft_tokens = np.sum(self.num_draft_tokens)
7577
num_accepted_tokens = np.sum(self.num_accepted_tokens)
78+
draft_throughput = 0
79+
accepted_throughput = 0
80+
81+
elapsed_time = time.monotonic() - self.last_log_time
82+
if elapsed_time > 0:
83+
draft_throughput = num_draft_tokens / elapsed_time
84+
accepted_throughput = num_accepted_tokens / elapsed_time
7685

7786
draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
7887
100 if num_draft_tokens > 0 else float("nan"))
@@ -86,16 +95,20 @@ def log(self, log_fn=logger.info):
8695

8796
log_fn(
8897
"SpecDecoding metrics: "
89-
"Draft acceptance rate: %.1f%%, "
9098
"Mean acceptance length: %.2f, "
99+
"Accepted throughput: %.2f tokens/s, "
100+
"Drafted throughput: %.2f tokens/s, "
91101
"Accepted: %d tokens, "
92102
"Drafted: %d tokens, "
93-
"Per-position acceptance rate: %s",
94-
draft_acceptance_rate,
103+
"Per-position acceptance rate: %s, "
104+
"Avg Draft acceptance rate: %.1f%%",
95105
mean_acceptance_length,
106+
accepted_throughput,
107+
draft_throughput,
96108
num_accepted_tokens,
97109
num_draft_tokens,
98110
rates_str,
111+
draft_acceptance_rate,
99112
)
100113
self.reset()
101114

0 commit comments

Comments
 (0)