Skip to content

Commit c332158

Browse files
committed
rename + comment
1 parent 4a1ea83 commit c332158

File tree

1 file changed

+13
-9
lines changed

1 file changed

+13
-9
lines changed

lib/llm/src/http/service/metrics.rs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,12 @@ pub struct InflightGuard {
4545
request_type: RequestType,
4646
status: Status,
4747
timer: Instant,
48-
first_token: bool,
49-
last_response: Option<Duration>,
48+
// we use is_first_token to distinguish TTFT from ITL. It is false by default and
49+
// flip to true when the first token is returned and TTFT is published.
50+
is_first_token: bool,
51+
// we track the last response time so that ITL for the newly returned tokens can
52+
// be computed.
53+
last_response_time: Option<Duration>,
5054
osl: usize,
5155
}
5256

@@ -296,8 +300,8 @@ impl InflightGuard {
296300
request_type,
297301
status: Status::Error,
298302
timer,
299-
first_token: true,
300-
last_response: None,
303+
is_first_token: true,
304+
last_response_time: None,
301305
osl: 0,
302306
}
303307
}
@@ -315,10 +319,10 @@ impl InflightGuard {
315319
}
316320

317321
pub(crate) fn observe_response(&mut self, isl: usize, num_tokens: usize) {
318-
if self.first_token {
322+
if self.is_first_token {
319323
// NOTE: when there are multiple tokens in the first response,
320324
// we use the full response time as TTFT and ignore the ITL
321-
self.first_token = false;
325+
self.is_first_token = false;
322326

323327
// Publish TTFT
324328
let ttft = self.timer.elapsed().as_secs_f64();
@@ -337,8 +341,8 @@ impl InflightGuard {
337341

338342
let current_duration = self.timer.elapsed();
339343

340-
if let Some(last_response) = self.last_response {
341-
let response_duration = current_duration - last_response;
344+
if let Some(last_response_time) = self.last_response_time {
345+
let response_duration = current_duration - last_response_time;
342346
let itl = response_duration.as_secs_f64() / num_tokens as f64;
343347
for _ in 0..num_tokens {
344348
self.metrics
@@ -348,7 +352,7 @@ impl InflightGuard {
348352
}
349353
}
350354

351-
self.last_response = Some(current_duration);
355+
self.last_response_time = Some(current_duration);
352356
}
353357
}
354358

0 commit comments

Comments
 (0)