@@ -45,8 +45,12 @@ pub struct InflightGuard {
4545 request_type : RequestType ,
4646 status : Status ,
4747 timer : Instant ,
48- first_token : bool ,
49- last_response : Option < Duration > ,
48+ // we use is_first_token to distinguish TTFT from ITL. It is false by default and
49+ // flip to true when the first token is returned and TTFT is published.
50+ is_first_token : bool ,
51+ // we track the last response time so that ITL for the newly returned tokens can
52+ // be computed.
53+ last_response_time : Option < Duration > ,
5054 osl : usize ,
5155}
5256
@@ -296,8 +300,8 @@ impl InflightGuard {
296300 request_type,
297301 status : Status :: Error ,
298302 timer,
299- first_token : true ,
300- last_response : None ,
303+ is_first_token : true ,
304+ last_response_time : None ,
301305 osl : 0 ,
302306 }
303307 }
@@ -315,10 +319,10 @@ impl InflightGuard {
315319 }
316320
317321 pub ( crate ) fn observe_response ( & mut self , isl : usize , num_tokens : usize ) {
318- if self . first_token {
322+ if self . is_first_token {
319323 // NOTE: when there are multiple tokens in the first response,
320324 // we use the full response time as TTFT and ignore the ITL
321- self . first_token = false ;
325+ self . is_first_token = false ;
322326
323327 // Publish TTFT
324328 let ttft = self . timer . elapsed ( ) . as_secs_f64 ( ) ;
@@ -337,8 +341,8 @@ impl InflightGuard {
337341
338342 let current_duration = self . timer . elapsed ( ) ;
339343
340- if let Some ( last_response ) = self . last_response {
341- let response_duration = current_duration - last_response ;
344+ if let Some ( last_response_time ) = self . last_response_time {
345+ let response_duration = current_duration - last_response_time ;
342346 let itl = response_duration. as_secs_f64 ( ) / num_tokens as f64 ;
343347 for _ in 0 ..num_tokens {
344348 self . metrics
@@ -348,7 +352,7 @@ impl InflightGuard {
348352 }
349353 }
350354
351- self . last_response = Some ( current_duration) ;
355+ self . last_response_time = Some ( current_duration) ;
352356 }
353357}
354358
0 commit comments