Skip to content

Commit 27e1be0

Browse files
committed
use comment field
1 parent cfd12d7 commit 27e1be0

File tree

8 files changed

+40
-55
lines changed

8 files changed

+40
-55
lines changed

lib/engines/mistralrs/src/lib.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,6 @@ impl
418418
id: None,
419419
data: Some(delta),
420420
event: None,
421-
chunk_tokens: None,
422-
input_tokens: None,
423-
output_tokens: None,
424421
comment: None,
425422
};
426423
yield ann;
@@ -585,9 +582,6 @@ impl AsyncEngine<SingleIn<CompletionRequest>, ManyOut<Annotated<CompletionRespon
585582
id: None,
586583
data: Some(inner),
587584
event: None,
588-
chunk_tokens: None,
589-
input_tokens: None,
590-
output_tokens: None,
591585
comment: None,
592586
};
593587
yield ann;

lib/llm/src/engines.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,15 +202,15 @@ impl
202202
let response = NvCreateChatCompletionStreamResponse {
203203
inner,
204204
};
205-
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, chunk_tokens: None, input_tokens: None, output_tokens: None, comment: None };
205+
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
206206
id += 1;
207207
}
208208

209209
let inner = deltas.create_choice(0, None, Some(async_openai::types::FinishReason::Stop), None);
210210
let response = NvCreateChatCompletionStreamResponse {
211211
inner,
212212
};
213-
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, chunk_tokens: None, input_tokens: None, output_tokens: None, comment: None };
213+
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
214214
};
215215

216216
Ok(ResponseStream::new(Box::pin(output), ctx))
@@ -234,11 +234,11 @@ impl AsyncEngine<SingleIn<CompletionRequest>, ManyOut<Annotated<CompletionRespon
234234
for c in chars_string.chars() {
235235
tokio::time::sleep(*TOKEN_ECHO_DELAY).await;
236236
let response = deltas.create_choice(0, Some(c.to_string()), None);
237-
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, chunk_tokens: None, input_tokens: None, output_tokens: None, comment: None };
237+
yield Annotated{ id: Some(id.to_string()), data: Some(response), event: None, comment: None };
238238
id += 1;
239239
}
240240
let response = deltas.create_choice(0, None, Some("stop".to_string()));
241-
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, chunk_tokens: None, input_tokens: None, output_tokens: None, comment: None };
241+
yield Annotated { id: Some(id.to_string()), data: Some(response), event: None, comment: None };
242242

243243
};
244244

lib/llm/src/http/service/openai.rs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -514,13 +514,36 @@ fn process_event_converter<T: Serialize>(
514514
event = event.event(msg);
515515
}
516516

517-
if let Some(osl) = annotated.output_tokens {
518-
response_collector.observe_current_osl(osl);
519-
}
517+
// Parse token information from comment field for metrics
518+
if let Some(comments) = &annotated.comment {
519+
let mut output_tokens = None;
520+
let mut input_tokens = None;
521+
let mut chunk_tokens = None;
522+
523+
for comment in comments {
524+
if let Some(value) = comment.strip_prefix("chunk_tokens: ") {
525+
if let Ok(tokens) = value.parse::<usize>() {
526+
chunk_tokens = Some(tokens);
527+
}
528+
} else if let Some(value) = comment.strip_prefix("input_tokens: ") {
529+
if let Ok(tokens) = value.parse::<usize>() {
530+
input_tokens = Some(tokens);
531+
}
532+
} else if let Some(value) = comment.strip_prefix("output_tokens: ") {
533+
if let Ok(tokens) = value.parse::<usize>() {
534+
output_tokens = Some(tokens);
535+
}
536+
}
537+
}
538+
539+
if let Some(osl) = output_tokens {
540+
response_collector.observe_current_osl(osl);
541+
}
520542

521-
if let Some(isl) = annotated.input_tokens {
522-
if let Some(chunk_tokens) = annotated.chunk_tokens {
523-
response_collector.observe_response(isl, chunk_tokens);
543+
if let Some(isl) = input_tokens {
544+
if let Some(chunk_tokens) = chunk_tokens {
545+
response_collector.observe_response(isl, chunk_tokens);
546+
}
524547
}
525548
}
526549

lib/llm/src/preprocessor.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,13 @@ impl OpenAIPreprocessor {
251251
.map_err(|e| e.to_string())
252252
});
253253

254-
response.chunk_tokens = Some(chunk_tokens);
255-
response.input_tokens = Some(isl);
256-
response.output_tokens = Some(current_osl);
254+
// Store token information in comment field
255+
let token_info = vec![
256+
format!("chunk_tokens: {}", chunk_tokens),
257+
format!("input_tokens: {}", isl),
258+
format!("output_tokens: {}", current_osl),
259+
];
260+
response.comment = Some(token_info);
257261

258262
tracing::trace!(
259263
request_id = inner.context.id(),

lib/llm/src/protocols/codec.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ where
118118
data,
119119
id: value.id,
120120
event: value.event,
121-
chunk_tokens: None,
122-
input_tokens: None,
123-
output_tokens: None,
124121
comment: value.comments,
125122
})
126123
}

lib/llm/src/protocols/openai/chat_completions/aggregator.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,6 @@ mod tests {
284284
data: Some(data),
285285
id: Some("test_id".to_string()),
286286
event: None,
287-
chunk_tokens: None,
288-
input_tokens: None,
289-
output_tokens: None,
290287
comment: None,
291288
}
292289
}
@@ -430,9 +427,6 @@ mod tests {
430427
data: Some(data),
431428
id: Some("test_id".to_string()),
432429
event: None,
433-
chunk_tokens: None,
434-
input_tokens: None,
435-
output_tokens: None,
436430
comment: None,
437431
};
438432
let stream = Box::pin(stream::iter(vec![annotated_delta]));

lib/llm/src/protocols/openai/completions/aggregator.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,6 @@ mod tests {
205205
}),
206206
id: Some("test_id".to_string()),
207207
event: None,
208-
chunk_tokens: None,
209-
input_tokens: None,
210-
output_tokens: None,
211208
comment: None,
212209
}
213210
}
@@ -317,9 +314,6 @@ mod tests {
317314
}),
318315
id: Some("test_id".to_string()),
319316
event: None,
320-
chunk_tokens: None,
321-
input_tokens: None,
322-
output_tokens: None,
323317
comment: None,
324318
};
325319

lib/runtime/src/protocols/annotated.rs

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,6 @@ pub struct Annotated<R> {
3737
#[serde(skip_serializing_if = "Option::is_none")]
3838
pub event: Option<String>,
3939
#[serde(skip_serializing_if = "Option::is_none")]
40-
pub chunk_tokens: Option<usize>,
41-
#[serde(skip_serializing_if = "Option::is_none")]
42-
pub input_tokens: Option<usize>,
43-
#[serde(skip_serializing_if = "Option::is_none")]
44-
pub output_tokens: Option<usize>,
45-
#[serde(skip_serializing_if = "Option::is_none")]
4640
pub comment: Option<Vec<String>>,
4741
}
4842

@@ -53,9 +47,6 @@ impl<R> Annotated<R> {
5347
data: None,
5448
id: None,
5549
event: Some("error".to_string()),
56-
chunk_tokens: None,
57-
input_tokens: None,
58-
output_tokens: None,
5950
comment: Some(vec![error]),
6051
}
6152
}
@@ -66,9 +57,6 @@ impl<R> Annotated<R> {
6657
data: Some(data),
6758
id: None,
6859
event: None,
69-
chunk_tokens: None,
70-
input_tokens: None,
71-
output_tokens: None,
7260
comment: None,
7361
}
7462
}
@@ -84,9 +72,6 @@ impl<R> Annotated<R> {
8472
data: None,
8573
id: None,
8674
event: Some(name.into()),
87-
chunk_tokens: None,
88-
input_tokens: None,
89-
output_tokens: None,
9075
comment: Some(vec![serde_json::to_string(value)?]),
9176
})
9277
}
@@ -122,9 +107,6 @@ impl<R> Annotated<R> {
122107
data,
123108
id: self.id,
124109
event: self.event,
125-
chunk_tokens: self.chunk_tokens,
126-
input_tokens: self.input_tokens,
127-
output_tokens: self.output_tokens,
128110
comment: self.comment,
129111
}
130112
}
@@ -140,9 +122,6 @@ impl<R> Annotated<R> {
140122
data,
141123
id: self.id,
142124
event: self.event,
143-
chunk_tokens: self.chunk_tokens,
144-
input_tokens: self.input_tokens,
145-
output_tokens: self.output_tokens,
146125
comment: self.comment,
147126
},
148127
Err(e) => Annotated::from_error(e),

0 commit comments

Comments
 (0)