Skip to content

Commit 9d2323e

Browse files
committed
feat: metrics for non-streaming
1 parent b4189c6 commit 9d2323e

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

lib/llm/src/http/service/openai.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,13 @@ async fn completions(
290290

291291
Ok(sse_stream.into_response())
292292
} else {
293-
// TODO: report ISL/OSL for non-streaming requests
293+
// Process the stream to collect metrics for non-streaming requests
294+
let stream = stream.map(move |response| {
295+
// Process metrics but return the original response for aggregation
296+
process_metrics_only(&response, &mut response_collector);
297+
response
298+
});
299+
294300
let response = NvCreateCompletionResponse::from_annotated_stream(stream)
295301
.await
296302
.map_err(|e| {
@@ -515,7 +521,13 @@ async fn chat_completions(
515521

516522
Ok(sse_stream.into_response())
517523
} else {
518-
// TODO: report ISL/OSL for non-streaming requests
524+
// Process the stream to collect metrics for non-streaming requests
525+
let stream = stream.map(move |response| {
526+
// Process metrics but return the original response for aggregation
527+
process_metrics_only(&response, &mut response_collector);
528+
response
529+
});
530+
519531
let response = NvCreateChatCompletionResponse::from_annotated_stream(stream)
520532
.await
521533
.map_err(|e| {
@@ -911,6 +923,17 @@ impl<T> From<Annotated<T>> for EventConverter<T> {
911923
}
912924
}
913925

926+
fn process_metrics_only<T>(
927+
annotated: &Annotated<T>,
928+
response_collector: &mut ResponseMetricCollector,
929+
) {
930+
// update metrics
931+
if let Ok(Some(metrics)) = LLMMetricAnnotation::from_annotation(annotated) {
932+
response_collector.observe_current_osl(metrics.output_tokens);
933+
response_collector.observe_response(metrics.input_tokens, metrics.chunk_tokens);
934+
}
935+
}
936+
914937
fn process_event_converter<T: Serialize>(
915938
annotated: EventConverter<T>,
916939
response_collector: &mut ResponseMetricCollector,

0 commit comments

Comments
 (0)