@@ -290,7 +290,13 @@ async fn completions(
290290
291291 Ok ( sse_stream. into_response ( ) )
292292 } else {
293- // TODO: report ISL/OSL for non-streaming requests
293+ // Process the stream to collect metrics for non-streaming requests
294+ let stream = stream. map ( move |response| {
295+ // Process metrics but return the original response for aggregation
296+ process_metrics_only ( & response, & mut response_collector) ;
297+ response
298+ } ) ;
299+
294300 let response = NvCreateCompletionResponse :: from_annotated_stream ( stream)
295301 . await
296302 . map_err ( |e| {
@@ -515,7 +521,13 @@ async fn chat_completions(
515521
516522 Ok ( sse_stream. into_response ( ) )
517523 } else {
518- // TODO: report ISL/OSL for non-streaming requests
524+ // Process the stream to collect metrics for non-streaming requests
525+ let stream = stream. map ( move |response| {
526+ // Process metrics but return the original response for aggregation
527+ process_metrics_only ( & response, & mut response_collector) ;
528+ response
529+ } ) ;
530+
519531 let response = NvCreateChatCompletionResponse :: from_annotated_stream ( stream)
520532 . await
521533 . map_err ( |e| {
@@ -911,6 +923,17 @@ impl<T> From<Annotated<T>> for EventConverter<T> {
911923 }
912924}
913925
926+ fn process_metrics_only < T > (
927+ annotated : & Annotated < T > ,
928+ response_collector : & mut ResponseMetricCollector ,
929+ ) {
930+ // update metrics
931+ if let Ok ( Some ( metrics) ) = LLMMetricAnnotation :: from_annotation ( annotated) {
932+ response_collector. observe_current_osl ( metrics. output_tokens ) ;
933+ response_collector. observe_response ( metrics. input_tokens , metrics. chunk_tokens ) ;
934+ }
935+ }
936+
914937fn process_event_converter < T : Serialize > (
915938 annotated : EventConverter < T > ,
916939 response_collector : & mut ResponseMetricCollector ,
0 commit comments