@@ -37,6 +37,7 @@ use crate::protocols::openai::{
3737 completions:: { NvCreateCompletionRequest , NvCreateCompletionResponse } ,
3838 embeddings:: { NvCreateEmbeddingRequest , NvCreateEmbeddingResponse } ,
3939 responses:: { NvCreateResponse , NvResponse } ,
40+ ParsingOptions ,
4041} ;
4142use crate :: request_template:: RequestTemplate ;
4243use crate :: types:: Annotated ;
@@ -194,6 +195,13 @@ fn get_or_create_request_id(primary: Option<&str>, headers: &HeaderMap) -> Strin
194195 uuid. to_string ( )
195196}
196197
198+ fn get_parsing_options ( state : & Arc < service_v2:: State > , model : & str ) -> ParsingOptions {
199+ let tool_call_parser = state. manager ( ) . get_model_tool_call_parser ( model) ;
200+ let reasoning_parser = None ; // TODO: Implement reasoning parser
201+
202+ ParsingOptions :: new ( tool_call_parser, reasoning_parser)
203+ }
204+
197205/// OpenAI Completions Request Handler
198206///
199207/// This method will handle the incoming request for the `/v1/completions endpoint`. The endpoint is a "source"
@@ -267,6 +275,8 @@ async fn completions(
267275 . get_completions_engine ( model)
268276 . map_err ( |_| ErrorMessage :: model_not_found ( ) ) ?;
269277
278+ let parsing_options = get_parsing_options ( & state, model) ;
279+
270280 let mut inflight_guard =
271281 state
272282 . metrics_clone ( )
@@ -325,7 +335,7 @@ async fn completions(
325335 process_metrics_only ( response, & mut response_collector) ;
326336 } ) ;
327337
328- let response = NvCreateCompletionResponse :: from_annotated_stream ( stream)
338+ let response = NvCreateCompletionResponse :: from_annotated_stream ( stream, parsing_options )
329339 . await
330340 . map_err ( |e| {
331341 tracing:: error!(
@@ -494,6 +504,8 @@ async fn chat_completions(
494504 . get_chat_completions_engine ( model)
495505 . map_err ( |_| ErrorMessage :: model_not_found ( ) ) ?;
496506
507+ let parsing_options = get_parsing_options ( & state, model) ;
508+
497509 let mut inflight_guard =
498510 state
499511 . metrics_clone ( )
@@ -553,19 +565,20 @@ async fn chat_completions(
553565 process_metrics_only ( response, & mut response_collector) ;
554566 } ) ;
555567
556- let response = NvCreateChatCompletionResponse :: from_annotated_stream ( stream)
557- . await
558- . map_err ( |e| {
559- tracing:: error!(
560- request_id,
561- "Failed to fold chat completions stream for: {:?}" ,
562- e
563- ) ;
564- ErrorMessage :: internal_server_error ( & format ! (
565- "Failed to fold chat completions stream: {}" ,
566- e
567- ) )
568- } ) ?;
568+ let response =
569+ NvCreateChatCompletionResponse :: from_annotated_stream ( stream, parsing_options. clone ( ) )
570+ . await
571+ . map_err ( |e| {
572+ tracing:: error!(
573+ request_id,
574+ "Failed to fold chat completions stream for: {:?}" ,
575+ e
576+ ) ;
577+ ErrorMessage :: internal_server_error ( & format ! (
578+ "Failed to fold chat completions stream: {}" ,
579+ e
580+ ) )
581+ } ) ?;
569582
570583 inflight_guard. mark_ok ( ) ;
571584 Ok ( Json ( response) . into_response ( ) )
@@ -726,6 +739,8 @@ async fn responses(
726739 . get_chat_completions_engine ( model)
727740 . map_err ( |_| ErrorMessage :: model_not_found ( ) ) ?;
728741
742+ let parsing_options = get_parsing_options ( & state, model) ;
743+
729744 let mut inflight_guard =
730745 state
731746 . metrics_clone ( )
@@ -742,19 +757,20 @@ async fn responses(
742757 . map_err ( |e| ErrorMessage :: from_anyhow ( e, "Failed to generate completions" ) ) ?;
743758
744759 // TODO: handle streaming, currently just unary
745- let response = NvCreateChatCompletionResponse :: from_annotated_stream ( stream)
746- . await
747- . map_err ( |e| {
748- tracing:: error!(
749- request_id,
750- "Failed to fold chat completions stream for: {:?}" ,
751- e
752- ) ;
753- ErrorMessage :: internal_server_error ( & format ! (
754- "Failed to fold chat completions stream: {}" ,
755- e
756- ) )
757- } ) ?;
760+ let response =
761+ NvCreateChatCompletionResponse :: from_annotated_stream ( stream, parsing_options. clone ( ) )
762+ . await
763+ . map_err ( |e| {
764+ tracing:: error!(
765+ request_id,
766+ "Failed to fold chat completions stream for: {:?}" ,
767+ e
768+ ) ;
769+ ErrorMessage :: internal_server_error ( & format ! (
770+ "Failed to fold chat completions stream: {}" ,
771+ e
772+ ) )
773+ } ) ?;
758774
759775 // Convert NvCreateChatCompletionResponse --> NvResponse
760776 let response: NvResponse = response. try_into ( ) . map_err ( |e| {
0 commit comments