11// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22// SPDX-License-Identifier: Apache-2.0
3- //
4- // Licensed under the Apache License, Version 2.0 (the "License");
5- // you may not use this file except in compliance with the License.
6- // You may obtain a copy of the License at
7- //
8- // http://www.apache.org/licenses/LICENSE-2.0
9- //
10- // Unless required by applicable law or agreed to in writing, software
11- // distributed under the License is distributed on an "AS IS" BASIS,
12- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13- // See the License for the specific language governing permissions and
14- // limitations under the License.
153
164use std:: env;
175use std:: sync:: Arc ;
@@ -21,32 +9,17 @@ use std::time::Duration;
219use async_stream:: stream;
2210use async_trait:: async_trait;
2311
24- use dynamo_runtime:: component:: Client ;
2512use dynamo_runtime:: engine:: { AsyncEngine , AsyncEngineContextProvider , ResponseStream } ;
26- use dynamo_runtime:: pipeline:: Operator as _;
27- use dynamo_runtime:: pipeline:: PushRouter ;
28- use dynamo_runtime:: pipeline:: RouterMode ;
29- use dynamo_runtime:: pipeline:: SegmentSource ;
30- use dynamo_runtime:: pipeline:: ServiceBackend ;
31- use dynamo_runtime:: pipeline:: Source as _;
3213use dynamo_runtime:: pipeline:: { Error , ManyOut , SingleIn } ;
3314use dynamo_runtime:: protocols:: annotated:: Annotated ;
3415
35- use crate :: backend:: Backend ;
3616use crate :: backend:: ExecutionContext ;
37- use crate :: kv_router:: KvPushRouter ;
38- use crate :: kv_router:: KvRouter ;
39- use crate :: migration:: Migration ;
40- use crate :: model_card:: ModelDeploymentCard ;
41- use crate :: preprocessor:: OpenAIPreprocessor ;
4217use crate :: preprocessor:: PreprocessedRequest ;
4318use crate :: protocols:: common:: llm_backend:: LLMEngineOutput ;
4419use crate :: protocols:: openai:: {
4520 chat_completions:: { NvCreateChatCompletionRequest , NvCreateChatCompletionStreamResponse } ,
4621 completions:: { prompt_to_string, NvCreateCompletionRequest , NvCreateCompletionResponse } ,
4722} ;
48- use crate :: types:: openai:: chat_completions:: OpenAIChatCompletionsStreamingEngine ;
49- use crate :: types:: openai:: completions:: OpenAICompletionsStreamingEngine ;
5023use crate :: types:: openai:: embeddings:: NvCreateEmbeddingRequest ;
5124use crate :: types:: openai:: embeddings:: NvCreateEmbeddingResponse ;
5225
@@ -436,89 +409,3 @@ impl
436409 self . 0 . handle_chat ( req) . await
437410 }
438411}
439-
440- pub async fn build_chat_completions (
441- card : & ModelDeploymentCard ,
442- client : & Client ,
443- router_mode : RouterMode ,
444- chooser : Option < Arc < KvRouter > > ,
445- ) -> anyhow:: Result < OpenAIChatCompletionsStreamingEngine > {
446- let frontend = SegmentSource :: <
447- SingleIn < NvCreateChatCompletionRequest > ,
448- ManyOut < Annotated < NvCreateChatCompletionStreamResponse > > ,
449- > :: new ( ) ;
450- let preprocessor = OpenAIPreprocessor :: new ( card. clone ( ) ) . await ?. into_operator ( ) ;
451- let backend = Backend :: from_mdc ( card. clone ( ) ) . await ?. into_operator ( ) ;
452- let migration = Migration :: from_mdc ( card. clone ( ) ) . await ?. into_operator ( ) ;
453- let router = PushRouter :: < PreprocessedRequest , Annotated < LLMEngineOutput > > :: from_client (
454- client. clone ( ) ,
455- router_mode,
456- )
457- . await ?;
458- let service_backend = match router_mode {
459- RouterMode :: Random | RouterMode :: RoundRobin | RouterMode :: Direct ( _) => {
460- ServiceBackend :: from_engine ( Arc :: new ( router) )
461- }
462- RouterMode :: KV => {
463- let Some ( chooser) = chooser else {
464- anyhow:: bail!( "RouterMode::KV requires KVRouter to not be null" ) ;
465- } ;
466- let kv_push_router = KvPushRouter :: new ( router, chooser) ;
467- ServiceBackend :: from_engine ( Arc :: new ( kv_push_router) )
468- }
469- } ;
470-
471- let chat_engine = frontend
472- . link ( preprocessor. forward_edge ( ) ) ?
473- . link ( backend. forward_edge ( ) ) ?
474- . link ( migration. forward_edge ( ) ) ?
475- . link ( service_backend) ?
476- . link ( migration. backward_edge ( ) ) ?
477- . link ( backend. backward_edge ( ) ) ?
478- . link ( preprocessor. backward_edge ( ) ) ?
479- . link ( frontend) ?;
480- Ok ( chat_engine)
481- }
482-
483- pub async fn build_completions (
484- card : & ModelDeploymentCard ,
485- client : & Client ,
486- router_mode : RouterMode ,
487- chooser : Option < Arc < KvRouter > > ,
488- ) -> anyhow:: Result < OpenAICompletionsStreamingEngine > {
489- let frontend = SegmentSource :: <
490- SingleIn < NvCreateCompletionRequest > ,
491- ManyOut < Annotated < NvCreateCompletionResponse > > ,
492- > :: new ( ) ;
493- let preprocessor = OpenAIPreprocessor :: new ( card. clone ( ) ) . await ?. into_operator ( ) ;
494- let backend = Backend :: from_mdc ( card. clone ( ) ) . await ?. into_operator ( ) ;
495- let migration = Migration :: from_mdc ( card. clone ( ) ) . await ?. into_operator ( ) ;
496- let router = PushRouter :: < PreprocessedRequest , Annotated < LLMEngineOutput > > :: from_client (
497- client. clone ( ) ,
498- router_mode,
499- )
500- . await ?;
501- let service_backend = match router_mode {
502- RouterMode :: Random | RouterMode :: RoundRobin | RouterMode :: Direct ( _) => {
503- ServiceBackend :: from_engine ( Arc :: new ( router) )
504- }
505- RouterMode :: KV => {
506- let Some ( chooser) = chooser else {
507- anyhow:: bail!( "RouterMode::KV requires KVRouter to not be null" ) ;
508- } ;
509- let kv_push_router = KvPushRouter :: new ( router, chooser) ;
510- ServiceBackend :: from_engine ( Arc :: new ( kv_push_router) )
511- }
512- } ;
513-
514- let completions_engine = frontend
515- . link ( preprocessor. forward_edge ( ) ) ?
516- . link ( backend. forward_edge ( ) ) ?
517- . link ( migration. forward_edge ( ) ) ?
518- . link ( service_backend) ?
519- . link ( migration. backward_edge ( ) ) ?
520- . link ( backend. backward_edge ( ) ) ?
521- . link ( preprocessor. backward_edge ( ) ) ?
522- . link ( frontend) ?;
523- Ok ( completions_engine)
524- }
0 commit comments