File tree Expand file tree Collapse file tree 8 files changed +24
-25
lines changed Expand file tree Collapse file tree 8 files changed +24
-25
lines changed Original file line number Diff line number Diff line change @@ -148,12 +148,12 @@ async fn engine_for(
148148 // Auto-discover backends
149149 Ok ( EngineConfig :: Dynamic ( Box :: new ( local_model) ) )
150150 }
151- Output :: Echo => Ok ( EngineConfig :: StaticFull {
151+ Output :: Echo => Ok ( EngineConfig :: InProcessText {
152152 model : Box :: new ( local_model) ,
153153 engine : dynamo_llm:: engines:: make_echo_engine ( ) ,
154154 } ) ,
155155 #[ cfg( feature = "mistralrs" ) ]
156- Output :: MistralRs => Ok ( EngineConfig :: StaticFull {
156+ Output :: MistralRs => Ok ( EngineConfig :: InProcessText {
157157 engine : dynamo_engine_mistralrs:: make_engine ( & local_model) . await ?,
158158 model : Box :: new ( local_model) ,
159159 } ) ,
@@ -164,7 +164,7 @@ async fn engine_for(
164164 let engine =
165165 dynamo_llm:: mocker:: engine:: make_mocker_engine ( drt, endpoint, args) . await ?;
166166
167- Ok ( EngineConfig :: StaticCore {
167+ Ok ( EngineConfig :: InProcessTokens {
168168 engine,
169169 model : Box :: new ( local_model) ,
170170 is_prefill : false ,
Original file line number Diff line number Diff line change @@ -253,7 +253,7 @@ async fn select_engine(
253253 let inner = match args. engine_type {
254254 EngineType :: Echo => {
255255 // There is no validation for the echo engine
256- RsEngineConfig :: StaticFull {
256+ RsEngineConfig :: InProcessText {
257257 model : Box :: new ( local_model) ,
258258 engine : dynamo_llm:: engines:: make_echo_engine ( ) ,
259259 }
@@ -284,7 +284,7 @@ async fn select_engine(
284284 )
285285 . await ?;
286286
287- RsEngineConfig :: StaticCore {
287+ RsEngineConfig :: InProcessTokens {
288288 engine,
289289 model : Box :: new ( local_model) ,
290290 is_prefill : args. is_prefill ,
Original file line number Diff line number Diff line change @@ -51,14 +51,14 @@ pub enum EngineConfig {
5151 /// Remote networked engines that we discover via etcd
5252 Dynamic ( Box < LocalModel > ) ,
5353
54- /// A Full service engine does it's own tokenization and prompt formatting.
55- StaticFull {
54+ /// A Text engine receives text, does it's own tokenization and prompt formatting.
55+ InProcessText {
5656 engine : Arc < dyn StreamingEngine > ,
5757 model : Box < LocalModel > ,
5858 } ,
5959
60- /// A core engine expects to be wrapped with pre/post processors that handle tokenization.
61- StaticCore {
60+ /// A Tokens engine receives tokens, expects to be wrapped with pre/post processors that handle tokenization.
61+ InProcessTokens {
6262 engine : ExecutionContext ,
6363 model : Box < LocalModel > ,
6464 is_prefill : bool ,
@@ -70,8 +70,8 @@ impl EngineConfig {
7070 use EngineConfig :: * ;
7171 match self {
7272 Dynamic ( lm) => lm,
73- StaticFull { model, .. } => model,
74- StaticCore { model, .. } => model,
73+ InProcessText { model, .. } => model,
74+ InProcessTokens { model, .. } => model,
7575 }
7676 }
7777}
Original file line number Diff line number Diff line change @@ -93,7 +93,7 @@ pub async fn prepare_engine(
9393 request_template : local_model. request_template ( ) ,
9494 } )
9595 }
96- EngineConfig :: StaticFull { engine, model, .. } => {
96+ EngineConfig :: InProcessText { engine, model, .. } => {
9797 let service_name = model. service_name ( ) . to_string ( ) ;
9898 tracing:: debug!( "Model: {service_name} with engine pre-processing" ) ;
9999 let engine = Arc :: new ( StreamingEngineAdapter :: new ( engine) ) ;
@@ -105,7 +105,7 @@ pub async fn prepare_engine(
105105 card : Some ( model. into_card ( ) ) ,
106106 } )
107107 }
108- EngineConfig :: StaticCore {
108+ EngineConfig :: InProcessTokens {
109109 engine : inner_engine,
110110 model,
111111 ..
Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ pub async fn run(
3838 let endpoint = component. endpoint ( & endpoint_id. name ) ;
3939
4040 let rt_fut: Pin < Box < dyn Future < Output = _ > + Send + ' static > > = match engine_config {
41- EngineConfig :: StaticFull { engine, mut model } => {
41+ EngineConfig :: InProcessText { engine, mut model } => {
4242 let engine = Arc :: new ( StreamingEngineAdapter :: new ( engine) ) ;
4343 let ingress_chat = Ingress :: <
4444 Context < NvCreateChatCompletionRequest > ,
@@ -51,7 +51,7 @@ pub async fn run(
5151
5252 Box :: pin ( fut_chat)
5353 }
54- EngineConfig :: StaticCore {
54+ EngineConfig :: InProcessTokens {
5555 engine : inner_engine,
5656 mut model,
5757 is_prefill,
@@ -127,7 +127,7 @@ mod integration_tests {
127127 . await
128128 . map_err ( |e| anyhow:: anyhow!( "Failed to create distributed runtime: {}" , e) ) ?;
129129
130- let engine_config = EngineConfig :: StaticFull {
130+ let engine_config = EngineConfig :: InProcessText {
131131 engine : crate :: engines:: make_echo_engine ( ) ,
132132 model : Box :: new (
133133 crate :: local_model:: LocalModelBuilder :: default ( )
Original file line number Diff line number Diff line change @@ -45,7 +45,7 @@ pub async fn run(
4545 . await ?;
4646 grpc_service
4747 }
48- EngineConfig :: StaticFull { engine, model, .. } => {
48+ EngineConfig :: InProcessText { engine, model, .. } => {
4949 let grpc_service = grpc_service_builder. build ( ) ?;
5050 let engine = Arc :: new ( StreamingEngineAdapter :: new ( engine) ) ;
5151 let manager = grpc_service. model_manager ( ) ;
@@ -54,7 +54,7 @@ pub async fn run(
5454 manager. add_chat_completions_model ( model. service_name ( ) , checksum, engine) ?;
5555 grpc_service
5656 }
57- EngineConfig :: StaticCore {
57+ EngineConfig :: InProcessTokens {
5858 engine : inner_engine,
5959 model,
6060 ..
Original file line number Diff line number Diff line change @@ -87,7 +87,7 @@ pub async fn run(
8787 . await ?;
8888 http_service
8989 }
90- EngineConfig :: StaticFull { engine, model, .. } => {
90+ EngineConfig :: InProcessText { engine, model, .. } => {
9191 let http_service = http_service_builder. build ( ) ?;
9292 let engine = Arc :: new ( StreamingEngineAdapter :: new ( engine) ) ;
9393 let manager = http_service. model_manager ( ) ;
@@ -101,7 +101,7 @@ pub async fn run(
101101 }
102102 http_service
103103 }
104- EngineConfig :: StaticCore {
104+ EngineConfig :: InProcessTokens {
105105 engine : inner_engine,
106106 model,
107107 ..
Original file line number Diff line number Diff line change @@ -321,7 +321,7 @@ mod integration_tests {
321321 . unwrap ( ) ;
322322
323323 // Create EngineConfig with EchoEngine
324- let engine_config = EngineConfig :: StaticFull {
324+ let engine_config = EngineConfig :: InProcessText {
325325 engine : make_echo_engine ( ) ,
326326 model : Box :: new ( local_model. clone ( ) ) ,
327327 } ;
@@ -355,9 +355,8 @@ mod integration_tests {
355355 model_watcher. watch ( discovery_stream, None ) . await ;
356356 } ) ;
357357
358- // Set up the engine following the StaticFull pattern from http.rs
359- let EngineConfig :: StaticFull { engine, model, .. } = engine_config else {
360- panic ! ( "Expected StaticFull config" ) ;
358+ let EngineConfig :: InProcessText { engine, model, .. } = engine_config else {
359+ panic ! ( "Expected InProcessText config" ) ;
361360 } ;
362361
363362 let card = local_model. card ( ) . clone ( ) ;
@@ -373,7 +372,7 @@ mod integration_tests {
373372 let test_component = namespace. component ( "test-mdc-component" ) . unwrap ( ) ;
374373 let test_endpoint = test_component. endpoint ( "test-mdc-endpoint" ) ;
375374
376- // This will store the MDC in etcd for discovery
375+ // This will store the MDC in key-value store for discovery
377376 local_model
378377 . attach (
379378 & test_endpoint,
You can’t perform that action at this time.
0 commit comments