Skip to content

Commit 0fc5273

Browse files
authored
refactor(llm): Rename EngineConfig::Static to InProcess (#4585)
Signed-off-by: Graham King <grahamk@nvidia.com>
1 parent a77558d commit 0fc5273

File tree

8 files changed

+24
-25
lines changed

8 files changed

+24
-25
lines changed

launch/dynamo-run/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,12 +148,12 @@ async fn engine_for(
148148
// Auto-discover backends
149149
Ok(EngineConfig::Dynamic(Box::new(local_model)))
150150
}
151-
Output::Echo => Ok(EngineConfig::StaticFull {
151+
Output::Echo => Ok(EngineConfig::InProcessText {
152152
model: Box::new(local_model),
153153
engine: dynamo_llm::engines::make_echo_engine(),
154154
}),
155155
#[cfg(feature = "mistralrs")]
156-
Output::MistralRs => Ok(EngineConfig::StaticFull {
156+
Output::MistralRs => Ok(EngineConfig::InProcessText {
157157
engine: dynamo_engine_mistralrs::make_engine(&local_model).await?,
158158
model: Box::new(local_model),
159159
}),
@@ -164,7 +164,7 @@ async fn engine_for(
164164
let engine =
165165
dynamo_llm::mocker::engine::make_mocker_engine(drt, endpoint, args).await?;
166166

167-
Ok(EngineConfig::StaticCore {
167+
Ok(EngineConfig::InProcessTokens {
168168
engine,
169169
model: Box::new(local_model),
170170
is_prefill: false,

lib/bindings/python/rust/llm/entrypoint.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ async fn select_engine(
253253
let inner = match args.engine_type {
254254
EngineType::Echo => {
255255
// There is no validation for the echo engine
256-
RsEngineConfig::StaticFull {
256+
RsEngineConfig::InProcessText {
257257
model: Box::new(local_model),
258258
engine: dynamo_llm::engines::make_echo_engine(),
259259
}
@@ -284,7 +284,7 @@ async fn select_engine(
284284
)
285285
.await?;
286286

287-
RsEngineConfig::StaticCore {
287+
RsEngineConfig::InProcessTokens {
288288
engine,
289289
model: Box::new(local_model),
290290
is_prefill: args.is_prefill,

lib/llm/src/entrypoint.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,14 @@ pub enum EngineConfig {
5151
/// Remote networked engines that we discover via etcd
5252
Dynamic(Box<LocalModel>),
5353

54-
/// A Full service engine does it's own tokenization and prompt formatting.
55-
StaticFull {
54+
/// A Text engine receives text, does it's own tokenization and prompt formatting.
55+
InProcessText {
5656
engine: Arc<dyn StreamingEngine>,
5757
model: Box<LocalModel>,
5858
},
5959

60-
/// A core engine expects to be wrapped with pre/post processors that handle tokenization.
61-
StaticCore {
60+
/// A Tokens engine receives tokens, expects to be wrapped with pre/post processors that handle tokenization.
61+
InProcessTokens {
6262
engine: ExecutionContext,
6363
model: Box<LocalModel>,
6464
is_prefill: bool,
@@ -70,8 +70,8 @@ impl EngineConfig {
7070
use EngineConfig::*;
7171
match self {
7272
Dynamic(lm) => lm,
73-
StaticFull { model, .. } => model,
74-
StaticCore { model, .. } => model,
73+
InProcessText { model, .. } => model,
74+
InProcessTokens { model, .. } => model,
7575
}
7676
}
7777
}

lib/llm/src/entrypoint/input/common.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ pub async fn prepare_engine(
9393
request_template: local_model.request_template(),
9494
})
9595
}
96-
EngineConfig::StaticFull { engine, model, .. } => {
96+
EngineConfig::InProcessText { engine, model, .. } => {
9797
let service_name = model.service_name().to_string();
9898
tracing::debug!("Model: {service_name} with engine pre-processing");
9999
let engine = Arc::new(StreamingEngineAdapter::new(engine));
@@ -105,7 +105,7 @@ pub async fn prepare_engine(
105105
card: Some(model.into_card()),
106106
})
107107
}
108-
EngineConfig::StaticCore {
108+
EngineConfig::InProcessTokens {
109109
engine: inner_engine,
110110
model,
111111
..

lib/llm/src/entrypoint/input/endpoint.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub async fn run(
3838
let endpoint = component.endpoint(&endpoint_id.name);
3939

4040
let rt_fut: Pin<Box<dyn Future<Output = _> + Send + 'static>> = match engine_config {
41-
EngineConfig::StaticFull { engine, mut model } => {
41+
EngineConfig::InProcessText { engine, mut model } => {
4242
let engine = Arc::new(StreamingEngineAdapter::new(engine));
4343
let ingress_chat = Ingress::<
4444
Context<NvCreateChatCompletionRequest>,
@@ -51,7 +51,7 @@ pub async fn run(
5151

5252
Box::pin(fut_chat)
5353
}
54-
EngineConfig::StaticCore {
54+
EngineConfig::InProcessTokens {
5555
engine: inner_engine,
5656
mut model,
5757
is_prefill,
@@ -127,7 +127,7 @@ mod integration_tests {
127127
.await
128128
.map_err(|e| anyhow::anyhow!("Failed to create distributed runtime: {}", e))?;
129129

130-
let engine_config = EngineConfig::StaticFull {
130+
let engine_config = EngineConfig::InProcessText {
131131
engine: crate::engines::make_echo_engine(),
132132
model: Box::new(
133133
crate::local_model::LocalModelBuilder::default()

lib/llm/src/entrypoint/input/grpc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ pub async fn run(
4545
.await?;
4646
grpc_service
4747
}
48-
EngineConfig::StaticFull { engine, model, .. } => {
48+
EngineConfig::InProcessText { engine, model, .. } => {
4949
let grpc_service = grpc_service_builder.build()?;
5050
let engine = Arc::new(StreamingEngineAdapter::new(engine));
5151
let manager = grpc_service.model_manager();
@@ -54,7 +54,7 @@ pub async fn run(
5454
manager.add_chat_completions_model(model.service_name(), checksum, engine)?;
5555
grpc_service
5656
}
57-
EngineConfig::StaticCore {
57+
EngineConfig::InProcessTokens {
5858
engine: inner_engine,
5959
model,
6060
..

lib/llm/src/entrypoint/input/http.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pub async fn run(
8787
.await?;
8888
http_service
8989
}
90-
EngineConfig::StaticFull { engine, model, .. } => {
90+
EngineConfig::InProcessText { engine, model, .. } => {
9191
let http_service = http_service_builder.build()?;
9292
let engine = Arc::new(StreamingEngineAdapter::new(engine));
9393
let manager = http_service.model_manager();
@@ -101,7 +101,7 @@ pub async fn run(
101101
}
102102
http_service
103103
}
104-
EngineConfig::StaticCore {
104+
EngineConfig::InProcessTokens {
105105
engine: inner_engine,
106106
model,
107107
..

lib/llm/tests/http_metrics.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ mod integration_tests {
321321
.unwrap();
322322

323323
// Create EngineConfig with EchoEngine
324-
let engine_config = EngineConfig::StaticFull {
324+
let engine_config = EngineConfig::InProcessText {
325325
engine: make_echo_engine(),
326326
model: Box::new(local_model.clone()),
327327
};
@@ -355,9 +355,8 @@ mod integration_tests {
355355
model_watcher.watch(discovery_stream, None).await;
356356
});
357357

358-
// Set up the engine following the StaticFull pattern from http.rs
359-
let EngineConfig::StaticFull { engine, model, .. } = engine_config else {
360-
panic!("Expected StaticFull config");
358+
let EngineConfig::InProcessText { engine, model, .. } = engine_config else {
359+
panic!("Expected InProcessText config");
361360
};
362361

363362
let card = local_model.card().clone();
@@ -373,7 +372,7 @@ mod integration_tests {
373372
let test_component = namespace.component("test-mdc-component").unwrap();
374373
let test_endpoint = test_component.endpoint("test-mdc-endpoint");
375374

376-
// This will store the MDC in etcd for discovery
375+
// This will store the MDC in key-value store for discovery
377376
local_model
378377
.attach(
379378
&test_endpoint,

0 commit comments

Comments
 (0)