Skip to content

Commit 48d31b0

Browse files
grahamkingatchernych
authored andcommitted
chore(dynamo-run): Refactor to library (#1687)
Move much of what was in the `dynamo-run` crate into `dynamo-llm` so that everyone can use it. Example usage: 1. Create a `LocalModel`: ``` let local_model = LocalModelBuilder::default() .model_path("Qwen/Qwen3-0.6B") .http_port(8080) .build().await?; ``` 2. Make an engine: ``` let engine_config = EngineConfig::StaticFull { engine: dynamo_engine_mistralrs::make_engine(&local_model).await?, model: Box::new(local_model), }; ``` 3. Connect it to an input and run it ``` dynamo_llm::entrypoint::input::run_input(Input::Http, runtime, engine_config).await?; ``` For #1647 Code Rabbit summary, thanks: * Introduced a flexible builder pattern for local model configuration, allowing advanced customization and easier initialization. * Added new input modes and unified input handling, supporting interactive chat, HTTP server, batch file, and distributed endpoint modes. * Centralized engine configuration and routing, enabling more extensible and maintainable engine management. * Simplified and modularized the codebase by moving input and engine logic into dedicated modules. * Replaced direct model construction with an asynchronous builder for improved clarity and extensibility. * Streamlined configuration and validation for flags and router settings. * Added validation to prevent incompatible input and output combinations in endpoint and dynamic modes.
1 parent 8e32a6b commit 48d31b0

File tree

41 files changed

+808
-699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+808
-699
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/router/src/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct Args {
4747

4848
/// Block size for the router
4949
#[arg(long)]
50-
block_size: usize,
50+
block_size: u32,
5151
}
5252

5353
fn main() -> Result<()> {
@@ -88,7 +88,7 @@ impl WorkerSelector for CustomWorkerSelector {
8888
&self,
8989
workers: &ProcessedEndpoints,
9090
request: &SchedulingRequest,
91-
block_size: usize,
91+
block_size: u32,
9292
) -> Result<WorkerSelectionResult, KvSchedulerError> {
9393
// customize logic here
9494
// F12 into [DefaultWorkerSelector] to see the original logic

launch/dynamo-run/Cargo.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ anyhow = { workspace = true }
3434
async-stream = { workspace = true }
3535
async-trait = { workspace = true }
3636
futures = { workspace = true }
37-
humantime = { workspace = true }
3837
libc = { workspace = true }
3938
serde = { workspace = true }
4039
serde_json = { workspace = true }
@@ -47,7 +46,6 @@ uuid = { workspace = true }
4746

4847
async-openai = { workspace = true }
4948
clap = { version = "4.5", features = ["derive", "env"] }
50-
dialoguer = { version = "0.11", default-features = false, features = ["editor", "history"] }
5149
futures-util = { version = "0.3" }
5250
regex = "1"
5351

launch/dynamo-run/src/flags.rs

Lines changed: 61 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@ use std::collections::HashMap;
1717
use std::path::PathBuf;
1818

1919
use clap::ValueEnum;
20+
use dynamo_llm::entrypoint::RouterConfig;
2021
use dynamo_llm::kv_router::KvRouterConfig;
22+
use dynamo_llm::local_model::LocalModel;
2123
use dynamo_runtime::pipeline::RouterMode as RuntimeRouterMode;
2224

25+
use crate::Output;
26+
2327
/// Required options depend on the in and out choices
2428
#[derive(clap::Parser, Debug, Clone)]
2529
#[command(version, about, long_about = None)]
@@ -125,11 +129,11 @@ pub struct Flags {
125129
/// context length (e.g. Llama 4).
126130
/// Defaults to the model's max, which is usually model_max_length in tokenizer_config.json.
127131
#[arg(long)]
128-
pub context_length: Option<usize>,
132+
pub context_length: Option<u32>,
129133

130134
/// KV cache block size (vllm only)
131135
#[arg(long)]
132-
pub kv_cache_block_size: Option<usize>,
136+
pub kv_cache_block_size: Option<u32>,
133137

134138
/// Additional engine-specific arguments from a JSON file.
135139
/// Contains a mapping of parameter names to values.
@@ -154,66 +158,63 @@ pub struct Flags {
154158
}
155159

156160
impl Flags {
157-
/// Get KV router configuration
158-
pub fn kv_router_config(&self) -> KvRouterConfig {
159-
KvRouterConfig::new(
160-
self.kv_overlap_score_weight,
161-
self.kv_gpu_cache_usage_weight,
162-
self.kv_waiting_requests_weight,
163-
)
161+
/// For each Output variant, check if it would be able to run.
162+
/// This takes validation out of the main engine creation path.
163+
pub fn validate(&self, local_model: &LocalModel, out_opt: &Output) -> anyhow::Result<()> {
164+
match out_opt {
165+
Output::Dynamic => {
166+
if self.context_length.is_some() {
167+
anyhow::bail!("'--context-length' flag should only be used on the worker node, not on the ingress");
168+
}
169+
if self.kv_cache_block_size.is_some() {
170+
anyhow::bail!("'--kv-cache-block-size' flag should only be used on the worker node, not on the ingress");
171+
}
172+
}
173+
Output::EchoFull => {}
174+
Output::EchoCore => {
175+
if !local_model.card().has_tokenizer() {
176+
anyhow::bail!(
177+
"out=echo_core need to find the tokenizer. Pass flag --model-path <path>"
178+
);
179+
};
180+
}
181+
#[cfg(feature = "mistralrs")]
182+
Output::MistralRs => {}
183+
Output::SgLang => {
184+
if !local_model.path().is_dir() {
185+
// TODO GGUF support for sglang: https://github.com/ai-dynamo/dynamo/issues/572
186+
anyhow::bail!("`--model-path should point at a HuggingFace repo checkout");
187+
}
188+
}
189+
Output::Vllm => {
190+
if self.base_gpu_id != 0 {
191+
anyhow::bail!("vllm does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead.");
192+
}
193+
}
194+
Output::Trtllm => {
195+
if self.base_gpu_id != 0 {
196+
anyhow::bail!("TRTLLM does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead.");
197+
}
198+
}
199+
#[cfg(feature = "llamacpp")]
200+
Output::LlamaCpp => {
201+
if !local_model.path().is_file() {
202+
anyhow::bail!("--model-path should refer to a GGUF file. llama_cpp does not support safetensors.");
203+
}
204+
}
205+
}
206+
Ok(())
164207
}
165208

166-
/// Convert the flags back to a command line. Including only the non-null values, but
167-
/// include the defaults. Includes the canonicalized model path and normalized model name.
168-
///
169-
/// Used to pass arguments to python engines via `pystr` and `pytok`.
170-
pub fn as_vec(&self, path: &str, name: &str) -> Vec<String> {
171-
let mut out = vec![
172-
"--model-path".to_string(),
173-
path.to_string(),
174-
"--model-name".to_string(),
175-
name.to_string(),
176-
"--http-port".to_string(),
177-
self.http_port.to_string(),
178-
// Default 1
179-
"--tensor-parallel-size".to_string(),
180-
self.tensor_parallel_size.to_string(),
181-
// Default 0
182-
"--base-gpu-id".to_string(),
183-
self.base_gpu_id.to_string(),
184-
// Default 1
185-
"--num-nodes".to_string(),
186-
self.num_nodes.to_string(),
187-
// Default 0
188-
"--node-rank".to_string(),
189-
self.node_rank.to_string(),
190-
];
191-
if let Some(model_config_path) = self.model_config.as_ref() {
192-
out.push("--model-config".to_string());
193-
out.push(model_config_path.display().to_string());
194-
}
195-
if let Some(leader) = self.leader_addr.as_ref() {
196-
out.push("--leader-addr".to_string());
197-
out.push(leader.to_string());
198-
}
199-
if let Some(extra_engine_args) = self.extra_engine_args.as_ref() {
200-
out.push("--extra-engine-args".to_string());
201-
out.push(extra_engine_args.display().to_string());
202-
}
203-
if let Some(weight) = self.kv_overlap_score_weight {
204-
out.push("--kv-overlap-score-weight".to_string());
205-
out.push(weight.to_string());
206-
}
207-
if let Some(weight) = self.kv_gpu_cache_usage_weight {
208-
out.push("--kv-gpu-cache-usage-weight".to_string());
209-
out.push(weight.to_string());
210-
}
211-
if let Some(weight) = self.kv_waiting_requests_weight {
212-
out.push("--kv-waiting-requests-weight".to_string());
213-
out.push(weight.to_string());
214-
}
215-
out.extend(self.last.clone());
216-
out
209+
pub fn router_config(&self) -> RouterConfig {
210+
RouterConfig::new(
211+
self.router_mode.into(),
212+
KvRouterConfig::new(
213+
self.kv_overlap_score_weight,
214+
self.kv_gpu_cache_usage_weight,
215+
self.kv_waiting_requests_weight,
216+
),
217+
)
217218
}
218219

219220
/// Load extra engine arguments from a JSON file

launch/dynamo-run/src/input.rs

Lines changed: 0 additions & 20 deletions
This file was deleted.

0 commit comments

Comments
 (0)