@@ -17,9 +17,13 @@ use std::collections::HashMap;
1717use std:: path:: PathBuf ;
1818
1919use clap:: ValueEnum ;
20+ use dynamo_llm:: entrypoint:: RouterConfig ;
2021use dynamo_llm:: kv_router:: KvRouterConfig ;
22+ use dynamo_llm:: local_model:: LocalModel ;
2123use dynamo_runtime:: pipeline:: RouterMode as RuntimeRouterMode ;
2224
25+ use crate :: Output ;
26+
2327/// Required options depend on the in and out choices
2428#[ derive( clap:: Parser , Debug , Clone ) ]
2529#[ command( version, about, long_about = None ) ]
@@ -154,66 +158,63 @@ pub struct Flags {
154158}
155159
156160impl Flags {
157- /// Get KV router configuration
158- pub fn kv_router_config ( & self ) -> KvRouterConfig {
159- KvRouterConfig :: new (
160- self . kv_overlap_score_weight ,
161- self . kv_gpu_cache_usage_weight ,
162- self . kv_waiting_requests_weight ,
163- )
161+ /// For each Output variant, check if it would be able to run.
162+ /// This takes validation out of the main engine creation path.
163+ pub fn validate ( & self , local_model : & LocalModel , out_opt : & Output ) -> anyhow:: Result < ( ) > {
164+ match out_opt {
165+ Output :: Dynamic => {
166+ if self . context_length . is_some ( ) {
167+ anyhow:: bail!( "'--content-length' flag should only be used on the worker node, not on the ingress" ) ;
168+ }
169+ if self . kv_cache_block_size . is_some ( ) {
170+ anyhow:: bail!( "'--kv-cache-block-size' flag should only be used on the worker node, not on the ingress" ) ;
171+ }
172+ }
173+ Output :: EchoFull => { }
174+ Output :: EchoCore => {
175+ if !local_model. card ( ) . has_tokenizer ( ) {
176+ anyhow:: bail!(
177+ "out=echo_core need to find the tokenizer. Pass flag --model-path <path>"
178+ ) ;
179+ } ;
180+ }
181+ #[ cfg( feature = "mistralrs" ) ]
182+ Output :: MistralRs => { }
183+ Output :: SgLang => {
184+ if !local_model. path ( ) . is_dir ( ) {
185+ // TODO GGUF support for sglang: https://github.com/ai-dynamo/dynamo/issues/572
186+ anyhow:: bail!( "`--model-path should point at a HuggingFace repo checkout" ) ;
187+ }
188+ }
189+ Output :: Vllm => {
190+ if self . base_gpu_id != 0 {
191+ anyhow:: bail!( "vllm does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead." ) ;
192+ }
193+ }
194+ Output :: Trtllm => {
195+ if self . base_gpu_id != 0 {
196+ anyhow:: bail!( "TRTLLM does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead." ) ;
197+ }
198+ }
199+ #[ cfg( feature = "llamacpp" ) ]
200+ Output :: LlamaCpp => {
201+ if !local_model. path ( ) . is_file ( ) {
202+ anyhow:: bail!( "--model-path should refer to a GGUF file. llama_cpp does not support safetensors." ) ;
203+ }
204+ }
205+ }
206+ Ok ( ( ) )
164207 }
165208
166- /// Convert the flags back to a command line. Including only the non-null values, but
167- /// include the defaults. Includes the canonicalized model path and normalized model name.
168- ///
169- /// Used to pass arguments to python engines via `pystr` and `pytok`.
170- pub fn as_vec ( & self , path : & str , name : & str ) -> Vec < String > {
171- let mut out = vec ! [
172- "--model-path" . to_string( ) ,
173- path. to_string( ) ,
174- "--model-name" . to_string( ) ,
175- name. to_string( ) ,
176- "--http-port" . to_string( ) ,
177- self . http_port. to_string( ) ,
178- // Default 1
179- "--tensor-parallel-size" . to_string( ) ,
180- self . tensor_parallel_size. to_string( ) ,
181- // Default 0
182- "--base-gpu-id" . to_string( ) ,
183- self . base_gpu_id. to_string( ) ,
184- // Default 1
185- "--num-nodes" . to_string( ) ,
186- self . num_nodes. to_string( ) ,
187- // Default 0
188- "--node-rank" . to_string( ) ,
189- self . node_rank. to_string( ) ,
190- ] ;
191- if let Some ( model_config_path) = self . model_config . as_ref ( ) {
192- out. push ( "--model-config" . to_string ( ) ) ;
193- out. push ( model_config_path. display ( ) . to_string ( ) ) ;
194- }
195- if let Some ( leader) = self . leader_addr . as_ref ( ) {
196- out. push ( "--leader-addr" . to_string ( ) ) ;
197- out. push ( leader. to_string ( ) ) ;
198- }
199- if let Some ( extra_engine_args) = self . extra_engine_args . as_ref ( ) {
200- out. push ( "--extra-engine-args" . to_string ( ) ) ;
201- out. push ( extra_engine_args. display ( ) . to_string ( ) ) ;
202- }
203- if let Some ( weight) = self . kv_overlap_score_weight {
204- out. push ( "--kv-overlap-score-weight" . to_string ( ) ) ;
205- out. push ( weight. to_string ( ) ) ;
206- }
207- if let Some ( weight) = self . kv_gpu_cache_usage_weight {
208- out. push ( "--kv-gpu-cache-usage-weight" . to_string ( ) ) ;
209- out. push ( weight. to_string ( ) ) ;
210- }
211- if let Some ( weight) = self . kv_waiting_requests_weight {
212- out. push ( "--kv-waiting-requests-weight" . to_string ( ) ) ;
213- out. push ( weight. to_string ( ) ) ;
214- }
215- out. extend ( self . last . clone ( ) ) ;
216- out
209+ pub fn router_config ( & self ) -> RouterConfig {
210+ RouterConfig :: new (
211+ self . router_mode . into ( ) ,
212+ KvRouterConfig :: new (
213+ self . kv_overlap_score_weight ,
214+ self . kv_gpu_cache_usage_weight ,
215+ self . kv_waiting_requests_weight ,
216+ ) ,
217+ )
217218 }
218219
219220 /// Load extra engine arguments from a JSON file
0 commit comments