11# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22# SPDX-License-Identifier: Apache-2.0
33
4- # Example cli using the Python bindings.
5- # Usage: `python cli.py text mistralrs --model-path <your-model>`.
6- # If `--model-path` not provided defaults to Qwen3 0.6B.
7- # Must be in a virtualenv with the bindings (or wheel) installed.
4+ # Example cli using the Python bindings, similar to `dynamo-run`.
5+ # Usage: `python cli.py in=text out=mistralrs <your-model>`.
6+ # Must be in a virtualenv with the Dynamo bindings (or wheel) installed.
87
98import argparse
109import asyncio
1817
1918
2019def parse_args ():
21- """
22- Parses command-line arguments for the program.
23- """
20+ in_mode = "text"
21+ out_mode = "echo"
22+ batch_file = None # Specific to in_mode="batch"
23+
24+ # List to hold arguments that argparse will process (flags and model path)
25+ argparse_args = []
26+
27+ # --- Step 1: Manual Pre-parsing for 'in=' and 'out=' ---
28+ # Iterate through sys.argv[1:] to extract in= and out=
29+ # and collect remaining arguments for argparse.
30+ for arg in sys .argv [1 :]:
31+ if arg .startswith ("in=" ):
32+ in_val = arg [len ("in=" ) :]
33+ if in_val .startswith ("batch:" ):
34+ in_mode = "batch"
35+ batch_file = in_val [len ("batch:" ) :]
36+ else :
37+ in_mode = in_val
38+ elif arg .startswith ("out=" ):
39+ out_mode = arg [len ("out=" ) :]
40+ else :
41+ # This argument is not 'in=' or 'out=', so it's either a flag or the model path
42+ argparse_args .append (arg )
43+
44+ # --- Step 2: Argparse for flags and the model path ---
2445 parser = argparse .ArgumentParser (
25- description = "Run a Dynamo LLM engine with configurable parameters. " ,
26- formatter_class = argparse .ArgumentDefaultsHelpFormatter , # Show default values in help
46+ description = "Dynamo CLI: Connect inputs to an engine " ,
47+ formatter_class = argparse .RawTextHelpFormatter , # To preserve multi-line help formatting
2748 )
2849
29- # Positional arguments (replacing sys.argv[1] and sys.argv[2])
30- parser .add_argument (
31- "input_source" ,
32- type = str ,
33- help = "Input source for the engine: 'text', 'http', 'stdin', 'batch:file.jsonl', 'dyn://<name>'" ,
34- )
35- parser .add_argument (
36- "output_type" ,
37- type = str ,
38- help = "Output type (engine type): 'echo', 'mistralrs', 'llamacpp', 'dyn'" ,
39- )
40-
41- # Optional arguments corresponding to EntrypointArgs fields
42- # model_path: Option<PathBuf>
43- parser .add_argument (
44- "--model-path" ,
45- type = Path ,
46- default = Path ("Qwen/Qwen3-0.6B" ),
47- help = "Path to the model directory." ,
48- )
4950 # model_name: Option<String>
5051 parser .add_argument ("--model-name" , type = str , help = "Name of the model to load." )
5152 # model_config: Option<PathBuf>
@@ -69,8 +70,45 @@ def parse_args():
6970 # http_port: Option<u16>
7071 parser .add_argument ("--http-port" , type = int , help = "HTTP port for the engine (u16)." )
7172
72- args = parser .parse_args ()
73- return args
73+ # TODO: Not yet used here
74+ parser .add_argument (
75+ "--tensor-parallel-size" ,
76+ type = int ,
77+ help = "Tensor parallel size for the model (e.g., 4)." ,
78+ )
79+
80+ # Add the positional model argument.
81+ # It's made optional (nargs='?') because its requirement depends on 'out_mode',
82+ # which is handled in post-parsing validation.
83+ parser .add_argument (
84+ "model" ,
85+ nargs = "?" , # Make it optional for argparse, we'll validate manually
86+ help = "Path to the model (e.g., Qwen/Qwen3-0.6B).\n " "Required unless out=dyn." ,
87+ )
88+
89+ # Parse the arguments that were not 'in=' or 'out='
90+ flags = parser .parse_args (argparse_args )
91+
92+ # --- Step 3: Post-parsing Validation and Final Assignment ---
93+
94+ # Validate 'batch' mode requires a file path
95+ if in_mode == "batch" and not batch_file :
96+ parser .error ("Batch mode requires a file path: in=batch:FILE" )
97+
98+ # Validate model path requirement based on 'out_mode'
99+ if out_mode != "dyn" and flags .model is None :
100+ parser .error ("Model path is required unless out=dyn." )
101+
102+ # Consolidate all parsed arguments into a dictionary
103+ parsed_args = {
104+ "in_mode" : in_mode ,
105+ "out_mode" : out_mode ,
106+ "batch_file" : batch_file , # Will be None if in_mode is not "batch"
107+ "model_path" : flags .model ,
108+ "flags" : flags ,
109+ }
110+
111+ return parsed_args
74112
75113
76114async def run ():
@@ -79,39 +117,39 @@ async def run():
79117
80118 args = parse_args ()
81119
82- input = args .input_source
83- output = args .output_type
84-
85120 engine_type_map = {
86121 "echo" : EngineType .Echo ,
87122 "mistralrs" : EngineType .MistralRs ,
88123 "llamacpp" : EngineType .LlamaCpp ,
89124 "dyn" : EngineType .Dynamic ,
90125 }
91- engine_type = engine_type_map .get (output )
126+ out_mode = args ["out_mode" ]
127+ engine_type = engine_type_map .get (out_mode )
92128 if engine_type is None :
93- print (f"Unsupported output type: { output } " )
129+ print (f"Unsupported output type: { out_mode } " )
94130 sys .exit (1 )
95131
96132 # TODO: The "vllm", "sglang" and "trtllm" cases should call Python directly
97133
98- entrypoint_kwargs = {"model_path" : args .model_path }
99- if args .model_name is not None :
100- entrypoint_kwargs ["model_name" ] = args .model_name
101- if args .model_config is not None :
102- entrypoint_kwargs ["model_config" ] = args .model_config
103- if args .context_length is not None :
104- entrypoint_kwargs ["context_length" ] = args .context_length
105- if args .template_file is not None :
106- entrypoint_kwargs ["template_file" ] = args .template_file
107- if args .kv_cache_block_size is not None :
108- entrypoint_kwargs ["kv_cache_block_size" ] = args .kv_cache_block_size
109- if args .http_port is not None :
110- entrypoint_kwargs ["http_port" ] = args .http_port
134+ entrypoint_kwargs = {"model_path" : args ["model_path" ]}
135+
136+ flags = args ["flags" ]
137+ if flags .model_name is not None :
138+ entrypoint_kwargs ["model_name" ] = flags .model_name
139+ if flags .model_config is not None :
140+ entrypoint_kwargs ["model_config" ] = flags .model_config
141+ if flags .context_length is not None :
142+ entrypoint_kwargs ["context_length" ] = flags .context_length
143+ if flags .template_file is not None :
144+ entrypoint_kwargs ["template_file" ] = flags .template_file
145+ if flags .kv_cache_block_size is not None :
146+ entrypoint_kwargs ["kv_cache_block_size" ] = flags .kv_cache_block_size
147+ if flags .http_port is not None :
148+ entrypoint_kwargs ["http_port" ] = flags .http_port
111149
112150 e = EntrypointArgs (engine_type , ** entrypoint_kwargs )
113151 engine = await make_engine (runtime , e )
114- await run_input (runtime , input , engine )
152+ await run_input (runtime , args [ "in_mode" ] , engine )
115153
116154
117155if __name__ == "__main__" :
0 commit comments