Skip to content

Commit 5a33bce

Browse files
committed
fix: Make cli example more like dynamo-run
1 parent 4e882ac commit 5a33bce

File tree

2 files changed

+91
-51
lines changed

2 files changed

+91
-51
lines changed

examples/cli/cli.py

Lines changed: 88 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
# Example cli using the Python bindings.
5-
# Usage: `python cli.py text mistralrs --model-path <your-model>`.
6-
# If `--model-path` not provided defaults to Qwen3 0.6B.
7-
# Must be in a virtualenv with the bindings (or wheel) installed.
4+
# Example cli using the Python bindings, similar to `dynamo-run`.
5+
# Usage: `python cli.py in=text out=mistralrs <your-model>`.
6+
# Must be in a virtualenv with the Dynamo bindings (or wheel) installed.
87

98
import argparse
109
import asyncio
@@ -18,34 +17,36 @@
1817

1918

2019
def parse_args():
21-
"""
22-
Parses command-line arguments for the program.
23-
"""
20+
in_mode = "text"
21+
out_mode = "echo"
22+
batch_file = None # Specific to in_mode="batch"
23+
24+
# List to hold arguments that argparse will process (flags and model path)
25+
argparse_args = []
26+
27+
# --- Step 1: Manual Pre-parsing for 'in=' and 'out=' ---
28+
# Iterate through sys.argv[1:] to extract in= and out=
29+
# and collect remaining arguments for argparse.
30+
for arg in sys.argv[1:]:
31+
if arg.startswith("in="):
32+
in_val = arg[len("in=") :]
33+
if in_val.startswith("batch:"):
34+
in_mode = "batch"
35+
batch_file = in_val[len("batch:") :]
36+
else:
37+
in_mode = in_val
38+
elif arg.startswith("out="):
39+
out_mode = arg[len("out=") :]
40+
else:
41+
# This argument is not 'in=' or 'out=', so it's either a flag or the model path
42+
argparse_args.append(arg)
43+
44+
# --- Step 2: Argparse for flags and the model path ---
2445
parser = argparse.ArgumentParser(
25-
description="Run a Dynamo LLM engine with configurable parameters.",
26-
formatter_class=argparse.ArgumentDefaultsHelpFormatter, # Show default values in help
46+
description="Dynamo CLI: Connect inputs to an engine",
47+
formatter_class=argparse.RawTextHelpFormatter, # To preserve multi-line help formatting
2748
)
2849

29-
# Positional arguments (replacing sys.argv[1] and sys.argv[2])
30-
parser.add_argument(
31-
"input_source",
32-
type=str,
33-
help="Input source for the engine: 'text', 'http', 'stdin', 'batch:file.jsonl', 'dyn://<name>'",
34-
)
35-
parser.add_argument(
36-
"output_type",
37-
type=str,
38-
help="Output type (engine type): 'echo', 'mistralrs', 'llamacpp', 'dyn'",
39-
)
40-
41-
# Optional arguments corresponding to EntrypointArgs fields
42-
# model_path: Option<PathBuf>
43-
parser.add_argument(
44-
"--model-path",
45-
type=Path,
46-
default=Path("Qwen/Qwen3-0.6B"),
47-
help="Path to the model directory.",
48-
)
4950
# model_name: Option<String>
5051
parser.add_argument("--model-name", type=str, help="Name of the model to load.")
5152
# model_config: Option<PathBuf>
@@ -69,8 +70,45 @@ def parse_args():
6970
# http_port: Option<u16>
7071
parser.add_argument("--http-port", type=int, help="HTTP port for the engine (u16).")
7172

72-
args = parser.parse_args()
73-
return args
73+
# TODO: Not yet used here
74+
parser.add_argument(
75+
"--tensor-parallel-size",
76+
type=int,
77+
help="Tensor parallel size for the model (e.g., 4).",
78+
)
79+
80+
# Add the positional model argument.
81+
# It's made optional (nargs='?') because its requirement depends on 'out_mode',
82+
# which is handled in post-parsing validation.
83+
parser.add_argument(
84+
"model",
85+
nargs="?", # Make it optional for argparse, we'll validate manually
86+
help="Path to the model (e.g., Qwen/Qwen3-0.6B).\n" "Required unless out=dyn.",
87+
)
88+
89+
# Parse the arguments that were not 'in=' or 'out='
90+
flags = parser.parse_args(argparse_args)
91+
92+
# --- Step 3: Post-parsing Validation and Final Assignment ---
93+
94+
# Validate 'batch' mode requires a file path
95+
if in_mode == "batch" and not batch_file:
96+
parser.error("Batch mode requires a file path: in=batch:FILE")
97+
98+
# Validate model path requirement based on 'out_mode'
99+
if out_mode != "dyn" and flags.model is None:
100+
parser.error("Model path is required unless out=dyn.")
101+
102+
# Consolidate all parsed arguments into a dictionary
103+
parsed_args = {
104+
"in_mode": in_mode,
105+
"out_mode": out_mode,
106+
"batch_file": batch_file, # Will be None if in_mode is not "batch"
107+
"model_path": flags.model,
108+
"flags": flags,
109+
}
110+
111+
return parsed_args
74112

75113

76114
async def run():
@@ -79,39 +117,39 @@ async def run():
79117

80118
args = parse_args()
81119

82-
input = args.input_source
83-
output = args.output_type
84-
85120
engine_type_map = {
86121
"echo": EngineType.Echo,
87122
"mistralrs": EngineType.MistralRs,
88123
"llamacpp": EngineType.LlamaCpp,
89124
"dyn": EngineType.Dynamic,
90125
}
91-
engine_type = engine_type_map.get(output)
126+
out_mode = args["out_mode"]
127+
engine_type = engine_type_map.get(out_mode)
92128
if engine_type is None:
93-
print(f"Unsupported output type: {output}")
129+
print(f"Unsupported output type: {out_mode}")
94130
sys.exit(1)
95131

96132
# TODO: The "vllm", "sglang" and "trtllm" cases should call Python directly
97133

98-
entrypoint_kwargs = {"model_path": args.model_path}
99-
if args.model_name is not None:
100-
entrypoint_kwargs["model_name"] = args.model_name
101-
if args.model_config is not None:
102-
entrypoint_kwargs["model_config"] = args.model_config
103-
if args.context_length is not None:
104-
entrypoint_kwargs["context_length"] = args.context_length
105-
if args.template_file is not None:
106-
entrypoint_kwargs["template_file"] = args.template_file
107-
if args.kv_cache_block_size is not None:
108-
entrypoint_kwargs["kv_cache_block_size"] = args.kv_cache_block_size
109-
if args.http_port is not None:
110-
entrypoint_kwargs["http_port"] = args.http_port
134+
entrypoint_kwargs = {"model_path": args["model_path"]}
135+
136+
flags = args["flags"]
137+
if flags.model_name is not None:
138+
entrypoint_kwargs["model_name"] = flags.model_name
139+
if flags.model_config is not None:
140+
entrypoint_kwargs["model_config"] = flags.model_config
141+
if flags.context_length is not None:
142+
entrypoint_kwargs["context_length"] = flags.context_length
143+
if flags.template_file is not None:
144+
entrypoint_kwargs["template_file"] = flags.template_file
145+
if flags.kv_cache_block_size is not None:
146+
entrypoint_kwargs["kv_cache_block_size"] = flags.kv_cache_block_size
147+
if flags.http_port is not None:
148+
entrypoint_kwargs["http_port"] = flags.http_port
111149

112150
e = EntrypointArgs(engine_type, **entrypoint_kwargs)
113151
engine = await make_engine(runtime, e)
114-
await run_input(runtime, input, engine)
152+
await run_input(runtime, args["in_mode"], engine)
115153

116154

117155
if __name__ == "__main__":

lib/bindings/python/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,11 @@ maturin develop --uv
4949
5. Experimental: To allow using mistral.rs and llama.cpp via the bindings, build with feature flags:
5050

5151
```
52-
maturin develop --features mistralrs,llamacpp
52+
maturin develop --features mistralrs,llamacpp --release
5353
```
5454

55+
`--release` is optional. It builds slower but the resulting library is significantly faster.
56+
5557
See `examples/cli/cli.py` for usage.
5658

5759
They will both be built for CUDA by default. If you see a runtime error `CUDA_ERROR_STUB_LIBRARY` this is because

0 commit comments

Comments
 (0)