Skip to content

Commit 29b8160

Browse files
committed
feat(pystr): Pass command line arguments
Command line arguments are passed to the python engine like this: ``` dynamo-run out=pystr:my_python_engine.py -- -n 42 --custom-arg Orange --yes ``` The python engine has two options for receiving those command line arguments (both of which are optional). In both cases the argument list will include some standard ones as well as anything after the `--`. This input: ``` dynamo-run out=pystr:my_engine.py /opt/models/Llama-3.2-3B-Instruct/ --model-name llama_3.2 --tensor-parallel-size 4 -- -n 1 ``` 1. In `sys.argv`: ``` async def generate(request): .. as before .. if __name__ == "__main__": print(f"MAIN: {sys.argv}") ``` Produces this output: ``` MAIN: ['my_engine.py', '--model-path', '/opt/models/Llama-3.2-3B-Instruct/', '--model-name', 'llama3.2', '--http-port', '8080', '--tensor-parallel-size', '4', '--base-gpu-id', '0', '--num-nodes', '1', '--node-rank', '0', '-n', '1'] ``` This form allows quick iteration on the engine setup. 2. In an `initialize` function: ``` async def generate(request): .. as before .. def initialize(args: list[str]) -> None: print(f"initialize: {args}") ``` Produces this output: ``` initialize: ['--model-path', '/opt/models/Llama-3.2-3B-Instruct/', '--model-name', 'llama3.2', '--http-port', '8080', '--tensor-parallel-size', '4', '--base-gpu-id', '0', '--num-nodes', '1', '--node-rank', '0', '-n', '1'] ``` Note how in both cases the `-n` `1` is included. Flags `--leader-addr` and `--model-config` will also be added if provided to `dynamo-run`.
1 parent d9cf9d0 commit 29b8160

File tree

4 files changed

+146
-22
lines changed

4 files changed

+146
-22
lines changed

launch/dynamo-run/README.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,56 @@ async def generate(request):
211211
yield {"id":"1","choices":[{"index":0,"delta":{"content":"","role":"assistant"},"finish_reason":"stop"}],"created":1841762283,"model":"Llama-3.2-1B-Instruct","system_fingerprint":"local","object":"chat.completion.chunk"}
212212
```
213213

214+
Command line arguments are passed to the python engine like this:
215+
```
216+
dynamo-run out=pystr:my_python_engine.py -- -n 42 --custom-arg Orange --yes
217+
```
218+
219+
The python engine has two options for receiving those command line arguments (both of which are optional).
220+
221+
In both cases the argument list will include some standard ones as well as anything after the `--`.
222+
223+
This input:
224+
```
225+
dynamo-run out=pystr:my_engine.py /opt/models/Llama-3.2-3B-Instruct/ --model-name llama_3.2 --tensor-parallel-size 4 -- -n 1
226+
```
227+
228+
1. In `sys.argv`:
229+
230+
```
231+
async def generate(request):
232+
.. as before ..
233+
234+
if __name__ == "__main__":
235+
print(f"MAIN: {sys.argv}")
236+
```
237+
238+
Produces this output:
239+
```
240+
MAIN: ['my_engine.py', '--model-path', '/opt/models/Llama-3.2-3B-Instruct/', '--model-name', 'llama3.2', '--http-port', '8080', '--tensor-parallel-size', '4', '--base-gpu-id', '0', '--num-nodes', '1', '--node-rank', '0', '-n', '1']
241+
```
242+
243+
This form allows quick iteration on the engine setup.
244+
245+
2. In an `initialize` function:
246+
247+
```
248+
async def generate(request):
249+
.. as before ..
250+
251+
def initialize(args: list[str]) -> None:
252+
print(f"initialize: {args}")
253+
```
254+
255+
Produces this output:
256+
```
257+
initialize: ['--model-path', '/opt/models/Llama-3.2-3B-Instruct/', '--model-name', 'llama3.2', '--http-port', '8080', '--tensor-parallel-size', '4', '--base-gpu-id', '0', '--num-nodes', '1', '--node-rank', '0', '-n', '1']
258+
```
259+
260+
Note how in both cases the `-n` `1` is included.
261+
262+
Flags `--leader-addr` and `--model-config` will also be added if provided to `dynamo-run`.
263+
214264
### Dynamo does the pre-processing
215265

216266
If the Python engine wants to receive and return tokens - the prompt templating and tokenization is already done - run it like this:
@@ -250,6 +300,8 @@ async def generate(request):
250300
yield {"token_ids":[13]}
251301
```
252302

303+
`pytok` supports the same ways of passing command line arguments as `pystr` - `initialize` or `main` with `sys.argv`.
304+
253305
## trtllm
254306

255307
TensorRT-LLM. Requires `clang` and `libclang-dev`.

launch/dynamo-run/src/flags.rs

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ pub struct Flags {
9393

9494
/// Internal use only.
9595
// Start the python vllm engine sub-process.
96-
#[arg(long)]
97-
#[clap(hide = true, default_value = "false")]
96+
#[arg(long, hide = true, default_value = "false")]
9897
pub internal_vllm_process: bool,
9998

10099
/// Internal use only.
@@ -104,9 +103,52 @@ pub struct Flags {
104103
/// - the node rank (0 for first host, 1 for second host, etc)
105104
/// - the workers' rank (globally unique)
106105
/// - the GPU to use (locally unique)
107-
#[arg(long)]
108-
#[clap(hide = true, value_parser = parse_sglang_flags)]
106+
#[arg(long, hide = true, value_parser = parse_sglang_flags)]
109107
pub internal_sglang_process: Option<SgLangFlags>,
108+
109+
/// Everything after a `--`.
110+
/// These are the command line arguments to the python engine when using `pystr` or `pytok`.
111+
#[arg(index = 2, last = true, hide = true, allow_hyphen_values = true)]
112+
pub last: Vec<String>,
113+
}
114+
115+
impl Flags {
116+
/// Convert the flags back to a command line. Including only the non-null values, but
117+
/// include the defaults. Includes the canonicalized model path and normalized model name.
118+
///
119+
/// Used to pass arguments to python engines via `pystr` and `pytok`.
120+
pub fn as_vec(&self, path: &str, name: &str) -> Vec<String> {
121+
let mut out = vec![
122+
"--model-path".to_string(),
123+
path.to_string(),
124+
"--model-name".to_string(),
125+
name.to_string(),
126+
"--http-port".to_string(),
127+
self.http_port.to_string(),
128+
// Default 1
129+
"--tensor-parallel-size".to_string(),
130+
self.tensor_parallel_size.to_string(),
131+
// Default 0
132+
"--base-gpu-id".to_string(),
133+
self.base_gpu_id.to_string(),
134+
// Default 1
135+
"--num-nodes".to_string(),
136+
self.num_nodes.to_string(),
137+
// Default 0
138+
"--node-rank".to_string(),
139+
self.node_rank.to_string(),
140+
];
141+
if let Some(model_config_path) = self.model_config.as_ref() {
142+
out.push("--model-config".to_string());
143+
out.push(model_config_path.display().to_string());
144+
}
145+
if let Some(leader) = self.leader_addr.as_ref() {
146+
out.push("--leader-addr".to_string());
147+
out.push(leader.to_string());
148+
}
149+
out.extend(self.last.clone());
150+
out
151+
}
110152
}
111153

112154
#[derive(Debug, Clone, Copy)]

launch/dynamo-run/src/lib.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ pub async fn run(
8282
// Turn relative paths into absolute paths
8383
let model_path = flags
8484
.model_path_pos
85-
.or(flags.model_path_flag)
85+
.clone()
86+
.or(flags.model_path_flag.clone())
8687
.and_then(|p| {
8788
if p.exists() {
8889
p.canonicalize().ok()
@@ -93,6 +94,7 @@ pub async fn run(
9394
// Serve the model under the name provided, or the name of the GGUF file or HF repo.
9495
let model_name = flags
9596
.model_name
97+
.clone()
9698
.or_else(|| {
9799
model_path
98100
.as_ref()
@@ -338,8 +340,9 @@ pub async fn run(
338340
let Some(model_name) = model_name else {
339341
anyhow::bail!("Provide model service name as `--model-name <this>`");
340342
};
343+
let py_args = flags.as_vec(&path_str, &model_name);
341344
let p = std::path::PathBuf::from(path_str);
342-
let engine = python::make_string_engine(cancel_token.clone(), &p).await?;
345+
let engine = python::make_string_engine(cancel_token.clone(), &p, py_args).await?;
343346
EngineConfig::StaticFull {
344347
service_name: model_name,
345348
engine,
@@ -354,8 +357,9 @@ pub async fn run(
354357
let Some(model_name) = model_name else {
355358
unreachable!("If we have a card we must have a model name");
356359
};
360+
let py_args = flags.as_vec(&path_str, &model_name);
357361
let p = std::path::PathBuf::from(path_str);
358-
let engine = python::make_token_engine(cancel_token.clone(), &p).await?;
362+
let engine = python::make_token_engine(cancel_token.clone(), &p, py_args).await?;
359363
EngineConfig::StaticCore {
360364
service_name: model_name.clone(),
361365
engine,

lib/llm/src/engines/python.rs

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
use std::ffi::CStr;
1717
use std::{path::Path, sync::Arc};
1818

19+
use anyhow::Context;
1920
use dynamo_runtime::pipeline::error as pipeline_error;
2021
pub use dynamo_runtime::{
2122
error,
@@ -43,23 +44,23 @@ const PY_IMPORT: &CStr = cr#"
4344
import importlib.util
4445
import sys
4546
46-
module_name = file_path.split("/")[-1].replace(".py", "")
47-
spec = importlib.util.spec_from_file_location(module_name, file_path)
48-
47+
spec = importlib.util.spec_from_file_location("__main__", file_path)
4948
module = importlib.util.module_from_spec(spec)
5049
51-
sys.modules[module_name] = module
50+
sys.argv = sys_argv
51+
sys.modules["__main__"] = module
5252
spec.loader.exec_module(module)
5353
"#;
5454

5555
/// An engine that takes and returns strings, feeding them to a python written engine
5656
pub async fn make_string_engine(
5757
cancel_token: CancellationToken,
5858
py_file: &Path,
59+
py_args: Vec<String>,
5960
) -> pipeline_error::Result<OpenAIChatCompletionsStreamingEngine> {
6061
pyo3::prepare_freethreaded_python();
6162

62-
let engine = new_engine(cancel_token, py_file).await?;
63+
let engine = new_engine(cancel_token, py_file, py_args).await?;
6364
let engine: OpenAIChatCompletionsStreamingEngine = Arc::new(engine);
6465
Ok(engine)
6566
}
@@ -68,10 +69,11 @@ pub async fn make_string_engine(
6869
pub async fn make_token_engine(
6970
cancel_token: CancellationToken,
7071
py_file: &Path,
72+
py_args: Vec<String>,
7173
) -> pipeline_error::Result<ExecutionContext> {
7274
pyo3::prepare_freethreaded_python();
7375

74-
let engine = new_engine(cancel_token, py_file).await?;
76+
let engine = new_engine(cancel_token, py_file, py_args).await?;
7577
let engine: ExecutionContext = Arc::new(engine);
7678
Ok(engine)
7779
}
@@ -86,13 +88,28 @@ pub struct PythonServerStreamingEngine {
8688
async fn new_engine(
8789
cancel_token: CancellationToken,
8890
py_file: &Path,
91+
py_args: Vec<String>,
8992
) -> anyhow::Result<PythonServerStreamingEngine> {
9093
let (tx, rx) = tokio::sync::oneshot::channel();
9194
tokio::task::spawn_blocking(move || run_asyncio(tx));
9295
let event_loop = rx.await?;
9396

94-
let user_module = python_file_to_module(py_file)?;
95-
let generator = Python::with_gil(|py| user_module.getattr(py, "generate").unwrap());
97+
let user_module = python_file_to_module(py_file, py_args.clone())
98+
.with_context(|| py_file.display().to_string())?;
99+
let generator = Python::with_gil(|py| {
100+
if let Ok(initialize) = user_module.getattr(py, "initialize") {
101+
initialize
102+
.call1(py, (py_args,))
103+
.inspect_err(|err| {
104+
println!();
105+
err.display(py);
106+
})
107+
.with_context(|| "Failed calling python engine's initialize(args)")?;
108+
};
109+
user_module
110+
.getattr(py, "generate")
111+
.with_context(|| "generate")
112+
})?;
96113
Ok(PythonServerStreamingEngine::new(
97114
cancel_token,
98115
Arc::new(generator),
@@ -127,16 +144,25 @@ fn run_asyncio(tx: Sender<Arc<PyObject>>) {
127144
});
128145
}
129146

130-
fn python_file_to_module(p: &Path) -> Result<PyObject> {
147+
fn python_file_to_module(p: &Path, mut py_args: Vec<String>) -> Result<PyObject> {
148+
if let Some(filename) = p.file_name() {
149+
py_args.insert(0, filename.to_string_lossy().to_string());
150+
};
131151
let module: PyObject = Python::with_gil(|py| {
132-
let globals = [("file_path", p.display().to_string())]
152+
let py_file_path: PyObject = p.display().to_string().into_pyobject(py).unwrap().into();
153+
let py_sys_argv: PyObject = py_args.into_pyobject(py).unwrap().into();
154+
let globals = [("file_path", py_file_path), ("sys_argv", py_sys_argv)]
133155
.into_py_dict(py)
134-
.unwrap();
156+
.context("into_py_dict")?;
135157
let locals = PyDict::new(py);
136-
py.run(PY_IMPORT, Some(&globals), Some(&locals)).unwrap();
137-
let module = locals.get_item("module").unwrap().unwrap();
138-
module.extract().unwrap()
139-
});
158+
py.run(PY_IMPORT, Some(&globals), Some(&locals))
159+
.context("PY_IMPORT")?;
160+
let module = locals
161+
.get_item("module")
162+
.unwrap()
163+
.context("get module after import")?;
164+
module.extract().context("extract")
165+
})?;
140166
Ok(module)
141167
}
142168

0 commit comments

Comments
 (0)