Skip to content

Commit 4b8c890

Browse files
committed
feat(frontend): dynamo frontend alternative to dynamo-run
Instead of `dynamo-run in=http out=dyn` we can now do either: - `dynamo frontend [--http-port 8080]` OR - `python frontend.py` No need to build or install the `dynamo-run` Rust binary. It is bundled in `pip install ai-dynamo`. This will have the same performance as `dynamo-run`, it uses the Rust library. In time we could split http server, pre-processor and router into separate bindings, and call them here, allow easy customization.
1 parent 26d7a61 commit 4b8c890

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

deploy/sdk/src/dynamo/sdk/cli/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from dynamo.sdk.cli.build import build
2626
from dynamo.sdk.cli.env import env
27+
from dynamo.sdk.cli.frontend import frontend
2728
from dynamo.sdk.cli.run import run
2829
from dynamo.sdk.cli.serve import serve
2930

@@ -75,6 +76,10 @@ def main(
7576
add_help_option=False,
7677
)(run)
7778
cli.command()(build)
79+
cli.command(
80+
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
81+
add_help_option=False,
82+
)(frontend)
7883

7984
if __name__ == "__main__":
8085
cli()
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# Start a frontend node. This runs:
5+
# - OpenAI HTTP server.
6+
# - Auto-discovery: Watches etcd for engine/worker registration (via `register_llm`).
7+
# - Pre-processor: Prompt templating and tokenization.
8+
# - Router, defaulting to round-robin (TODO: Add flags to enable KV routing).
9+
10+
import argparse
11+
import asyncio
12+
from typing import Optional
13+
14+
import typer
15+
import uvloop
16+
17+
from dynamo.llm import EngineType, EntrypointArgs, make_engine, run_input
18+
from dynamo.runtime import DistributedRuntime
19+
20+
21+
# Called as `dynamo frontend`
22+
def frontend(
23+
ctx: typer.Context, http_port: int = 8080, kv_cache_block_size: Optional[int] = None
24+
):
25+
"""Start an OpenAI compatible HTTP server"""
26+
27+
uvloop.run(main(http_port=http_port, kv_cache_block_size=kv_cache_block_size))
28+
29+
30+
# Called as `python frontend.py`
31+
def cli():
32+
"""Start an OpenAI compatible HTTP server"""
33+
parser = argparse.ArgumentParser(
34+
description="Dynamo Frontend: HTTP+Pre-processor+Router",
35+
formatter_class=argparse.RawTextHelpFormatter, # To preserve multi-line help formatting
36+
)
37+
parser.add_argument(
38+
"--kv-cache-block-size", type=int, help="KV cache block size (u32)."
39+
)
40+
parser.add_argument("--http-port", type=int, help="HTTP port for the engine (u16).")
41+
flags = parser.parse_args()
42+
43+
uvloop.run(
44+
main(http_port=flags.http_port, kv_cache_block_size=flags.kv_cache_block_size)
45+
)
46+
47+
48+
async def main(http_port=8080, kv_cache_block_size=None):
49+
"""Common entry point"""
50+
51+
# If we find cases where subprocess does not stop we may need this. Seem OK so far.
52+
# atexit.register(cleanup_subprocess)
53+
54+
runtime = DistributedRuntime(asyncio.get_running_loop(), False)
55+
56+
entrypoint_kwargs = {"http_port": http_port}
57+
if kv_cache_block_size:
58+
entrypoint_kwargs["kv_cache_block_size"] = kv_cache_block_size
59+
60+
# out=dyn
61+
e = EntrypointArgs(EngineType.Dynamic, **entrypoint_kwargs)
62+
engine = await make_engine(runtime, e)
63+
# in=http
64+
await run_input(runtime, "http", engine)
65+
66+
67+
if __name__ == "__main__":
68+
cli()

0 commit comments

Comments
 (0)