Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions components/ingress/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Dynamo ingress / frontend node.

Usage: `python -m dynamo.ingress [--http-port <port>]`. Port defaults to 8080.

This runs an OpenAI compliant HTTP server, a pre-processor, and a router in a single process. Engines / workers are auto-discovered when they call `register_llm`.

Requires `etcd` and `nats-server -js`.

This is the same as `dynamo-run in=http out=dyn`.
Empty file.
7 changes: 7 additions & 0 deletions components/ingress/src/dynamo/ingress/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from dynamo.ingress.main import main

if __name__ == "__main__":
main()
61 changes: 61 additions & 0 deletions components/ingress/src/dynamo/ingress/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Usage: `python -m dynamo.ingress [args]`
#
# Start a frontend node. This runs:
# - OpenAI HTTP server.
# - Auto-discovery: Watches etcd for engine/worker registration (via `register_llm`).
# - Pre-processor: Prompt templating and tokenization.
# - Router, defaulting to round-robin (TODO: Add flags to enable KV routing).

import argparse
import asyncio

import uvloop

from dynamo.llm import EngineType, EntrypointArgs, make_engine, run_input
from dynamo.runtime import DistributedRuntime


def parse_args():
parser = argparse.ArgumentParser(
description="Dynamo Frontend: HTTP+Pre-processor+Router",
formatter_class=argparse.RawTextHelpFormatter, # To preserve multi-line help formatting
)
parser.add_argument(
"--kv-cache-block-size", type=int, help="KV cache block size (u32)."
)
parser.add_argument(
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
)
flags = parser.parse_args()

kwargs = {"http_port": flags.http_port}
if flags.kv_cache_block_size is not None:
kwargs["kv_cache_block_size"] = flags.kv_cache_block_size

return kwargs


async def async_main():
runtime = DistributedRuntime(asyncio.get_running_loop(), False)
flags = parse_args()

# out=dyn
e = EntrypointArgs(EngineType.Dynamic, **flags)
engine = await make_engine(runtime, e)

# in=http
try:
await run_input(runtime, "http", engine)
except asyncio.exceptions.CancelledError:
pass


def main():
uvloop.run(async_main())


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["deploy/sdk/src/dynamo", "components/planner/src/dynamo"]
packages = ["deploy/sdk/src/dynamo", "components/planner/src/dynamo", "components/ingress/src/dynamo"]

# This section is for including the binaries in the wheel package
# but doesn't make them executable scripts in the venv bin directory
Expand Down
2 changes: 0 additions & 2 deletions tests/serve/test_dynamo_serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ def __init__(
(f"http://localhost:{port}/v1/models", self._check_model)
]
health_check_ports = [port]
env = None

self.port = port
self.graph = graph
Expand All @@ -305,7 +304,6 @@ def __init__(
"from multiprocessing.spawn",
],
log_dir=request.node.name,
env=env, # Pass the environment variables
)

def _check_model(self, response):
Expand Down
Loading