chore: split frontend + runtime launch message to accurately represent container purpose (#4537)

nv-tusharma · saturley-hall · web-flow · commit f2769d8823ec · 2025-11-21T13:30:44.000-08:00
Signed-off-by: Tushar Sharma &lt;tusharma@nvidia.com&gt;
Co-authored-by: Harrison Saturley-Hall &lt;hsaturleyhal@nvidia.com&gt;
diff --git a/container/Dockerfile b/container/Dockerfile
@@ -421,7 +421,7 @@ RUN uv pip install \
     && UV_GIT_LFS=1 uv pip install --no-cache .
 
 # Setup launch banner in common directory accessible to all users
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
+RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
     sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
 
 # Setup environment for all users
diff --git a/container/Dockerfile.frontend b/container/Dockerfile.frontend
@@ -40,7 +40,7 @@ ENV DYNAMO_HOME=/opt/dynamo
 WORKDIR /
 COPY --chown=dynamo: --from=epp /epp /epp
 
-COPY --chown=dynamo: container/launch_message.txt /opt/dynamo/.launch_screen
+COPY --chown=dynamo: container/launch_message/frontend.txt /opt/dynamo/.launch_screen
 # Copy tests, benchmarks, deploy and components with correct ownership
 COPY --chown=dynamo: tests /workspace/tests
 COPY --chown=dynamo: examples /workspace/examples
diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang
@@ -362,10 +362,9 @@ RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requi
 
 ## Copy attribution files and launch banner with correct ownership
 COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
-COPY --chown=dynamo: container/launch_message.txt /workspace/launch_message.txt
 
 # Setup launch banner in common directory accessible to all users
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
+RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
     sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
 
 # Setup environment for all users
diff --git a/container/Dockerfile.trtllm b/container/Dockerfile.trtllm
@@ -336,7 +336,7 @@ COPY --chown=dynamo: recipes/ /workspace/recipes/
 COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
 
 # Setup launch banner in common directory accessible to all users
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
+RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
     sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
 
 # Setup environment for all users
diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm
@@ -296,7 +296,7 @@ COPY --chown=dynamo: . /workspace/
 COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
 
 # Setup launch banner in common directory accessible to all users
-RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \
+RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
     sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen
 
 # Setup environment for all users
diff --git a/container/launch_message/frontend.txt b/container/launch_message/frontend.txt
@@ -0,0 +1,66 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+                         @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                         @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                         @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                    @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+               @@@@@@@@@@@@@@@     @@@@@@@@@@@@@@@@@@@@@@@@@
+            @@@@@@@@@@   @@@@@@@@@@    @@@@@@@@@@@@@@@@@@@@@
+         @@@@@@@@     @@@@@@@@@@@@@@@@   @@@@@@@@@@@@@@@@@@@
+       @@@@@@@    @@@@@@@@      @@@@@@@    @@@@@@@@@@@@@@@@@
+     @@@@@@@@   @@@@@@@  @@@@      @@@@@@    @@@@@@@@@@@@@@@
+     @@@@@@@   @@@@@@    @@@@@@   @@@@@@@   @@@@@@@@@@@@@@@@
+      @@@@@@@  @@@@@@    @@@@@@@@@@@@@@    @@@@@@@@@@@@@@@@@
+       @@@@@@   @@@@@@   @@@@@@@@@@@@    @@@@@@@@@@@@@@@@@@@
+        @@@@@@@  @@@@@@@ @@@@@@@@@@   @@@@@@@@@      @@@@@@@
+          @@@@@@   @@@@@@@@@@@@@    @@@@@@@@         @@@@@@@
+            @@@@@@    @@@@     @@@@@@@@@@          @@@@@@@@@
+              @@@@@@@    @@@@@@@@@@@@@        @@@@@@@@@@@@@@
+                @@@@@@@@@@@@@@@@@        @@@@@@@@@@@@@@@@@@@
+                    @@@@@@       @@@@@@@@@@@@@@@@@@@@@@@@@@@
+                         @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                         @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+                         @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ @@@@@@@@@     @@@@      @@@@ @@@@  @@@@@@@@       @@@@       @@@@@
+@@@@@@@@@@@@@  @@@@@    @@@@@ @@@@@ @@@@@@@@@@@@@  @@@@@     @@@@@@@
+@@@@@@@@@@@@@@ @@@@@@  @@@@@  @@@@@ @@@@@@@@@@@@@@ @@@@@    @@@@@@@@@
+@@@@@    @@@@@@@@@@@@  @@@@@  @@@@@ @@@@@    @@@@@ @@@@@   @@@@@ @@@@@
+@@@@@     @@@@@ @@@@@@@@@@@   @@@@@ @@@@@    @@@@@ @@@@@  @@@@@  @@@@@@
+@@@@@     @@@@@  @@@@@@@@@@   @@@@@ @@@@@   @@@@@@ @@@@@  @@@@@@@@@@@@@
+@@@@@     @@@@@  @@@@@@@@@    @@@@@ @@@@@@@@@@@@@@ @@@@@ @@@@@@@@@@@@@@@
+@@@@@     @@@@@   @@@@@@@     @@@@@ @@@@@@@@@@@@@  @@@@@@@@@@@     @@@@@@
+ @@@       @@@      @@@@       @@@   @@@@@@@        @@   @@@         @@@  ®
+
+Dynamo: A Datacenter Scale Distributed Inference Serving Framework
+
+This is a framework-less image designed to deploy and run CPU-bound Frontend
+components without requiring CUDA or backend engine dependencies (vllm/sglang).
+
+The frontend container includes:
+- HTTP API service
+- Preprocessor
+- Router
+- Endpoint Picker (EPP) for Gateway API Inference Extension
+
+Benefits:
+- Minimal dependencies for purely CPU-based processes
+- Fast deployment for integration testing on GPU-constrained clusters
+- Can spin up frontend with mock workers for rapid testing
+
+Quick Start:
+
+Start mocker with custom configuration:
+> python -m dynamo.mocker \
+  --model-path TinyLlama/TinyLlama-1.1B-Chat-v1.0 \
+  --num-gpu-blocks-override 8192 \
+  --block-size 16 \
+  --speedup-ratio 10.0 \
+  --max-num-seqs 512 \
+  --num-workers 4 \
+  --enable-prefix-caching
+
+Start frontend server:
+> python -m dynamo.frontend --http-port 8000
+
+
diff --git a/container/launch_message/runtime.txt b/container/launch_message/runtime.txt
@@ -1,14 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
                           @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
                           @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
                           @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -47,9 +38,8 @@ This is a minimum runtime container for interacting with Dynamo via our CLI
 tools.
 
 Try the following to begin interacting with a model:
-> dynamo --help
 > python -m dynamo.frontend [--http-port 8000]
-> python -m dynamo.vllm --model Qwen/Qwen2.5-3B-Instruct
+> python -m dynamo.{vllm,sglang,trtllm} --model Qwen/Qwen2.5-3B-Instruct
 
 To run more complete deployment examples, instances of etcd and nats need to be
 accessible within the container. This is generally done by connecting to