Skip to content

Commit be2b3b4

Browse files
authored
Merge branch 'main' into ryan/connector-dev
2 parents 105e68d + dbb4caa commit be2b3b4

File tree

6 files changed

+5
-49
lines changed

6 files changed

+5
-49
lines changed

components/backends/sglang/README.md

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,6 @@ cd $DYNAMO_ROOT/components/backends/sglang
9494

9595
### Aggregated Serving with KV Routing
9696

97-
> [!NOTE]
98-
> The current implementation of `components/backends/sglang/src/dynamo/sglang/worker/main.py` publishes _placeholder_ engine metrics to keep the Dynamo KV-router happy. Real-time metrics will be surfaced directly from the SGLang engine once the following pull requests are merged:
99-
> • Dynamo: [ai-dynamo/dynamo #1465](https://github.com/ai-dynamo/dynamo/pull/1465)_feat: receive kvmetrics from sglang scheduler_.
100-
>
101-
> After these are in, the TODOs in `main.py` will be resolved and the placeholder logic removed.
102-
10397
```bash
10498
cd $DYNAMO_ROOT/components/backends/sglang
10599
./launch/agg_router.sh

components/backends/sglang/slurm_jobs/scripts/gb200.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ if [ "$mode" = "prefill" ]; then
8686
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER=0 \
8787
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=1 \
8888
PYTHONUNBUFFERED=1 \
89-
python3 components/worker.py \
89+
python3 -m dynamo.sglang.worker \
9090
--served-model-name deepseek-ai/DeepSeek-R1 \
9191
--model-path /model/ \
9292
--skip-tokenizer-init \
@@ -188,7 +188,7 @@ elif [ "$mode" = "decode" ]; then
188188
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER=0 \
189189
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=1 \
190190
PYTHONUNBUFFERED=1 \
191-
python3 components/decode_worker.py \
191+
python3 -m dynamo.sglang.decode_worker \
192192
--served-model-name deepseek-ai/DeepSeek-R1 \
193193
--model-path /model/ \
194194
--skip-tokenizer-init \

components/backends/sglang/slurm_jobs/scripts/h100.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ fi
7070
if [ "$mode" = "prefill" ]; then
7171
if [ "$cmd" = "dynamo" ]; then
7272
# H100 dynamo prefill command
73-
python3 components/worker.py \
73+
python3 -m dynamo.sglang.worker \
7474
--model-path /model/ \
7575
--served-model-name deepseek-ai/DeepSeek-R1 \
7676
--skip-tokenizer-init \
@@ -131,7 +131,7 @@ if [ "$mode" = "prefill" ]; then
131131
elif [ "$mode" = "decode" ]; then
132132
if [ "$cmd" = "dynamo" ]; then
133133
# H100 dynamo decode command
134-
python3 components/decode_worker.py \
134+
python3 -m dynamo.sglang.decode_worker \
135135
--model-path /model/ \
136136
--served-model-name deepseek-ai/DeepSeek-R1 \
137137
--skip-tokenizer-init \

components/backends/trtllm/kv-cache-tranfer.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,4 @@ To enable NIXL for KV cache transfer in disaggregated serving:
6161
4. **Send the request:**
6262
See [client](./README.md#client) section to learn how to send the request to deployment.
6363

64-
**Important:** Ensure that ETCD and NATS services are running before starting the service.
65-
66-
The container will automatically configure the appropriate environment variables (`TRTLLM_USE_NIXL_KVCACHE=1`) when built with the NIXL flag. The same container image can be used to use UCX for KV cache transfer.
67-
```bash
68-
unset TRTLLM_USE_NIXL_KVCACHE
69-
export TRTLLM_USE_UCX_KVCACHE=1
70-
```
64+
**Important:** Ensure that ETCD and NATS services are running before starting the service.

components/backends/trtllm/multinode/start_trtllm_worker.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,6 @@ if [[ -z ${ENGINE_CONFIG} ]]; then
2222
exit 1
2323
fi
2424

25-
# NOTE: When this script is run directly from srun, the environment variables
26-
# for TRTLLM KV cache are not set. So we need to set them here.
27-
# Related issue: https://github.com/ai-dynamo/dynamo/issues/1743
28-
if [[ -z ${TRTLLM_USE_UCX_KVCACHE} ]] && [[ -z ${TRTLLM_USE_NIXL_KVCACHE} ]]; then
29-
export TRTLLM_USE_UCX_KVCACHE=1
30-
fi
31-
3225
EXTRA_ARGS=""
3326
if [[ -n ${DISAGGREGATION_MODE} ]]; then
3427
EXTRA_ARGS+="--disaggregation-mode ${DISAGGREGATION_MODE} "

container/Dockerfile.tensorrt_llm

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -332,26 +332,6 @@ RUN pip install dist/ai_dynamo_runtime*cp312*.whl && \
332332
pip install dist/ai_dynamo*any.whl
333333

334334
ENV DYNAMO_HOME=/workspace
335-
336-
# Use UCX for TRTLLM KV Cache Transfer
337-
ARG TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL
338-
ENV TRTLLM_USE_UCX_KVCACHE=1
339-
340-
# Create a script that sets the environment variables and source it.
341-
# If TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL is set to 1, unset TRTLLM_USE_UCX_KVCACHE
342-
# and set TRTLLM_USE_NIXL_KVCACHE to 1.
343-
RUN echo '#!/bin/bash' > /usr/local/bin/set_trtllm_env.sh && \
344-
if [ "$TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL" = "1" ]; then \
345-
echo 'unset TRTLLM_USE_UCX_KVCACHE' >> /usr/local/bin/set_trtllm_env.sh; \
346-
echo 'export TRTLLM_USE_NIXL_KVCACHE=1' >> /usr/local/bin/set_trtllm_env.sh; \
347-
else \
348-
echo '# TRTLLM_USE_UCX_KVCACHE should already be set to 1' >> /usr/local/bin/set_trtllm_env.sh; \
349-
fi && \
350-
chmod +x /usr/local/bin/set_trtllm_env.sh
351-
352-
# Source the script in bashrc
353-
RUN echo 'source /usr/local/bin/set_trtllm_env.sh' >> /root/.bashrc
354-
355335
# Copy launch banner
356336
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
357337
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
@@ -501,11 +481,6 @@ RUN uv pip install --extra-index-url "${TENSORRTLLM_INDEX_URL}" \
501481
"${TENSORRTLLM_PIP_WHEEL}" && \
502482
uv pip install ai-dynamo nixl --find-links wheelhouse
503483

504-
# Setup TRTLLM environment variables, same as in dev image
505-
ENV TRTLLM_USE_UCX_KVCACHE=1
506-
COPY --from=dev /usr/local/bin/set_trtllm_env.sh /usr/local/bin/set_trtllm_env.sh
507-
RUN echo 'source /usr/local/bin/set_trtllm_env.sh' >> /root/.bashrc
508-
509484
# Copy benchmarks, backends and tests for CI
510485
# TODO: Remove this once we have a functional CI image built on top of the runtime image
511486
COPY tests /workspace/tests

0 commit comments

Comments
 (0)