Skip to content

Commit d5d1a52

Browse files
committed
Merge branch 'main' into phendricks/refactor-choice-and-finish-reason
2 parents 55eeabc + c95031e commit d5d1a52

File tree

36 files changed

+606
-123
lines changed

36 files changed

+606
-123
lines changed

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Cargo.toml @ryanolson @grahamking @paulhendricks @biswapanda @tmonty12 @guanluo
2222
/examples/hello_world/ @alec-flowers @biswapanda @grahamking @hhzhang16 @ishandhanani @julienmancuso @kkranen @mohammedabdulwahhab @nnshah1 @tedzhouhk
2323
/examples/llm/ @alec-flowers @biswapanda @grahamking @guanluo @hhzhang16 @ishandhanani @julienmancuso @kkranen @mohammedabdulwahhab @nnshah1 @piotrm-nvidia @ptarasiewiczNV @rmccorm4 @tanmayv25 @tedzhouhk
2424
/examples/multimodal/ @indrajit96 @krishung5 @whoisj
25-
/examples/sglang/ @biswapanda @ishandhanani @tedzhouhk
25+
/examples/sglang/ @biswapanda @ishandhanani @tedzhouhk @rmccorm4
2626
/examples/tensorrt_llm/ @guanluo @richardhuo-nv @rmccorm4 @tanmayv25
2727
/examples/vllm_v0/ @alec-flowers @tedzhouhk
2828
/examples/vllm_v1/ @biswapanda @ptarasiewiczNV @tedzhouhk

Cargo.lock

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/planner/src/dynamo/planner/local_connector.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,13 @@
3232

3333

3434
class LocalConnector(PlannerConnector):
35-
def __init__(self, namespace: str, runtime: DistributedRuntime, backend: str):
35+
def __init__(self, namespace: str, runtime: DistributedRuntime):
3636
"""
3737
Initialize LocalConnector and connect to CircusController.
3838
3939
Args:
4040
namespace: The Dynamo namespace
4141
runtime: Optional DistributedRuntime instance
42-
backend: The backend to use ("vllm_v0", "vllm_v1")
4342
"""
4443
self.namespace = namespace
4544
self.runtime = runtime

container/Dockerfile.sglang-deepep

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,62 @@ ARG ARCH_ALT=x86_64
3535

3636
WORKDIR /sgl-workspace
3737

38+
# Install UCX dependencies
39+
RUN apt-get update -y && \
40+
apt-get install -y --no-install-recommends \
41+
--reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
42+
libnuma-dev librdmacm-dev ibverbs-providers \
43+
autoconf libtool
44+
45+
# Build UCX from source
46+
ARG NIXL_UCX_REF=v1.19.x
47+
RUN rm -rf /opt/hpcx/ucx && \
48+
rm -rf /usr/local/ucx && \
49+
cd /usr/local/src && \
50+
git clone https://github.com/openucx/ucx.git && \
51+
cd ucx && \
52+
git checkout $NIXL_UCX_REF && \
53+
./autogen.sh && ./configure \
54+
--prefix=/usr/local/ucx \
55+
--enable-shared \
56+
--disable-static \
57+
--disable-doxygen-doc \
58+
--enable-optimizations \
59+
--enable-cma \
60+
--enable-devel-headers \
61+
--with-cuda=/usr/local/cuda \
62+
--with-verbs \
63+
--with-efa \
64+
--with-dm \
65+
--with-gdrcopy=/usr/local \
66+
--enable-mt && \
67+
make -j && \
68+
make -j install-strip && \
69+
ldconfig
70+
71+
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
72+
3873
# Pinning to NIXL 0.2.1 right now
3974
# TODO: investigate pip install failure with 0.3.0 release
4075
ARG NIXL_COMMIT="5e4c179ee850d482a83cb2a211e0947e46281060"
41-
RUN git clone https://github.com/ai-dynamo/nixl.git && cd nixl && git checkout ${NIXL_COMMIT} &&pip install --break-system-packages . --config-settings=setup-args="-Ducx_path=/opt/hpcx/ucx"
76+
RUN git clone https://github.com/ai-dynamo/nixl.git && cd nixl && git checkout ${NIXL_COMMIT} && pip install --break-system-packages . --config-settings=setup-args="-Ducx_path=/usr/local/ucx"
4277

4378
WORKDIR /sgl-workspace
4479

4580
RUN pip uninstall --break-system-packages -y sglang
4681
RUN rm -rf sglang
47-
# 0.4.7
48-
RUN pip install --break-system-packages "sglang==0.4.7"
82+
# 0.4.8 has a bug with CUDA graphs and decode worker
83+
# https://github.com/sgl-project/sglang/issues/7511
84+
RUN pip install --break-system-packages "sglang==0.4.7.post1"
85+
86+
# Allow forceful shutdown of inflight requests
87+
ENV SGL_FORCE_SHUTDOWN=1
4988

5089
WORKDIR /sgl-workspace
5190
# https://github.com/ai-dynamo/dynamo/pull/1510
5291
ARG DYNAMO_COMMIT="382e3aedc421b3b3abc338062b332b54b5aa8529"
53-
RUN git clone https://github.com/ai-dynamo/dynamo.git && cd dynamo && git checkout ${DYNAMO_COMMIT}
92+
ARG DYNAMO_BRANCH="ishan/cmpl-token-id"
93+
RUN git clone https://github.com/ai-dynamo/dynamo.git && cd dynamo && git checkout ${DYNAMO_BRANCH}
5494

5595
# install dynamo in editable mode
5696
WORKDIR /sgl-workspace/dynamo

container/build_trtllm_wheel.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,17 @@ cp $MAIN_DIR/deps/tensorrt_llm/install_nixl.sh docker/common/install_nixl.sh
9090
sed -i "s/NIXL_COMMIT=\"[^\"]*\"/NIXL_COMMIT=\"${NIXL_COMMIT}\"/" docker/common/install_nixl.sh
9191

9292

93-
# Need to build in the Triton Devel Image for NIXL support.
94-
make -C docker tritondevel_build
95-
make -C docker wheel_build DEVEL_IMAGE=tritondevel BUILD_WHEEL_OPTS='--extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl'
93+
94+
95+
if [ "$ARCH" = "amd64" ]; then
96+
# Need to build in the Triton Devel Image for NIXL support.
97+
make -C docker tritondevel_build
98+
make -C docker wheel_build DEVEL_IMAGE=tritondevel BUILD_WHEEL_OPTS='--extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl'
99+
else
100+
# NIXL backend is not supported on arm64 for TensorRT-LLM.
101+
# See here: https://github.com/NVIDIA/TensorRT-LLM/blob/main/docker/common/install_nixl.sh
102+
make -C docker wheel_build
103+
fi
96104

97105
# Copy the wheel to the host
98106
mkdir -p $OUTPUT_DIR

deploy/cloud/api-store/Earthfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ uv-base:
2424
COPY +uv-source/uv /bin/uv
2525
RUN uv venv
2626
ENV PATH="/app/.venv/bin:$PATH"
27-
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
2827
WORKDIR /app
2928
COPY uv.lock pyproject.toml README.md /app
3029
RUN uv sync --frozen --no-install-project --no-dev --no-install-workspace --no-editable

deploy/sdk/src/dynamo/sdk/lib/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,9 @@ def add_to_args(args: list[str], key: str, value):
105105
# Convert to CLI format
106106
if isinstance(value, bool):
107107
if value:
108-
args.append(f"--{arg_key}")
108+
args.extend([f"--{arg_key}", "true"])
109+
else:
110+
args.extend([f"--{arg_key}", "false"])
109111
elif isinstance(value, dict):
110112
args.extend([f"--{arg_key}", json.dumps(value)])
111113
else:

deploy/sdk/src/dynamo/sdk/tests/test_config.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,41 @@ def test_service_config_override_common_configs():
140140
assert f"--{key}" in vllm_worker_args
141141

142142
assert vllm_worker_args[vllm_worker_args.index("--block-size") + 1] == "128"
143+
144+
145+
def test_explicit_boolean_arguments():
146+
"""Test that boolean arguments are handled with explicit true/false values"""
147+
# Reset singleton instance
148+
ServiceConfig._instance = None
149+
150+
# Set environment variable with boolean configs
151+
os.environ[
152+
"DYNAMO_SERVICE_CONFIG"
153+
] = """
154+
{
155+
"VllmWorker": {
156+
"enable-prefix-caching": true,
157+
"disable-sliding-window": false,
158+
"enforce-eager": true
159+
}
160+
}
161+
"""
162+
163+
# Get arguments and verify explicit boolean handling
164+
service_config = ServiceConfig.get_instance()
165+
vllm_worker_args = service_config.as_args("VllmWorker")
166+
167+
# Check that true values are passed as --arg true
168+
assert "--enable-prefix-caching" in vllm_worker_args
169+
enable_idx = vllm_worker_args.index("--enable-prefix-caching")
170+
assert vllm_worker_args[enable_idx + 1] == "true"
171+
172+
# Check that false values are passed as --arg false
173+
assert "--disable-sliding-window" in vllm_worker_args
174+
disable_idx = vllm_worker_args.index("--disable-sliding-window")
175+
assert vllm_worker_args[disable_idx + 1] == "false"
176+
177+
# Check that another true value works
178+
assert "--enforce-eager" in vllm_worker_args
179+
enforce_idx = vllm_worker_args.index("--enforce-eager")
180+
assert vllm_worker_args[enforce_idx + 1] == "true"

examples/sglang/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,12 @@ Dynamo supports SGLang's implementation of wide expert parallelism and large sca
106106

107107
Steps to run:
108108

109-
1. Build the SGLang DeepEP container
109+
1. Build the SGLang DeepEP container.
110110

111111
```bash
112-
git clone https://github.com/sgl-project/sglang.git
112+
git clone -b v0.4.8 https://github.com/sgl-project/sglang.git
113113
cd sglang/docker
114-
docker build -f Dockerfile.deepep -t deepep .
114+
docker build -f Dockerfile -t deepep .
115115
```
116116

117117
You will now have a `deepep:latest` image

examples/sglang/components/decode_worker.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ def __init__(self):
4545
@endpoint()
4646
async def generate(self, req: DisaggPreprocessedRequest):
4747
g = await self.engine.async_generate(
48-
input_ids=req.request.token_ids,
48+
input_ids=req.request.token_ids
49+
if req.request.batch_token_ids is None
50+
else req.request.batch_token_ids,
4951
sampling_params=req.sampling_params,
5052
stream=True,
5153
bootstrap_host=req.bootstrap_host,

0 commit comments

Comments
 (0)