diff --git a/README.md b/README.md index 8bcd092c14..759a9187d8 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,6 @@ uv pip install ai-dynamo[sglang] Run the backend/worker like this: ``` -# Note the '.worker' in the module path for SGLang python -m dynamo.sglang.worker --help ``` diff --git a/components/backends/sglang/docs/dsr1-wideep-h100.md b/components/backends/sglang/docs/dsr1-wideep-h100.md index 5a6a21799f..d766bc3edf 100644 --- a/components/backends/sglang/docs/dsr1-wideep-h100.md +++ b/components/backends/sglang/docs/dsr1-wideep-h100.md @@ -9,15 +9,13 @@ Dynamo supports SGLang's implementation of wide expert parallelism and large sca ## Instructions -1. Build the SGLang DeepEP container. +1. Pull the SGLang container. ```bash -git clone -b v0.4.9.post2 https://github.com/sgl-project/sglang.git -cd sglang/docker -docker build -f Dockerfile -t sgl-widepep . +docker pull lmsysorg/sglang:latest ``` -You will now have a `sgl-widepep:latest` image +You can also pull a specific tag from the [lmsys dockerhub](https://hub.docker.com/r/lmsysorg/sglang/tags) 2. Build the Dynamo container diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang index eca8b77ee9..c3190e7c27 100644 --- a/container/Dockerfile.sglang +++ b/container/Dockerfile.sglang @@ -27,7 +27,7 @@ ARG ARCH=amd64 ARG ARCH_ALT=x86_64 # Make sure to update the dependency version in pyproject.toml when updating this -ARG SGLANG_VERSION="0.4.9.post1" +ARG SGLANG_VERSION="0.4.9.post6" ################################## ########## Base Image ############ diff --git a/container/Dockerfile.sglang-wideep b/container/Dockerfile.sglang-wideep index 1cd7b684f2..0bbcb3af23 100644 --- a/container/Dockerfile.sglang-wideep +++ b/container/Dockerfile.sglang-wideep @@ -13,9 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Note this container is built from a local dockerfile -# Please see instructions in examples/sglang/README.md -FROM sgl-wideep:latest +# This should be pinned to the sglang version that is installed with Dynamo +# in the pyproject.toml +FROM lmsysorg/sglang:v0.4.9.post6-cu126 # Add NIXL build dependencies RUN apt-get update -y && \ @@ -80,10 +80,7 @@ WORKDIR /sgl-workspace ENV SGL_FORCE_SHUTDOWN=1 WORKDIR /sgl-workspace -# include flush cache endpoint and server support -# https://github.com/ai-dynamo/dynamo/pull/1769 -ARG DYNAMO_COMMIT="bd91dca6141e05bcfbe9bd4dea54cc58b9e37d75" -RUN git clone https://github.com/ai-dynamo/dynamo.git && cd dynamo && git checkout ${DYNAMO_COMMIT} +RUN git clone https://github.com/ai-dynamo/dynamo.git # install dynamo in editable mode WORKDIR /sgl-workspace/dynamo @@ -126,8 +123,6 @@ RUN cargo build --release RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../.. RUN pip install --break-system-packages -e . -ENV PYTHONPATH=/sgl-workspace/dynamo/components/planner/src:/sgl-workspace/dynamo/examples/sglang:$PYTHONPATH - RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \ dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb @@ -168,7 +163,7 @@ ENV PATH=/sgl-workspace/perf_analyzer/build/perf_analyzer/src/perf-analyzer-buil RUN pip install --break-system-packages genai-perf -COPY examples/sglang/configs/deepseek_r1/wideep/* /sgl-workspace/dynamo/examples/sglang/configs/ -COPY examples/sglang/utils/benchmarking/* /sgl-workspace/dynamo/examples/sglang/utils/ +# https://pypi.org/project/sglang-router/0.1.5 is latest +RUN pip install sglang-router==0.1.5 -WORKDIR /sgl-workspace/dynamo/examples/sglang +WORKDIR /sgl-workspace/dynamo/components/backends/sglang diff --git a/pyproject.toml b/pyproject.toml index 245711df19..b15ec28cf2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,10 +73,7 @@ vllm = [ sglang = [ "uvloop", "nixl", - "sglang[runtime_common]==0.4.9.post1", - "einops", - "sgl-kernel==0.2.4", - "sentencepiece", + "sglang[all]==0.4.9.post6", ] llama_cpp = [