Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions .github/workflows/trigger_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ jobs:
filters: |
vllm:
- 'container/Dockerfile.vllm'
- 'container/Dockerfile.vllm_v1'
- 'examples/vllm/**'
- 'examples/python_rs/llm/**'
- 'container/deps/requirements.vllm.txt'
- 'container/deps/vllm/**'
- 'components/backends/vllm/**'
- 'tests/serve/test_vllm.py'
trtllm:
- 'container/Dockerfile.tensorrt_llm'
Expand All @@ -65,7 +63,7 @@ jobs:
sglang:
- 'container/Dockerfile.sglang'
- 'container/Dockerfile.sglang-deepep'
- 'examples/sglang/**'
- 'components/backends/sglang/**'
- 'container/build.sh'
- name: Check if Validation Workflow has run
id: check_workflow
Expand Down
96 changes: 96 additions & 0 deletions components/backends/sglang/deploy/agg.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
metadata:
name: sglang-agg
spec:
services:
Frontend:
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
dynamoNamespace: sglang-agg
componentType: main
replicas: 1
resources:
requests:
cpu: "5"
memory: "10Gi"
limits:
cpu: "5"
memory: "10Gi"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
livenessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
readinessProbe:
exec:
command:
- /bin/sh
- -c
- "exit 0"
initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
dynamoNamespace: sglang-agg
componentType: worker
replicas: 1
resources:
requests:
cpu: "10"
memory: "20Gi"
gpu: "1"
limits:
cpu: "10"
memory: "20Gi"
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
args:
- "python3"
- "-m"
- "dynamo.sglang.worker"
- "--model-path"
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
- "--served-model-name"
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
- "--page-size"
- "16"
- "--tp"
- "1"
- "--trust-remote-code"
- "--skip-tokenizer-init"
2 changes: 1 addition & 1 deletion components/backends/sglang/launch/agg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
--page-size 16 \
--tp 1 \
--trust-remote-code \
--skip-tokenizer-init \
--skip-tokenizer-init
2 changes: 1 addition & 1 deletion components/backends/sglang/launch/agg_router.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
--page-size 16 \
--tp 1 \
--trust-remote-code \
--skip-tokenizer-init \
--skip-tokenizer-init
3 changes: 3 additions & 0 deletions container/Dockerfile.sglang
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,9 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
echo "cat ~/.launch_screen" >> ~/.bashrc

# Once UX refactor is merged, we can remove these files
# Python components will have been pip installed and packaged in wheel
COPY components/ /workspace/components/
# Copy benchmarks, examples, and tests for CI
# TODO: Remove this once we have a functional CI image built on top of the runtime image
COPY tests /workspace/tests
Expand Down
Loading