diff --git a/.github/workflows/trigger_ci.yml b/.github/workflows/trigger_ci.yml index 3935546ebf..d0d0d937a8 100644 --- a/.github/workflows/trigger_ci.yml +++ b/.github/workflows/trigger_ci.yml @@ -47,11 +47,9 @@ jobs: filters: | vllm: - 'container/Dockerfile.vllm' - - 'container/Dockerfile.vllm_v1' - - 'examples/vllm/**' - - 'examples/python_rs/llm/**' - 'container/deps/requirements.vllm.txt' - 'container/deps/vllm/**' + - 'components/backends/vllm/**' - 'tests/serve/test_vllm.py' trtllm: - 'container/Dockerfile.tensorrt_llm' @@ -65,7 +63,7 @@ jobs: sglang: - 'container/Dockerfile.sglang' - 'container/Dockerfile.sglang-deepep' - - 'examples/sglang/**' + - 'components/backends/sglang/**' - 'container/build.sh' - name: Check if Validation Workflow has run id: check_workflow diff --git a/components/backends/sglang/deploy/agg.yaml b/components/backends/sglang/deploy/agg.yaml new file mode 100644 index 0000000000..b030716a11 --- /dev/null +++ b/components/backends/sglang/deploy/agg.yaml @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: nvidia.com/v1alpha1 +kind: DynamoGraphDeployment +metadata: + name: sglang-agg +spec: + services: + Frontend: + livenessProbe: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 60 + periodSeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + readinessProbe: + exec: + command: + - /bin/sh + - -c + - "exit 0" + initialDelaySeconds: 60 + periodSeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + dynamoNamespace: sglang-agg + componentType: main + replicas: 1 + resources: + requests: + cpu: "5" + memory: "10Gi" + limits: + cpu: "5" + memory: "10Gi" + extraPodSpec: + mainContainer: + image: my-registry/sglang-runtime:my-tag + workingDir: /workspace/components/backends/sglang + command: ["sh", "-c"] + args: + - "python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend" + SGLangDecodeWorker: + envFromSecret: hf-token-secret + livenessProbe: + exec: + command: + - /bin/sh + - -c + - "exit 0" + periodSeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + readinessProbe: + exec: + command: + - /bin/sh + - -c + - "exit 0" + initialDelaySeconds: 60 + periodSeconds: 60 + timeoutSeconds: 30 + failureThreshold: 10 + dynamoNamespace: sglang-agg + componentType: worker + replicas: 1 + resources: + requests: + cpu: "10" + memory: "20Gi" + gpu: "1" + limits: + cpu: "10" + memory: "20Gi" + gpu: "1" + extraPodSpec: + mainContainer: + image: my-registry/sglang-runtime:my-tag + workingDir: /workspace/components/backends/sglang + args: + - "python3" + - "-m" + - "dynamo.sglang.worker" + - "--model-path" + - "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + - "--served-model-name" + - "deepseek-ai/DeepSeek-R1-Distill-Llama-8B" + - "--page-size" + - "16" + - "--tp" + - "1" + - "--trust-remote-code" + - "--skip-tokenizer-init" diff --git a/components/backends/sglang/launch/agg.sh b/components/backends/sglang/launch/agg.sh index 0050acfceb..94153ba9cb 100755 --- a/components/backends/sglang/launch/agg.sh +++ b/components/backends/sglang/launch/agg.sh @@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \ --page-size 16 \ --tp 1 \ --trust-remote-code \ - --skip-tokenizer-init \ + --skip-tokenizer-init diff --git a/components/backends/sglang/launch/agg_router.sh b/components/backends/sglang/launch/agg_router.sh index cde133e06a..b45509235c 100755 --- a/components/backends/sglang/launch/agg_router.sh +++ b/components/backends/sglang/launch/agg_router.sh @@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \ --page-size 16 \ --tp 1 \ --trust-remote-code \ - --skip-tokenizer-init \ + --skip-tokenizer-init diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang index b421759071..f484ad3e49 100644 --- a/container/Dockerfile.sglang +++ b/container/Dockerfile.sglang @@ -463,6 +463,9 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc +# Once UX refactor is merged, we can remove these files +# Python components will have been pip installed and packaged in wheel +COPY components/ /workspace/components/ # Copy benchmarks, examples, and tests for CI # TODO: Remove this once we have a functional CI image built on top of the runtime image COPY tests /workspace/tests