File tree Expand file tree Collapse file tree 3 files changed +86
-2
lines changed
components/backends/sglang Expand file tree Collapse file tree 3 files changed +86
-2
lines changed Original file line number Diff line number Diff line change 1+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+
4+ apiVersion : nvidia.com/v1alpha1
5+ kind : DynamoGraphDeployment
6+ metadata :
7+ name : sglang-agg
8+ spec :
9+ services :
10+ Frontend :
11+ livenessProbe :
12+ httpGet :
13+ path : /health
14+ port : 8000
15+ initialDelaySeconds : 60
16+ periodSeconds : 60
17+ timeoutSeconds : 30
18+ failureThreshold : 10
19+ readinessProbe :
20+ exec :
21+ command :
22+ - /bin/sh
23+ - -c
24+ - " exit 0"
25+ initialDelaySeconds : 60
26+ periodSeconds : 60
27+ timeoutSeconds : 30
28+ failureThreshold : 10
29+ dynamoNamespace : sglang-agg
30+ componentType : main
31+ replicas : 1
32+ resources :
33+ requests :
34+ cpu : " 5"
35+ memory : " 10Gi"
36+ limits :
37+ cpu : " 5"
38+ memory : " 10Gi"
39+ extraPodSpec :
40+ mainContainer :
41+ image : my-registry/sglang-runtime:my-tag
42+ workingDir : /workspace/components/backends/sglang
43+ command : ["sh", "-c"]
44+ args :
45+ - " python3 -m dynamo.sglang.utils.clear_namespace --namespace dynamo && python3 -m dynamo.frontend"
46+ SGLangDecodeWorker :
47+ envFromSecret : hf-token-secret
48+ livenessProbe :
49+ exec :
50+ command :
51+ - /bin/sh
52+ - -c
53+ - " exit 0"
54+ periodSeconds : 60
55+ timeoutSeconds : 30
56+ failureThreshold : 10
57+ readinessProbe :
58+ exec :
59+ command :
60+ - /bin/sh
61+ - -c
62+ - " exit 0"
63+ initialDelaySeconds : 60
64+ periodSeconds : 60
65+ timeoutSeconds : 30
66+ failureThreshold : 10
67+ dynamoNamespace : sglang-agg
68+ componentType : worker
69+ replicas : 1
70+ resources :
71+ requests :
72+ cpu : " 10"
73+ memory : " 20Gi"
74+ gpu : " 1"
75+ limits :
76+ cpu : " 10"
77+ memory : " 20Gi"
78+ gpu : " 1"
79+ extraPodSpec :
80+ mainContainer :
81+ image : my-registry/sglang-runtime:my-tag
82+ workingDir : /workspace/components/backends/sglang
83+ args :
84+ - " python3 -m dynamo.sglang.worker --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B --page-size 16 --tp 1 --trust-remote-code --skip-tokenizer-init"
Original file line number Diff line number Diff line change @@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
2525 --page-size 16 \
2626 --tp 1 \
2727 --trust-remote-code \
28- --skip-tokenizer-init \
28+ --skip-tokenizer-init
Original file line number Diff line number Diff line change @@ -25,4 +25,4 @@ python3 -m dynamo.sglang.worker \
2525 --page-size 16 \
2626 --tp 1 \
2727 --trust-remote-code \
28- --skip-tokenizer-init \
28+ --skip-tokenizer-init
You can’t perform that action at this time.
0 commit comments