Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg
Expand Down
7 changes: 3 additions & 4 deletions components/backends/sglang/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000 --router-mode kv"
envs:
- name: DYN_ROUTER_MODE
value: kv
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg-router
Expand Down
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/disagg-multinode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg-multinode && python3 -m dynamo.frontend --http-port=8000"
decode:
multinode:
nodeCount: 2
Expand Down
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
Expand Down
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
Planner:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
Expand Down
6 changes: 0 additions & 6 deletions components/backends/trtllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
Expand Down
9 changes: 3 additions & 6 deletions components/backends/trtllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYN_ROUTER_MODE
value: kv
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg-router
Expand Down
6 changes: 0 additions & 6 deletions components/backends/trtllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
Expand Down
9 changes: 3 additions & 6 deletions components/backends/trtllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYN_ROUTER_MODE
value: kv
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
Expand Down
9 changes: 3 additions & 6 deletions components/backends/vllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYN_ROUTER_MODE
value: kv
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
Planner:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
Expand Down
9 changes: 3 additions & 6 deletions components/backends/vllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYN_ROUTER_MODE
value: kv
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
Expand Down
9 changes: 6 additions & 3 deletions components/frontend/src/dynamo/frontend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ def parse_args():
"--kv-cache-block-size", type=int, help="KV cache block size (u32)."
)
parser.add_argument(
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
"--http-port",
type=int,
default=int(os.environ.get("DYN_HTTP_PORT", "8080")),
help="HTTP port for the engine (u16). Can be set via DYN_HTTP_PORT env var.",
)
parser.add_argument(
"--tls-cert-path",
Expand All @@ -106,8 +109,8 @@ def parse_args():
"--router-mode",
type=str,
choices=["round-robin", "random", "kv"],
default="round-robin",
help="How to route the request",
default=os.environ.get("DYN_ROUTER_MODE", "round-robin"),
help="How to route the request. Can be set via DYN_ROUTER_MODE env var.",
)
parser.add_argument(
"--kv-overlap-score-weight",
Expand Down
8 changes: 8 additions & 0 deletions deploy/cloud/operator/internal/dynamo/component_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
// Frontend doesn't need backend-specific config
container := f.getCommonContainer(context)

// Set default command and args
container.Command = []string{"python3"}
container.Args = []string{"-m", "dynamo.frontend"}

// Add HTTP port
container.Ports = []corev1.ContainerPort{
{
Expand Down Expand Up @@ -71,6 +75,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
Name: commonconsts.EnvDynamoServicePort,
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYN_HTTP_PORT", // TODO: need to reconcile DYNAMO_PORT and DYN_HTTP_PORT
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
}...)

return container, nil
Expand Down
93 changes: 93 additions & 0 deletions deploy/cloud/operator/internal/dynamo/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYN_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -2036,6 +2040,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYN_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -2798,6 +2806,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYN_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -4243,6 +4255,87 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) {
}
}

func TestGenerateBasePodSpec_Frontend(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
dynamoDeployment := &v1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment",
Namespace: "default",
},
}

tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
backendFramework BackendFramework
wantEnvVars map[string]string
wantErr bool
}{
{
name: "frontend with default command",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
},
},
backendFramework: BackendFrameworkVLLM,
wantEnvVars: map[string]string{
"DYN_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
tt.backendFramework,
secretsRetriever,
dynamoDeployment.Name,
dynamoDeployment.Namespace,
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)

if (err != nil) != tt.wantErr {
t.Errorf("GenerateBasePodSpec() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}

// Check command and args
wantCommand := []string{"python3"}
wantArgs := []string{"-m", "dynamo.frontend"}
if !reflect.DeepEqual(podSpec.Containers[0].Command, wantCommand) {
t.Errorf("GenerateBasePodSpec() command = %v, want %v",
podSpec.Containers[0].Command, wantCommand)
}
if !reflect.DeepEqual(podSpec.Containers[0].Args, wantArgs) {
t.Errorf("GenerateBasePodSpec() args = %v, want %v",
podSpec.Containers[0].Args, wantArgs)
}

// Check environment variables
envVars := make(map[string]string)
for _, env := range podSpec.Containers[0].Env {
envVars[env.Name] = env.Value
}
for k, v := range tt.wantEnvVars {
if envVars[k] != v {
t.Errorf("GenerateBasePodSpec() env var %s = %v, want %v",
k, envVars[k], v)
}
}
})
}
}

func TestGenerateBasePodSpec_PlannerServiceAccount(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
Expand Down
Loading