diff --git a/components/backends/sglang/deploy/agg.yaml b/components/backends/sglang/deploy/agg.yaml index 81087a4293..58fcd4dc87 100644 --- a/components/backends/sglang/deploy/agg.yaml +++ b/components/backends/sglang/deploy/agg.yaml @@ -14,10 +14,6 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang - command: ["sh", "-c"] - args: - - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000" SGLangDecodeWorker: envFromSecret: hf-token-secret dynamoNamespace: sglang-agg diff --git a/components/backends/sglang/deploy/agg_router.yaml b/components/backends/sglang/deploy/agg_router.yaml index 865e7fcba7..7bc8902370 100644 --- a/components/backends/sglang/deploy/agg_router.yaml +++ b/components/backends/sglang/deploy/agg_router.yaml @@ -14,10 +14,9 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang - command: ["sh", "-c"] - args: - - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000 --router-mode kv" + envs: + - name: DYN_ROUTER_MODE + value: kv SGLangDecodeWorker: envFromSecret: hf-token-secret dynamoNamespace: sglang-agg-router diff --git a/components/backends/sglang/deploy/disagg-multinode.yaml b/components/backends/sglang/deploy/disagg-multinode.yaml index 24f2131c97..e5ef9e951d 100644 --- a/components/backends/sglang/deploy/disagg-multinode.yaml +++ b/components/backends/sglang/deploy/disagg-multinode.yaml @@ -23,10 +23,6 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang - command: ["sh", "-c"] - args: - - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg-multinode && python3 -m dynamo.frontend --http-port=8000" decode: multinode: nodeCount: 2 diff --git a/components/backends/sglang/deploy/disagg.yaml b/components/backends/sglang/deploy/disagg.yaml index a00943e3c5..dc0eac62bf 100644 --- a/components/backends/sglang/deploy/disagg.yaml +++ b/components/backends/sglang/deploy/disagg.yaml @@ -14,10 +14,6 @@ spec: extraPodSpec: mainContainer: image: my-registry/sglang-runtime:my-tag - workingDir: /workspace/components/backends/sglang - command: ["sh", "-c"] - args: - - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000" SGLangDecodeWorker: envFromSecret: hf-token-secret dynamoNamespace: sglang-disagg diff --git a/components/backends/sglang/deploy/disagg_planner.yaml b/components/backends/sglang/deploy/disagg_planner.yaml index 0db81857c7..dcae295ae8 100644 --- a/components/backends/sglang/deploy/disagg_planner.yaml +++ b/components/backends/sglang/deploy/disagg_planner.yaml @@ -21,10 +21,6 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1 - workingDir: /workspace/components/backends/sglang - command: ["sh", "-c"] - args: - - "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000" Planner: dynamoNamespace: dynamo envFromSecret: hf-token-secret diff --git a/components/backends/trtllm/deploy/agg.yaml b/components/backends/trtllm/deploy/agg.yaml index 5118a6100f..76e9dc8a14 100644 --- a/components/backends/trtllm/deploy/agg.yaml +++ b/components/backends/trtllm/deploy/agg.yaml @@ -14,12 +14,6 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/trtllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000" TRTLLMWorker: envFromSecret: hf-token-secret dynamoNamespace: trtllm-agg diff --git a/components/backends/trtllm/deploy/agg_router.yaml b/components/backends/trtllm/deploy/agg_router.yaml index 9a273390c6..2ca97b6a49 100644 --- a/components/backends/trtllm/deploy/agg_router.yaml +++ b/components/backends/trtllm/deploy/agg_router.yaml @@ -14,12 +14,9 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/trtllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" + envs: + - name: DYN_ROUTER_MODE + value: kv TRTLLMWorker: envFromSecret: hf-token-secret dynamoNamespace: trtllm-agg-router diff --git a/components/backends/trtllm/deploy/disagg.yaml b/components/backends/trtllm/deploy/disagg.yaml index 43bea0f387..5c38e30094 100644 --- a/components/backends/trtllm/deploy/disagg.yaml +++ b/components/backends/trtllm/deploy/disagg.yaml @@ -14,12 +14,6 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/trtllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000" TRTLLMPrefillWorker: dynamoNamespace: trtllm-disagg envFromSecret: hf-token-secret diff --git a/components/backends/trtllm/deploy/disagg_router.yaml b/components/backends/trtllm/deploy/disagg_router.yaml index 24fda37cc7..cc455fe7a7 100644 --- a/components/backends/trtllm/deploy/disagg_router.yaml +++ b/components/backends/trtllm/deploy/disagg_router.yaml @@ -14,12 +14,9 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/trtllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" + envs: + - name: DYN_ROUTER_MODE + value: kv TRTLLMPrefillWorker: dynamoNamespace: trtllm-v1-disagg-router envFromSecret: hf-token-secret diff --git a/components/backends/vllm/deploy/agg.yaml b/components/backends/vllm/deploy/agg.yaml index 5bd74a48dd..95350332bd 100644 --- a/components/backends/vllm/deploy/agg.yaml +++ b/components/backends/vllm/deploy/agg.yaml @@ -14,12 +14,6 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/vllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000" VllmDecodeWorker: envFromSecret: hf-token-secret dynamoNamespace: vllm-agg diff --git a/components/backends/vllm/deploy/agg_router.yaml b/components/backends/vllm/deploy/agg_router.yaml index 675951cfed..4b8abaaae4 100644 --- a/components/backends/vllm/deploy/agg_router.yaml +++ b/components/backends/vllm/deploy/agg_router.yaml @@ -14,12 +14,9 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/vllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" + envs: + - name: DYN_ROUTER_MODE + value: kv VllmDecodeWorker: envFromSecret: hf-token-secret dynamoNamespace: vllm-agg-router diff --git a/components/backends/vllm/deploy/disagg.yaml b/components/backends/vllm/deploy/disagg.yaml index b2e78f6227..20f1c857c7 100644 --- a/components/backends/vllm/deploy/disagg.yaml +++ b/components/backends/vllm/deploy/disagg.yaml @@ -14,12 +14,6 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/vllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000" VllmDecodeWorker: dynamoNamespace: vllm-disagg envFromSecret: hf-token-secret diff --git a/components/backends/vllm/deploy/disagg_planner.yaml b/components/backends/vllm/deploy/disagg_planner.yaml index 8ba69074a9..c1f5097139 100644 --- a/components/backends/vllm/deploy/disagg_planner.yaml +++ b/components/backends/vllm/deploy/disagg_planner.yaml @@ -21,12 +21,6 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02 - workingDir: /workspace/components/backends/vllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000" Planner: dynamoNamespace: vllm-disagg-planner envFromSecret: hf-token-secret diff --git a/components/backends/vllm/deploy/disagg_router.yaml b/components/backends/vllm/deploy/disagg_router.yaml index 18cb757a98..0434b55cea 100644 --- a/components/backends/vllm/deploy/disagg_router.yaml +++ b/components/backends/vllm/deploy/disagg_router.yaml @@ -14,12 +14,9 @@ spec: extraPodSpec: mainContainer: image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17 - workingDir: /workspace/components/backends/vllm - command: - - /bin/sh - - -c - args: - - "python3 -m dynamo.frontend --http-port 8000 --router-mode kv" + envs: + - name: DYN_ROUTER_MODE + value: kv VllmDecodeWorker: dynamoNamespace: vllm-v1-disagg-router envFromSecret: hf-token-secret diff --git a/components/frontend/src/dynamo/frontend/main.py b/components/frontend/src/dynamo/frontend/main.py index eb8e081880..f8960cf0f5 100644 --- a/components/frontend/src/dynamo/frontend/main.py +++ b/components/frontend/src/dynamo/frontend/main.py @@ -88,7 +88,10 @@ def parse_args(): "--kv-cache-block-size", type=int, help="KV cache block size (u32)." ) parser.add_argument( - "--http-port", type=int, default=8080, help="HTTP port for the engine (u16)." + "--http-port", + type=int, + default=int(os.environ.get("DYN_HTTP_PORT", "8080")), + help="HTTP port for the engine (u16). Can be set via DYN_HTTP_PORT env var.", ) parser.add_argument( "--tls-cert-path", @@ -106,8 +109,8 @@ def parse_args(): "--router-mode", type=str, choices=["round-robin", "random", "kv"], - default="round-robin", - help="How to route the request", + default=os.environ.get("DYN_ROUTER_MODE", "round-robin"), + help="How to route the request. Can be set via DYN_ROUTER_MODE env var.", ) parser.add_argument( "--kv-overlap-score-weight", diff --git a/deploy/cloud/operator/internal/dynamo/component_frontend.go b/deploy/cloud/operator/internal/dynamo/component_frontend.go index a3bbdccb3e..fec5328482 100644 --- a/deploy/cloud/operator/internal/dynamo/component_frontend.go +++ b/deploy/cloud/operator/internal/dynamo/component_frontend.go @@ -26,6 +26,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co // Frontend doesn't need backend-specific config container := f.getCommonContainer(context) + // Set default command and args + container.Command = []string{"python3"} + container.Args = []string{"-m", "dynamo.frontend"} + // Add HTTP port container.Ports = []corev1.ContainerPort{ { @@ -71,6 +75,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co Name: commonconsts.EnvDynamoServicePort, Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort), }, + { + Name: "DYN_HTTP_PORT", // TODO: need to reconcile DYNAMO_PORT and DYN_HTTP_PORT + Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort), + }, }...) return container, nil diff --git a/deploy/cloud/operator/internal/dynamo/graph_test.go b/deploy/cloud/operator/internal/dynamo/graph_test.go index 5e66d1801f..496c34efef 100644 --- a/deploy/cloud/operator/internal/dynamo/graph_test.go +++ b/deploy/cloud/operator/internal/dynamo/graph_test.go @@ -1286,6 +1286,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "DYN_HTTP_PORT", + Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort), + }, { Name: "DYNAMO_POD_GANG_SET_REPLICAS", Value: "1", @@ -2036,6 +2040,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "DYN_HTTP_PORT", + Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort), + }, { Name: "DYNAMO_POD_GANG_SET_REPLICAS", Value: "1", @@ -2798,6 +2806,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { }, }, Env: []corev1.EnvVar{ + { + Name: "DYN_HTTP_PORT", + Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort), + }, { Name: "DYNAMO_POD_GANG_SET_REPLICAS", Value: "1", @@ -4243,6 +4255,87 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) { } } +func TestGenerateBasePodSpec_Frontend(t *testing.T) { + secretsRetriever := &mockSecretsRetriever{} + controllerConfig := controller_common.Config{} + dynamoDeployment := &v1alpha1.DynamoGraphDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-deployment", + Namespace: "default", + }, + } + + tests := []struct { + name string + component *v1alpha1.DynamoComponentDeploymentOverridesSpec + backendFramework BackendFramework + wantEnvVars map[string]string + wantErr bool + }{ + { + name: "frontend with default command", + component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ + DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ + ComponentType: commonconsts.ComponentTypeFrontend, + }, + }, + backendFramework: BackendFrameworkVLLM, + wantEnvVars: map[string]string{ + "DYN_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + podSpec, err := GenerateBasePodSpec( + tt.component, + tt.backendFramework, + secretsRetriever, + dynamoDeployment.Name, + dynamoDeployment.Namespace, + RoleMain, + 1, + controllerConfig, + commonconsts.MultinodeDeploymentTypeGrove, + "test-service", + ) + + if (err != nil) != tt.wantErr { + t.Errorf("GenerateBasePodSpec() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.wantErr { + return + } + + // Check command and args + wantCommand := []string{"python3"} + wantArgs := []string{"-m", "dynamo.frontend"} + if !reflect.DeepEqual(podSpec.Containers[0].Command, wantCommand) { + t.Errorf("GenerateBasePodSpec() command = %v, want %v", + podSpec.Containers[0].Command, wantCommand) + } + if !reflect.DeepEqual(podSpec.Containers[0].Args, wantArgs) { + t.Errorf("GenerateBasePodSpec() args = %v, want %v", + podSpec.Containers[0].Args, wantArgs) + } + + // Check environment variables + envVars := make(map[string]string) + for _, env := range podSpec.Containers[0].Env { + envVars[env.Name] = env.Value + } + for k, v := range tt.wantEnvVars { + if envVars[k] != v { + t.Errorf("GenerateBasePodSpec() env var %s = %v, want %v", + k, envVars[k], v) + } + } + }) + } +} + func TestGenerateBasePodSpec_PlannerServiceAccount(t *testing.T) { secretsRetriever := &mockSecretsRetriever{} controllerConfig := controller_common.Config{}