You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: tests/planner/README.md
+16-7Lines changed: 16 additions & 7 deletions
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -133,7 +133,7 @@ The fourth plot, similar to the third plot, shows the actual decode throughput,
133
133
134
134
## Scaling Tests
135
135
136
-
This directory contains comprehensive tests for validating the SLA planner's scaling behavior. The tests validate both the replica calculation logic and end-to-end scaling behavior.
136
+
This directory contains comprehensive tests for validating the SLA planner's scaling behavior. The tests validate both the replica calculation logic and end-to-end scaling behavior. The scaling test uses a graduated load approach rather than dataset files, as it proved more reliable for metric generation and scaling triggers.
137
137
138
138
### Test Types
139
139
@@ -166,13 +166,22 @@ To save results to `tests/planner/e2e_scaling_results` instead of `/tmp`:
166
166
./run_scaling_test.sh --save-results
167
167
```
168
168
169
-
### Test Scenario
169
+
**E2E Test Deployment Management:**
170
+
- If no deployment exists: creates, tests, and cleans up deployment
171
+
- If deployment exists: uses existing deployment and preserves it
172
+
- Perfect for development workflows where you want to keep deployments running between tests
170
173
171
-
The main test scenario validates scaling for**H200 with 1P1D configuration**:
Copy file name to clipboardExpand all lines: tests/planner/run_scaling_test.sh
+10-25Lines changed: 10 additions & 25 deletions
Original file line number
Diff line number
Diff line change
@@ -100,7 +100,7 @@ check_existing_deployment() {
100
100
# Check if the DynamoGraphDeployment is ready
101
101
local status=$(kubectl get dynamographdeployment "$DEPLOYMENT_NAME" -n "$NAMESPACE" -o jsonpath='{.status.state}')
102
102
if [ "$status"="successful" ];then
103
-
# Check if frontend pod is running (main indicator)
103
+
# Check if frontend pod is running
104
104
if kubectl get pods -n "$NAMESPACE" -l "nvidia.com/dynamo-component-type=frontend,nvidia.com/dynamo-namespace=vllm-disagg-planner" --field-selector=status.phase=Running | grep -q .;then
105
105
log_success "Existing deployment is ready"
106
106
return 0
@@ -135,7 +135,6 @@ deploy_planner() {
135
135
exit 1
136
136
fi
137
137
138
-
# Wait for DynamoGraphDeployment to be processed
139
138
log_info "Waiting for DynamoGraphDeployment to be processed..."
140
139
if kubectl wait --for=condition=Ready dynamographdeployment/"$DEPLOYMENT_NAME" -n "$NAMESPACE" --timeout=600s;then
141
140
log_success "DynamoGraphDeployment is ready"
@@ -144,10 +143,8 @@ deploy_planner() {
144
143
exit 1
145
144
fi
146
145
147
-
# Wait for pods to be running (this may take a while for image pulls)
148
146
log_info "Waiting for pods to be running (this may take several minutes for image pulls)..."
149
147
150
-
# Wait for frontend pod (main component we need for testing)
151
148
log_info "Waiting for frontend pod..."
152
149
if kubectl wait --for=condition=Ready pod -l "nvidia.com/dynamo-component-type=frontend,nvidia.com/dynamo-namespace=vllm-disagg-planner" -n "$NAMESPACE" --timeout=900s;then
153
150
log_success "Frontend pod is ready"
@@ -156,12 +153,10 @@ deploy_planner() {
156
153
exit 1
157
154
fi
158
155
159
-
# Wait a bit more for all pods to be fully running
160
156
log_info "Waiting for all pods to be running..."
161
157
sleep 30
162
158
}
163
159
164
-
# Setup port forwarding
165
160
setup_port_forward() {
166
161
log_info "Setting up port forwarding..."
167
162
@@ -172,10 +167,8 @@ setup_port_forward() {
172
167
sleep 2
173
168
fi
174
169
175
-
# Start port forwarding to frontend service directly
176
170
local frontend_service="vllm-disagg-planner-frontend"
177
171
178
-
# Check if the frontend service exists
179
172
if! kubectl get service "$frontend_service" -n "$NAMESPACE"&> /dev/null;then
180
173
log_error "Frontend service '$frontend_service' not found"
0 commit comments