Fix HPA race condition by reading deployment replicas instead of HPA status (#4214)

ciarams87 · ciarams87 · commit 98f3b33f47c8 · 2025-11-07T10:28:32.000Z
When HPA is enabled, read the current Deployment.Spec.Replicas directly
instead of HPA.Status.DesiredReplicas, which is eventually consistent and
lags behind deployment changes. This prevents the controller from
overwriting HPA's replica count with stale values, eliminating pod churn
and connection drops.

Fixes race condition where HPA scales down → NGF reads stale HPA status
→ NGF overwrites deployment with old replica count → pods restart.
diff --git a/internal/controller/provisioner/objects.go b/internal/controller/provisioner/objects.go
@@ -686,33 +686,69 @@ func (p *NginxProvisioner) buildNginxDeployment(
 		}
 	}
 
-	var replicas *int32
-	if deploymentCfg.Replicas != nil {
-		replicas = deploymentCfg.Replicas
-	}
-
-	if isAutoscalingEnabled(&deploymentCfg) {
-		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-		defer cancel()
-
-		hpa := &autoscalingv2.HorizontalPodAutoscaler{}
-		err := p.k8sClient.Get(ctx, types.NamespacedName{
-			Namespace: objectMeta.Namespace,
-			Name:      objectMeta.Name,
-		}, hpa)
-		if err == nil && hpa.Status.DesiredReplicas > 0 {
-			// overwrite with HPA's desiredReplicas
-			replicas = helpers.GetPointer(hpa.Status.DesiredReplicas)
-		}
-	}
-
+	// Determine replica count based on HPA status
+	replicas := p.determineReplicas(objectMeta, deploymentCfg)
 	if replicas != nil {
 		deployment.Spec.Replicas = replicas
 	}
 
 	return deployment, nil
 }
 
+// determineReplicas determines the appropriate replica count for a deployment based on HPA status.
+//
+// HPA Replicas Management Strategy:
+//
+// When an HPA is managing a deployment, we must read the current deployment's replicas
+// from the cluster and use that value, rather than trying to set our own value or read
+// from HPA.Status.DesiredReplicas (which is eventually consistent and stale).
+//
+// Why we can't use HPA.Status.DesiredReplicas:
+// - HPA.Status updates lag behind Deployment.Spec.Replicas changes
+// - When HPA scales down: HPA writes Deployment.Spec → then updates its own Status
+// - If we read Status during this window, we get the OLD value and overwrite HPA's new value
+// - This creates a race condition causing pod churn
+//
+// Our approach:
+// - When HPA exists: Read current deployment replicas from cluster and use that
+// - When HPA doesn't exist yet: Set replicas for initial deployment creation
+// - When HPA exists but Deployment doesn't exist yet: Set replicas for initial deployment creation
+// - When HPA is disabled: Set replicas normally.
+func (p *NginxProvisioner) determineReplicas(
+	objectMeta metav1.ObjectMeta,
+	deploymentCfg ngfAPIv1alpha2.DeploymentSpec,
+) *int32 {
+	replicas := deploymentCfg.Replicas
+
+	if !isAutoscalingEnabled(&deploymentCfg) {
+		return replicas
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
+	hpa := &autoscalingv2.HorizontalPodAutoscaler{}
+	err := p.k8sClient.Get(ctx, types.NamespacedName{
+		Namespace: objectMeta.Namespace,
+		Name:      objectMeta.Name,
+	}, hpa)
+	if err != nil {
+		return replicas
+	}
+
+	existingDeployment := &appsv1.Deployment{}
+	err = p.k8sClient.Get(ctx, types.NamespacedName{
+		Namespace: objectMeta.Namespace,
+		Name:      objectMeta.Name,
+	}, existingDeployment)
+
+	if err == nil && existingDeployment.Spec.Replicas != nil {
+		replicas = existingDeployment.Spec.Replicas
+	}
+
+	return replicas
+}
+
 // applyPatches applies the provided patches to the given object.
 func applyPatches(obj client.Object, patches []ngfAPIv1alpha2.Patch) error {
 	if len(patches) == 0 {
diff --git a/internal/controller/provisioner/objects_test.go b/internal/controller/provisioner/objects_test.go
@@ -392,78 +392,156 @@ func TestBuildNginxResourceObjects_NginxProxyConfig(t *testing.T) {
 
 func TestBuildNginxResourceObjects_DeploymentReplicasFromHPA(t *testing.T) {
 	t.Parallel()
-	g := NewWithT(t)
 
-	// Create a fake HPA with status.desiredReplicas set
-	hpa := &autoscalingv2.HorizontalPodAutoscaler{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "gw-nginx",
-			Namespace: "default",
+	tests := []struct {
+		currentReplicas  *int32
+		configReplicas   *int32
+		name             string
+		description      string
+		expectedValue    int32
+		hpaExists        bool
+		deploymentExists bool
+		expectedNil      bool
+	}{
+		{
+			name:             "HPA exists - use current deployment replicas",
+			hpaExists:        true,
+			deploymentExists: true,
+			currentReplicas:  helpers.GetPointer(int32(8)),
+			configReplicas:   helpers.GetPointer(int32(5)),
+			expectedNil:      false,
+			expectedValue:    8,
+			description:      "When HPA exists, read current deployment replicas (set by HPA)",
 		},
-		Status: autoscalingv2.HorizontalPodAutoscalerStatus{
-			DesiredReplicas: 7,
+		{
+			name:             "HPA does not exist - use configured replicas",
+			hpaExists:        false,
+			deploymentExists: false,
+			configReplicas:   helpers.GetPointer(int32(3)),
+			expectedNil:      false,
+			expectedValue:    3,
+			description:      "When HPA doesn't exist yet (initial creation), use configured replicas",
 		},
-	}
-
-	agentTLSSecret := &corev1.Secret{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      agentTLSTestSecretName,
-			Namespace: ngfNamespace,
+		{
+			name:             "HPA enabled but doesn't exist, no configured replicas",
+			hpaExists:        false,
+			deploymentExists: false,
+			configReplicas:   nil,
+			expectedNil:      true,
+			description:      "When HPA enabled but doesn't exist and no replicas configured, don't set replicas",
 		},
-		Data: map[string][]byte{"tls.crt": []byte("tls")},
 	}
 
-	fakeClient := fake.NewFakeClient(agentTLSSecret, hpa)
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			g := NewWithT(t)
 
-	provisioner := &NginxProvisioner{
-		cfg: Config{
-			GatewayPodConfig: &config.GatewayPodConfig{
-				Namespace: ngfNamespace,
-				Version:   "1.0.0",
-				Image:     "ngf-image",
-			},
-			AgentTLSSecretName: agentTLSTestSecretName,
-		},
-		baseLabelSelector: metav1.LabelSelector{
-			MatchLabels: map[string]string{"app": "nginx"},
-		},
-		k8sClient: fakeClient,
-	}
+			agentTLSSecret := &corev1.Secret{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      agentTLSTestSecretName,
+					Namespace: ngfNamespace,
+				},
+				Data: map[string][]byte{"tls.crt": []byte("tls")},
+			}
 
-	gateway := &gatewayv1.Gateway{
-		ObjectMeta: metav1.ObjectMeta{
-			Name:      "gw",
-			Namespace: "default",
-		},
-		Spec: gatewayv1.GatewaySpec{
-			Listeners: []gatewayv1.Listener{{Port: 80}},
-		},
-	}
+			var fakeClient client.Client
+			switch {
+			case tc.hpaExists && tc.deploymentExists:
+				// Create a fake HPA and existing deployment
+				hpa := &autoscalingv2.HorizontalPodAutoscaler{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "gw-nginx",
+						Namespace: "default",
+					},
+					Status: autoscalingv2.HorizontalPodAutoscalerStatus{
+						DesiredReplicas: 7,
+					},
+				}
+				existingDeployment := &appsv1.Deployment{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "gw-nginx",
+						Namespace: "default",
+					},
+					Spec: appsv1.DeploymentSpec{
+						Replicas: tc.currentReplicas,
+						Selector: &metav1.LabelSelector{
+							MatchLabels: map[string]string{"app": "nginx"},
+						},
+					},
+				}
+				fakeClient = fake.NewFakeClient(agentTLSSecret, hpa, existingDeployment)
+			case tc.hpaExists:
+				hpa := &autoscalingv2.HorizontalPodAutoscaler{
+					ObjectMeta: metav1.ObjectMeta{
+						Name:      "gw-nginx",
+						Namespace: "default",
+					},
+					Status: autoscalingv2.HorizontalPodAutoscalerStatus{
+						DesiredReplicas: 7,
+					},
+				}
+				fakeClient = fake.NewFakeClient(agentTLSSecret, hpa)
+			default:
+				fakeClient = fake.NewFakeClient(agentTLSSecret)
+			}
 
-	resourceName := "gw-nginx"
-	nProxyCfg := &graph.EffectiveNginxProxy{
-		Kubernetes: &ngfAPIv1alpha2.KubernetesSpec{
-			Deployment: &ngfAPIv1alpha2.DeploymentSpec{
-				Replicas:    nil, // Should be overridden by HPA
-				Autoscaling: &ngfAPIv1alpha2.AutoscalingSpec{Enable: true},
-			},
-		},
-	}
+			provisioner := &NginxProvisioner{
+				cfg: Config{
+					GatewayPodConfig: &config.GatewayPodConfig{
+						Namespace: ngfNamespace,
+						Version:   "1.0.0",
+						Image:     "ngf-image",
+					},
+					AgentTLSSecretName: agentTLSTestSecretName,
+				},
+				baseLabelSelector: metav1.LabelSelector{
+					MatchLabels: map[string]string{"app": "nginx"},
+				},
+				k8sClient: fakeClient,
+			}
 
-	objects, err := provisioner.buildNginxResourceObjects(resourceName, gateway, nProxyCfg)
-	g.Expect(err).ToNot(HaveOccurred())
+			gateway := &gatewayv1.Gateway{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "gw",
+					Namespace: "default",
+				},
+				Spec: gatewayv1.GatewaySpec{
+					Listeners: []gatewayv1.Listener{{Port: 80}},
+				},
+			}
 
-	// Find the deployment object
-	var deployment *appsv1.Deployment
-	for _, obj := range objects {
-		if d, ok := obj.(*appsv1.Deployment); ok {
-			deployment = d
-			break
-		}
+			resourceName := "gw-nginx"
+			nProxyCfg := &graph.EffectiveNginxProxy{
+				Kubernetes: &ngfAPIv1alpha2.KubernetesSpec{
+					Deployment: &ngfAPIv1alpha2.DeploymentSpec{
+						Replicas:    tc.configReplicas,
+						Autoscaling: &ngfAPIv1alpha2.AutoscalingSpec{Enable: true},
+					},
+				},
+			}
+
+			objects, err := provisioner.buildNginxResourceObjects(resourceName, gateway, nProxyCfg)
+			g.Expect(err).ToNot(HaveOccurred())
+
+			// Find the deployment object
+			var deployment *appsv1.Deployment
+			for _, obj := range objects {
+				if d, ok := obj.(*appsv1.Deployment); ok {
+					deployment = d
+					break
+				}
+			}
+			g.Expect(deployment).ToNot(BeNil())
+
+			if tc.expectedNil {
+				g.Expect(deployment.Spec.Replicas).To(BeNil(), tc.description)
+			} else {
+				g.Expect(deployment.Spec.Replicas).ToNot(BeNil(), tc.description)
+				g.Expect(*deployment.Spec.Replicas).To(Equal(tc.expectedValue), tc.description)
+			}
+		})
 	}
-	g.Expect(deployment).ToNot(BeNil())
-	g.Expect(deployment.Spec.Replicas).ToNot(BeNil())
-	g.Expect(*deployment.Spec.Replicas).To(Equal(int32(7)))
 }
 
 func TestBuildNginxResourceObjects_Plus(t *testing.T) {