Even spread for HA implementation

Signed-off-by: Ansu Varghese <avarghese@us.ibm.com>
knative-extensions · Apr 29, 2021 · 2acb50a · 2acb50a
1 parent 77c511a
commit 2acb50a
Show file tree

Hide file tree

Showing 12 changed files with 323 additions and 68 deletions.
diff --git a/config/source/multi/deployments/adapter.yaml b/config/source/multi/deployments/adapter.yaml
@@ -20,7 +20,7 @@ metadata:
  labels:
  kafka.eventing.knative.dev/release: devel
 spec:
- replicas: 1
+ replicas: 3
  selector:
  matchLabels: &labels
  control-plane: kafkasource-mt-adapter
@@ -42,6 +42,11 @@ spec:
  valueFrom:
  fieldRef:
  fieldPath: metadata.name
+ - name: NODE_NAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+
 
  # The maximum number of messages per second, per vreplica
  - name: VREPLICA_LIMITS_MPS
@@ -80,3 +85,18 @@ spec:
  containerPort: 8008
 
  terminationGracePeriodSeconds: 10
+ affinity:
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - podAffinityTerm:
+ labelSelector:
+ matchLabels:
+ control-plane: kafkasource-mt-adapter
+ topologyKey: kubernetes.io/hostname
+ weight: 50
+ - podAffinityTerm:
+ labelSelector:
+ matchLabels:
+ control-plane: kafkasource-mt-adapter
+ topologyKey: topology.kubernetes.io/zone
+ weight: 50
diff --git a/config/source/multi/deployments/controller.yaml b/config/source/multi/deployments/controller.yaml
@@ -47,12 +47,20 @@ spec:
 
  # How often (in seconds) the autoscaler tries to scale down the statefulset.
  - name: AUTOSCALER_REFRESH_PERIOD
- value: '10'
+ value: '100'
 
  # The number of virtual replicas this pod can handle.
  - name: POD_CAPACITY
  value: '100'
 
+ # The number of zones in a multi-zone cluster, for spreading vreplica placement with HA
+ - name: SCHEDULE_SPREAD_ZONES
+ value: '3'
+
+ # The scheduling policy type for placing vreplicas on pods (a boolean value temporarily set to true for even spread across zones)
+ - name: SCHEDULE_POLICY_TYPE
+ value: 'true'
+
  resources:
  requests:
  cpu: 20m

diff --git a/config/source/multi/roles/clusterrole.yaml b/config/source/multi/roles/clusterrole.yaml
@@ -93,6 +93,7 @@ rules:
  - events
  - configmaps
  - secrets
+ - nodes
  verbs: *everything
 
 # let the webhook label the appropriate namespace

diff --git a/pkg/apis/duck/v1alpha1/placement_types.go b/pkg/apis/duck/v1alpha1/placement_types.go
@@ -50,6 +50,9 @@ type Placement struct {
  // PodName is the name of the pod where the resource is placed
  PodName string `json:"podName,omitempty"`
 
+ // ZoneName is the name of the zone where the pod is located
+ ZoneName string `json:"zoneName,omitempty"`
+
  // VReplicas is the number of virtual replicas assigned to in the pod
  VReplicas int32 `json:"vreplicas,omitempty"`
 }

diff --git a/pkg/common/scheduler/statefulset/autoscaler.go b/pkg/common/scheduler/statefulset/autoscaler.go
@@ -124,8 +124,10 @@ func (a *autoscaler) doautoscale(ctx context.Context, attemptScaleDown bool, pen
  // The number of replicas may be lower than the last ordinal, for instance
  // when the statefulset is manually scaled down. In that case, replicas above
  // scale.Spec.Replicas have not been considered when scheduling vreplicas.
- // Adjust accordingly
- pending -= state.freeCapacity()
+ // Adjust accordingly (applicable only for maxFillUp scheduling policy and not for HA)
+ if !state.schedulePolicy {
+ pending -= state.freeCapacity()
+ }
 
  // Still need more?
  if pending > 0 {

diff --git a/pkg/common/scheduler/statefulset/autoscaler_test.go b/pkg/common/scheduler/statefulset/autoscaler_test.go
@@ -188,7 +188,7 @@ func TestAutoscaler(t *testing.T) {
  ctx, _ := setupFakeContext(t)
 
  vpodClient := tscheduler.NewVPodClient()
- stateAccessor := newStateBuilder(logging.FromContext(ctx), vpodClient.List, 10)
+ stateAccessor := newStateBuilder(logging.FromContext(ctx), vpodClient.List, 10, false)
 
  sfsClient := kubeclient.Get(ctx).AppsV1().StatefulSets(testNs)
  _, err := sfsClient.Create(ctx, makeStatefulset(ctx, testNs, sfsName, tc.replicas), metav1.CreateOptions{})
@@ -231,7 +231,7 @@ func TestAutoscalerScaleDownToZero(t *testing.T) {
  })
 
  vpodClient := tscheduler.NewVPodClient()
- stateAccessor := newStateBuilder(logging.FromContext(ctx), vpodClient.List, 10)
+ stateAccessor := newStateBuilder(logging.FromContext(ctx), vpodClient.List, 10, false)
 
  sfsClient := kubeclient.Get(ctx).AppsV1().StatefulSets(testNs)
  _, err := sfsClient.Create(ctx, makeStatefulset(ctx, testNs, sfsName, 10), metav1.CreateOptions{})