Skip to content

Commit

Permalink
Merge pull request #496 from hardikdr/feature/autoscaler-annotation
Browse files Browse the repository at this point in the history
Inhibit scale-down by autoscaler during roll-outs.
  • Loading branch information
hardikdr authored Sep 1, 2020
2 parents 90f8b67 + f5a0478 commit 91c52d5
Show file tree
Hide file tree
Showing 14 changed files with 1,561 additions and 33 deletions.
1 change: 1 addition & 0 deletions cmd/machine-controller-manager/app/controllermanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ func StartControllers(s *options.MCMServer,
s.NodeConditions,
s.BootstrapTokenAuthExtraGroups,
s.DeleteMigratedMachineClass,
s.AutoscalerScaleDownAnnotationDuringRollout,
)
if err != nil {
return err
Expand Down
3 changes: 3 additions & 0 deletions cmd/machine-controller-manager/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func NewMCMServer() *MCMServer {
KubeAPIBurst: 30,
LeaderElection: leaderelectionconfig.DefaultLeaderElectionConfiguration(),
ControllerStartInterval: metav1.Duration{Duration: 0 * time.Second},
AutoscalerScaleDownAnnotationDuringRollout: true,
SafetyOptions: machineconfig.SafetyOptions{
SafetyUp: 2,
SafetyDown: 1,
Expand Down Expand Up @@ -115,6 +116,8 @@ func (s *MCMServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.BootstrapTokenAuthExtraGroups, "bootstrap-token-auth-extra-groups", s.BootstrapTokenAuthExtraGroups, "Comma-separated list of groups to set bootstrap token's \"auth-extra-groups\" field to")
fs.BoolVar(&s.DeleteMigratedMachineClass, "delete-migrated-machine-class", false, "Deletes any (provider specific) machine class that has the machine.sapcloud.io/migrated annotation")

fs.BoolVar(&s.AutoscalerScaleDownAnnotationDuringRollout, "autoscaler-scaldown-annotation-during-rollout", true, "Add cluster autoscaler scale-down disabled annotation during roll-out.")

leaderelectionconfig.BindFlags(&s.LeaderElection, fs)
// TODO: DefaultFeatureGate is global and it adds all k8s flags
// utilfeature.DefaultFeatureGate.AddFlag(fs)
Expand Down
59 changes: 31 additions & 28 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,32 +81,34 @@ func NewController(
nodeConditions string,
bootstrapTokenAuthExtraGroups string,
deleteMigratedMachineClass bool,
autoscalerScaleDownAnnotationDuringRollout bool,
) (Controller, error) {
controller := &controller{
namespace: namespace,
controlMachineClient: controlMachineClient,
controlCoreClient: controlCoreClient,
targetCoreClient: targetCoreClient,
recorder: recorder,
expectations: NewUIDTrackingContExpectations(NewContExpectations()),
secretQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "secret"),
nodeQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node"),
openStackMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "openstackmachineclass"),
awsMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "awsmachineclass"),
azureMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "azuremachineclass"),
gcpMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "gcpmachineclass"),
alicloudMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "alicloudmachineclass"),
packetMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "packetmachineclass"),
machineQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machine"),
machineSetQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machineset"),
machineDeploymentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinedeployment"),
machineSafetyOrphanVMsQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinesafetyorphanvms"),
machineSafetyOvershootingQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinesafetyovershooting"),
machineSafetyAPIServerQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinesafetyapiserver"),
safetyOptions: safetyOptions,
nodeConditions: nodeConditions,
bootstrapTokenAuthExtraGroups: bootstrapTokenAuthExtraGroups,
deleteMigratedMachineClass: deleteMigratedMachineClass,
namespace: namespace,
controlMachineClient: controlMachineClient,
controlCoreClient: controlCoreClient,
targetCoreClient: targetCoreClient,
recorder: recorder,
expectations: NewUIDTrackingContExpectations(NewContExpectations()),
secretQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "secret"),
nodeQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node"),
openStackMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "openstackmachineclass"),
awsMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "awsmachineclass"),
azureMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "azuremachineclass"),
gcpMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "gcpmachineclass"),
alicloudMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "alicloudmachineclass"),
packetMachineClassQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "packetmachineclass"),
machineQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machine"),
machineSetQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machineset"),
machineDeploymentQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinedeployment"),
machineSafetyOrphanVMsQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinesafetyorphanvms"),
machineSafetyOvershootingQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinesafetyovershooting"),
machineSafetyAPIServerQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machinesafetyapiserver"),
safetyOptions: safetyOptions,
nodeConditions: nodeConditions,
bootstrapTokenAuthExtraGroups: bootstrapTokenAuthExtraGroups,
deleteMigratedMachineClass: deleteMigratedMachineClass,
autoscalerScaleDownAnnotationDuringRollout: autoscalerScaleDownAnnotationDuringRollout,
}

controller.internalExternalScheme = runtime.NewScheme()
Expand Down Expand Up @@ -398,10 +400,11 @@ type Controller interface {

// controller is a concrete Controller.
type controller struct {
namespace string
nodeConditions string
bootstrapTokenAuthExtraGroups string
deleteMigratedMachineClass bool
namespace string
nodeConditions string
bootstrapTokenAuthExtraGroups string
deleteMigratedMachineClass bool
autoscalerScaleDownAnnotationDuringRollout bool

controlMachineClient machineapi.MachineV1alpha1Interface
controlCoreClient kubernetes.Interface
Expand Down
128 changes: 128 additions & 0 deletions pkg/controller/controller_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ import (
"sync/atomic"
"time"

"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/validation"

"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
machineapi "github.com/gardener/machine-controller-manager/pkg/client/clientset/versioned/typed/machine/v1alpha1"
annotationsutils "github.com/gardener/machine-controller-manager/pkg/util/annotations"
conditionutils "github.com/gardener/machine-controller-manager/pkg/util/conditions"
hashutil "github.com/gardener/machine-controller-manager/pkg/util/hash"
taintutils "github.com/gardener/machine-controller-manager/pkg/util/taints"
Expand Down Expand Up @@ -96,6 +98,13 @@ var Backoff = wait.Backoff{
Jitter: 1.0,
}

// UpdateAnnotationBackoff is the backoff period used while updating the annotation
var UpdateAnnotationBackoff = wait.Backoff{
Steps: 5,
Duration: 100 * time.Millisecond,
Jitter: 1.0,
}

var (
// KeyFunc is the variable that stores the function that retreives the object key from an object
KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc
Expand Down Expand Up @@ -1111,3 +1120,122 @@ func ComputeHash(template *v1alpha1.MachineTemplateSpec, collisionCount *int32)

return machineTemplateSpecHasher.Sum32()
}

// AddOrUpdateAnnotationOnNode add annotations to the node. If annotation was added into node, it'll issue API calls
// to update nodes; otherwise, no API calls. Return error if any.
func AddOrUpdateAnnotationOnNode(c clientset.Interface, nodeName string, annotations map[string]string) error {
if annotations == nil {
return nil
}
firstTry := true
return clientretry.RetryOnConflict(UpdateAnnotationBackoff, func() error {
var err error
var oldNode *v1.Node
// First we try getting node from the API server cache, as it's cheaper. If it fails
// we get it from etcd to be sure to have fresh data.
if firstTry {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"})
firstTry = false
} else {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
}
if errors.IsNotFound(err) {
klog.Warningf("Node %s not found while updating annotation. Err: %v", nodeName, err)
return nil
}
if err != nil {
return err
}

var newNode *v1.Node
updated := false

newNode, updated, err = annotationsutils.AddOrUpdateAnnotation(oldNode, annotations)

if !updated {
return nil
}
return UpdateNodeAnnotations(c, nodeName, oldNode, newNode)
})
}

// UpdateNodeAnnotations is for updating the node annotations from oldNode to the newNode
// using the nodes Update() method
func UpdateNodeAnnotations(c clientset.Interface, nodeName string, oldNode *v1.Node, newNode *v1.Node) error {
newNodeClone := oldNode.DeepCopy()
newNodeClone.Annotations = newNode.Annotations

_, err := c.CoreV1().Nodes().Update(newNodeClone)
if err != nil {
return fmt.Errorf("failed to create or update annotations for node %q: %v", nodeName, err)
}

return err
}

// RemoveAnnotationsOffNode is for cleaning up annotations temporarily added to node,
// won't fail if target annotation doesn't exist or has been removed.
// If passed a node it'll check if there's anything to be done, if annotation is not present it won't issue
// any API calls.
func RemoveAnnotationsOffNode(c clientset.Interface, nodeName string, annotations map[string]string) error {

// Short circuit if annotation doesnt exist for limiting API calls.
if annotations == nil || nodeName == "" {
return nil
}

firstTry := true
return clientretry.RetryOnConflict(UpdateAnnotationBackoff, func() error {
var err error
var oldNode *v1.Node
// First we try getting node from the API server cache, as it's cheaper. If it fails
// we get it from etcd to be sure to have fresh data.
if firstTry {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{ResourceVersion: "0"})
firstTry = false
} else {
oldNode, err = c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
}
if errors.IsNotFound(err) {
klog.Warningf("Node %s not found while removing annotation. Err: %v", nodeName, err)
return nil
}

if err != nil {
return err
}

var newNode *v1.Node
oldNodeCopy := oldNode
updated := false

// Remove the annotations from the node.
newNode, updated, err = annotationsutils.RemoveAnnotation(oldNodeCopy, annotations)

if !updated {
return nil
}
return UpdateNodeAnnotations(c, nodeName, oldNode, newNode)
})
}

// GetAnnotationsFromNode returns all the annotations of the provided node.
func GetAnnotationsFromNode(c clientset.Interface, nodeName string) (map[string]string, error) {

// Short circuit if annotation doesnt exist for limiting API calls.
if nodeName == "" {
return nil, nil
}

node, err := c.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{})
if errors.IsNotFound(err) {
klog.Warningf("Node %s not found while fetching annotation. Err: %v", nodeName, err)
return nil, nil
}

if err != nil {
return nil, err
}

return node.Annotations, nil
}
Loading

0 comments on commit 91c52d5

Please sign in to comment.