Skip to content

Commit

Permalink
aws: add scale_in_protection configuration (#807)
Browse files Browse the repository at this point in the history
When `scale_in_protection` is enabled, the AWS target plugin skips
instances that are protected from scale in operations[1].

[1] https://docs.aws.amazon.com/autoscaling/ec2/userguide/ec2-auto-scaling-instance-protection.html
  • Loading branch information
lgfa29 authored Jan 2, 2024
1 parent 324fbc6 commit ac51534
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
IMPROVEMENTS:
* agent: Add `BlockQueryWaitTime` config option for Nomad API connectivity [[GH-755](https://github.com/hashicorp/nomad-autoscaler/pull/755)]
* metrics: Add `policy_id` and `target_name` labels to `scale.invoke.success_count` and `scale.invoke.error_count` metrics [[GH-814](https://github.com/hashicorp/nomad-autoscaler/pull/814)]
* plugin/target/aws: Add `scale_in_protection` configuration [[GH-807](https://github.com/hashicorp/nomad-autoscaler/pull/807)]
* scaleutils: Add new node filter option `node_pool` to select nodes by their node pool value [[GH-810](https://github.com/hashicorp/nomad-autoscaler/pull/810)]

BUG FIXES:
Expand Down
40 changes: 32 additions & 8 deletions plugins/builtin/target/aws-asg/plugin/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"errors"
"fmt"
"strconv"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
Expand Down Expand Up @@ -96,8 +97,7 @@ func (t *TargetPlugin) scaleOut(ctx context.Context, asg *types.AutoScalingGroup
DesiredCapacity: aws.Int32(int32(count)),
}

// Ignore the response from Send() as its empty.

// Ignore the response from UpdateAutoScalingGroup() as its empty.
_, err := t.asg.UpdateAutoScalingGroup(ctx, &input)
if err != nil {
return fmt.Errorf("failed to update Autoscaling Group: %v", err)
Expand All @@ -112,19 +112,43 @@ func (t *TargetPlugin) scaleOut(ctx context.Context, asg *types.AutoScalingGroup
}

func (t *TargetPlugin) scaleIn(ctx context.Context, asg *types.AutoScalingGroup, num int64, config map[string]string) error {
// Check if policy overrides the plugin configuration for
// scale_in_protection.
scaleInProtection := t.scaleInProtectionEnabled
if str, ok := config[configKeyScaleInProtection]; ok {
b, err := strconv.ParseBool(str)
if err != nil {
return fmt.Errorf("failed to parse %s value from policy: %w", configKeyScaleInProtection, err)
}
scaleInProtection = b
}

// Create a logger for this action to pre-populate useful information we
// would like on all log lines.
log := t.logger.With("action", "scale_in", "asg_name", *asg.AutoScalingGroupName)
log := t.logger.With(
"action", "scale_in",
"asg_name", *asg.AutoScalingGroupName,
"scale_in_protection", scaleInProtection,
)

// Find instance IDs in the target ASG and perform pre-scale tasks.
remoteIDs := []string{}
for _, inst := range asg.Instances {
if *inst.HealthStatus == "Healthy" && inst.LifecycleState == types.LifecycleStateInService {
log.Debug("found healthy instance", "instance_id", *inst.InstanceId)
remoteIDs = append(remoteIDs, *inst.InstanceId)
} else {
log.Debug("skipping instance", "instance_id", *inst.InstanceId, "health_status", *inst.HealthStatus, "lifecycle_state", inst.LifecycleState)
skip := *inst.HealthStatus != "Healthy" ||
inst.LifecycleState != types.LifecycleStateInService ||
(scaleInProtection && *inst.ProtectedFromScaleIn)
if skip {
log.Debug("skipping instance",
"instance_id", *inst.InstanceId,
"health_status", *inst.HealthStatus,
"lifecycle_state", inst.LifecycleState,
"protected_from_scale_in", *inst.ProtectedFromScaleIn,
)
continue
}

log.Debug("found eligible instance", "instance_id", *inst.InstanceId)
remoteIDs = append(remoteIDs, *inst.InstanceId)
}

ids, err := t.clusterUtils.RunPreScaleInTasksWithRemoteCheck(ctx, config, remoteIDs, int(num))
Expand Down
11 changes: 11 additions & 0 deletions plugins/builtin/target/aws-asg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const (
configKeyASGName = "aws_asg_name"
configKeyCredentialProvider = "aws_credential_provider"
configKeyRetryAttempts = "retry_attempts"
configKeyScaleInProtection = "scale_in_protection"

// configValues are the default values used when a configuration key is not
// supplied by the operator that are specific to the plugin.
Expand Down Expand Up @@ -68,6 +69,10 @@ type TargetPlugin struct {
// given ASG state should be retried.
retryAttempts int

// scaleInProtectionEnabled is true when instance scale-in protection
// should be applied.
scaleInProtectionEnabled bool

// clusterUtils provides general cluster scaling utilities for querying the
// state of nodes pools and performing scaling tasks.
clusterUtils *scaleutils.ClusterScaleUtils
Expand Down Expand Up @@ -105,6 +110,12 @@ func (t *TargetPlugin) SetConfig(config map[string]string) error {
}
t.retryAttempts = retryLimit

scaleInProtection, err := strconv.ParseBool(getConfigValue(config, configKeyScaleInProtection, "false"))
if err != nil {
return err
}
t.scaleInProtectionEnabled = scaleInProtection

return nil
}

Expand Down

0 comments on commit ac51534

Please sign in to comment.