Skip to content

Commit

Permalink
add env var to disable leaked ENI cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jdn5126 committed May 5, 2023
1 parent 7b64c73 commit 236eaa2
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 25 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,14 @@ Default: empty

Specify the EC2 endpoint to use. This is useful if you are using a custom endpoint for EC2. For example, if you are using a proxy for EC2, you can set this to the proxy endpoint. Any kind of URL or IP address is valid such as `https://localhost:8080` or `http://ec2.us-west-2.customaws.com`. If this is not set, the default EC2 endpoint will be used.

#### `DISABLE_LEAKED_ENI_CLEANUP` (v1.13.0+)

Type: Boolean as a String

Default: `false`

On IPv4 clusters, IPAMD schedules an hourly background task per node that cleans up leaked ENIs. Setting this environment variable to `true` disables that job. The primary motivation to disable this task is to decrease the amount of EC2 API calls made from each node.

### VPC CNI Feature Matrix

IP Mode | Secondary IP Mode | Prefix Delegation | Security Groups Per Pod | WARM & MIN IP/Prefix Targets | External SNAT
Expand Down
5 changes: 2 additions & 3 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ func (i instrumentedIMDS) GetMetadataWithContext(ctx context.Context, p string)
}

// New creates an EC2InstanceMetadataCache
func New(useCustomNetworking, disableENIProvisioning, v4Enabled, v6Enabled bool, eventRecorder *eventrecorder.EventRecorder) (*EC2InstanceMetadataCache, error) {
func New(useCustomNetworking, disableLeakedENICleanup, v4Enabled, v6Enabled bool, eventRecorder *eventrecorder.EventRecorder) (*EC2InstanceMetadataCache, error) {
//ctx is passed to initWithEC2Metadata func to cancel spawned go-routines when tests are run
ctx := context.Background()

Expand Down Expand Up @@ -437,7 +437,7 @@ func New(useCustomNetworking, disableENIProvisioning, v4Enabled, v6Enabled bool,
}

// Clean up leaked ENIs in the background
if !disableENIProvisioning {
if !disableLeakedENICleanup {
go wait.Forever(cache.cleanUpLeakedENIs, time.Hour)
}

Expand Down Expand Up @@ -1807,7 +1807,6 @@ func (cache *EC2InstanceMetadataCache) getLeakedENIs() ([]*ec2.NetworkInterface,
}

err := cache.getENIsFromPaginatedDescribeNetworkInterfaces(input, filterFn)

if err != nil {
return nil, errors.Wrap(err, "awsutils: unable to obtain filtered list of network interfaces")
}
Expand Down
39 changes: 19 additions & 20 deletions pkg/ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,12 @@ const (
// should not manage it in any form.
eniNoManageTagKey = "node.k8s.amazonaws.com/no_manage"

// disableENIProvisioning is used to specify that ENI doesn't need to be synced during initializing a pod.
// disableENIProvisioning is used to specify that ENIs do not need to be synced during initializing a pod.
envDisableENIProvisioning = "DISABLE_NETWORK_RESOURCE_PROVISIONING"

// disableLeakedENICleanup is used to specify that the task checking and cleaning up leaked ENIs should not be run.
envDisableLeakedENICleanup = "DISABLE_LEAKED_ENI_CLEANUP"

// Specify where ipam should persist its current IP<->container allocations.
envBackingStorePath = "AWS_VPC_K8S_CNI_BACKING_STORE"
defaultBackingStorePath = "/var/run/aws-node/ipam.json"
Expand Down Expand Up @@ -382,7 +385,6 @@ func (c *IPAMContext) inInsufficientCidrCoolingPeriod() bool {
func New(rawK8SClient client.Client, cachedK8SClient client.Client) (*IPAMContext, error) {
prometheusRegister()
c := &IPAMContext{}

c.rawK8SClient = rawK8SClient
c.cachedK8SClient = cachedK8SClient
c.networkClient = networkutils.New()
Expand All @@ -391,10 +393,10 @@ func New(rawK8SClient client.Client, cachedK8SClient client.Client) (*IPAMContex
c.enablePrefixDelegation = usePrefixDelegation()
c.enableIPv4 = isIPv4Enabled()
c.enableIPv6 = isIPv6Enabled()
c.disableENIProvisioning = disableENIProvisioning()
disableLeakedENICleanup := c.disableENIProvisioning || disableLeakedENICleanup()

c.disableENIProvisioning = disablingENIProvisioning()

client, err := awsutils.New(c.useCustomNetworking, c.disableENIProvisioning, c.enableIPv4, c.enableIPv6, c.eventRecorder)
client, err := awsutils.New(c.useCustomNetworking, disableLeakedENICleanup, c.enableIPv4, c.enableIPv6, c.eventRecorder)
if err != nil {
return nil, errors.Wrap(err, "ipamd: can not initialize with AWS SDK interface")
}
Expand Down Expand Up @@ -759,18 +761,16 @@ func (c *IPAMContext) tryFreeENI() {
// tryUnassignIPsorPrefixesFromAll determines if there are IPs to free when we have extra IPs beyond the target and warmIPTargetDefined
// is enabled, deallocate extra IP addresses
func (c *IPAMContext) tryUnassignCidrsFromAll() {

_, over, warmTargetDefined := c.datastoreTargetState()

//WARM IP targets not defined then check if WARM_PREFIX_TARGET is defined.
// If WARM IP targets are not defined, check if WARM_PREFIX_TARGET is defined.
if !warmTargetDefined {
over = c.computeExtraPrefixesOverWarmTarget()
}

if over > 0 {
eniInfos := c.dataStore.GetENIInfos()
for eniID := range eniInfos.ENIs {
//Either returns prefixes or IPs [Cidrs]
// Either returns prefixes or IPs [Cidrs]
cidrs := c.dataStore.FindFreeableCidrs(eniID)
if cidrs == nil {
log.Errorf("Error finding unassigned IPs for ENI %s", eniID)
Expand All @@ -781,15 +781,14 @@ func (c *IPAMContext) tryUnassignCidrsFromAll() {
// this ENI. In that case we should only free the number of available Cidrs.
numFreeable := min(over, len(cidrs))
cidrs = cidrs[:numFreeable]

if len(cidrs) == 0 {
continue
}

// Delete IPs from datastore
var deletedCidrs []datastore.CidrInfo
for _, toDelete := range cidrs {
// Don't force the delete, since a freeable Cidrs might have been assigned to a pod
// Do not force the delete, since a freeable Cidr might have been assigned to a pod
// before we get around to deleting it.
err := c.dataStore.DelIPv4CidrFromStore(eniID, toDelete.Cidr, false /* force */)
if err != nil {
Expand All @@ -801,7 +800,7 @@ func (c *IPAMContext) tryUnassignCidrsFromAll() {
}
}

// Deallocate Cidrs from the instance if they aren't used by pods.
// Deallocate Cidrs from the instance if they are not used by pods.
c.DeallocCidrs(eniID, deletedCidrs)
}
}
Expand Down Expand Up @@ -887,7 +886,6 @@ func (c *IPAMContext) tryAllocateENI(ctx context.Context) error {

if c.useCustomNetworking {
eniCfg, err := eniconfig.MyENIConfig(ctx, c.cachedK8SClient)

if err != nil {
log.Errorf("Failed to get pod ENI config")
return err
Expand All @@ -909,7 +907,6 @@ func (c *IPAMContext) tryAllocateENI(ctx context.Context) error {
}

resourcesToAllocate := c.GetENIResourcesToAllocate()

_, err = c.awsClient.AllocIPAddresses(eni, resourcesToAllocate)
if err != nil {
log.Warnf("Failed to allocate %d IP addresses on an ENI: %v", resourcesToAllocate, err)
Expand Down Expand Up @@ -1771,10 +1768,15 @@ func getMinimumIPTarget() int {
return noMinimumIPTarget
}

func disablingENIProvisioning() bool {
func disableENIProvisioning() bool {
return getEnvBoolWithDefault(envDisableENIProvisioning, false)
}

func disableLeakedENICleanup() bool {
// If IPv6 is enabled, leaked ENI cleanup is disabled since no ENIs are attached
return isIPv6Enabled() || getEnvBoolWithDefault(envDisableLeakedENICleanup, false)
}

func enablePodENI() bool {
return getEnvBoolWithDefault(envEnablePodENI, false)
}
Expand Down Expand Up @@ -1832,7 +1834,6 @@ func (c *IPAMContext) filterUnmanagedENIs(enis []awsutils.ENIMetadata) []awsutil
// accounting for the MINIMUM_IP_TARGET
// With prefix delegation this function determines the number of Prefixes `short` or `over`
func (c *IPAMContext) datastoreTargetState() (short int, over int, enabled bool) {

if c.warmIPTarget == noWarmIPTarget && c.minimumIPTarget == noMinimumIPTarget {
// there is no WARM_IP_TARGET defined and no MINIMUM_IP_TARGET, fallback to use all IP addresses on ENI
return 0, 0, false
Expand All @@ -1854,10 +1855,8 @@ func (c *IPAMContext) datastoreTargetState() (short int, over int, enabled bool)
over = max(min(over, stats.TotalIPs-c.minimumIPTarget), 0)

if c.enablePrefixDelegation {

//short : number of IPs short to reach warm targets
//over : number of IPs over the warm targets

// short : number of IPs short to reach warm targets
// over : number of IPs over the warm targets
_, numIPsPerPrefix, _ := datastore.GetPrefixDelegationDefaults()
// Number of prefixes IPAMD is short of to achieve warm targets
shortPrefix := datastore.DivCeil(short, numIPsPerPrefix)
Expand Down
4 changes: 2 additions & 2 deletions pkg/ipamd/ipamd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1503,11 +1503,11 @@ func TestDisablingENIProvisioning(t *testing.T) {
defer m.ctrl.Finish()

_ = os.Setenv(envDisableENIProvisioning, "true")
disabled := disablingENIProvisioning()
disabled := disableENIProvisioning()
assert.True(t, disabled)

_ = os.Unsetenv(envDisableENIProvisioning)
disabled = disablingENIProvisioning()
disabled = disableENIProvisioning()
assert.False(t, disabled)
}

Expand Down

0 comments on commit 236eaa2

Please sign in to comment.