Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: upstream Azure managed instance cache refactor #7116

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package azure

import (
"context"
"strings"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
"github.com/Azure/go-autorest/autorest/azure"

"k8s.io/klog/v2"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
)

// When Azure Dedicated Host is enabled or using isolated vm skus, force deleting a VMSS fails with the following error:
//
// "predominantErrorDetail": {
// "innererror": {
// "internalErrorCode": "OperationNotAllowedOnResourceThatManagesUpdatesWithMaintenanceControl"
// },
// "code": "OperationNotAllowed",
// "message": "Operation 'ForceDelete' is not allowed on resource 'aks-newnp-11436513-vmss' since it manages updates using maintenance control."
// },
//
// A programmatically way to determine if a VM size is isolated or not has not been found. The isolated VM documentation:
// https://docs.microsoft.com/en-us/azure/virtual-machines/isolation
// has the current list of isolated VM sizes, but new isolated VM size could be introduced in the future.
//
// As a result of not being able to find out if a VM size is isolated or not, we'll do the following:
// - if scaleSet has isolated vm size or dedicated host, disable forDelete
// - else use forceDelete
// - if new isolated sku were added or dedicatedHost was not updated properly, this forceDelete call will fail with above error.
// In that case, call normal delete (fall-back)

var isolatedVMSizes = map[string]bool{
strings.ToLower("Standard_E80ids_v4"): true,
strings.ToLower("Standard_E80is_v4"): true,
strings.ToLower("Standard_E104i_v5"): true,
strings.ToLower("Standard_E104is_v5"): true,
strings.ToLower("Standard_E104id_v5"): true,
strings.ToLower("Standard_E104ids_v5"): true,
strings.ToLower("Standard_M192is_v2"): true,
strings.ToLower("Standard_M192ims_v2"): true,
strings.ToLower("Standard_M192ids_v2"): true,
strings.ToLower("Standard_M192idms_v2"): true,
strings.ToLower("Standard_F72s_v2"): true,
strings.ToLower("Standard_M128ms"): true,
}

func (scaleSet *ScaleSet) deleteInstances(ctx context.Context, requiredIds *compute.VirtualMachineScaleSetVMInstanceRequiredIDs, commonAsgId string) (*azure.Future, *retry.Error) {
scaleSet.instanceMutex.Lock()
defer scaleSet.instanceMutex.Unlock()

skuName := scaleSet.getSKU()
resourceGroup := scaleSet.manager.config.ResourceGroup
forceDelete := shouldForceDelete(skuName, scaleSet)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsgId, *requiredIds, forceDelete)
if forceDelete && isOperationNotAllowed(rerr) {
klog.Infof("falling back to normal delete for instances %v for %s", requiredIds.InstanceIds, scaleSet.Name)
return scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsgId, *requiredIds, false)
}
return future, rerr
}

func shouldForceDelete(skuName string, scaleSet *ScaleSet) bool {
return scaleSet.enableForceDelete && !isolatedVMSizes[strings.ToLower(skuName)] && !scaleSet.dedicatedHost
}

func isOperationNotAllowed(rerr *retry.Error) bool {
return rerr != nil && rerr.ServiceErrorCode() == retry.OperationNotAllowed
}
Comment on lines +84 to +86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should move these to azure sdk for go extensions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean we have an equivalent of this in a library?
Also, I don't think small things like this is necessary to be abstracted in the first place.

But, whatever idea we have, let's save it for post-defork quality improvement. It doesn't look worth it to do that now---in this case.

Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package azure

import (
"net/http"
"testing"

"github.com/Azure/go-autorest/autorest/azure"
"github.com/stretchr/testify/assert"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
)

func TestShouldForceDelete(t *testing.T) {
skuName := "test-vmssSku"

t.Run("should return true", func(t *testing.T) {
scaleSet := &ScaleSet{}
scaleSet.enableForceDelete = true
assert.Equal(t, shouldForceDelete(skuName, scaleSet), true)
})

t.Run("should return false because of dedicated hosts", func(t *testing.T) {
scaleSet := &ScaleSet{}
scaleSet.enableForceDelete = true
scaleSet.dedicatedHost = true
assert.Equal(t, shouldForceDelete(skuName, scaleSet), false)
})

t.Run("should return false because of isolated sku", func(t *testing.T) {
scaleSet := &ScaleSet{}
scaleSet.enableForceDelete = true
skuName = "Standard_F72s_v2" // belongs to the map isolatedVMSizes
assert.Equal(t, shouldForceDelete(skuName, scaleSet), false)
})

}

func TestIsOperationNotAllowed(t *testing.T) {
t.Run("should return false because it's not OperationNotAllowed error", func(t *testing.T) {
error := &retry.Error{
HTTPStatusCode: http.StatusBadRequest,
}
assert.Equal(t, isOperationNotAllowed(error), false)
})

t.Run("should return false because error is nil", func(t *testing.T) {
assert.Equal(t, isOperationNotAllowed(nil), false)
})

t.Run("should return true if error is OperationNotAllowed", func(t *testing.T) {
sre := &azure.ServiceError{
Code: retry.OperationNotAllowed,
Message: "error-message",
}
error := &retry.Error{
RawError: sre,
}
assert.Equal(t, isOperationNotAllowed(error), false)
})

// It is difficult to condition the case where return error matched expected error string for forceDelete and the
// function should return true.

}
8 changes: 4 additions & 4 deletions cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,8 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
enableForceDelete: manager.config.EnableForceDelete,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
getVmssSizeRefreshPeriod: time.Duration(VmssSizeRefreshPeriodDefault) * time.Second,
getVmssSizeRefreshPeriod: time.Duration(manager.azureCache.refreshInterval) * time.Second,
InstanceCache: InstanceCache{instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod},
}}
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}
Expand Down Expand Up @@ -751,8 +751,8 @@ func TestGetFilteredAutoscalingGroupsVmssWithConfiguredSizes(t *testing.T) {
enableForceDelete: manager.config.EnableForceDelete,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
getVmssSizeRefreshPeriod: time.Duration(VmssSizeRefreshPeriodDefault) * time.Second,
getVmssSizeRefreshPeriod: time.Duration(manager.azureCache.refreshInterval) * time.Second,
InstanceCache: InstanceCache{instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod},
}}
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}
Expand Down
Loading
Loading