Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ProviderID as a fallback for fetching the VM, InitializeMachine returns Uninitialized error code if VM is not found. #173

Merged
merged 7 commits into from
Sep 13, 2024
50 changes: 26 additions & 24 deletions pkg/aws/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func (d *Driver) CreateMachine(_ context.Context, req *driver.CreateMachineReque

// Check if the MachineClass is for the supported cloud provider
if req.MachineClass.Provider != ProviderAWS {
err = fmt.Errorf("Requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
err = fmt.Errorf("requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
return nil, status.Error(codes.InvalidArgument, err.Error())
}

Expand Down Expand Up @@ -259,18 +259,20 @@ func (d *Driver) InitializeMachine(_ context.Context, request *driver.Initialize
if err != nil {
return nil, err
}
instances, err := d.getInstancesFromMachineName(request.Machine.Name, providerSpec, request.Secret)
svc, err := d.createSVC(request.Secret, providerSpec.Region)
if err != nil {
return nil, status.Error(codes.Uninitialized, err.Error())
}
instances, err := d.getMatchingInstancesForMachine(request.Machine, svc, providerSpec.Tags)
if err != nil {
if isNotFoundError(err) {
klog.Errorf("Could not get matching instance for uninitialized machine %q from provider: %s", request.Machine.Name, err)
return nil, status.Error(codes.Uninitialized, err.Error())
}
return nil, err
}
targetInstance := instances[0]
providerID := encodeInstanceID(providerSpec.Region, *targetInstance.InstanceId)

svc, err := d.createSVC(request.Secret, providerSpec.Region)
if err != nil {
return nil, status.Error(codes.Uninitialized, err.Error())
}

// if SrcAnDstCheckEnabled is false then disable the SrcAndDestCheck on running NAT instance
if providerSpec.SrcAndDstChecksEnabled != nil && !*providerSpec.SrcAndDstChecksEnabled && *targetInstance.SourceDestCheck {
klog.V(3).Infof("Disabling SourceDestCheck on VM %q associated with machine %s", providerID, request.Machine.Name)
Expand All @@ -279,7 +281,6 @@ func (d *Driver) InitializeMachine(_ context.Context, request *driver.Initialize
return nil, status.Error(codes.Uninitialized, err.Error())
}
}

for i, netIf := range providerSpec.NetworkInterfaces {
for _, instanceNetIf := range targetInstance.NetworkInterfaces {
if netIf.Ipv6PrefixCount != nil && *instanceNetIf.Attachment.DeviceIndex == int64(i) {
Expand Down Expand Up @@ -333,7 +334,7 @@ func (d *Driver) DeleteMachine(_ context.Context, req *driver.DeleteMachineReque

// Check if the MachineClass is for the supported cloud provider
if req.MachineClass.Provider != ProviderAWS {
err = fmt.Errorf("Requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
err = fmt.Errorf("requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
return nil, status.Error(codes.InvalidArgument, err.Error())
}

Expand Down Expand Up @@ -368,10 +369,9 @@ func (d *Driver) DeleteMachine(_ context.Context, req *driver.DeleteMachineReque

} else {
// ProviderID doesn't exist, hence check for any existing machine and then delete if exists
instances, err = d.getInstancesFromMachineName(req.Machine.Name, providerSpec, req.Secret)
instances, err = getMachineInstancesByTagsAndStatus(svc, req.Machine.Name, providerSpec.Tags)
if err != nil {
errorStatus, ok := status.FromError(err)
if ok && errorStatus.Code() == codes.NotFound {
if isNotFoundError(err) {
klog.V(3).Infof("No matching VM found. Termination successful for machine object %q", req.Machine.Name)
return &driver.DeleteMachineResponse{}, nil
}
Expand Down Expand Up @@ -403,24 +403,27 @@ func (d *Driver) GetMachineStatus(_ context.Context, req *driver.GetMachineStatu

// Check if the MachineClass is for the supported cloud provider
if req.MachineClass.Provider != ProviderAWS {
err = fmt.Errorf("Requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
err = fmt.Errorf("requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
return nil, status.Error(codes.InvalidArgument, err.Error())
}

// Log messages to track start and end of request
klog.V(3).Infof("Get request has been recieved for %q", req.Machine.Name)

providerSpec, err := decodeProviderSpecAndSecret(machineClass, secret)
if err != nil {
return nil, err
}

instances, err := d.getInstancesFromMachineName(req.Machine.Name, providerSpec, secret)
svc, err := d.createSVC(secret, providerSpec.Region)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}

instances, err := d.getMatchingInstancesForMachine(req.Machine, svc, providerSpec.Tags)
if err != nil {
return nil, err
} else if len(instances) > 1 {
instanceIDs := []string{}
instanceIDs := make([]string, 0, len(instances))
for _, instance := range instances {
instanceIDs = append(instanceIDs, *instance.InstanceId)
}
Expand All @@ -430,22 +433,21 @@ func (d *Driver) GetMachineStatus(_ context.Context, req *driver.GetMachineStatu
}

requiredInstance := instances[0]
response := &driver.GetMachineStatusResponse{
NodeName: *requiredInstance.PrivateDnsName,
ProviderID: encodeInstanceID(providerSpec.Region, *requiredInstance.InstanceId),
}

// if SrcAnDstCheckEnabled is false then check attribute on instance and return Uninitialized error if not matching.
if providerSpec.SrcAndDstChecksEnabled != nil && !*providerSpec.SrcAndDstChecksEnabled {
if *requiredInstance.SourceDestCheck {
msg := fmt.Sprintf("VM %q associated with machine %q has SourceDestCheck=%t despite providerSpec.SrcAndDstChecksEnabled=%t",
*requiredInstance.InstanceId, req.Machine.Name, *requiredInstance.SourceDestCheck, *providerSpec.SrcAndDstChecksEnabled)
klog.Warning(msg)
return nil, status.Error(codes.Uninitialized, msg)
return response, status.Error(codes.Uninitialized, msg)
}
}

response := &driver.GetMachineStatusResponse{
NodeName: *requiredInstance.PrivateDnsName,
ProviderID: encodeInstanceID(providerSpec.Region, *requiredInstance.InstanceId),
}

klog.V(3).Infof("Machine get request has been processed successfully for %q", req.Machine.Name)
return response, nil
}
Expand All @@ -461,7 +463,7 @@ func (d *Driver) ListMachines(_ context.Context, req *driver.ListMachinesRequest

// Check if the MachineClass is for the supported cloud provider
if req.MachineClass.Provider != ProviderAWS {
err = fmt.Errorf("Requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
err = fmt.Errorf("requested for Provider '%s', we only support '%s'", req.MachineClass.Provider, ProviderAWS)
return nil, status.Error(codes.InvalidArgument, err.Error())
}

Expand Down
22 changes: 11 additions & 11 deletions pkg/aws/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ import (
)

const (
awsAccessKeyIDIsMissing = "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec secretRef.AWSAccessKeyID: Required value: Mention atleast providerAccessKeyId or accessKeyID]"
awsSecretAccessKeyIsMissing = "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec secretRef.AWSSecretAccessKey: Required value: Mention atleast providerSecretAccessKey or secretAccessKey]"
awsSecretAccessKeyNUserDataAreMissing = "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec [secretRef.AWSSecretAccessKey: Required value: Mention atleast providerSecretAccessKey or secretAccessKey, secretRef.userData: Required value: Mention userData]]"
regionNAMIMissing = "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec [providerSpec.ami: Required value: AMI is required, providerSpec.region: Required value: Region is required]]"
userDataIsMissing = "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec secretRef.userData: Required value: Mention userData]"
awsAccessKeyIDIsMissing = "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec secretRef.AWSAccessKeyID: Required value: Mention atleast providerAccessKeyId or accessKeyID]"
awsSecretAccessKeyIsMissing = "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec secretRef.AWSSecretAccessKey: Required value: Mention atleast providerSecretAccessKey or secretAccessKey]"
awsSecretAccessKeyNUserDataAreMissing = "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec [secretRef.AWSSecretAccessKey: Required value: Mention atleast providerSecretAccessKey or secretAccessKey, secretRef.userData: Required value: Mention userData]]"
regionNAMIMissing = "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec [providerSpec.ami: Required value: AMI is required, providerSpec.region: Required value: Region is required]]"
userDataIsMissing = "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec secretRef.userData: Required value: Mention userData]"
)

var _ = Describe("MachineServer", func() {
Expand Down Expand Up @@ -114,7 +114,7 @@ var _ = Describe("MachineServer", func() {
},
expect: expect{
errToHaveOccurred: true,
errMessage: "machine codes error: code = [InvalidArgument] message = [Requested for Provider 'azure', we only support 'AWS']",
errMessage: "machine codes error: code = [InvalidArgument] message = [requested for Provider 'azure', we only support 'AWS']",
},
}),
Entry("Machine creation request with IAM ARN", &data{
Expand Down Expand Up @@ -191,7 +191,7 @@ var _ = Describe("MachineServer", func() {
},
expect: expect{
errToHaveOccurred: true,
errMessage: "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec providerSpec.capacityReservation: Required value: CapacityReservationResourceGroupArn or CapacityReservationId are optional but only one should be used]",
errMessage: "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec providerSpec.capacityReservation: Required value: CapacityReservationResourceGroupArn or CapacityReservationId are optional but only one should be used]",
},
}),
Entry("Machine creation request for an AWS Capacity Reservation Group with capacityReservationPreference only", &data{
Expand Down Expand Up @@ -681,7 +681,7 @@ var _ = Describe("MachineServer", func() {
},
expect: expect{
errToHaveOccurred: true,
errMessage: "machine codes error: code = [InvalidArgument] message = [Requested for Provider 'azure', we only support 'AWS']",
errMessage: "machine codes error: code = [InvalidArgument] message = [requested for Provider 'azure', we only support 'AWS']",
},
}),
Entry("providerAccessKeyId missing for secret", &data{
Expand Down Expand Up @@ -891,7 +891,7 @@ var _ = Describe("MachineServer", func() {
},
expect: expect{
errToHaveOccurred: true,
errMessage: "machine codes error: code = [InvalidArgument] message = [Requested for Provider 'azure', we only support 'AWS']",
errMessage: "machine codes error: code = [InvalidArgument] message = [requested for Provider 'azure', we only support 'AWS']",
},
}),
Entry("providerAccessKeyId missing for secret", &data{
Expand Down Expand Up @@ -1093,7 +1093,7 @@ var _ = Describe("MachineServer", func() {
},
expect: expect{
errToHaveOccurred: true,
errMessage: "machine codes error: code = [InvalidArgument] message = [Requested for Provider 'azure', we only support 'AWS']",
errMessage: "machine codes error: code = [InvalidArgument] message = [requested for Provider 'azure', we only support 'AWS']",
},
}),
Entry("Unexpected end of JSON input", &data{
Expand Down Expand Up @@ -1198,7 +1198,7 @@ var _ = Describe("MachineServer", func() {
},
expect: expect{
errToHaveOccurred: true,
errMessage: "machine codes error: code = [InvalidArgument] message = [Error while validating ProviderSpec providerSpec.tags[]: Required value: Tag required of the form kubernetes.io/cluster/****]",
errMessage: "machine codes error: code = [InvalidArgument] message = [error while validating ProviderSpec providerSpec.tags[]: Required value: Tag required of the form kubernetes.io/cluster/****]",
},
}),
Entry("Cloud provider returned error while describing instance", &data{
Expand Down
73 changes: 51 additions & 22 deletions pkg/aws/core_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func decodeProviderSpecAndSecret(machineClass *v1alpha1.MachineClass, secret *co
// Validate the Spec and Secrets
validationErr := validation.ValidateAWSProviderSpec(providerSpec, secret, field.NewPath("providerSpec"))
if validationErr.ToAggregate() != nil && len(validationErr.ToAggregate().Errors()) > 0 {
err = fmt.Errorf("Error while validating ProviderSpec %v", validationErr.ToAggregate().Error())
err = fmt.Errorf("error while validating ProviderSpec %v", validationErr.ToAggregate().Error())
klog.V(2).Infof("Validation of AWSMachineClass failed %s", err)

return nil, status.Error(codes.InvalidArgument, err.Error())
Expand All @@ -79,27 +79,18 @@ func disableSrcAndDestCheck(svc ec2iface.EC2API, instanceID *string) error {
return nil
}

// getInstancesFromMachineName extracts AWS Instance object from given machine name
func (d *Driver) getInstancesFromMachineName(machineName string, providerSpec *api.AWSProviderSpec, secret *corev1.Secret) ([]*ec2.Instance, error) {
func getMachineInstancesByTagsAndStatus(svc ec2iface.EC2API, machineName string, providerSpecTags map[string]string) ([]*ec2.Instance, error) {
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
var (
clusterName string
nodeRole string
instances []*ec2.Instance
)

svc, err := d.createSVC(secret, providerSpec.Region)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}

for key := range providerSpec.Tags {
for key := range providerSpecTags {
if strings.Contains(key, "kubernetes.io/cluster/") {
clusterName = key
} else if strings.Contains(key, "kubernetes.io/role/") {
nodeRole = key
}
}

input := ec2.DescribeInstancesInput{
Filters: []*ec2.Filter{
{
Expand Down Expand Up @@ -131,41 +122,79 @@ func (d *Driver) getInstancesFromMachineName(machineName string, providerSpec *a
},
},
}

runResult, err := svc.DescribeInstances(&input)

if err != nil {
klog.Errorf("AWS plugin is returning error while describe instances request is sent: %s", err)
return nil, status.Error(codes.Internal, err.Error())
} else {
var instances []*ec2.Instance
for _, reservation := range runResult.Reservations {
instances = append(instances, reservation.Instances...)
}
return instances, nil
}
}

for _, reservation := range runResult.Reservations {
instances = append(instances, reservation.Instances...)
// getMatchingInstancesForMachine extracts AWS Instance object for a given machine
func (d *Driver) getMatchingInstancesForMachine(machine *v1alpha1.Machine, svc ec2iface.EC2API, providerSpecTags map[string]string) (instances []*ec2.Instance, err error) {
instances, err = getMachineInstancesByTagsAndStatus(svc, machine.Name, providerSpecTags)
if err != nil {
return nil, err
}
if len(instances) == 0 {
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
errMessage := "AWS plugin is returning no VM instances backing this machine object"
return nil, status.Error(codes.NotFound, errMessage)
//if getMachineInstancesByTagsAndStatus does not return any instances, try fetching matching instances using ProviderID
klog.V(3).Infof("No VM instances found for machine %s using tags/status. Now looking for VM using providerID", machine.Name)
if machine.Spec.ProviderID == "" {
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
return nil, status.Error(codes.NotFound, "No ProviderID found on the machine")
}
_, instanceID, err := decodeRegionAndInstanceID(machine.Spec.ProviderID)
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
}
runResult, err := getInstanceByID(svc, instanceID)
if err != nil {
return nil, err
}
for _, reservation := range runResult.Reservations {
instances = append(instances, reservation.Instances...)
}
if len(instances) == 0 {
errMessage := "AWS plugin is returning no VM instances backing this machine object"
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
return nil, status.Error(codes.NotFound, errMessage)
}
}

return instances, nil
}

func confirmInstanceByID(svc ec2iface.EC2API, instanceID string) (bool, error) {
func getInstanceByID(svc ec2iface.EC2API, instanceID string) (*ec2.DescribeInstancesOutput, error) {
input := ec2.DescribeInstancesInput{
InstanceIds: []*string{&instanceID},
}
instances, err := svc.DescribeInstances(&input)
if err != nil {
if awserror.IsInstanceIDNotFound(err) {
errMessage := "AWS plugin is returning no VM instances backing this machine object"
return nil, status.Error(codes.NotFound, errMessage)
}
klog.Errorf("AWS plugin is returning error while describe instances request is sent: %s", err)
return nil, status.Error(codes.Internal, err.Error())
}
return instances, err
}

_, err := svc.DescribeInstances(&input)
func confirmInstanceByID(svc ec2iface.EC2API, instanceID string) (bool, error) {
_, err := getInstanceByID(svc, instanceID)
if err != nil {
return false, err
}

return true, nil
}

func (d *Driver) generateBlockDevices(blockDevices []api.AWSBlockDeviceMappingSpec, rootDeviceName *string) ([]*ec2.BlockDeviceMapping, error) {
// If not blockDevices are passed, return an error.
if len(blockDevices) == 0 {
return nil, fmt.Errorf("No block devices passed")
return nil, fmt.Errorf("no block devices passed")
}

var blkDeviceMappings []*ec2.BlockDeviceMapping
Expand Down
2 changes: 1 addition & 1 deletion pkg/aws/core_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ var _ = Describe("CoreUtils", func() {

Expect(disksGenerated).To(Equal(expectedDisks))
Expect(err).To(HaveOccurred())
Expect(err).To(Equal(fmt.Errorf("No block devices passed")))
Expect(err).To(Equal(fmt.Errorf("no block devices passed")))
})

It("should not encrypt blockDevices by default", func() {
Expand Down
6 changes: 6 additions & 0 deletions pkg/aws/errors/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,9 @@ func GetMCMErrorCodeForTerminateInstances(err error) codes.Code {
return codes.Internal
}
}

// IsInstanceIDNotFound checks if the provider returned an InstanceIDNotFound error
func IsInstanceIDNotFound(err error) bool {
awsErr := err.(awserr.Error)
return awsErr.Code() == InstanceIDNotFound
}
14 changes: 13 additions & 1 deletion pkg/aws/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import (
"strings"
"time"

"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/codes"
"github.com/gardener/machine-controller-manager/pkg/util/provider/machinecodes/status"

"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
backoff "github.com/cenkalti/backoff/v4"
corev1 "k8s.io/api/core/v1"
Expand All @@ -29,7 +32,7 @@ func encodeInstanceID(region, instanceID string) string {
func decodeRegionAndInstanceID(id string) (string, string, error) {
splitProviderID := strings.Split(id, "/")
if len(splitProviderID) < 2 {
err := fmt.Errorf("Unable to decode provider-ID")
err := fmt.Errorf("unable to decode provider-ID")
return "", "", err
}
return splitProviderID[len(splitProviderID)-2], splitProviderID[len(splitProviderID)-1], nil
Expand All @@ -45,6 +48,15 @@ func (d *Driver) createSVC(secret *corev1.Secret, region string) (ec2iface.EC2AP
return svc, nil
}

// Function returns true only if error code equals codes.NotFound
func isNotFoundError(err error) bool {
unmarshall marked this conversation as resolved.
Show resolved Hide resolved
errorStatus, ok := status.FromError(err)
if ok && errorStatus.Code() == codes.NotFound {
return true
}
return false
}

// kubernetesVolumeIDToEBSVolumeID translates Kubernetes volume ID to EBS volume ID
// KubernetsVolumeID forms:
// - aws://<zone>/<awsVolumeId>
Expand Down