-
Notifications
You must be signed in to change notification settings - Fork 205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG 1875598: Ensure the Virtual Machine provider state is set to Unknown when Failed #696
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ import ( | |
corev1 "k8s.io/api/core/v1" | ||
apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/rest" | ||
|
@@ -434,7 +435,16 @@ func (r *ReconcileMachine) setPhase(machine *machinev1.Machine, phase string, er | |
} | ||
|
||
// Since we may have mutated the local copy of the machine above, we need to calculate baseToPatch here. | ||
// Any updates to the status must be done after this point. | ||
baseToPatch := client.MergeFrom(machine.DeepCopy()) | ||
|
||
if phase == phaseFailed { | ||
if err := r.overrideFailedMachineProviderStatusState(machine); err != nil { | ||
klog.Errorf("Failed to update machine provider status %q: %v", machine.GetName(), err) | ||
return err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need to fail the whole run here, we should log the error and continue processing. Nothing about this added behavior for setting subsequent status. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've had a look through the kinds of errors that we could get returned from this function and I don't think in a running environment we would ever actually see them. To me, they all seem to be that they would exist only from programming errors (https://github.com/kubernetes/apimachinery/blob/94222d04a59075a01fddedd696037db9e61db6e9/pkg/runtime/converter.go#L404), digging into this, an error could come up if either the object passed is nil (Which is already being checked), or if the type of object cannot be converted there is a possibility for an error. In either of these cases we should see this during unit testing so I think this is better to leave as it is, else we may miss our programming errors as we make changes to this in the future |
||
} | ||
} | ||
|
||
machine.Status.Phase = &phase | ||
machine.Status.ErrorMessage = nil | ||
now := metav1.Now() | ||
|
@@ -462,6 +472,45 @@ func (r *ReconcileMachine) patchFailedMachineInstanceAnnotation(machine *machine | |
return nil | ||
} | ||
|
||
// overrideFailedMachineProviderStatusState patches the state of the VM in the provider status if it is set. | ||
// Not all providers set a state, but AWS, Azure, GCP and vSphere do. | ||
// If the machine has gone into the Failed phase, and the providerStatus has already been set, | ||
// the VM is in an unknown state. This function overrides the state. | ||
func (r *ReconcileMachine) overrideFailedMachineProviderStatusState(machine *machinev1.Machine) error { | ||
if machine.Status.ProviderStatus == nil { | ||
return nil | ||
} | ||
|
||
// instanceState is used by AWS, GCP and vSphere; vmState is used by Azure. | ||
const instanceStateField = "instanceState" | ||
const vmStateField = "vmState" | ||
|
||
providerStatus, err := runtime.DefaultUnstructuredConverter.ToUnstructured(machine.Status.ProviderStatus) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this work correctly if ProviderStatus is nil? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a check on L#480 which prevents us getting this far if the providerStatus is nil, hence that scenario is avoided (and if the check is removed, the tests fail) |
||
if err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this really prevent us from doing the rest? |
||
return fmt.Errorf("could not covert provider status to unstructured: %v", err) | ||
} | ||
|
||
// if the instanceState is set already, update it to unknown | ||
if _, found, err := unstructured.NestedString(providerStatus, instanceStateField); err == nil && found { | ||
if err := unstructured.SetNestedField(providerStatus, unknownInstanceState, instanceStateField); err != nil { | ||
return fmt.Errorf("could not set %s: %v", instanceStateField, err) | ||
} | ||
} | ||
|
||
// if the vmState is set already, update it to unknown | ||
if _, found, err := unstructured.NestedString(providerStatus, vmStateField); err == nil && found { | ||
if err := unstructured.SetNestedField(providerStatus, unknownInstanceState, vmStateField); err != nil { | ||
return fmt.Errorf("could not set %s: %v", instanceStateField, err) | ||
} | ||
} | ||
|
||
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(providerStatus, machine.Status.ProviderStatus); err != nil { | ||
return fmt.Errorf("could not convert provider status from unstructured: %v", err) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func machineIsProvisioned(machine *machinev1.Machine) bool { | ||
return len(machine.Status.Addresses) > 0 || stringPointerDeref(machine.Spec.ProviderID) != "" | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
shouldn't this reuse the if block in line 429?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we use the block from above, then it's before the
baseToPatch
is created and as such, the patching mechanism thinks that the changes were there already. This modification has to come afterbaseToPatch
is created, but the other one has to come before it, because it is modifying the spec.I added the line on 438 to try and make this more obvious