Skip to content

Commit

Permalink
fix: attempt to delete unknown replica instances on cleanup
Browse files Browse the repository at this point in the history
Longhorn 6552

Signed-off-by: Eric Weber <eric.weber@suse.com>
  • Loading branch information
ejweber committed Sep 3, 2024
1 parent 44d2dae commit f75f01e
Showing 1 changed file with 28 additions and 4 deletions.
32 changes: 28 additions & 4 deletions controller/replica_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,8 @@ func (rc *ReplicaController) DeleteInstance(obj interface{}) (err error) {
}
}

if im.Status.CurrentState != longhorn.InstanceManagerStateRunning {
if shouldSkip, skipReason := shouldSkipReplicaDeletion(im.Status.CurrentState); shouldSkip {
log.Infof("Skipping deleting replica %v since %s", r.Name, skipReason)
return nil
}

Expand All @@ -555,14 +556,15 @@ func (rc *ReplicaController) DeleteInstance(obj interface{}) (err error) {
defer func() {
if err != nil {
log.WithError(err).Warnf("Failed to delete replica process %v", r.Name)
if isDelinquent {
log.Warnf("Ignored the failure of deleting replica process %v because the RWX volume is currently delinquent", r.Name)
if canIgnore, ignoreReason := canIgnoreReplicaDeletionFailure(im.Status.CurrentState,
isDelinquent); canIgnore {
log.Warnf("Ignored the failure to delete replica process %v because %s", r.Name, ignoreReason)
err = nil
}
}
}()

c, err := engineapi.NewInstanceManagerClient(im, false)
c, err := engineapi.NewInstanceManagerClient(im, true)
if err != nil {
return err
}
Expand Down Expand Up @@ -888,3 +890,25 @@ func hasMatchingReplica(replica *longhorn.Replica, replicas map[string]*longhorn
}
return false
}

func shouldSkipReplicaDeletion(imState longhorn.InstanceManagerState) (canSkip bool, reason string) {
// If the instance manager is in an unknown state, we should at least attempt instance deletion.
if imState == longhorn.InstanceManagerStateRunning || imState == longhorn.InstanceManagerStateUnknown {
return false, ""
}

return true, fmt.Sprintf("instance manager is in %v state", imState)
}

func canIgnoreReplicaDeletionFailure(imState longhorn.InstanceManagerState, isDelinquent bool) (canIgnore bool, reason string) {
// Instance deletion is always best effort for an unknown instance manager.
if imState == longhorn.InstanceManagerStateUnknown {
return true, fmt.Sprintf("instance manager is in %v state", imState)
}

if isDelinquent {
return true, "the RWX volume is delinquent"
}

return false, ""
}

0 comments on commit f75f01e

Please sign in to comment.