diff --git a/prog/kube-utils/main.go b/prog/kube-utils/main.go index fd77a8597b..5926394d2b 100644 --- a/prog/kube-utils/main.go +++ b/prog/kube-utils/main.go @@ -105,7 +105,7 @@ func reclaimRemovedPeers(kube kubernetes.Interface, weave weaveClient, cml *conf common.Log.Warnln("[kube-peers] not removing myself", peer) continue } - changed, err := reclaimPeer(weave, cml, peer.PeerName, myPeerName) + changed, err := reclaimPeer(weave, cml, storedPeerList, peer.PeerName, myPeerName) if err != nil { return err } @@ -125,17 +125,28 @@ func reclaimRemovedPeers(kube kubernetes.Interface, weave weaveClient, cml *conf // actions the reclaim. // Return a bool to show whether we attempted to change anything, // and an error if something went wrong. -func reclaimPeer(weave weaveClient, cml *configMapAnnotations, peerName string, myPeerName string) (changed bool, err error) { +func reclaimPeer(weave weaveClient, cml *configMapAnnotations, storedPeerList *peerList, peerName string, myPeerName string) (changed bool, err error) { common.Log.Debugln("[kube-peers] Preparing to remove disappeared peer", peerName) okToRemove := false + nonExistentPeer := false + // 3. Check if there is an existing annotation with key X - if existingAnnotation, found := cml.GetAnnotation(KubePeersPrefix + peerName); found { + existingAnnotation, found := cml.GetAnnotation(KubePeersPrefix + peerName) + if found { common.Log.Debugln("[kube-peers] Existing annotation", existingAnnotation) // 4. If annotation already contains my identity, ok; if existingAnnotation == myPeerName { okToRemove = true + } else { + // handle an edge case where peer claimed to own the action to reclaim but no longer + // exists hence lock persists foever + if !storedPeerList.contains(existingAnnotation) { + nonExistentPeer = true + common.Log.Debugln("[kube-peers] Existing annotation", existingAnnotation, " has a non-existent peer so owning the reclaim action") + } } - } else { + } + if !found || nonExistentPeer { // 5. If non-existent, write an annotation with key X and contents "my identity" common.Log.Debugln("[kube-peers] Noting I plan to remove ", peerName) if err := cml.UpdateAnnotation(KubePeersPrefix+peerName, myPeerName); err == nil { diff --git a/prog/kube-utils/peerlist.go b/prog/kube-utils/peerlist.go index 2573d5a576..f034b2300c 100644 --- a/prog/kube-utils/peerlist.go +++ b/prog/kube-utils/peerlist.go @@ -26,7 +26,7 @@ type peerInfo struct { NodeName string // Kubernetes node name } -func (pl peerList) contains(peerName string) bool { +func (pl *peerList) contains(peerName string) bool { for _, peer := range pl.Peers { if peer.PeerName == peerName { return true diff --git a/prog/kube-utils/peerlist_test.go b/prog/kube-utils/peerlist_test.go index 0a52349421..77866f693b 100644 --- a/prog/kube-utils/peerlist_test.go +++ b/prog/kube-utils/peerlist_test.go @@ -137,7 +137,7 @@ func TestPeerListFuzz(t *testing.T) { found := storedPeerList.contains(peerName(i)) require.True(t, found, "peer %d not found in stored list", i) - _, err = reclaimPeer(mockWeave{}, cml, peerName(i), fmt.Sprintf("deleter-%d", i)) + _, err = reclaimPeer(mockWeave{}, cml, storedPeerList, peerName(i), fmt.Sprintf("deleter-%d", i)) require.NoError(t, err) storedPeerList, err = cml.GetPeerList()