@@ -788,25 +788,7 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str
788788 if len (statusAssignments ) == 0 {
789789 return nil
790790 }
791- // We need to proceed with add only under two conditions
792- // 1) egressNode present in at least one status is local to this zone
793- // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status)
794- // 2) the pod being added is local to this zone
795- proceed := false
796- for _ , status := range statusAssignments {
797- e .nodeZoneState .LockKey (status .Node )
798- isLocalZoneEgressNode , loadedEgressNode := e .nodeZoneState .Load (status .Node )
799- if loadedEgressNode && isLocalZoneEgressNode {
800- proceed = true
801- e .nodeZoneState .UnlockKey (status .Node )
802- break
803- }
804- e .nodeZoneState .UnlockKey (status .Node )
805- }
806- if ! proceed && ! e .isPodScheduledinLocalZone (pod ) {
807- return nil // nothing to do if none of the status nodes are local to this master and pod is also remote
808- }
809- var remainingAssignments []egressipv1.EgressIPStatusItem
791+ var remainingAssignments , staleAssignments []egressipv1.EgressIPStatusItem
810792 nadName := ni .GetNetworkName ()
811793 if ni .IsSecondary () {
812794 nadNames := ni .GetNADs ()
@@ -842,9 +824,16 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str
842824 // podState.egressIPName can be empty if no re-routes were found in
843825 // syncPodAssignmentCache for the existing pod, we will treat this case as a new add
844826 for _ , status := range statusAssignments {
845- if exists := podState .egressStatuses .contains (status ); ! exists {
827+ // Add the status if it's not already in the cache, or if it exists but is in pending state
828+ // (meaning it was populated during EIP sync and needs to be processed for the pod).
829+ if value , exists := podState .egressStatuses .statusMap [status ]; ! exists || value == egressStatusStatePending {
846830 remainingAssignments = append (remainingAssignments , status )
847831 }
832+ // Detect stale EIP status entries (same EgressIP reassigned to a different node)
833+ // and queue the outdated entry for cleanup.
834+ if staleStatus := podState .egressStatuses .hasStaleEIPStatus (status ); staleStatus != nil {
835+ staleAssignments = append (staleAssignments , * staleStatus )
836+ }
848837 }
849838 podState .podIPs = podIPs
850839 podState .egressIPName = name
@@ -866,6 +855,32 @@ func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name str
866855 podState .standbyEgressIPNames .Insert (name )
867856 return nil
868857 }
858+ for _ , staleStatus := range staleAssignments {
859+ klog .V (2 ).Infof ("Deleting stale pod egress IP status: %v for EgressIP: %s and pod: %s/%s/%v" , staleStatus , name , pod .Namespace , pod .Name , podIPNets )
860+ err = e .deletePodEgressIPAssignments (ni , name , []egressipv1.EgressIPStatusItem {staleStatus }, pod )
861+ if err != nil {
862+ klog .Warningf ("Failed to delete stale EgressIP status %s/%v for pod %s: %v" , name , staleStatus , podKey , err )
863+ }
864+ delete (podState .egressStatuses .statusMap , staleStatus )
865+ }
866+ // We need to proceed with add only under two conditions
867+ // 1) egressNode present in at least one status is local to this zone
868+ // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status)
869+ // 2) the pod being added is local to this zone
870+ proceed := false
871+ for _ , status := range statusAssignments {
872+ e .nodeZoneState .LockKey (status .Node )
873+ isLocalZoneEgressNode , loadedEgressNode := e .nodeZoneState .Load (status .Node )
874+ if loadedEgressNode && isLocalZoneEgressNode {
875+ proceed = true
876+ e .nodeZoneState .UnlockKey (status .Node )
877+ break
878+ }
879+ e .nodeZoneState .UnlockKey (status .Node )
880+ }
881+ if ! proceed && ! e .isPodScheduledinLocalZone (pod ) {
882+ return nil // nothing to do if none of the status nodes are local to this master and pod is also remote
883+ }
869884 for _ , status := range remainingAssignments {
870885 klog .V (2 ).Infof ("Adding pod egress IP status: %v for EgressIP: %s and pod: %s/%s/%v" , status , name , pod .Namespace , pod .Name , podIPNets )
871886 nodesToLock := []string {status .Node , pod .Spec .NodeName }
@@ -1155,6 +1170,8 @@ type egressIPCache struct {
11551170 egressLocalNodesCache sets.Set [string ]
11561171 // egressIP IP -> assigned node name
11571172 egressIPIPToNodeCache map [string ]string
1173+ // egressIP name -> egress IP -> assigned node name
1174+ egressIPToAssignedNodes map [string ]map [string ]string
11581175 // node name -> network name -> redirect IPs
11591176 egressNodeRedirectsCache nodeNetworkRedirects
11601177 // network name -> OVN cluster router name
@@ -1594,6 +1611,14 @@ func (e *EgressIPController) syncPodAssignmentCache(egressIPCache egressIPCache)
15941611 }
15951612 }
15961613
1614+ // populate podState.egressStatuses with assigned node for active egressIP IPs.
1615+ if podState .egressIPName == egressIPName {
1616+ for egressIPIP , nodeName := range egressIPCache .egressIPToAssignedNodes [egressIPName ] {
1617+ podState .egressStatuses .statusMap [egressipv1.EgressIPStatusItem {
1618+ EgressIP : egressIPIP , Node : nodeName }] = egressStatusStatePending
1619+ }
1620+ }
1621+
15971622 e .podAssignment .Store (podKey , podState )
15981623 return nil
15991624 }); err != nil {
@@ -1975,6 +2000,9 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) {
19752000 // egressIP IP -> node name. Assigned node for EIP.
19762001 egressIPIPNodeCache := make (map [string ]string , 0 )
19772002 cache .egressIPIPToNodeCache = egressIPIPNodeCache
2003+ // egressIP name -> egressIP IP -> node name.
2004+ egressIPToAssignedNodes := make (map [string ]map [string ]string , 0 )
2005+ cache .egressIPToAssignedNodes = egressIPToAssignedNodes
19782006 cache .markCache = make (map [string ]string )
19792007 egressIPs , err := e .watchFactory .GetEgressIPs ()
19802008 if err != nil {
@@ -1988,13 +2016,15 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) {
19882016 cache .markCache [egressIP .Name ] = mark .String ()
19892017 egressIPsCache [egressIP .Name ] = make (map [string ]selectedPods , 0 )
19902018 egressIPNameNodesCache [egressIP .Name ] = make ([]string , 0 , len (egressIP .Status .Items ))
2019+ egressIPToAssignedNodes [egressIP .Name ] = make (map [string ]string , 0 )
19912020 for _ , status := range egressIP .Status .Items {
19922021 eipIP := net .ParseIP (status .EgressIP )
19932022 if eipIP == nil {
19942023 klog .Errorf ("Failed to parse EgressIP %s IP %q from status" , egressIP .Name , status .EgressIP )
19952024 continue
19962025 }
19972026 egressIPIPNodeCache [eipIP .String ()] = status .Node
2027+ egressIPToAssignedNodes [egressIP .Name ][eipIP .String ()] = status .Node
19982028 if localZoneNodes .Has (status .Node ) {
19992029 egressLocalNodesCache .Insert (status .Node )
20002030 }
@@ -2251,9 +2281,18 @@ func InitClusterEgressPolicies(nbClient libovsdbclient.Client, addressSetFactory
22512281 return nil
22522282}
22532283
2284+ // egressStatusStatePending marks entries populated during EIP sync and
2285+ // indicates they must be reconciled again for the pod.
2286+ const egressStatusStatePending = "pending"
2287+
22542288type statusMap map [egressipv1.EgressIPStatusItem ]string
22552289
22562290type egressStatuses struct {
2291+ // statusMap tracks per EIP status assignment for a pod.
2292+ // Key: egressipv1.EgressIPStatusItem {EgressIP, Node}
2293+ // Values:
2294+ // "" -> applied/reconciled
2295+ // egressStatusStatePending -> populated during EIP sync, pending reconcile.
22572296 statusMap
22582297}
22592298
@@ -2265,6 +2304,21 @@ func (e egressStatuses) contains(potentialStatus egressipv1.EgressIPStatusItem)
22652304 return false
22662305}
22672306
2307+ // hasStaleEIPStatus checks for stale EIP status entries already in cache.
2308+ // This addresses the race condition where an EIP is reassigned to a different node
2309+ // but the cache still contains the old assignment, leading to stale SNAT/LRP entries.
2310+ func (e egressStatuses ) hasStaleEIPStatus (potentialStatus egressipv1.EgressIPStatusItem ) * egressipv1.EgressIPStatusItem {
2311+ var staleStatus * egressipv1.EgressIPStatusItem
2312+ for status := range e .statusMap {
2313+ if status .EgressIP == potentialStatus .EgressIP &&
2314+ status .Node != potentialStatus .Node {
2315+ staleStatus = & egressipv1.EgressIPStatusItem {EgressIP : status .EgressIP , Node : status .Node }
2316+ break
2317+ }
2318+ }
2319+ return staleStatus
2320+ }
2321+
22682322func (e egressStatuses ) delete (deleteStatus egressipv1.EgressIPStatusItem ) {
22692323 delete (e .statusMap , deleteStatus )
22702324}
0 commit comments