@@ -209,8 +209,7 @@ func (d *daemon) getIbSriovNetwork(networkID string) (string, *utils.IbSriovCniS
209209	// Check if this network's resource is managed by this daemon 
210210	resourceName  :=  netAttInfo .Annotations ["k8s.v1.cni.cncf.io/resourceName" ]
211211	if  resourceName  ==  ""  ||  ! d .config .IsManagedResource (resourceName ) {
212- 		// TODO(Nik) dev qol, check if someone else manages this resource or if it is orphan 
213- 		// checkResourceOwner(networkNamespace, networkName) 
212+ 		// TODO(Nik) qol, check if someone else manages this resource or if it is orphan 
214213		return  "" , nil , fmt .Errorf ("network %s uses resource %s which is not managed by this daemon" , networkName , resourceName )
215214	}
216215
@@ -254,8 +253,8 @@ func getPodNetworkInfo(netName string, pod *kapi.Pod, netMap networksMap) (*podN
254253
255254// addPodFinalizer adds the GUID cleanup finalizer to a pod 
256255func  (d  * daemon ) addPodFinalizer (pod  * kapi.Pod , networkName  string ) error  {
256+ 	podFinalizer  :=  fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
257257	return  wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
258- 		podFinalizer  :=  fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
259258		if  err  :=  d .kubeClient .AddFinalizerToPod (pod , podFinalizer ); err  !=  nil  {
260259			log .Warn ().Msgf ("failed to add finalizer to pod %s/%s: %v" ,
261260				pod .Namespace , pod .Name , err )
@@ -267,8 +266,8 @@ func (d *daemon) addPodFinalizer(pod *kapi.Pod, networkName string) error {
267266
268267// removePodFinalizer removes the GUID cleanup finalizer from a pod 
269268func  (d  * daemon ) removePodFinalizer (pod  * kapi.Pod , networkName  string ) error  {
269+ 	podFinalizer  :=  fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
270270	return  wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
271- 		podFinalizer  :=  fmt .Sprintf ("%s-%s" , PodGUIDFinalizer , networkName )
272271		if  err  :=  d .kubeClient .RemoveFinalizerFromPod (pod , podFinalizer ); err  !=  nil  {
273272			log .Warn ().Msgf ("failed to remove finalizer from pod %s/%s: %v" ,
274273				pod .Namespace , pod .Name , err )
@@ -480,44 +479,104 @@ func syncGUIDPool(smClient plugins.SubnetManagerClient, guidPool guid.Pool) erro
480479
481480// Update and set Pod's network annotation. 
482481// If failed to update annotation, pod's GUID added into the list to be removed from Pkey. 
483- func  (d  * daemon ) updatePodNetworkAnnotation (pi  * podNetworkInfo , removedList  * []net.HardwareAddr ) error   {
482+ func  (d  * daemon ) updatePodNetworkAnnotation (pi  * podNetworkInfo , removedList  * []net.HardwareAddr ) {
484483	if  pi .ibNetwork .CNIArgs  ==  nil  {
485484		pi .ibNetwork .CNIArgs  =  & map [string ]interface {}{}
486485	}
487486
488487	(* pi .ibNetwork .CNIArgs )[utils .InfiniBandAnnotation ] =  utils .ConfiguredInfiniBandPod 
489- 	netAnnotations , err  :=  json .Marshal (pi .networks )
490- 	if  err  !=  nil  {
491- 		return  fmt .Errorf ("failed to dump networks %+v of pod into json with error: %v" , pi .networks , err )
492- 	}
493- 
494- 	pi .pod .Annotations [v1 .NetworkAttachmentAnnot ] =  string (netAnnotations )
495488
496489	// Try to set pod's annotations in backoff loop 
497- 	if  err  =  wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
498- 		log .Info ().Msgf ("updatePodNetworkAnnotation(): Updating pod annotation for pod: %s with anootation: %s" , pi .pod .Name , pi .pod .Annotations )
490+ 	if  err  :=  wait .ExponentialBackoff (backoffValues , func () (bool , error ) {
491+ 
492+ 		// Get latest annotations state to avoid conflicts 
493+ 		latestPodAnnotations , networks , err  :=  d .getLatestPodAnnotations (pi .pod )
494+ 		if  err  !=  nil  {
495+ 			log .Warn ().Msgf ("failed to get latest pod annotations for %s/%s: %v" , pi .pod .Namespace , pi .pod .Name , err )
496+ 			return  false , nil 
497+ 		}
498+ 
499+ 		targetNetwork , err  :=  utils .GetPodNetwork (networks , pi .ibNetwork .Name )
500+ 		if  err  !=  nil  {
501+ 			return  false , fmt .Errorf ("failed to locate network %s in pod %s/%s annotations: %v" , pi .ibNetwork .Name , pi .pod .Namespace , pi .pod .Name , err )
502+ 		}
503+ 
504+ 		err  =  updateInfiniBandNetwork (targetNetwork , pi .ibNetwork )
505+ 		if  err  !=  nil  {
506+ 			return  false , fmt .Errorf ("failed to update infiniband network for pod %s/%s: %v" , pi .pod .Namespace , pi .pod .Name , err )
507+ 		}
508+ 
509+ 		netAnnotations , err  :=  json .Marshal (networks )
510+ 		if  err  !=  nil  {
511+ 			return  false , fmt .Errorf ("failed to marshal updated networks for pod %s/%s: %v" , pi .pod .Namespace , pi .pod .Name , err )
512+ 		}
513+ 
514+ 		if  latestPodAnnotations  ==  nil  {
515+ 			return  false , fmt .Errorf ("latestPodAnnotations is nil for pod %s/%s" , pi .pod .Namespace , pi .pod .Name )
516+ 		}
517+ 
518+ 		latestPodAnnotations [v1 .NetworkAttachmentAnnot ] =  string (netAnnotations )
519+ 		pi .pod .Annotations  =  latestPodAnnotations 
520+ 
521+ 		log .Info ().Msgf ("updatePodNetworkAnnotation(): Updating pod annotation for pod: %s/%s" , pi .pod .Namespace , pi .pod .Name )
499522		if  err  =  d .kubeClient .SetAnnotationsOnPod (pi .pod , pi .pod .Annotations ); err  !=  nil  {
500523			if  kerrors .IsNotFound (err ) {
501524				return  false , err 
502525			}
503- 			log .Warn ().Msgf ("failed to update pod annotations with err: %v" , err )
526+ 			if  kerrors .IsConflict (err ) {
527+ 				log .Warn ().Msgf ("conflict while updating pod annotations for %s/%s, will retry" , pi .pod .Namespace , pi .pod .Name )
528+ 				return  false , nil 
529+ 			}
530+ 			log .Warn ().Msgf ("failed to update pod annotations for %s/%s with err: %v" , pi .pod .Namespace , pi .pod .Name , err )
504531			return  false , nil 
505532		}
506- 		log .Info ().Msgf ("updatePodNetworkAnnotation(): Success on updating pod annotation for pod: %s with anootation: %s" , pi .pod .Name , pi .pod .Annotations )
533+ 
534+ 		log .Info ().Msgf ("updatePodNetworkAnnotation(): Success on updating pod annotation for pod: %s/%s with annotations: %s" , pi .pod .Namespace , pi .pod .Name , pi .pod .Annotations )
507535		return  true , nil 
508536	}); err  !=  nil  {
509- 		log .Error ().Msgf ("failed to update pod annotations"  )
537+ 		log .Error ().Msgf ("failed to update pod annotations for %s/%s with error: %v"  ,  pi . pod . Namespace ,  pi . pod . Name ,  err )
510538
511539		if  err  =  d .guidPool .ReleaseGUID (pi .addr .String ()); err  !=  nil  {
512- 			log .Warn ().Msgf ("failed to release guid \" %s\"  from removed pod \" %s\"  in namespace " + 
513- 				"\" %s\"  with error: %v" , pi .addr .String (), pi .pod .Name , pi .pod .Namespace , err )
540+ 			log .Warn ().Msgf ("failed to release guid \" %s\"  from removed pod \" %s\"  in namespace \" %s\"  with error: %v" , pi .addr .String (), pi .pod .Name , pi .pod .Namespace , err )
514541		} else  {
515542			delete (d .guidPodNetworkMap , pi .addr .String ())
516543		}
517544
518545		* removedList  =  append (* removedList , pi .addr )
519546	}
520547
548+ 	return 
549+ }
550+ 
551+ // Retrieves the latest annotations for a pod and returns the annotations and the pod networks. 
552+ func  (d  * daemon ) getLatestPodAnnotations (pod  * kapi.Pod ) (map [string ]string , []* v1.NetworkSelectionElement , error ) {
553+ 	latestPod , err  :=  d .kubeClient .GetPod (pod .Namespace , pod .Name )
554+ 	if  err  !=  nil  {
555+ 		return  nil , nil , err 
556+ 	}
557+ 
558+ 	networks , err  :=  netAttUtils .ParsePodNetworkAnnotation (latestPod )
559+ 	if  err  !=  nil  {
560+ 		return  nil , nil , err 
561+ 	}
562+ 
563+ 	return  latestPod .Annotations , networks , nil 
564+ }
565+ 
566+ // Replaces target network with source network, erroring if source is already configured. 
567+ func  updateInfiniBandNetwork (target  * v1.NetworkSelectionElement , source  * v1.NetworkSelectionElement ) error  {
568+ 	if  target  ==  nil  ||  source  ==  nil  {
569+ 		return  fmt .Errorf ("target or source network is nil" )
570+ 	}
571+ 
572+ 	if  target .CNIArgs  !=  nil  {
573+ 		if  (* target .CNIArgs )[utils .InfiniBandAnnotation ] ==  utils .ConfiguredInfiniBandPod  {
574+ 			return  fmt .Errorf ("target network is already configured" )
575+ 		}
576+ 	}
577+ 
578+ 	target .InfinibandGUIDRequest  =  source .InfinibandGUIDRequest 
579+ 	target .CNIArgs  =  source .CNIArgs 
521580	return  nil 
522581}
523582
@@ -609,10 +668,7 @@ func (d *daemon) AddPeriodicUpdate() {
609668		var  removedGUIDList  []net.HardwareAddr 
610669		for  _ , pi  :=  range  passedPods  {
611670			log .Info ().Msgf ("Updating annotations for the pod %s, network %s" , pi .pod .Name , pi .ibNetwork .Name )
612- 			err  =  d .updatePodNetworkAnnotation (pi , & removedGUIDList )
613- 			if  err  !=  nil  {
614- 				log .Error ().Msgf ("%v" , err )
615- 			}
671+ 			d .updatePodNetworkAnnotation (pi , & removedGUIDList )
616672		}
617673
618674		if  ibCniSpec .PKey  !=  ""  &&  len (removedGUIDList ) !=  0  {
0 commit comments