From d14d1319f537180bd43190db00d89d7ae3dbb195 Mon Sep 17 00:00:00 2001 From: Piotr Skamruk Date: Wed, 25 Apr 2018 18:22:35 +0200 Subject: [PATCH 1/3] Correct the way of CNI ADD handling --- pkg/manager/runtime.go | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/pkg/manager/runtime.go b/pkg/manager/runtime.go index 0edc26e3c..17c784458 100644 --- a/pkg/manager/runtime.go +++ b/pkg/manager/runtime.go @@ -123,7 +123,7 @@ func (v *VirtletRuntimeService) RunPodSandbox(ctx context.Context, in *kubeapi.R if status.State == kubeapi.PodSandboxState_SANDBOX_READY { return &kubeapi.RunPodSandboxResponse{ PodSandboxId: podID, - }, err + }, nil } } @@ -149,14 +149,22 @@ func (v *VirtletRuntimeService) RunPodSandbox(ctx context.Context, in *kubeapi.R fdPayload := &tapmanager.GetFDPayload{Description: pnd} csnBytes, err := v.fdManager.AddFDs(podID, fdPayload) if err != nil { - // this will cause kubelet to delete the pod sandbox and then retry - // its creation - state = kubeapi.PodSandboxState_SANDBOX_NOTREADY - glog.Errorf("Error adding pod %s (%s) to CNI network: %v", podName, podID, err) + // Try to cleanup cni e.x. in case of multiple plugins behind cni genie. + // While one of them failed, other could already allocated some resources + // so give them a try to do cleanup. + if fdErr := v.fdManager.ReleaseFDs(podID); err != nil { + glog.Errorf("Error removing pod %s (%s) from CNI network: %v", podName, podID, fdErr) + } + + return nil, fmt.Errorf("Error adding pod %s (%s) to CNI network: %v", podName, podID, err) } psi, err := metadata.NewPodSandboxInfo(config, csnBytes, state, v.clock) if err != nil { + // cleanup cni if we could not add pod to metadata store to prevent resource leaking + if fdErr := v.fdManager.ReleaseFDs(podID); err != nil { + glog.Errorf("Error removing pod %s (%s) from CNI network: %v", podName, podID, fdErr) + } return nil, err } @@ -166,14 +174,16 @@ func (v *VirtletRuntimeService) RunPodSandbox(ctx context.Context, in *kubeapi.R return psi, nil }, ); storeErr != nil { + // cleanup cni if we could not add pod to metadata store to prevent resource leaking + if err := v.fdManager.ReleaseFDs(podID); err != nil { + glog.Errorf("Error removing pod %s (%s) from CNI network: %v", podName, podID, err) + } return nil, storeErr } - // If we don't return PodSandboxId upon RunPodSandbox, kubelet will not retry - // RunPodSandbox for this pod after CNI failure return &kubeapi.RunPodSandboxResponse{ PodSandboxId: podID, - }, err + }, nil } // StopPodSandbox implements StopPodSandbox method of CRI. From 89509f455775ae15f7d270f65e66a5b4bdbcdadb Mon Sep 17 00:00:00 2001 From: Piotr Skamruk Date: Wed, 25 Apr 2018 21:00:29 +0200 Subject: [PATCH 2/3] Add defered cleanups --- pkg/tapmanager/tapfdsource.go | 36 +++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/pkg/tapmanager/tapfdsource.go b/pkg/tapmanager/tapfdsource.go index d4da3cc69..5eec4ab13 100644 --- a/pkg/tapmanager/tapfdsource.go +++ b/pkg/tapmanager/tapfdsource.go @@ -131,10 +131,27 @@ func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { return nil, nil, fmt.Errorf("error creating new netns for pod %s (%s): %v", pnd.PodName, pnd.PodID, err) } + weHadAnError := false + podAddedToNetwork := false + defer func() { + if weHadAnError { + if podAddedToNetwork { + if err := s.cniClient.RemoveSandboxFromNetwork(pnd.PodID, pnd.PodName, pnd.PodNs); err != nil { + glog.Errorf("Error while emergency removal of pod from cni network due to previous other error: %v", err) + } + } + if err := cni.DestroyNetNS(pnd.PodID); err != nil { + glog.Errorf("Error while emergency removal of netns: %v", err) + } + } + }() + netConfig, err := s.cniClient.AddSandboxToNetwork(pnd.PodID, pnd.PodName, pnd.PodNs) if err != nil { + weHadAnError = true return nil, nil, fmt.Errorf("error adding pod %s (%s) to CNI network: %v", pnd.PodName, pnd.PodID, err) } + podAddedToNetwork = true glog.V(3).Infof("CNI configuration for pod %s (%s): %s", pnd.PodName, pnd.PodID, spew.Sdump(netConfig)) if netConfig == nil { @@ -152,6 +169,7 @@ func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { var csn *network.ContainerSideNetwork if err := s.setupNetNS(key, pnd, func(netNSPath string, allLinks []netlink.Link) (*network.ContainerSideNetwork, error) { if netConfig, err = nettools.ValidateAndFixCNIResult(netConfig, netNSPath, allLinks); err != nil { + weHadAnError = true return nil, fmt.Errorf("error fixing cni configuration: %v", err) } if err := nettools.FixCalicoNetworking(netConfig, s.getDummyNetwork); err != nil { @@ -174,14 +192,14 @@ func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { } return csn, nil }); err != nil { - // TODO: do some cleanup on configured interfaces if there is an error + weHadAnError = true return nil, nil, err } for _, iface := range csn.Interfaces { if iface.Type == network.InterfaceTypeVF { if err := nettools.SetMacOnVf(iface.PCIAddress, iface.HardwareAddr); err != nil { - // TODO: do some cleanup on configured interfaces if there is an error + weHadAnError = true return nil, nil, err } } @@ -206,6 +224,16 @@ func (s *TapFDSource) Release(key string) error { return fmt.Errorf("failed to open network namespace at %q: %v", netNSPath, err) } + // Try to be idempotent even if there will be any other error during next functions calls. + // This can lead to lead to leaking resources in multiple cni plugins case, but unblocks + // a possibility to RunPodSandbox once again, after failed attempt. Without that - next + // attempt will fail with info about alredy existing netns so it can not be created. + defer func() { + if err := cni.DestroyNetNS(pn.pnd.PodID); err != nil { + glog.Errorf("Error when removing network namespace for pod sandbox %q: %v", pn.pnd.PodID, err) + } + }() + if err := nettools.ReconstructVFs(pn.csn, vmNS); err != nil { return fmt.Errorf("failed to reconstruct SR-IOV devices: %v", err) } @@ -224,10 +252,6 @@ func (s *TapFDSource) Release(key string) error { return fmt.Errorf("error removing pod sandbox %q from CNI network: %v", pn.pnd.PodID, err) } - if err := cni.DestroyNetNS(pn.pnd.PodID); err != nil { - return fmt.Errorf("error when removing network namespace for pod sandbox %q: %v", pn.pnd.PodID, err) - } - delete(s.fdMap, key) return nil } From 2f14e9db776af290b54afad38f18a59014aef02a Mon Sep 17 00:00:00 2001 From: Piotr Skamruk Date: Mon, 7 May 2018 16:13:51 +0200 Subject: [PATCH 3/3] Address comments from review --- pkg/manager/runtime.go | 4 +--- pkg/tapmanager/tapfdsource.go | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/pkg/manager/runtime.go b/pkg/manager/runtime.go index 17c784458..537549546 100644 --- a/pkg/manager/runtime.go +++ b/pkg/manager/runtime.go @@ -149,9 +149,7 @@ func (v *VirtletRuntimeService) RunPodSandbox(ctx context.Context, in *kubeapi.R fdPayload := &tapmanager.GetFDPayload{Description: pnd} csnBytes, err := v.fdManager.AddFDs(podID, fdPayload) if err != nil { - // Try to cleanup cni e.x. in case of multiple plugins behind cni genie. - // While one of them failed, other could already allocated some resources - // so give them a try to do cleanup. + // Try to clean up CNI netns (this may be necessary e.g. in case of multiple CNI plugins with CNI Genie) if fdErr := v.fdManager.ReleaseFDs(podID); err != nil { glog.Errorf("Error removing pod %s (%s) from CNI network: %v", podName, podID, fdErr) } diff --git a/pkg/tapmanager/tapfdsource.go b/pkg/tapmanager/tapfdsource.go index 5eec4ab13..097e52393 100644 --- a/pkg/tapmanager/tapfdsource.go +++ b/pkg/tapmanager/tapfdsource.go @@ -131,24 +131,24 @@ func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { return nil, nil, fmt.Errorf("error creating new netns for pod %s (%s): %v", pnd.PodName, pnd.PodID, err) } - weHadAnError := false + gotError := false podAddedToNetwork := false defer func() { - if weHadAnError { + if gotError { if podAddedToNetwork { if err := s.cniClient.RemoveSandboxFromNetwork(pnd.PodID, pnd.PodName, pnd.PodNs); err != nil { - glog.Errorf("Error while emergency removal of pod from cni network due to previous other error: %v", err) + glog.Errorf("Error removing a pod from the pod network after failed network setup: %v", err) } } if err := cni.DestroyNetNS(pnd.PodID); err != nil { - glog.Errorf("Error while emergency removal of netns: %v", err) + glog.Errorf("Error removing netns after failed network setup: %v", err) } } }() netConfig, err := s.cniClient.AddSandboxToNetwork(pnd.PodID, pnd.PodName, pnd.PodNs) if err != nil { - weHadAnError = true + gotError = true return nil, nil, fmt.Errorf("error adding pod %s (%s) to CNI network: %v", pnd.PodName, pnd.PodID, err) } podAddedToNetwork = true @@ -169,7 +169,7 @@ func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { var csn *network.ContainerSideNetwork if err := s.setupNetNS(key, pnd, func(netNSPath string, allLinks []netlink.Link) (*network.ContainerSideNetwork, error) { if netConfig, err = nettools.ValidateAndFixCNIResult(netConfig, netNSPath, allLinks); err != nil { - weHadAnError = true + gotError = true return nil, fmt.Errorf("error fixing cni configuration: %v", err) } if err := nettools.FixCalicoNetworking(netConfig, s.getDummyNetwork); err != nil { @@ -192,14 +192,14 @@ func (s *TapFDSource) GetFDs(key string, data []byte) ([]int, []byte, error) { } return csn, nil }); err != nil { - weHadAnError = true + gotError = true return nil, nil, err } for _, iface := range csn.Interfaces { if iface.Type == network.InterfaceTypeVF { if err := nettools.SetMacOnVf(iface.PCIAddress, iface.HardwareAddr); err != nil { - weHadAnError = true + gotError = true return nil, nil, err } } @@ -224,10 +224,10 @@ func (s *TapFDSource) Release(key string) error { return fmt.Errorf("failed to open network namespace at %q: %v", netNSPath, err) } - // Try to be idempotent even if there will be any other error during next functions calls. - // This can lead to lead to leaking resources in multiple cni plugins case, but unblocks - // a possibility to RunPodSandbox once again, after failed attempt. Without that - next - // attempt will fail with info about alredy existing netns so it can not be created. + // Try to keep this function idempotent even if there are errors during the following calls. + // This can cause some resource leaks in multiple CNI case but makes it possible + // to call `RunPodSandbox` again after a failed attempt. Failing to do so would cause + // the next `RunPodSandbox` call to fail due to the netns already being present. defer func() { if err := cni.DestroyNetNS(pn.pnd.PodID); err != nil { glog.Errorf("Error when removing network namespace for pod sandbox %q: %v", pn.pnd.PodID, err)