bugfix: avoid the deadlock when failed to remove invalid sandbox

Signed-off-by: YaoZengzeng <yaozengzeng@zju.edu.cn>
AliyunContainerService · Aug 13, 2018 · fa7315a · fa7315a
1 parent 218807c
commit fa7315a
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 2 deletions.
diff --git a/cri/v1alpha1/cri.go b/cri/v1alpha1/cri.go
@@ -207,7 +207,10 @@ func (c *CriManager) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
 	defer func() {
 		// If running sandbox failed, clean up the container.
 		if retErr != nil {
-			c.ContainerMgr.Remove(ctx, id, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
+			err := c.ContainerMgr.Remove(ctx, id, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
+			if err != nil {
+				logrus.Errorf("failed to remove the container when running sandbox failed: %v", err)
+			}
 		}
 	}()
 
@@ -462,6 +465,11 @@ func (c *CriManager) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb
 		return nil, fmt.Errorf("failed to list sandbox: %v", err)
 	}
 
+	sandboxList, err = c.filterInvalidSandboxes(ctx, sandboxList)
+	if err != nil {
+		return nil, fmt.Errorf("failed to filter invalid sandboxes: %v", err)
+	}
+
 	sandboxes := make([]*runtime.PodSandbox, 0, len(sandboxList))
 	for _, s := range sandboxList {
 		sandbox, err := toCriSandbox(s)

diff --git a/cri/v1alpha1/cri_utils.go b/cri/v1alpha1/cri_utils.go
@@ -313,6 +313,43 @@ func toCriSandbox(c *mgr.Container) (*runtime.PodSandbox, error) {
 	}, nil
 }
 
+// It has the possibility that we failed to run the sandbox and it is not being cleaned up.
+// Kubelet will use list to get the sandboxes, but will not get the status of the failed pod
+// whose meta data has not been put into the Sandbox Store. And Kubelet will keep trying to
+// get the status of the failed pod and won't create a new one to replace it. It's a DEAD LOCK.
+// Actually Kubelet should not know the existence of invalid pod whose meta data won't be in the
+// Sandbox Store. So we could avoid the DEAD LOCK mentioned above.
+func (c *CriManager) filterInvalidSandboxes(ctx context.Context, sandboxes []*mgr.Container) ([]*mgr.Container, error) {
+	validSandboxes, err := c.SandboxStore.Keys()
+	if err != nil {
+		return nil, err
+	}
+
+	var result []*mgr.Container
+	for _, sandbox := range sandboxes {
+		exist := false
+		for _, id := range validSandboxes {
+			if sandbox.ID == id {
+				exist = true
+				break
+			}
+		}
+		if exist {
+			result = append(result, sandbox)
+			continue
+		}
+
+		status := sandbox.State.Status
+		// NOTE: what if the worst case that we failed to remove the sandbox and
+		// it is still running?
+		if status != apitypes.StatusRunning && status != apitypes.StatusCreated {
+			// Remove invalid sandbox.
+			c.ContainerMgr.Remove(ctx, sandbox.ID, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
+		}
+	}
+	return result, nil
+}
+
 func filterCRISandboxes(sandboxes []*runtime.PodSandbox, filter *runtime.PodSandboxFilter) []*runtime.PodSandbox {
 	if filter == nil {
 		return sandboxes

diff --git a/cri/v1alpha2/cri.go b/cri/v1alpha2/cri.go
@@ -205,7 +205,10 @@ func (c *CriManager) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
 	defer func() {
 		// If running sandbox failed, clean up the container.
 		if retErr != nil {
-			c.ContainerMgr.Remove(ctx, id, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
+			err := c.ContainerMgr.Remove(ctx, id, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
+			if err != nil {
+				logrus.Errorf("failed to remove the container when running sandbox failed: %v", err)
+			}
 		}
 	}()
 
@@ -468,6 +471,11 @@ func (c *CriManager) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandb
 		return nil, fmt.Errorf("failed to list sandbox: %v", err)
 	}
 
+	sandboxList, err = c.filterInvalidSandboxes(ctx, sandboxList)
+	if err != nil {
+		return nil, fmt.Errorf("failed to filter invalid sandboxes: %v", err)
+	}
+
 	sandboxes := make([]*runtime.PodSandbox, 0, len(sandboxList))
 	for _, s := range sandboxList {
 		sandbox, err := toCriSandbox(s)

diff --git a/cri/v1alpha2/cri_utils.go b/cri/v1alpha2/cri_utils.go
@@ -308,6 +308,42 @@ func toCriSandbox(c *mgr.Container) (*runtime.PodSandbox, error) {
 	}, nil
 }
 
+// It has the possibility that we failed to run the sandbox and it is not being cleaned up.
+// Kubelet will use list to get the sandboxes, but will not get the status of the failed pod
+// whose meta data has not been put into the Sandbox Store. And Kubelet will keep trying to
+// get the status of the failed pod and won't create a new one to replace it. It's a DEAD LOCK.
+// Actually Kubelet should not know the existence of invalid pod whose meta data won't be in the
+// Sandbox Store. So we could avoid the DEAD LOCK mentioned above.
+func (c *CriManager) filterInvalidSandboxes(ctx context.Context, sandboxes []*mgr.Container) ([]*mgr.Container, error) {
+	validSandboxes, err := c.SandboxStore.Keys()
+	if err != nil {
+		return nil, err
+	}
+
+	var result []*mgr.Container
+	for _, sandbox := range sandboxes {
+		exist := false
+		for _, id := range validSandboxes {
+			if sandbox.ID == id {
+				exist = true
+				break
+			}
+		}
+		if exist {
+			result = append(result, sandbox)
+			continue
+		}
+
+		status := sandbox.State.Status
+		// NOTE: what if the worst case that we failed to remove the sandbox and
+		// it is still running?
+		if status != apitypes.StatusRunning && status != apitypes.StatusCreated {
+			c.ContainerMgr.Remove(ctx, sandbox.ID, &apitypes.ContainerRemoveOptions{Volumes: true, Force: true})
+		}
+	}
+	return result, nil
+}
+
 func filterCRISandboxes(sandboxes []*runtime.PodSandbox, filter *runtime.PodSandboxFilter) []*runtime.PodSandbox {
 	if filter == nil {
 		return sandboxes