From 6bdbb00af9de42baf0684fe1d6dceb6ae5c1248d Mon Sep 17 00:00:00 2001 From: Peter Hunt Date: Wed, 26 Aug 2020 09:52:03 -0400 Subject: [PATCH] Remove potentially unhealthy symlink only for dead containers Signed-off-by: Ted Yu Signed-off-by: Peter Hunt --- .../pkg/kubelet/kuberuntime/kuberuntime_gc.go | 26 +++++++++++++++++++ .../pkg/kubelet/kuberuntime/legacy.go | 20 ++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go index a66a07370cbb..cba577912d4a 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -338,9 +338,35 @@ func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error { logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix))) for _, logSymlink := range logSymlinks { if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) { + if containerID, err := getContainerIDFromLegacyLogSymlink(logSymlink); err == nil { + status, err := cgc.manager.runtimeService.ContainerStatus(containerID) + if err != nil { + // TODO: we should handle container not found (i.e. container was deleted) case differently + // once https://github.com/kubernetes/kubernetes/issues/63336 is resolved + glog.Infof("Error getting ContainerStatus for containerID %q: %v", containerID, err) + } else if status.State != runtimeapi.ContainerState_CONTAINER_EXITED { + // Here is how container log rotation works (see containerLogManager#rotateLatestLog): + // + // 1. rename current log to rotated log file whose filename contains current timestamp (fmt.Sprintf("%s.%s", log, timestamp)) + // 2. reopen the container log + // 3. if #2 fails, rename rotated log file back to container log + // + // There is small but indeterministic amount of time during which log file doesn't exist (between steps #1 and #2, between #1 and #3). + // Hence the symlink may be deemed unhealthy during that period. + // See https://github.com/kubernetes/kubernetes/issues/52172 + // + // We only remove unhealthy symlink for dead containers + glog.V(5).Infof("Container %q is still running, not removing symlink %q.", containerID, logSymlink) + continue + } + } else { + glog.V(4).Infof("unable to obtain container Id: %v", err) + } err := osInterface.Remove(logSymlink) if err != nil { glog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err) + } else { + glog.V(4).Infof("removed symlink %s", logSymlink) } } } diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/legacy.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/legacy.go index 2a00ad012f1d..e87de25a7e96 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/legacy.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/legacy.go @@ -19,6 +19,7 @@ package kuberuntime import ( "fmt" "path" + "strings" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) @@ -44,6 +45,25 @@ func legacyLogSymlink(containerID string, containerName, podName, podNamespace s containerName, containerID) } +// getContainerIDFromLegacyLogSymlink returns error if container Id cannot be parsed +func getContainerIDFromLegacyLogSymlink(logSymlink string) (string, error) { + parts := strings.Split(logSymlink, "-") + if len(parts) == 0 { + return "", fmt.Errorf("unable to find separator in %q", logSymlink) + } + containerIDWithSuffix := parts[len(parts)-1] + suffix := fmt.Sprintf(".%s", legacyLogSuffix) + if !strings.HasSuffix(containerIDWithSuffix, suffix) { + return "", fmt.Errorf("%q doesn't end with %q", logSymlink, suffix) + } + containerIDWithoutSuffix := strings.TrimSuffix(containerIDWithSuffix, suffix) + // container can be retrieved with container Id as short as 6 characters + if len(containerIDWithoutSuffix) < 6 { + return "", fmt.Errorf("container Id %q is too short", containerIDWithoutSuffix) + } + return containerIDWithoutSuffix, nil +} + func logSymlink(containerLogsDir, podFullName, containerName, dockerId string) string { suffix := fmt.Sprintf(".%s", legacyLogSuffix) logPath := fmt.Sprintf("%s_%s-%s", podFullName, containerName, dockerId)