Skip to content

Commit

Permalink
Kubernetes tests: Make waiting for pod robust to invalid response types.
Browse files Browse the repository at this point in the history
If an invalid type is encountered over the watch channel, this logs a
warning about it and falls back to the poll-based method, which is also
made faster in such a case.

Also add per-minute rate-limited logging in case a pod is still running
after 1 minute of waiting for it.

(Diffbased)

PiperOrigin-RevId: 700795608
  • Loading branch information
EtiennePerot authored and gvisor-bot committed Nov 27, 2024
1 parent 92c1208 commit 00dcbab
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 7 deletions.
5 changes: 4 additions & 1 deletion test/kubernetes/benchmarks/abslbuild.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"path"
"strings"
"testing"
"time"

k8s "gvisor.dev/gvisor/test/kubernetes"
"gvisor.dev/gvisor/test/kubernetes/benchmarks/profiling"
Expand Down Expand Up @@ -118,7 +119,9 @@ func BuildABSL(ctx context.Context, t *testing.T, k8sCtx k8sctx.KubernetesContex
}
defer cluster.DeletePod(ctx, pod)

containerDuration, err := benchmetric.GetTimedContainerDuration(ctx, cluster, pod, name)
waitDeadlineCtx, cancel := context.WithTimeout(ctx, 30*time.Minute)
containerDuration, err := benchmetric.GetTimedContainerDuration(waitDeadlineCtx, cluster, pod, name)
cancel()
if err != nil {
t.Fatalf("Failed to get container duration: %v", err)
}
Expand Down
1 change: 1 addition & 0 deletions test/kubernetes/testcluster/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ go_library(
"//visibility:public",
],
deps = [
"//pkg/log",
"//pkg/sync",
"//test/kubernetes:test_range_config_go_proto",
"@io_k8s_api//apps/v1:go_default_library",
Expand Down
32 changes: 26 additions & 6 deletions test/kubernetes/testcluster/testcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"golang.org/x/sync/errgroup"
cspb "google.golang.org/genproto/googleapis/container/v1"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sync"
testpb "gvisor.dev/gvisor/test/kubernetes/test_range_config_go_proto"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -437,22 +438,38 @@ func (t *TestCluster) doWaitForPod(ctx context.Context, pod *v13.Pod, phase v13.
if err != nil {
return fmt.Errorf("watch: %w", err)
}
podLogger := log.BasicRateLimitedLogger(5 * time.Minute)
incompatibleTypeLogger := log.BasicRateLimitedLogger(5 * time.Minute)
startLogTime := time.Now().Add(3 * time.Minute)

var p *v13.Pod
gotIncompatibleType := false
pollCh := time.NewTicker(10 * time.Second)
defer pollCh.Stop()
pollLoop:
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-pollCh.C:
p, err = t.GetPod(ctx, pod)
if err != nil {
return fmt.Errorf("failed to poll pod: %w", err)
}
case e := <-w.ResultChan():
var ok bool
p, ok = e.Object.(*v13.Pod)
if !ok {
return fmt.Errorf("invalid object watched: %T", p)
}
case <-time.After(10 * time.Second):
p, err = t.GetPod(ctx, pod)
if err != nil {
return fmt.Errorf("failed to poll pod: %w", err)
if !gotIncompatibleType {
log.Warningf("Received unexpected type of watched pod: got %T (%v), expected %T; falling back to polling-based wait.", e.Object, e.Object, p)
gotIncompatibleType = true
pollCh = time.NewTicker(250 * time.Millisecond)
defer pollCh.Stop()
} else {
incompatibleTypeLogger.Infof("Received another unexpected type of watched pod: got %T (%v), expected %T.", e.Object, e.Object, p)
}
time.Sleep(10 * time.Millisecond) // Avoid busy-looping when `w.ResultChan()` is closed.
continue pollLoop
}
}
if ctx.Err() != nil {
Expand All @@ -474,6 +491,9 @@ func (t *TestCluster) doWaitForPod(ctx context.Context, pod *v13.Pod, phase v13.
case phase:
return nil
}
if time.Now().After(startLogTime) {
podLogger.Infof("Still waiting for pod %q after %v; pod status: %v", pod.GetName(), time.Since(startLogTime), p.Status)
}
}
}

Expand Down

0 comments on commit 00dcbab

Please sign in to comment.