diff --git a/pkg/gameservers/controller.go b/pkg/gameservers/controller.go index fde5019556..e6faaf191c 100644 --- a/pkg/gameservers/controller.go +++ b/pkg/gameservers/controller.go @@ -867,6 +867,10 @@ func (c *Controller) syncGameServerRequestReadyState(ctx context.Context, gs *ag break } } + // Verify that we found the game server container - we may have a stale cache where pod is missing ContainerStatuses. + if _, ok := gsCopy.ObjectMeta.Annotations[agonesv1.GameServerReadyContainerIDAnnotation]; !ok { + return nil, workerqueue.NewDebugError(fmt.Errorf("game server container for GameServer %s in namespace %s not present in pod status, try again", gsCopy.ObjectMeta.Name, gsCopy.ObjectMeta.Namespace)) + } // Also update the pod with the same annotation, so we can check if the Pod data is up-to-date, now and also in the HealthController. // But if it is already set, then ignore it, since we only need to do this one time. diff --git a/pkg/gameservers/controller_test.go b/pkg/gameservers/controller_test.go index 9b3ffe8d32..6ad5e03115 100644 --- a/pkg/gameservers/controller_test.go +++ b/pkg/gameservers/controller_test.go @@ -1495,6 +1495,39 @@ func TestControllerSyncGameServerRequestReadyState(t *testing.T) { assert.False(t, podUpdated, "Pod was updated") }) + t.Run("GameServer whose pod is missing ContainerStatuses, so should retry and not update", func(t *testing.T) { + c, m := newFakeController() + + gsFixture := &agonesv1.GameServer{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + Spec: newSingleContainerSpec(), Status: agonesv1.GameServerStatus{State: agonesv1.GameServerStateRequestReady}} + gsFixture.ApplyDefaults() + gsFixture.Status.NodeName = nodeName + pod, err := gsFixture.Pod(agtesting.FakeAPIHooks{}) + assert.Nil(t, err) + gsUpdated := false + podUpdated := false + + m.KubeClient.AddReactor("list", "pods", func(action k8stesting.Action) (bool, runtime.Object, error) { + return true, &corev1.PodList{Items: []corev1.Pod{*pod}}, nil + }) + m.AgonesClient.AddReactor("update", "gameservers", func(action k8stesting.Action) (bool, runtime.Object, error) { + gsUpdated = true + return true, nil, nil + }) + m.KubeClient.AddReactor("update", "pods", func(action k8stesting.Action) (bool, runtime.Object, error) { + podUpdated = true + return true, nil, nil + }) + + ctx, cancel := agtesting.StartInformers(m, c.podSynced) + defer cancel() + + _, err = c.syncGameServerRequestReadyState(ctx, gsFixture) + assert.EqualError(t, err, "game server container for GameServer test in namespace default not present in pod status, try again") + assert.False(t, gsUpdated, "GameServer was updated") + assert.False(t, podUpdated, "Pod was updated") + }) + t.Run("GameServer with non zero deletion datetime", func(t *testing.T) { testWithNonZeroDeletionTimestamp(t, func(c *Controller, fixture *agonesv1.GameServer) (*agonesv1.GameServer, error) { return c.syncGameServerRequestReadyState(context.Background(), fixture)