Skip to content

Commit

Permalink
Hope to reduce e2e flakiness (#2348)
Browse files Browse the repository at this point in the history
A few things added to this PR to help combat/diagnose the e2e
test flakines:

* Enhanced logging. Await for Fleet condition functions now log which
test they are part of, and do dumps of gameserver state and events if a
Fleet does not match the criteria.
* New logging methods, such as the ability to dump all an object's
events.
* Retry on UDP message. Didn't seem to need this before (or only very
rarely), but apparently do now.

🤞🏻 this makes a difference!
  • Loading branch information
markmandel authored Oct 29, 2021
1 parent fc6e787 commit 296fbb7
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 95 deletions.
8 changes: 4 additions & 4 deletions test/e2e/controller/crash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"time"

agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
"github.com/sirupsen/logrus"
e2eframework "agones.dev/agones/test/e2e/framework"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -29,11 +29,11 @@ import (
)

func TestGameServerUnhealthyAfterDeletingPodWhileControllerDown(t *testing.T) {
logger := logrus.WithField("test", t.Name())
logger := e2eframework.TestLogger(t)
gs := framework.DefaultGameServer(defaultNs)
ctx := context.Background()

readyGs, err := framework.CreateGameServerAndWaitUntilReady(defaultNs, gs)
readyGs, err := framework.CreateGameServerAndWaitUntilReady(t, defaultNs, gs)
if err != nil {
t.Fatalf("Could not get a GameServer ready: %v", err)
}
Expand All @@ -52,7 +52,7 @@ func TestGameServerUnhealthyAfterDeletingPodWhileControllerDown(t *testing.T) {
err = podClient.Delete(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{})
assert.NoError(t, err)

_, err = framework.WaitForGameServerState(readyGs, agonesv1.GameServerStateUnhealthy, 3*time.Minute)
_, err = framework.WaitForGameServerState(t, readyGs, agonesv1.GameServerStateUnhealthy, 3*time.Minute)
assert.NoError(t, err)
logger.Info("waiting for Agones controller to come back to running")
assert.NoError(t, waitForAgonesControllerRunning(ctx))
Expand Down
30 changes: 15 additions & 15 deletions test/e2e/fleet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func TestFleetScaleUpEditAndScaleDown(t *testing.T) {
framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(targetScale))
gsa := framework.CreateAndApplyAllocation(t, flt)

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 1
})

Expand Down Expand Up @@ -210,7 +210,7 @@ func TestFleetScaleUpEditAndScaleDown(t *testing.T) {

framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1))

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 0
})
})
Expand Down Expand Up @@ -334,7 +334,7 @@ func TestFleetRollingUpdate(t *testing.T) {

framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1))

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 0
})
})
Expand Down Expand Up @@ -432,7 +432,7 @@ func TestScaleFleetUpAndDownWithGameServerAllocation(t *testing.T) {
gsa, err = framework.AgonesClient.AllocationV1().GameServerAllocations(framework.Namespace).Create(ctx, gsa, metav1.CreateOptions{})
assert.Nil(t, err)
assert.Equal(t, allocationv1.GameServerAllocationAllocated, gsa.Status.State)
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 1
})

Expand All @@ -452,7 +452,7 @@ func TestScaleFleetUpAndDownWithGameServerAllocation(t *testing.T) {
assert.Nil(t, err)
framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(1))

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 0
})
})
Expand Down Expand Up @@ -553,7 +553,7 @@ func TestUpdateGameServerConfigurationInFleet(t *testing.T) {
gsa, err = framework.AgonesClient.AllocationV1().GameServerAllocations(framework.Namespace).Create(ctx, gsa, metav1.CreateOptions{})
assert.Nil(t, err, "cloud not create gameserver allocation")
assert.Equal(t, allocationv1.GameServerAllocationAllocated, gsa.Status.State)
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 1
})

Expand Down Expand Up @@ -604,7 +604,7 @@ func TestReservedGameServerInFleet(t *testing.T) {
assert.NoError(t, err)

// make sure counts are correct
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.ReadyReplicas == 2 && fleet.Status.ReservedReplicas == 1
})

Expand All @@ -613,9 +613,9 @@ func TestReservedGameServerInFleet(t *testing.T) {
framework.AssertFleetCondition(t, flt, e2e.FleetReadyCount(0))

// one should be left behind
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
result := fleet.Status.ReservedReplicas == 1
logrus.WithField("reserved", fleet.Status.ReservedReplicas).WithField("result", result).Info("waiting for 1 reserved replica")
log.WithField("reserved", fleet.Status.ReservedReplicas).WithField("result", result).Info("waiting for 1 reserved replica")
return result
})

Expand Down Expand Up @@ -1114,7 +1114,7 @@ func TestFleetWithLongLabelsAnnotations(t *testing.T) {
assert.Error(t, err)
statusErr, ok = err.(*k8serrors.StatusError)
assert.True(t, ok)
assert.Len(t, statusErr.Status().Details.Causes, 1)
require.Len(t, statusErr.Status().Details.Causes, 1)
assert.Equal(t, "annotations", statusErr.Status().Details.Causes[0].Field)
assert.Equal(t, metav1.CauseTypeFieldValueInvalid, statusErr.Status().Details.Causes[0].Type)

Expand Down Expand Up @@ -1309,13 +1309,13 @@ func TestFleetAggregatedPlayerStatus(t *testing.T) {
flt, err := client.Fleets(framework.Namespace).Create(ctx, flt.DeepCopy(), metav1.CreateOptions{})
assert.NoError(t, err)

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
if fleet.Status.Players == nil {
logrus.WithField("status", fleet.Status).Info("No Players")
log.WithField("status", fleet.Status).Info("No Players")
return false
}

logrus.WithField("status", fleet.Status).Info("Checking Capacity")
log.WithField("status", fleet.Status).Info("Checking Capacity")
return fleet.Status.Players.Capacity == 30
})

Expand Down Expand Up @@ -1356,8 +1356,8 @@ func TestFleetAggregatedPlayerStatus(t *testing.T) {
}
}

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
logrus.WithField("players", fleet.Status.Players).WithField("totalCapacity", totalCapacity).
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
log.WithField("players", fleet.Status.Players).WithField("totalCapacity", totalCapacity).
WithField("totalPlayers", totalPlayers).Info("Checking Capacity")
// since UDP packets might fail, we might get an extra player, so we'll check for that.
return (fleet.Status.Players.Capacity == int64(totalCapacity)) && (fleet.Status.Players.Count >= int64(totalPlayers))
Expand Down
18 changes: 9 additions & 9 deletions test/e2e/fleetautoscaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func TestAutoscalerBasicFunctions(t *testing.T) {

// do an allocation and watch the fleet scale up
gsa := framework.CreateAndApplyAllocation(t, flt)
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 1
})

Expand All @@ -115,7 +115,7 @@ func TestAutoscalerBasicFunctions(t *testing.T) {
assert.True(t, fas.Status.AbleToScale, "Could not get AbleToScale status")

// check that we are able to scale
framework.WaitForFleetAutoScalerCondition(t, fas, func(fas *autoscalingv1.FleetAutoscaler) bool {
framework.WaitForFleetAutoScalerCondition(t, fas, func(log *logrus.Entry, fas *autoscalingv1.FleetAutoscaler) bool {
return !fas.Status.ScalingLimited
})

Expand All @@ -124,15 +124,15 @@ func TestAutoscalerBasicFunctions(t *testing.T) {
assert.Nil(t, err, "could not patch fleetautoscaler")

// check that we are not able to scale
framework.WaitForFleetAutoScalerCondition(t, fas, func(fas *autoscalingv1.FleetAutoscaler) bool {
framework.WaitForFleetAutoScalerCondition(t, fas, func(log *logrus.Entry, fas *autoscalingv1.FleetAutoscaler) bool {
return fas.Status.ScalingLimited
})

// delete the allocated GameServer and watch the fleet scale down
gp := int64(1)
err = stable.GameServers(framework.Namespace).Delete(ctx, gsa.Status.GameServerName, metav1.DeleteOptions{GracePeriodSeconds: &gp})
assert.Nil(t, err)
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 0 &&
fleet.Status.ReadyReplicas == 1 &&
fleet.Status.Replicas == 1
Expand Down Expand Up @@ -236,7 +236,7 @@ func TestFleetAutoScalerRollingUpdate(t *testing.T) {
assert.True(t, fas.Status.AbleToScale, "Could not get AbleToScale status")

// check that we are able to scale
framework.WaitForFleetAutoScalerCondition(t, fas, func(fas *autoscalingv1.FleetAutoscaler) bool {
framework.WaitForFleetAutoScalerCondition(t, fas, func(log *logrus.Entry, fas *autoscalingv1.FleetAutoscaler) bool {
return !fas.Status.ScalingLimited
})

Expand Down Expand Up @@ -471,11 +471,11 @@ func TestAutoscalerWebhook(t *testing.T) {
assert.FailNow(t, "Failed creating autoscaler, aborting TestAutoscalerWebhook")
}
framework.CreateAndApplyAllocation(t, flt)
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 1
})

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.Replicas > initialReplicasCount
})

Expand Down Expand Up @@ -701,11 +701,11 @@ func TestFleetAutoscalerTLSWebhook(t *testing.T) {
assert.FailNow(t, "Failed creating autoscaler, aborting TestTlsWebhook")
}
framework.CreateAndApplyAllocation(t, flt)
framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.AllocatedReplicas == 1
})

framework.AssertFleetCondition(t, flt, func(fleet *agonesv1.Fleet) bool {
framework.AssertFleetCondition(t, flt, func(log *logrus.Entry, fleet *agonesv1.Fleet) bool {
return fleet.Status.Replicas > initialReplicasCount
})
}
Expand Down
Loading

0 comments on commit 296fbb7

Please sign in to comment.