Skip to content

Commit

Permalink
Deleting Unhealthy game servers
Browse files Browse the repository at this point in the history
  • Loading branch information
Dimitris Gkanatsios authored and dgkanatsios committed Jun 16, 2022
1 parent ddc175c commit c3a451f
Show file tree
Hide file tree
Showing 18 changed files with 3,177 additions and 2,719 deletions.
2 changes: 1 addition & 1 deletion cmd/e2e/build_crashing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ var _ = Describe("Crashing Build", func() {

Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{"BuildName": testBuildCrashingName})
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildCrashingName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))
gs := gsList.Items[0]
Expand Down
2 changes: 1 addition & 1 deletion cmd/e2e/build_host_network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ var _ = Describe("Build with hostnetwork", func() {

Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{"BuildName": testBuildWithHostNetworkName})
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithHostNetworkName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))
gs := gsList.Items[0]
Expand Down
2 changes: 1 addition & 1 deletion cmd/e2e/build_sleep_before_readyforplayers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ var _ = Describe("Build which sleeps before calling GSDK ReadyForPlayers", func(

Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{"BuildName": testBuildSleepBeforeReadyForPlayersName})
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildSleepBeforeReadyForPlayersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))
gs := gsList.Items[0]
Expand Down
314 changes: 314 additions & 0 deletions cmd/e2e/build_unhealthy_gameservers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
package main

import (
"context"
"crypto/tls"

"github.com/google/uuid"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
mpsv1alpha1 "github.com/playfab/thundernetes/pkg/operator/api/v1alpha1"
"k8s.io/client-go/kubernetes"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// this file contains tests to verify that Unhealthy GameServers get deleted
// this test checks if StandingBy and Active GameServers that are marked as Unhealthy are properly deleted
var _ = Describe("Regular GameServerBuild", func() {
testBuildWithUnhealthyGameServersName := "unhealthygameservers"
testBuildWithUnhealthyGameServersID := "8512e812-c82f-4b45-86c5-9d2b1ae3d6f6"
It("should delete the Unhealthy GameServers and replace them with Healthy ones", func() {
cert, err := tls.LoadX509KeyPair(certFile, keyFile)
Expect(err).ToNot(HaveOccurred())

ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createE2eBuild(testBuildWithUnhealthyGameServersName, testBuildWithUnhealthyGameServersID, img))
Expect(err).ToNot(HaveOccurred())

coreClient, err := kubernetes.NewForConfig(kubeConfig)
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
state := buildState{
buildName: testBuildWithUnhealthyGameServersName,
buildID: testBuildWithUnhealthyGameServersID,
standingByCount: 2,
podRunningCount: 2,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// update the standingBy to 3
gsb := &mpsv1alpha1.GameServerBuild{}
err = kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithUnhealthyGameServersName, Namespace: testNamespace}, gsb)
Expect(err).ToNot(HaveOccurred())
patch := client.MergeFrom(gsb.DeepCopy())
gsb.Spec.StandingBy = 3
err = kubeClient.Patch(ctx, gsb, patch)
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
state := buildState{
buildName: testBuildWithUnhealthyGameServersName,
buildID: testBuildWithUnhealthyGameServersID,
standingByCount: 3,
podRunningCount: 3,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// get the names of the game servers so we can mark them as Unhealthy
// and later make sure that they disappeared
var gsList mpsv1alpha1.GameServerList
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithUnhealthyGameServersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(3))

gsNames := make(map[string]interface{})
for _, gs := range gsList.Items {
gsNames[gs.Name] = struct{}{}
}

// mark these gameservers as Unhealthy
// under normal circumstances, this can happen if they never send a heartbeat, if they are late in sending a heartbeat
// or if they mark themselves as Unhealthy via the relevant GSDK call
// check NodeAgent for relevant code
for _, gs := range gsList.Items {
patch := client.MergeFrom(gs.DeepCopy())
gs.Status.Health = mpsv1alpha1.GameServerUnhealthy
err = kubeClient.Status().Patch(ctx, &gs, patch)
Expect(err).ToNot(HaveOccurred())
}

// make sure 3 new servers were created to replace the ones that were deleted
Eventually(func(g Gomega) {
state := buildState{
buildName: testBuildWithUnhealthyGameServersName,
buildID: testBuildWithUnhealthyGameServersID,
standingByCount: 3,
podRunningCount: 3,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())

// get the names of the new game servers
var gsList mpsv1alpha1.GameServerList
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithUnhealthyGameServersName})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(len(gsList.Items)).To(Equal(3))

// make sure they have different names than the ones that were deleted
for _, gs := range gsList.Items {
g.Expect(gs.Status.Health).To(Equal(mpsv1alpha1.GameServerHealthy))
_, ok := gsNames[gs.Name]
g.Expect(ok).To(BeFalse())
}

}, timeout, interval).Should(Succeed())

// allocate a game server
sessionID := uuid.New().String()
err = allocate(testBuildWithUnhealthyGameServersID, sessionID, cert)
Expect(err).ToNot(HaveOccurred())

// so we now should have 1 active and 3 standingBy
Eventually(func(g Gomega) {
state := buildState{
buildName: testBuildWithUnhealthyGameServersName,
buildID: testBuildWithUnhealthyGameServersID,
standingByCount: 3,
activeCount: 1,
podRunningCount: 4,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

Expect(validateThatAllocatedServersHaveReadyForPlayersUnblocked(ctx, kubeClient, coreClient, testBuildWithUnhealthyGameServersID, 1)).To(Succeed())

// get the active game server so we can mark it as Unhealthy
// and later make sure that it was deleted
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithUnhealthyGameServersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(4))

// get the active game server
var activeGs mpsv1alpha1.GameServer
for _, gs := range gsList.Items {
if gs.Status.State == mpsv1alpha1.GameServerStateActive {
activeGs = gs
break
}
}
Expect(activeGs.Name).ToNot(BeEmpty())

// mark this Active GameServer as Unhealthy
// to verify that the controller is deleting GameServers that are Active but they turn Unhealthy
patch = client.MergeFrom(activeGs.DeepCopy())
activeGs.Status.Health = mpsv1alpha1.GameServerUnhealthy
err = kubeClient.Status().Patch(ctx, &activeGs, patch)
Expect(err).ToNot(HaveOccurred())

// make sure the active was deleted and we have 3 standingBy
Eventually(func(g Gomega) {
state := buildState{
buildName: testBuildWithUnhealthyGameServersName,
buildID: testBuildWithUnhealthyGameServersID,
standingByCount: 3,
podRunningCount: 3,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())

// get the names of the game servers
var gsList mpsv1alpha1.GameServerList
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithUnhealthyGameServersName})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(len(gsList.Items)).To(Equal(3))

// make sure they have different names than the active that we deleted
for _, gs := range gsList.Items {
g.Expect(gs.Status.Health).To(Equal(mpsv1alpha1.GameServerHealthy))
g.Expect(gs.Name).ToNot(Equal(activeGs.Name))
}

}, timeout, interval).Should(Succeed())
})
})

// this test verifies that GameServers that do not call the ReadyForPlayers() GSDK method (thus, they stay stuck in Initializing state)
// and are marked as Unhealthy for any reason (e.g. missing heartbeat)
// will eventually be deleted
var _ = Describe("GameServerBuild with Unhealthy GameServers without ReadyForPlayers", func() {
testBuildUnhealthyGameServersWithoutReadyForPlayersName := "withoutreadyforplayersunhealthy"
testBuildUnhealthyGameServersWithoutReadyForPlayersID := "85ffe8da-c82f-a12e-86c5-9d2b7652d6f8"
It("should delete unhealthy GameServers and replace them with healthy ones", func() {
ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createBuildWithoutReadyForPlayers(testBuildUnhealthyGameServersWithoutReadyForPlayersName, testBuildUnhealthyGameServersWithoutReadyForPlayersID, img))
Expect(err).ToNot(HaveOccurred())

Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildUnhealthyGameServersWithoutReadyForPlayersName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
state := buildState{
buildName: testBuildUnhealthyGameServersWithoutReadyForPlayersName,
buildID: testBuildUnhealthyGameServersWithoutReadyForPlayersID,
initializingCount: 2,
standingByCount: 0,
podRunningCount: 2,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())
}, timeout, interval).Should(Succeed())

// get the names of the game servers so we can mark them as Unhealthy
// and later make sure that they disappeared
var gsList mpsv1alpha1.GameServerList
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildUnhealthyGameServersWithoutReadyForPlayersName})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))

gsNames := make(map[string]interface{})
for _, gs := range gsList.Items {
gsNames[gs.Name] = struct{}{}
}

// mark these gameservers as unhealthy
for _, gs := range gsList.Items {
patch := client.MergeFrom(gs.DeepCopy())
gs.Status.Health = mpsv1alpha1.GameServerUnhealthy
err = kubeClient.Status().Patch(ctx, &gs, patch)
Expect(err).ToNot(HaveOccurred())
}

// make sure 2 more servers were created to replace the ones that were deleted
Eventually(func(g Gomega) {
state := buildState{
buildName: testBuildUnhealthyGameServersWithoutReadyForPlayersName,
buildID: testBuildUnhealthyGameServersWithoutReadyForPlayersID,
initializingCount: 2,
standingByCount: 0,
podRunningCount: 2,
gsbHealth: mpsv1alpha1.BuildHealthy,
}
g.Expect(verifyGameServerBuildOverall(ctx, kubeClient, state)).To(Succeed())

// get the names of the new game servers
var gsList mpsv1alpha1.GameServerList
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildUnhealthyGameServersWithoutReadyForPlayersName})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(len(gsList.Items)).To(Equal(2))

// make sure they have different names than the ones that were deleted
for _, gs := range gsList.Items {
g.Expect(gs.Status.Health).To(Equal(mpsv1alpha1.GameServerHealthy))
_, ok := gsNames[gs.Name]
g.Expect(ok).To(BeFalse())
}

}, timeout, interval).Should(Succeed())

})
})

// this test verifies that GameServers that do not integrate with GSDK and are marked as Unhealthy
// will eventually be deleted
var _ = Describe("GameServerBuild with GameServers without Gsdk", func() {
testBuildWithoutGsdkName := "withoutgsdk"
testBuildWithoutGsdkID := "8511e8da-c82f-a12e-86c5-9d2b76528356"
It("should delete unhealthy GameServers and replace them with healthy ones", func() {
ctx := context.Background()
kubeConfig := ctrl.GetConfigOrDie()
kubeClient, err := createKubeClient(kubeConfig)
Expect(err).ToNot(HaveOccurred())
err = kubeClient.Create(ctx, createBuildWithoutGsdk(testBuildWithoutGsdkName, testBuildWithoutGsdkID, img))
Expect(err).ToNot(HaveOccurred())

// get the names of the game servers so we can mark them as Unhealthy
// this is essentially simulating the NodeAgent marking these GameServers as unhealthy
// because they didn't send a heartbeat within a period of time
var gsList mpsv1alpha1.GameServerList
Eventually(func(g Gomega) {
err = kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutGsdkName})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(len(gsList.Items)).To(Equal(2))
}, timeout, interval).Should(Succeed())

// actually mark these gameservers as unhealthy
for _, gs := range gsList.Items {
patch := client.MergeFrom(gs.DeepCopy())
gs.Status.Health = mpsv1alpha1.GameServerUnhealthy
err = kubeClient.Status().Patch(ctx, &gs, patch)
Expect(err).ToNot(HaveOccurred())
}

// make sure that GameServerBuild is Unhealthy (since CrashesToMarkUnhealthy threshold was reached)
Eventually(func(g Gomega) {
gsb := &mpsv1alpha1.GameServerBuild{}
err := kubeClient.Get(ctx, client.ObjectKey{Name: testBuildWithoutGsdkName, Namespace: testNamespace}, gsb)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(gsb.Status.Health).To(Equal(mpsv1alpha1.BuildUnhealthy))
g.Expect(gsb.Status.CrashesCount >= 2).To(BeTrue())
}, timeout, interval).Should(Succeed())

})
})

// createBuildWithoutGsdk creates a GameServerBuild without GSDK
func createBuildWithoutGsdk(buildName, buildID, img string) *mpsv1alpha1.GameServerBuild {
gsb := createTestBuild(buildName, buildID, img)
gsb.Spec.Template.Spec.Containers[0].Command = []string{"/bin/sh", "-c", "sleep 3600"}
gsb.Spec.CrashesToMarkUnhealthy = 2

return gsb
}
4 changes: 2 additions & 2 deletions cmd/e2e/build_without_readyforplayers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

var _ = Describe("Build without ReadyForPlayers GSDK call", func() {
var _ = Describe("GameServerBuild without ReadyForPlayers GSDK call", func() {
testBuildWithoutReadyForPlayers := "withoutreadyforplayers"
testWithoutReadyForPlayersBuildID := "85ffe8da-c82f-4035-86c5-9d2b5f42d6f8"
It("should have GameServers stuck in Initializing", func() {
Expand Down Expand Up @@ -112,7 +112,7 @@ var _ = Describe("Build without ReadyForPlayers GSDK call", func() {

Eventually(func(g Gomega) {
var gsList mpsv1alpha1.GameServerList
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{"BuildName": testBuildWithoutReadyForPlayers})
err := kubeClient.List(ctx, &gsList, client.MatchingLabels{LabelBuildName: testBuildWithoutReadyForPlayers})
Expect(err).ToNot(HaveOccurred())
Expect(len(gsList.Items)).To(Equal(2))
gs := gsList.Items[0]
Expand Down
3 changes: 2 additions & 1 deletion cmd/e2e/utilities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const (
testNamespace = "e2e"
connectedPlayersCount = 3 // this should the same as in the netcore sample
LabelBuildID = "BuildID"
LabelBuildName = "BuildName"
invalidStatusCode string = "invalid status code"
containerName string = "netcore-sample" // this must be the same as the GameServer name
nodeAgentName string = "nodeagent"
Expand Down Expand Up @@ -314,7 +315,7 @@ func verifyPodsInHostNetwork(ctx context.Context, kubeClient client.Client, gsb
var pods = corev1.PodList{}
opts := []client.ListOption{
client.InNamespace(gsb.Namespace),
client.MatchingLabels{"BuildName": gsb.Name, "BuildID": gsb.Spec.BuildID},
client.MatchingLabels{LabelBuildName: gsb.Name, "BuildID": gsb.Spec.BuildID},
}

if err := kubeClient.List(ctx, &pods, opts...); err != nil {
Expand Down
Loading

0 comments on commit c3a451f

Please sign in to comment.