Skip to content

Commit

Permalink
refactor(runtime): make maxSkew configurable
Browse files Browse the repository at this point in the history
Read the TopologySpreadConstraints maxSkew from the YAML config
so we can tune based on cluster/game. By default it will be set to 5.
Using a low value lead to underutilized nodes which impacts in cost
  • Loading branch information
hspedro committed Oct 15, 2024
1 parent bde7835 commit 5f86120
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 16 deletions.
1 change: 1 addition & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ adapters:
inCluster: false
masterUrl: "https://127.0.0.1:6443"
kubeconfig: "./kubeconfig/kubeconfig.yaml"
maxSkew: 5
grpc:
keepAlive:
time: 30s
Expand Down
2 changes: 1 addition & 1 deletion internal/adapters/runtime/kubernetes/game_room.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ const (
)

func (k *kubernetes) CreateGameRoomInstance(ctx context.Context, scheduler *entities.Scheduler, gameRoomName string, gameRoomSpec game_room.Spec) (*game_room.Instance, error) {
pod, err := convertGameRoomSpec(*scheduler, gameRoomName, gameRoomSpec)
pod, err := convertGameRoomSpec(*scheduler, gameRoomName, gameRoomSpec, k.config)
if err != nil {
return nil, errors.NewErrInvalidArgument("invalid game room spec: %s", err)
}
Expand Down
4 changes: 2 additions & 2 deletions internal/adapters/runtime/kubernetes/game_room_convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ var invalidPodWaitingStates = []string{
"RunContainerError",
}

func convertGameRoomSpec(scheduler entities.Scheduler, gameRoomName string, gameRoomSpec game_room.Spec) (*v1.Pod, error) {
func convertGameRoomSpec(scheduler entities.Scheduler, gameRoomName string, gameRoomSpec game_room.Spec, config KubernetesConfig) (*v1.Pod, error) {
defaultAnnotations := map[string]string{safeToEvictAnnotation: safeToEvictValue}
defaultLabels := map[string]string{
maestroLabelKey: maestroLabelValue,
Expand Down Expand Up @@ -95,7 +95,7 @@ func convertGameRoomSpec(scheduler entities.Scheduler, gameRoomName string, game
// 5. Add a conversion function
TopologySpreadConstraints: []v1.TopologySpreadConstraint{
{
MaxSkew: 1,
MaxSkew: int32(config.MaxSkew),
TopologyKey: "topology.kubernetes.io/zone",
WhenUnsatisfiable: v1.ScheduleAnyway,
LabelSelector: &metav1.LabelSelector{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ func TestConvertGameSpec(t *testing.T) {
// Name: test.scheduler,
// Annotations: test.expectedPod.ObjectMeta.Annotations,
//}
res, err := convertGameRoomSpec(test.scheduler, test.roomName, test.gameSpec)
res, err := convertGameRoomSpec(test.scheduler, test.roomName, test.gameSpec, KubernetesConfig{MaxSkew: 1})
if test.withError {
require.Error(t, err)
return
Expand Down
6 changes: 3 additions & 3 deletions internal/adapters/runtime/kubernetes/game_room_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ func TestGameRoomCreation(t *testing.T) {
ctx := context.Background()
gameRoomName := "some-game-room-name"
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

t.Run("successfully create a room", func(t *testing.T) {
t.Parallel()
Expand Down Expand Up @@ -98,7 +98,7 @@ func TestGameRoomDeletion(t *testing.T) {
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
gameRoomName := "some-game-room"
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

t.Run("successfully delete a room", func(t *testing.T) {
t.Parallel()
Expand Down Expand Up @@ -155,7 +155,7 @@ func TestCreateGameRoomName(t *testing.T) {
t.Parallel()
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

t.Run("When scheduler name is greater than max name length minus randomLength", func(t *testing.T) {
t.Run("return the name with randomLength", func(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func TestGameRoomsWatch(t *testing.T) {
t.Parallel()
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

scheduler := &entities.Scheduler{Name: "watch-room-addition"}
err := kubernetesRuntime.CreateScheduler(ctx, scheduler)
Expand Down Expand Up @@ -96,7 +96,7 @@ func TestGameRoomsWatch(t *testing.T) {
t.Parallel()
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

scheduler := &entities.Scheduler{Name: "watch-room-ready"}
err := kubernetesRuntime.CreateScheduler(ctx, scheduler)
Expand Down Expand Up @@ -158,7 +158,7 @@ func TestGameRoomsWatch(t *testing.T) {
t.Parallel()
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

scheduler := &entities.Scheduler{Name: "watch-room-error"}
err := kubernetesRuntime.CreateScheduler(ctx, scheduler)
Expand Down Expand Up @@ -213,7 +213,7 @@ func TestGameRoomsWatch(t *testing.T) {
t.Parallel()
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

scheduler := &entities.Scheduler{Name: "watch-room-delete"}
err := kubernetesRuntime.CreateScheduler(ctx, scheduler)
Expand Down
13 changes: 12 additions & 1 deletion internal/adapters/runtime/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,27 @@ import (

var _ ports.Runtime = (*kubernetes)(nil)

const DefaultMaxSkew = 5

type KubernetesConfig struct {
MaxSkew int
}

type kubernetes struct {
clientSet kube.Interface
logger *zap.Logger
eventRecorder record.EventRecorder
config KubernetesConfig
}

func New(clientSet kube.Interface) *kubernetes {
func New(clientSet kube.Interface, config KubernetesConfig) *kubernetes {
if config.MaxSkew <= 0 {
config.MaxSkew = DefaultMaxSkew
}
k := &kubernetes{
clientSet: clientSet,
logger: zap.L().With(zap.String(logs.LogFieldRuntime, "kubernetes")),
config: config,
}

eventBroadcaster := record.NewBroadcaster()
Expand Down
6 changes: 3 additions & 3 deletions internal/adapters/runtime/kubernetes/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import (
func TestSchedulerCreation(t *testing.T) {
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

t.Run("create single scheduler", func(t *testing.T) {
scheduler := &entities.Scheduler{Name: "single-scheduler-test", PdbMaxUnavailable: "5%"}
Expand All @@ -66,7 +66,7 @@ func TestSchedulerCreation(t *testing.T) {
func TestSchedulerDeletion(t *testing.T) {
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

t.Run("delete scheduler", func(t *testing.T) {
scheduler := &entities.Scheduler{Name: "delete-scheduler-test", PdbMaxUnavailable: "5%"}
Expand All @@ -92,7 +92,7 @@ func TestSchedulerDeletion(t *testing.T) {
func TestPDBCreationAndDeletion(t *testing.T) {
ctx := context.Background()
client := test.GetKubernetesClientSet(t, kubernetesContainer)
kubernetesRuntime := New(client)
kubernetesRuntime := New(client, KubernetesConfig{MaxSkew: 1})

t.Run("create pdb from scheduler without autoscaling", func(t *testing.T) {
if !kubernetesRuntime.isPDBSupported() {
Expand Down
5 changes: 4 additions & 1 deletion internal/service/adapters.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ const (
runtimeKubernetesInClusterPath = "adapters.runtime.kubernetes.inCluster"
runtimeKubernetesQPS = "adapters.runtime.kubernetes.qps"
runtimeKubernetesBurst = "adapters.runtime.kubernetes.burst"
runtimeKubernetesMaxSkew = "adapters.runtime.kubernetes.maxSkew"
// Redis operation storage
operationStorageRedisURLPath = "adapters.operationStorage.redis.url"
operationLeaseStorageRedisURLPath = "adapters.operationLeaseStorage.redis.url"
Expand Down Expand Up @@ -146,7 +147,9 @@ func NewRuntimeKubernetes(c config.Config) (ports.Runtime, error) {
return nil, fmt.Errorf("failed to initialize Kubernetes runtime: %w", err)
}

return kubernetesRuntime.New(clientSet), nil
return kubernetesRuntime.New(clientSet, kubernetesRuntime.KubernetesConfig{
MaxSkew: c.GetInt(runtimeKubernetesMaxSkew),
}), nil
}

// NewOperationStorageRedis instantiates redis as operation storage.
Expand Down

0 comments on commit 5f86120

Please sign in to comment.