Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Players in-game metric for when PlayerTracking is enabled #2765

Merged
merged 17 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions pkg/metrics/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,29 @@ func (c *Controller) recordGameServerStatusChanges(old, next interface{}) {
if !ok {
return
}
if newGs.Status.State != oldGs.Status.State {
fleetName := newGs.Labels[agonesv1.FleetNameLabel]
if fleetName == "" {
fleetName = noneValue

fleetName := newGs.Labels[agonesv1.FleetNameLabel]
if fleetName == "" {
fleetName = noneValue
}

if runtime.FeatureEnabled(runtime.FeaturePlayerTracking) &&
newGs.Status.Players != nil &&
oldGs.Status.Players != nil {

if newGs.Status.Players.Count != oldGs.Status.Players.Count {
recordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyFleetName, fleetName),
tag.Upsert(keyName, newGs.GetName()), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerPlayerConnectedTotal.M(newGs.Status.Players.Count))
}

if newGs.Status.Players.Capacity-newGs.Status.Players.Count != oldGs.Status.Players.Capacity-oldGs.Status.Players.Count {
recordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyFleetName, fleetName),
tag.Upsert(keyName, newGs.GetName()), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerPlayerCapacityTotal.M(newGs.Status.Players.Capacity-newGs.Status.Players.Count))
}

}

if newGs.Status.State != oldGs.Status.State {
recordWithTags(context.Background(), []tag.Mutator{tag.Upsert(keyType, string(newGs.Status.State)),
tag.Upsert(keyFleetName, fleetName), tag.Upsert(keyNamespace, newGs.GetNamespace())}, gameServerTotalStats.M(1))

Expand Down
46 changes: 32 additions & 14 deletions pkg/metrics/controller_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ const (
fleetAutoscalersLimitedName = "fleet_autoscalers_limited"
gameServersCountName = "gameservers_count"
gameServersTotalName = "gameservers_total"
gameServersPlayerConnectedTotalName = "gameserver_player_connected_total"
gameServersPlayerCapacityTotalName = "gameserver_player_capacity_total"
nodeCountName = "nodes_count"
gameServersNodeCountName = "gameservers_node_count"
gameServerStateDurationName = "gameserver_state_duration"
Expand All @@ -40,21 +42,23 @@ var (
fleetAutoscalerViews = []string{fleetAutoscalerBufferLimitName, fleetAutoscalterBufferSizeName, fleetAutoscalerCurrentReplicaCountName,
fleetAutoscalersDesiredReplicaCountName, fleetAutoscalersAbleToScaleName, fleetAutoscalersLimitedName}
// fleetViews are metric views associated with Fleets
fleetViews = append([]string{fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServerStateDurationName}, fleetAutoscalerViews...)
fleetViews = append([]string{fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName}, fleetAutoscalerViews...)

stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
fasCurrentReplicasStats = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1")
fasDesiredReplicasStats = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1")
fasAbleToScaleStats = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1")
fasLimitedStats = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1")
gameServerCountStats = stats.Int64("gameservers/count", "The count of gameservers", "1")
gameServerTotalStats = stats.Int64("gameservers/total", "The total of gameservers", "1")
nodesCountStats = stats.Int64("nodes/count", "The count of nodes in the cluster", "1")
gsPerNodesCountStats = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1")
gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)
stateDurationSeconds = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
fleetsReplicasCountStats = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
fasBufferLimitsCountStats = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
fasBufferSizeStats = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
fasCurrentReplicasStats = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1")
fasDesiredReplicasStats = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1")
fasAbleToScaleStats = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1")
fasLimitedStats = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1")
gameServerCountStats = stats.Int64("gameservers/count", "The count of gameservers", "1")
gameServerTotalStats = stats.Int64("gameservers/total", "The total of gameservers", "1")
gameServerPlayerConnectedTotal = stats.Int64("gameservers/player_connected", "The total number of players connected to gameservers", "1")
gameServerPlayerCapacityTotal = stats.Int64("gameservers/player_capacity", "The available player capacity for gameservers", "1")
nodesCountStats = stats.Int64("nodes/count", "The count of nodes in the cluster", "1")
gsPerNodesCountStats = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1")
gsStateDurationSec = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)

stateViews = []*view.View{
{
Expand Down Expand Up @@ -120,6 +124,20 @@ var (
Aggregation: view.Count(),
TagKeys: []tag.Key{keyType, keyFleetName, keyNamespace},
},
{
Name: gameServersPlayerConnectedTotalName,
Measure: gameServerPlayerConnectedTotal,
Description: "The current amount of players connected in gameservers",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyFleetName, keyName, keyNamespace},
},
{
Name: gameServersPlayerCapacityTotalName,
Measure: gameServerPlayerCapacityTotal,
Description: "The available player capacity per gameserver",
Aggregation: view.LastValue(),
TagKeys: []tag.Key{keyFleetName, keyName, keyNamespace},
},
{
Name: nodeCountName,
Measure: nodesCountStats,
Expand Down
80 changes: 80 additions & 0 deletions pkg/metrics/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,86 @@ func TestControllerGameServerCount(t *testing.T) {
})
}

func TestControllerGameServerPlayerConnectedCount(t *testing.T) {
runtime.EnableAllFeatures()
resetMetrics()
exporter := &metricExporter{}
reader := metricexport.NewReader()

c := newFakeController()
defer c.close()

gs1 := gameServerWithFleetAndState("test-fleet", agonesv1.GameServerStateReady)
gs1.Status.Players = &agonesv1.PlayerStatus{
Count: 0,
}
c.gsWatch.Add(gs1)
gs1 = gs1.DeepCopy()
gs1.Status.Players.Count = 1
c.gsWatch.Modify(gs1)

c.run(t)
require.True(t, c.sync())
require.Eventually(t, func() bool {
gs, err := c.gameServerLister.GameServers(gs1.ObjectMeta.Namespace).Get(gs1.ObjectMeta.Name)
assert.NoError(t, err)
return gs.Status.Players.Count == 1
}, 5*time.Second, time.Second)
c.collect()

gs1 = gs1.DeepCopy()
gs1.Status.Players.Count = 4
c.gsWatch.Modify(gs1)

c.run(t)
require.True(t, c.sync())
require.Eventually(t, func() bool {
gs, err := c.gameServerLister.GameServers(gs1.ObjectMeta.Namespace).Get(gs1.ObjectMeta.Name)
assert.NoError(t, err)
return gs.Status.Players.Count == 4
}, 5*time.Second, time.Second)
c.collect()

reader.ReadAndExport(exporter)
assertMetricData(t, exporter, gameServersPlayerConnectedTotalName, []expectedMetricData{
{labels: []string{"test-fleet", gs1.GetName(), defaultNs}, val: int64(4)},
})
}

func TestControllerGameServerPlayerCapacityCount(t *testing.T) {
runtime.EnableAllFeatures()
resetMetrics()
exporter := &metricExporter{}
reader := metricexport.NewReader()

c := newFakeController()
defer c.close()

gs1 := gameServerWithFleetAndState("test-fleet", agonesv1.GameServerStateReady)
gs1.Status.Players = &agonesv1.PlayerStatus{
Capacity: 4,
Count: 0,
}
c.gsWatch.Add(gs1)
gs1 = gs1.DeepCopy()
gs1.Status.Players.Count = 1
c.gsWatch.Modify(gs1)

c.run(t)
require.True(t, c.sync())
require.Eventually(t, func() bool {
gs, err := c.gameServerLister.GameServers(gs1.ObjectMeta.Namespace).Get(gs1.ObjectMeta.Name)
assert.NoError(t, err)
return gs.Status.Players.Count == 1
}, 5*time.Second, time.Second)
c.collect()

reader.ReadAndExport(exporter)
assertMetricData(t, exporter, gameServersPlayerCapacityTotalName, []expectedMetricData{
{labels: []string{"test-fleet", gs1.GetName(), defaultNs}, val: int64(3)},
})
}

func TestControllerGameServersTotal(t *testing.T) {
resetMetrics()
exporter := &metricExporter{}
Expand Down
35 changes: 35 additions & 0 deletions site/content/en/docs/Guides/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Follow the [Stackdriver Installation steps](#stackdriver-installation) to see yo

## Metrics available

{{% feature expiryVersion="1.28.0" %}}
| Name | Description | Type |
|-------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|
| agones_gameservers_count | The number of gameservers per fleet and status | gauge |
Expand Down Expand Up @@ -67,7 +68,41 @@ Follow the [Stackdriver Installation steps](#stackdriver-installation) to see yo
| agones_k8s_client_workqueue_retries_total | Total number of items retried to the work queue | counter |
| agones_k8s_client_workqueue_longest_running_processor | How long the longest running workqueue processor has been running in microseconds | gauge |
| agones_k8s_client_workqueue_unfinished_work_seconds | How long unfinished work has been sitting in the workqueue in seconds | gauge |
{{% /feature %}}

{{% feature publishVersion="1.28.0" %}}
| Name | Description | Type |
|-------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|
| agones_gameservers_count | The number of gameservers per fleet and status | gauge |
| agones_gameserver_allocations_duration_seconds | The distribution of gameserver allocation requests latencies | histogram |
| agones_gameservers_total | The total of gameservers per fleet and status | counter |
| agones_gameserver_player_connected_total | The total number of players connected to gameservers (Only available when [player tracking]({{< relref "player-tracking.md" >}}) is enabled) | gauge |
| agones_gameserver_player_capacity_total | The available capacity for players on gameservers (Only available when [player tracking]({{< relref "player-tracking.md" >}}) is enabled) | gauge |
| agones_fleets_replicas_count | The number of replicas per fleet (total, desired, ready, reserved, allocated) | gauge |
| agones_fleet_autoscalers_able_to_scale | The fleet autoscaler can access the fleet to scale | gauge |
| agones_fleet_autoscalers_buffer_limits | The limits of buffer based fleet autoscalers (min, max) | gauge |
| agones_fleet_autoscalers_buffer_size | The buffer size of fleet autoscalers (count or percentage) | gauge |
| agones_fleet_autoscalers_current_replicas_count | The current replicas count as seen by autoscalers | gauge |
| agones_fleet_autoscalers_desired_replicas_count | The desired replicas count as seen by autoscalers | gauge |
| agones_fleet_autoscalers_limited | The fleet autoscaler is capped (1) | gauge |
| agones_gameservers_node_count | The distribution of gameservers per node | histogram |
| agones_nodes_count | The count of nodes empty and with gameservers | gauge |
| agones_gameservers_state_duration | The distribution of gameserver state duration in seconds. Note: this metric could have some missing samples by design. Do not use the `_total` counter as the real value for state changes. | histogram |
| agones_k8s_client_http_request_total | The total of HTTP requests to the Kubernetes API by status code | counter |
| agones_k8s_client_http_request_duration_seconds | The distribution of HTTP requests latencies to the Kubernetes API by status code | histogram |
| agones_k8s_client_cache_list_total | The total number of list operations for client-go caches | counter |
| agones_k8s_client_cache_list_duration_seconds | Duration of a Kubernetes list API call in seconds | histogram |
| agones_k8s_client_cache_list_items | Count of items in a list from the Kubernetes API | histogram |
| agones_k8s_client_cache_watches_total | The total number of watch operations for client-go caches | counter |
| agones_k8s_client_cache_last_resource_version | Last resource version from the Kubernetes API | gauge |
| agones_k8s_client_workqueue_depth | Current depth of the work queue | gauge |
| agones_k8s_client_workqueue_latency_seconds | How long an item stays in the work queue | histogram |
| agones_k8s_client_workqueue_items_total | Total number of items added to the work queue | counter |
| agones_k8s_client_workqueue_work_duration_seconds | How long processing an item from the work queue takes | histogram |
| agones_k8s_client_workqueue_retries_total | Total number of items retried to the work queue | counter |
| agones_k8s_client_workqueue_longest_running_processor | How long the longest running workqueue processor has been running in microseconds | gauge |
| agones_k8s_client_workqueue_unfinished_work_seconds | How long unfinished work has been sitting in the workqueue in seconds | gauge |
{{% /feature %}}

### Dropping Metric Labels

Expand Down