Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resource_control: unify label name to group_name (#7547) #7656

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
continue
}
if _, ok := c.groupsController.LoadAndDelete(group.Name); ok {
resourceGroupStatusGauge.DeleteLabelValues(group.Name)
resourceGroupStatusGauge.DeleteLabelValues(group.Name, group.Name)
}
} else {
// Prev-kv is compacted means there must have been a delete event before this event,
Expand Down Expand Up @@ -431,7 +431,7 @@ func (c *ResourceGroupsController) tryGetResourceGroup(ctx context.Context, name
// Check again to prevent initializing the same resource group concurrently.
tmp, loaded := c.groupsController.LoadOrStore(group.GetName(), gc)
if !loaded {
resourceGroupStatusGauge.WithLabelValues(name).Set(1)
resourceGroupStatusGauge.WithLabelValues(name, group.Name).Set(1)
log.Info("[resource group controller] create resource group cost controller", zap.String("name", group.GetName()))
}
return tmp.(*groupCostController), nil
Expand All @@ -448,7 +448,7 @@ func (c *ResourceGroupsController) cleanUpResourceGroup() {
if equalRU(latestConsumption, *gc.run.consumption) {
if gc.tombstone {
c.groupsController.Delete(resourceGroupName)
resourceGroupStatusGauge.DeleteLabelValues(resourceGroupName)
resourceGroupStatusGauge.DeleteLabelValues(resourceGroupName, resourceGroupName)
return true
}
gc.tombstone = true
Expand Down Expand Up @@ -713,11 +713,11 @@ func newGroupCostController(
name: group.Name,
mainCfg: mainCfg,
mode: group.GetMode(),
successfulRequestDuration: successfulRequestDuration.WithLabelValues(group.Name),
failedLimitReserveDuration: failedLimitReserveDuration.WithLabelValues(group.Name),
failedRequestCounter: failedRequestCounter.WithLabelValues(group.Name),
requestRetryCounter: requestRetryCounter.WithLabelValues(group.Name),
tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(group.Name),
successfulRequestDuration: successfulRequestDuration.WithLabelValues(group.Name, group.Name),
failedLimitReserveDuration: failedLimitReserveDuration.WithLabelValues(group.Name, group.Name),
failedRequestCounter: failedRequestCounter.WithLabelValues(group.Name, group.Name),
requestRetryCounter: requestRetryCounter.WithLabelValues(group.Name, group.Name),
tokenRequestCounter: resourceGroupTokenRequestCounter.WithLabelValues(group.Name, group.Name),
calculators: []ResourceCalculator{
newKVCalculator(mainCfg),
newSQLCalculator(mainCfg),
Expand Down
16 changes: 9 additions & 7 deletions client/resource_group/controller/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ const (
requestSubsystem = "request"
tokenRequestSubsystem = "token_request"

resourceGroupNameLabel = "name"
// TODO: remove old label in 8.x
resourceGroupNameLabel = "name"
newResourceGroupNameLabel = "resource_group"
)

var (
Expand All @@ -31,7 +33,7 @@ var (
Subsystem: "resource_group",
Name: "status",
Help: "Status of the resource group.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

successfulRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand All @@ -40,7 +42,7 @@ var (
Name: "success",
Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30
Help: "Bucketed histogram of wait duration of successful request.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

failedLimitReserveDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand All @@ -49,23 +51,23 @@ var (
Name: "limit_reserve_time_failed",
Buckets: []float64{.005, .01, .05, .1, .5, 1, 5, 10, 20, 25, 30}, // 0.005 ~ 30
Help: "Bucketed histogram of wait duration of failed request.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

failedRequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: requestSubsystem,
Name: "fail",
Help: "Counter of failed request.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

requestRetryCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: requestSubsystem,
Name: "retry",
Help: "Counter of retry time for request.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

tokenRequestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand All @@ -81,7 +83,7 @@ var (
Subsystem: tokenRequestSubsystem,
Name: "resource_group",
Help: "Counter of token request by every resource group.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})
)

var (
Expand Down
40 changes: 20 additions & 20 deletions pkg/mcs/resourcemanager/server/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,15 +373,15 @@

var (
name = consumptionInfo.resourceGroupName
rruMetrics = readRequestUnitCost.WithLabelValues(name, ruLabelType)
wruMetrics = writeRequestUnitCost.WithLabelValues(name, ruLabelType)
sqlLayerRuMetrics = sqlLayerRequestUnitCost.WithLabelValues(name)
readByteMetrics = readByteCost.WithLabelValues(name, ruLabelType)
writeByteMetrics = writeByteCost.WithLabelValues(name, ruLabelType)
kvCPUMetrics = kvCPUCost.WithLabelValues(name, ruLabelType)
sqlCPUMetrics = sqlCPUCost.WithLabelValues(name, ruLabelType)
readRequestCountMetrics = requestCount.WithLabelValues(name, readTypeLabel)
writeRequestCountMetrics = requestCount.WithLabelValues(name, writeTypeLabel)
rruMetrics = readRequestUnitCost.WithLabelValues(name, name, ruLabelType)
wruMetrics = writeRequestUnitCost.WithLabelValues(name, name, ruLabelType)
sqlLayerRuMetrics = sqlLayerRequestUnitCost.WithLabelValues(name, name)
readByteMetrics = readByteCost.WithLabelValues(name, name, ruLabelType)
writeByteMetrics = writeByteCost.WithLabelValues(name, name, ruLabelType)
kvCPUMetrics = kvCPUCost.WithLabelValues(name, name, ruLabelType)
sqlCPUMetrics = sqlCPUCost.WithLabelValues(name, name, ruLabelType)
readRequestCountMetrics = requestCount.WithLabelValues(name, name, readTypeLabel)
writeRequestCountMetrics = requestCount.WithLabelValues(name, name, writeTypeLabel)
)
// RU info.
if consumption.RRU > 0 {
Expand Down Expand Up @@ -419,16 +419,16 @@
// Clean up the metrics that have not been updated for a long time.
for name, lastTime := range m.consumptionRecord {
if time.Since(lastTime) > metricsCleanupTimeout {
readRequestUnitCost.DeleteLabelValues(name)
writeRequestUnitCost.DeleteLabelValues(name)
sqlLayerRequestUnitCost.DeleteLabelValues(name)
readByteCost.DeleteLabelValues(name)
writeByteCost.DeleteLabelValues(name)
kvCPUCost.DeleteLabelValues(name)
sqlCPUCost.DeleteLabelValues(name)
requestCount.DeleteLabelValues(name, readTypeLabel)
requestCount.DeleteLabelValues(name, writeTypeLabel)
availableRUCounter.DeleteLabelValues(name)
readRequestUnitCost.DeleteLabelValues(name, name)
writeRequestUnitCost.DeleteLabelValues(name, name)
sqlLayerRequestUnitCost.DeleteLabelValues(name, name)
readByteCost.DeleteLabelValues(name, name)
writeByteCost.DeleteLabelValues(name, name)
kvCPUCost.DeleteLabelValues(name, name)
sqlCPUCost.DeleteLabelValues(name, name)
requestCount.DeleteLabelValues(name, name, readTypeLabel)
requestCount.DeleteLabelValues(name, name, writeTypeLabel)
availableRUCounter.DeleteLabelValues(name, name)

Check warning on line 431 in pkg/mcs/resourcemanager/server/manager.go

View check run for this annotation

Codecov / codecov/patch

pkg/mcs/resourcemanager/server/manager.go#L422-L431

Added lines #L422 - L431 were not covered by tests
delete(m.consumptionRecord, name)
}
}
Expand All @@ -442,7 +442,7 @@
if ru < 0 {
ru = 0
}
availableRUCounter.WithLabelValues(name).Set(ru)
availableRUCounter.WithLabelValues(name, name).Set(ru)

Check warning on line 445 in pkg/mcs/resourcemanager/server/manager.go

View check run for this annotation

Codecov / codecov/patch

pkg/mcs/resourcemanager/server/manager.go#L445

Added line #L445 was not covered by tests
}
m.RUnlock()
}
Expand Down
41 changes: 21 additions & 20 deletions pkg/mcs/resourcemanager/server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@ package server
import "github.com/prometheus/client_golang/prometheus"

const (
namespace = "resource_manager"
serverSubsystem = "server"
ruSubsystem = "resource_unit"
resourceSubsystem = "resource"
resourceGroupNameLabel = "name"
typeLabel = "type"
readTypeLabel = "read"
writeTypeLabel = "write"
backgroundTypeLabel = "background"
tiflashTypeLabel = "ap"
defaultTypeLabel = "tp"
namespace = "resource_manager"
serverSubsystem = "server"
ruSubsystem = "resource_unit"
resourceSubsystem = "resource"
resourceGroupNameLabel = "name"
typeLabel = "type"
readTypeLabel = "read"
writeTypeLabel = "write"
backgroundTypeLabel = "background"
tiflashTypeLabel = "ap"
defaultTypeLabel = "tp"
newResourceGroupNameLabel = "resource_group"
)

var (
Expand All @@ -47,21 +48,21 @@ var (
Subsystem: ruSubsystem,
Name: "read_request_unit_sum",
Help: "Counter of the read request unit cost for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})
writeRequestUnitCost = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: ruSubsystem,
Name: "write_request_unit_sum",
Help: "Counter of the write request unit cost for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})
sqlLayerRequestUnitCost = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: ruSubsystem,
Name: "sql_layer_request_unit_sum",
Help: "The number of the sql layer request unit cost for all resource groups.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})

// Resource cost metrics.
readByteCost = prometheus.NewCounterVec(
Expand All @@ -70,43 +71,43 @@ var (
Subsystem: resourceSubsystem,
Name: "read_byte_sum",
Help: "Counter of the read byte cost for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})
writeByteCost = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: resourceSubsystem,
Name: "write_byte_sum",
Help: "Counter of the write byte cost for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})
kvCPUCost = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: resourceSubsystem,
Name: "kv_cpu_time_ms_sum",
Help: "Counter of the KV CPU time cost in milliseconds for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})
sqlCPUCost = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: resourceSubsystem,
Name: "sql_cpu_time_ms_sum",
Help: "Counter of the SQL CPU time cost in milliseconds for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})
requestCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: resourceSubsystem,
Name: "request_count",
Help: "The number of read/write requests for all resource groups.",
}, []string{resourceGroupNameLabel, typeLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel, typeLabel})

availableRUCounter = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: ruSubsystem,
Name: "available_ru",
Help: "Counter of the available RU for all resource groups.",
}, []string{resourceGroupNameLabel})
}, []string{resourceGroupNameLabel, newResourceGroupNameLabel})
)

func init() {
Expand Down
Loading