diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b1365a10da..a0085dce5bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ * [ENHANCEMENT] Querier: Add context error check when merging slices from ingesters for GetLabel operations. #5837 * [ENHANCEMENT] Ring: Add experimental `-ingester.tokens-generator-strategy=minimize-spread` flag to enable the new minimize spread token generator strategy. #5855 * [ENHANCEMENT] Query Frontend: Ensure error response returned by Query Frontend follows Prometheus API error response format. #5811 +* [ENHANCEMENT] Ring Status Page: Add `Ownership Diff From Expected` column in the ring table to indicate the extent to which the ownership of a specific ingester differs from the expected ownership. #5889 * [BUGFIX] Distributor: Do not use label with empty values for sharding #5717 * [BUGFIX] Query Frontend: queries with negative offset should check whether it is cacheable or not. #5719 * [BUGFIX] Redis Cache: pass `cache_size` config correctly. #5734 diff --git a/pkg/ring/http.go b/pkg/ring/http.go index 81615afb439..9c6be69431b 100644 --- a/pkg/ring/http.go +++ b/pkg/ring/http.go @@ -38,6 +38,7 @@ const pageContent = ` Last Heartbeat Tokens Ownership + Ownership Diff From Expected Actions @@ -56,6 +57,7 @@ const pageContent = ` {{ .HeartbeatTimestamp }} {{ .NumTokens }} {{ .Ownership }}% + {{ .DiffOwnership }}% {{ end }} @@ -114,6 +116,7 @@ type ingesterDesc struct { Tokens []uint32 `json:"tokens"` NumTokens int `json:"-"` Ownership float64 `json:"-"` + DiffOwnership float64 `json:"-"` } type httpResponse struct { @@ -153,7 +156,6 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) { storageLastUpdate := r.KVClient.LastUpdateTime(r.key) var ingesters []ingesterDesc - _, owned := r.countTokens() for _, id := range ingesterIDs { ing := r.ringDesc.Ingesters[id] heartbeatTimestamp := time.Unix(ing.Timestamp, 0) @@ -168,6 +170,21 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) { registeredTimestamp = ing.GetRegisteredAt().String() } + var ownership float64 + var deltaOwnership float64 + + if r.cfg.ZoneAwarenessEnabled { + numTokens, ownedByAz := r.countTokensByAz() + ownership = (float64(ownedByAz[id]) / float64(math.MaxUint32+1)) * 100 + expectedOwnership := 1 / float64(len(numTokens[ing.Zone])) * 100 + deltaOwnership = (1 - expectedOwnership/ownership) * 100 + } else { + _, owned := r.countTokens() + ownership = (float64(owned[id]) / float64(math.MaxUint32+1)) * 100 + expectedOwnership := 1 / float64(len(owned)) * 100 + deltaOwnership = (1 - expectedOwnership/ownership) * 100 + } + ingesters = append(ingesters, ingesterDesc{ ID: id, State: state, @@ -177,7 +194,8 @@ func (r *Ring) ServeHTTP(w http.ResponseWriter, req *http.Request) { Tokens: ing.Tokens, Zone: ing.Zone, NumTokens: len(ing.Tokens), - Ownership: (float64(owned[id]) / float64(math.MaxUint32)) * 100, + Ownership: ownership, + DiffOwnership: deltaOwnership, }) } diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 71bb82a7977..aa6c0ab94ac 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -590,22 +590,42 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro }, nil } +func (r *Ring) countTokensByAz() (map[string]map[string]uint32, map[string]int64) { + numTokens := map[string]map[string]uint32{} + owned := map[string]int64{} + + for zone, zonalTokens := range r.ringDesc.getTokensByZone() { + numTokens[zone] = map[string]uint32{} + for i := 1; i <= len(zonalTokens); i++ { + index := i % len(zonalTokens) + diff := tokenDistance(zonalTokens[i-1], zonalTokens[index]) + info := r.ringInstanceByToken[zonalTokens[index]] + owned[info.InstanceID] = owned[info.InstanceID] + diff + numTokens[zone][info.InstanceID] = numTokens[zone][info.InstanceID] + 1 + } + } + + // Set to 0 the number of owned tokens by instances which don't have tokens yet. + for id, info := range r.ringDesc.Ingesters { + if _, ok := owned[id]; !ok { + owned[id] = 0 + numTokens[info.Zone][id] = 0 + } + } + + return numTokens, owned +} + // countTokens returns the number of tokens and tokens within the range for each instance. // The ring read lock must be already taken when calling this function. -func (r *Ring) countTokens() (map[string]uint32, map[string]uint32) { - owned := map[string]uint32{} +func (r *Ring) countTokens() (map[string]uint32, map[string]int64) { + owned := map[string]int64{} numTokens := map[string]uint32{} - for i, token := range r.ringTokens { - var diff uint32 - - // Compute how many tokens are within the range. - if i+1 == len(r.ringTokens) { - diff = (math.MaxUint32 - token) + r.ringTokens[0] - } else { - diff = r.ringTokens[i+1] - token - } + for i := 1; i <= len(r.ringTokens); i++ { // Compute how many tokens are within the range. + index := i % len(r.ringTokens) + diff := tokenDistance(r.ringTokens[i-1], r.ringTokens[index]) - info := r.ringInstanceByToken[token] + info := r.ringInstanceByToken[r.ringTokens[index]] numTokens[info.InstanceID] = numTokens[info.InstanceID] + 1 owned[info.InstanceID] = owned[info.InstanceID] + diff } @@ -662,7 +682,7 @@ func (r *Ring) updateRingMetrics(compareResult CompareResult) { r.reportedOwners = make(map[string]struct{}) numTokens, ownedRange := r.countTokens() for id, totalOwned := range ownedRange { - r.memberOwnershipGaugeVec.WithLabelValues(id).Set(float64(totalOwned) / float64(math.MaxUint32)) + r.memberOwnershipGaugeVec.WithLabelValues(id).Set(float64(totalOwned) / float64(math.MaxUint32+1)) r.numTokensGaugeVec.WithLabelValues(id).Set(float64(numTokens[id])) delete(prevOwners, id) r.reportedOwners[id] = struct{}{} diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 82bb096d020..08a1864195f 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -3008,8 +3008,8 @@ func TestUpdateMetrics(t *testing.T) { err = testutil.GatherAndCompare(registry, bytes.NewBufferString(` # HELP ring_member_ownership_percent The percent ownership of the ring by member # TYPE ring_member_ownership_percent gauge - ring_member_ownership_percent{member="A",name="test"} 0.500000000349246 - ring_member_ownership_percent{member="B",name="test"} 0.49999999965075403 + ring_member_ownership_percent{member="A",name="test"} 0.49999999976716936 + ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306 # HELP ring_members Number of members in the ring # TYPE ring_members gauge ring_members{name="test",state="ACTIVE"} 2 @@ -3060,8 +3060,8 @@ func TestUpdateMetricsWithRemoval(t *testing.T) { err = testutil.GatherAndCompare(registry, bytes.NewBufferString(` # HELP ring_member_ownership_percent The percent ownership of the ring by member # TYPE ring_member_ownership_percent gauge - ring_member_ownership_percent{member="A",name="test"} 0.500000000349246 - ring_member_ownership_percent{member="B",name="test"} 0.49999999965075403 + ring_member_ownership_percent{member="A",name="test"} 0.49999999976716936 + ring_member_ownership_percent{member="B",name="test"} 0.5000000002328306 # HELP ring_members Number of members in the ring # TYPE ring_members gauge ring_members{name="test",state="ACTIVE"} 2