Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Receive: make tsdb stats limit configurable #6437

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

### Added

- [#6437](https://github.com/thanos-io/thanos/pull/6437) Receive: make tenant stats limit configurable
- [#6369](https://github.com/thanos-io/thanos/pull/6369) Receive: add az-aware replication support for Ketama algorithm
- [#6185](https://github.com/thanos-io/thanos/pull/6185) Tracing: tracing in OTLP support configuring service_name.
- [#6192](https://github.com/thanos-io/thanos/pull/6192) Store: add flag `bucket-web-label` to select the label to use as timeline title in web UI
Expand Down
2 changes: 1 addition & 1 deletion docs/components/receive.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ The [Thanos Receive Controller](https://github.com/observatorium/thanos-receive-

## TSDB stats

Thanos Receive supports getting TSDB stats using the `/api/v1/status/tsdb` endpoint. Use the `THANOS-TENANT` HTTP header to get stats for individual Tenants. The output format of the endpoint is compatible with [Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats).
Thanos Receive supports getting TSDB stats using the `/api/v1/status/tsdb` endpoint. Use the `THANOS-TENANT` HTTP header to get stats for individual Tenants. Use the `limit` query parameter to tweak the number of stats to return (the default is 10). The output format of the endpoint is compatible with [Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats).

Note that each Thanos Receive will only expose local stats and replicated series will not be included in the response.

Expand Down
29 changes: 27 additions & 2 deletions pkg/receive/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"fmt"
"io"
stdlog "log"
"math"
"net"
"net/http"
"path"
Expand Down Expand Up @@ -54,12 +55,16 @@ const (
DefaultTenantHeader = "THANOS-TENANT"
// DefaultTenant is the default value used for when no tenant is passed via the tenant header.
DefaultTenant = "default-tenant"
// DefaultStatsLimit is the default value used for limiting tenant stats.
DefaultStatsLimit = 10
// DefaultTenantLabel is the default label-name used for when no tenant is passed via the tenant header.
DefaultTenantLabel = "tenant_id"
// DefaultReplicaHeader is the default header used to designate the replica count of a write request.
DefaultReplicaHeader = "THANOS-REPLICA"
// AllTenantsQueryParam is the query parameter for getting TSDB stats for all tenants.
AllTenantsQueryParam = "all_tenants"
// LimitStatsQueryParam is the query parameter for limiting the amount of returned TSDB stats.
LimitStatsQueryParam = "limit"
// Labels for metrics.
labelSuccess = "success"
labelError = "error"
Expand Down Expand Up @@ -280,6 +285,21 @@ func (h *Handler) testReady(f http.HandlerFunc) http.HandlerFunc {
}
}

func getStatsLimitParameter(r *http.Request) (int, error) {
statsLimitStr := r.URL.Query().Get(LimitStatsQueryParam)
if statsLimitStr == "" {
return DefaultStatsLimit, nil
}
statsLimit, err := strconv.ParseInt(statsLimitStr, 10, 0)
if err != nil {
return 0, fmt.Errorf("unable to parse '%s' parameter: %w", LimitStatsQueryParam, err)
}
if statsLimit > math.MaxInt {
return 0, fmt.Errorf("'%s' parameter is larger than %d", LimitStatsQueryParam, math.MaxInt)
}
return int(statsLimit), nil
}

func (h *Handler) getStats(r *http.Request, statsByLabelName string) ([]statusapi.TenantStats, *api.ApiError) {
if !h.isReady() {
return nil, &api.ApiError{Typ: api.ErrorInternal, Err: fmt.Errorf("service unavailable")}
Expand All @@ -292,15 +312,20 @@ func (h *Handler) getStats(r *http.Request, statsByLabelName string) ([]statusap
return nil, &api.ApiError{Typ: api.ErrorBadData, Err: err}
}

statsLimit, err := getStatsLimitParameter(r)
if err != nil {
return nil, &api.ApiError{Typ: api.ErrorBadData, Err: err}
}

if getAllTenantStats {
return h.options.TSDBStats.TenantStats(statsByLabelName), nil
return h.options.TSDBStats.TenantStats(statsLimit, statsByLabelName), nil
}

if tenantID == "" {
tenantID = h.options.DefaultTenantID
}

return h.options.TSDBStats.TenantStats(statsByLabelName, tenantID), nil
return h.options.TSDBStats.TenantStats(statsLimit, statsByLabelName, tenantID), nil
}

// Close stops the Handler.
Expand Down
48 changes: 48 additions & 0 deletions pkg/receive/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"path/filepath"
"runtime"
"runtime/pprof"
"strconv"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -1520,3 +1521,50 @@ func TestRelabel(t *testing.T) {
})
}
}

func TestGetStatsLimitParameter(t *testing.T) {
t.Run("invalid limit parameter, not integer", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

q := r.URL.Query()
q.Add(LimitStatsQueryParam, "abc")
r.URL.RawQuery = q.Encode()

_, err = getStatsLimitParameter(r)
testutil.NotOk(t, err)
})
t.Run("invalid limit parameter, too large", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

q := r.URL.Query()
q.Add(LimitStatsQueryParam, strconv.FormatUint(math.MaxInt+1, 10))
r.URL.RawQuery = q.Encode()

_, err = getStatsLimitParameter(r)
testutil.NotOk(t, err)
})
t.Run("not present returns default", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

limit, err := getStatsLimitParameter(r)
testutil.Ok(t, err)
testutil.Equals(t, limit, DefaultStatsLimit)
})
t.Run("if present and valid, the parameter is returned", func(t *testing.T) {
r, err := http.NewRequest(http.MethodGet, "http://0:0", nil)
testutil.Ok(t, err)

const givenLimit = 20

q := r.URL.Query()
q.Add(LimitStatsQueryParam, strconv.FormatUint(givenLimit, 10))
r.URL.RawQuery = q.Encode()

limit, err := getStatsLimitParameter(r)
testutil.Ok(t, err)
testutil.Equals(t, limit, givenLimit)
})
}
6 changes: 3 additions & 3 deletions pkg/receive/multitsdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ import (
type TSDBStats interface {
// TenantStats returns TSDB head stats for the given tenants.
// If no tenantIDs are provided, stats for all tenants are returned.
TenantStats(statsByLabelName string, tenantIDs ...string) []status.TenantStats
TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats
}

type MultiTSDB struct {
Expand Down Expand Up @@ -518,7 +518,7 @@ func (t *MultiTSDB) TSDBExemplars() map[string]*exemplars.TSDB {
return res
}

func (t *MultiTSDB) TenantStats(statsByLabelName string, tenantIDs ...string) []status.TenantStats {
func (t *MultiTSDB) TenantStats(limit int, statsByLabelName string, tenantIDs ...string) []status.TenantStats {
t.mtx.RLock()
defer t.mtx.RUnlock()
if len(tenantIDs) == 0 {
Expand All @@ -545,7 +545,7 @@ func (t *MultiTSDB) TenantStats(statsByLabelName string, tenantIDs ...string) []
if db == nil {
return
}
stats := db.Head().Stats(statsByLabelName, 10)
stats := db.Head().Stats(statsByLabelName, limit)

mu.Lock()
defer mu.Unlock()
Expand Down
2 changes: 1 addition & 1 deletion pkg/receive/multitsdb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ func TestMultiTSDBStats(t *testing.T) {
testutil.Ok(t, appendSample(m, "baz", time.Now()))
testutil.Equals(t, 3, len(m.TSDBLocalClients()))

stats := m.TenantStats(labels.MetricName, test.tenants...)
stats := m.TenantStats(10, labels.MetricName, test.tenants...)
testutil.Equals(t, test.expectedStats, len(stats))
})
}
Expand Down