cortexproject · pracucci · May 27, 2021 · May 24, 2021 · May 24, 2021 · May 25, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
   - `-alertmanager.receivers-firewall.block.private-addresses` renamed to `-alertmanager.receivers-firewall-block-private-addresses`
 * [CHANGE] Change default value of `-server.grpc.keepalive.min-time-between-pings` to `10s` and `-server.grpc.keepalive.ping-without-stream-allowed` to `true`. #4168
 * [FEATURE] Querier: Added new `-querier.max-fetched-series-per-query` flag. When Cortex is running with blocks storage, the max series per query limit is enforced in the querier and applies to unique series received from ingesters and store-gateway (long-term storage). #4179
+* [FEATURE] Querier: Added new `-querier.max-chunk-bytes-per-query` flag. When Cortex is running with blocks storage, the max chunk bytes limit is enforced in the querier and counts the size of chunks returned from ingesters and blocks storage as bytes.  #4216
 * [FEATURE] Alertmanager: Added rate-limits to notifiers. Rate limits used by all integrations can be configured using `-alertmanager.notification-rate-limit`, while per-integration rate limits can be specified via `-alertmanager.notification-rate-limit-per-integration` parameter. Both shared and per-integration limits can be overwritten using overrides mechanism. These limits are applied on individual (per-tenant) alertmanagers. Rate-limited notifications are failed notifications. It is possible to monitor rate-limited notifications via new `cortex_alertmanager_notification_rate_limited_total` metric. #4135 #4163
 * [ENHANCEMENT] Alertmanager: introduced new metrics to monitor operation when using `-alertmanager.sharding-enabled`: #4149
   * `cortex_alertmanager_state_fetch_replica_state_total`

diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go
@@ -951,7 +951,7 @@ func TestDistributor_QueryStream_ShouldReturnErrorIfMaxSeriesPerQueryLimitIsReac
 
 	limits := &validation.Limits{}
 	flagext.DefaultValues(limits)
-	ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(maxSeriesLimit))
+	ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(maxSeriesLimit, 0))
 	// Prepare distributors.
 	ds, _, r, _ := prepare(t, prepConfig{
 		numIngesters:     3,

diff --git a/pkg/distributor/query.go b/pkg/distributor/query.go
@@ -232,10 +232,16 @@ func (d *Distributor) queryIngesterStream(ctx context.Context, userID string, re
 					return nil, validation.LimitError(fmt.Sprintf(errMaxChunksPerQueryLimit, util.LabelMatchersToString(matchers), chunksLimit))
 				}
 			}
+
 			for _, series := range resp.Chunkseries {
 				if limitErr := queryLimiter.AddSeries(series.Labels); limitErr != nil {
 					return nil, limitErr
 				}
+				for _, chunks := range series.Chunks {
+					if chunkBytesLimitErr := queryLimiter.AddChunkBytes(chunks.Size()); chunkBytesLimitErr != nil {
+						return nil, chunkBytesLimitErr
+					}
+				}
 			}
 			for _, series := range resp.Timeseries {
 				if limitErr := queryLimiter.AddSeries(series.Labels); limitErr != nil {

diff --git a/pkg/querier/blocks_store_queryable.go b/pkg/querier/blocks_store_queryable.go
@@ -56,6 +56,7 @@ const (
 var (
 	errNoStoreGatewayAddress  = errors.New("no store-gateway address configured")
 	errMaxChunksPerQueryLimit = "the query hit the max number of chunks limit while fetching chunks from store-gateways for %s (limit: %d)"
+	errMaxChunkBytesHit = "The query hit the max number of chunk bytes limit (limit: %d)"
 )
 
 // BlocksStoreSet is the interface used to get the clients to query series on a set of blocks.
@@ -626,6 +627,12 @@ func (q *blocksStoreQuerier) fetchSeriesFromStores(
 							return validation.LimitError(fmt.Sprintf(errMaxChunksPerQueryLimit, util.LabelMatchersToString(matchers), maxChunksLimit))
 						}
 					}
+
+					for _, c := range s.Chunks{
+						if chunkBytesLimitErr := queryLimiter.AddChunkBytes(c.Size()); chunkBytesLimitErr != nil {
+							return chunkBytesLimitErr
+						}
+					}
 				}
 
 				if w := resp.GetWarning(); w != "" {

diff --git a/pkg/querier/blocks_store_queryable_test.go b/pkg/querier/blocks_store_queryable_test.go
@@ -51,7 +51,7 @@ func TestBlocksStoreQuerier_Select(t *testing.T) {
 		metricNameLabel  = labels.Label{Name: labels.MetricName, Value: metricName}
 		series1Label     = labels.Label{Name: "series", Value: "1"}
 		series2Label     = labels.Label{Name: "series", Value: "2"}
-		noOpQueryLimiter = limiter.NewQueryLimiter(0)
+		noOpQueryLimiter = limiter.NewQueryLimiter(0, 0)
 	)
 
 	type valueResult struct {
@@ -507,9 +507,27 @@ func TestBlocksStoreQuerier_Select(t *testing.T) {
 				},
 			},
 			limits:       &blocksStoreLimitsMock{},
-			queryLimiter: limiter.NewQueryLimiter(1),
+			queryLimiter: limiter.NewQueryLimiter(1, 0),
 			expectedErr:  validation.LimitError(fmt.Sprintf("The query hit the max number of series limit (limit: %d)", 1)),
 		},
+		"max chunk bytes per query limit hit while fetching chunks": {
+			finderResult: bucketindex.Blocks{
+				{ID: block1},
+				{ID: block2},
+			},
+			storeSetResponses: []interface{}{
+				map[BlocksStoreClient][]ulid.ULID{
+					&storeGatewayClientMock{remoteAddr: "1.1.1.1", mockedSeriesResponses: []*storepb.SeriesResponse{
+						mockSeriesResponse(labels.Labels{metricNameLabel, series1Label}, minT, 1),
+						mockSeriesResponse(labels.Labels{metricNameLabel, series1Label}, minT+1, 2),
+						mockHintsResponse(block1, block2),
+					}}: {block1, block2},
+				},
+			},
+			limits:       &blocksStoreLimitsMock{maxChunksPerQuery: 1},
+			queryLimiter: limiter.NewQueryLimiter(0, 8),
+			expectedErr:  validation.LimitError(fmt.Sprintf(errMaxChunkBytesHit, 8)),
+		},
 	}
 
 	for testName, testData := range tests {

diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go
@@ -224,7 +224,7 @@ func NewQueryable(distributor QueryableWithFilter, stores []QueryableWithFilter,
 			return nil, err
 		}
 
-		ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(limits.MaxFetchedSeriesPerQuery(userID)))
+		ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(limits.MaxFetchedSeriesPerQuery(userID), limits.MaxChunkBytesPerQuery(userID)))
 
 		mint, maxt, err = validateQueryTimeRange(ctx, userID, mint, maxt, limits, cfg.MaxQueryIntoFuture)
 		if err == errEmptyTimeRange {

diff --git a/pkg/util/limiter/query_limiter.go b/pkg/util/limiter/query_limiter.go
@@ -6,6 +6,7 @@ import (
 	"sync"
 
 	"github.com/prometheus/common/model"
+	"go.uber.org/atomic"
 
 	"github.com/cortexproject/cortex/pkg/cortexpb"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
@@ -15,25 +16,32 @@ import (
 type queryLimiterCtxKey struct{}
 
 var (
-	ctxKey          = &queryLimiterCtxKey{}
-	errMaxSeriesHit = "The query hit the max number of series limit (limit: %d)"
+	ctxKey              = &queryLimiterCtxKey{}
+	errMaxSeriesHit     = "The query hit the max number of series limit (limit: %d)"
+	errMaxChunkBytesHit = "The query hit the max number of chunk bytes limit (limit: %d)"
 )
 
 type QueryLimiter struct {
 	uniqueSeriesMx sync.Mutex
 	uniqueSeries   map[model.Fingerprint]struct{}
 
-	maxSeriesPerQuery int
+	chunkBytesCount *atomic.Int32
+
+	maxSeriesPerQuery     int
+	maxChunkBytesPerQuery int
 }
 
 // NewQueryLimiter makes a new per-query limiter. Each query limiter
 // is configured using the `maxSeriesPerQuery` limit.
-func NewQueryLimiter(maxSeriesPerQuery int) *QueryLimiter {
+func NewQueryLimiter(maxSeriesPerQuery int, maxChunkBytesPerQuery int) *QueryLimiter {
 	return &QueryLimiter{
 		uniqueSeriesMx: sync.Mutex{},
 		uniqueSeries:   map[model.Fingerprint]struct{}{},
 
-		maxSeriesPerQuery: maxSeriesPerQuery,
+		chunkBytesCount: atomic.NewInt32(0),
+
+		maxSeriesPerQuery:     maxSeriesPerQuery,
+		maxChunkBytesPerQuery: maxChunkBytesPerQuery,
 	}
 }
 
@@ -47,7 +55,7 @@ func QueryLimiterFromContextWithFallback(ctx context.Context) *QueryLimiter {
 	ql, ok := ctx.Value(ctxKey).(*QueryLimiter)
 	if !ok {
 		// If there's no limiter return a new unlimited limiter as a fallback
-		ql = NewQueryLimiter(0)
+		ql = NewQueryLimiter(0, 0)
 	}
 	return ql
 }
@@ -77,3 +85,13 @@ func (ql *QueryLimiter) uniqueSeriesCount() int {
 	defer ql.uniqueSeriesMx.Unlock()
 	return len(ql.uniqueSeries)
 }
+
+func (ql *QueryLimiter) AddChunkBytes(bytes int) error {
+	if ql.maxChunkBytesPerQuery == 0 {
+		return nil
+	}
+	if ql.chunkBytesCount.Add(int32(bytes)) > int32(ql.maxChunkBytesPerQuery) {
+		return validation.LimitError(fmt.Sprintf(errMaxChunkBytesHit, ql.maxChunkBytesPerQuery))
+	}
+	return nil
+}
diff --git a/pkg/util/limiter/query_limiter_test.go b/pkg/util/limiter/query_limiter_test.go
@@ -25,7 +25,7 @@ func TestQueryLimiter_AddSeries_ShouldReturnNoErrorOnLimitNotExceeded(t *testing
 			labels.MetricName: metricName + "_2",
 			"series2":         "1",
 		})
-		limiter = NewQueryLimiter(100)
+		limiter = NewQueryLimiter(100, 0)
 	)
 	err := limiter.AddSeries(cortexpb.FromLabelsToLabelAdapters(series1))
 	assert.NoError(t, err)
@@ -53,14 +53,23 @@ func TestQueryLimiter_AddSeriers_ShouldReturnErrorOnLimitExceeded(t *testing.T)
 			labels.MetricName: metricName + "_2",
 			"series2":         "1",
 		})
-		limiter = NewQueryLimiter(1)
+		limiter = NewQueryLimiter(1, 0)
 	)
 	err := limiter.AddSeries(cortexpb.FromLabelsToLabelAdapters(series1))
 	require.NoError(t, err)
 	err = limiter.AddSeries(cortexpb.FromLabelsToLabelAdapters(series2))
 	require.Error(t, err)
 }
 
+func TestQueryLimiter_AddChunkBytes(t *testing.T) {
+	var limiter = NewQueryLimiter(0, 100)
+
+	err := limiter.AddChunkBytes(100)
+	require.NoError(t, err)
+	err = limiter.AddChunkBytes(1)
+	require.Error(t, err)
+}
+
 func BenchmarkQueryLimiter_AddSeries(b *testing.B) {
 	const (
 		metricName = "test_metric"
@@ -75,7 +84,7 @@ func BenchmarkQueryLimiter_AddSeries(b *testing.B) {
 	}
 	b.ResetTimer()
 
-	limiter := NewQueryLimiter(b.N + 1)
+	limiter := NewQueryLimiter(b.N+1, 0)
 	for _, s := range series {
 		err := limiter.AddSeries(cortexpb.FromLabelsToLabelAdapters(s))
 		assert.NoError(b, err)

diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go
@@ -75,6 +75,7 @@ type Limits struct {
 	MaxChunksPerQueryFromStore int            `yaml:"max_chunks_per_query" json:"max_chunks_per_query"` // TODO Remove in Cortex 1.12.
 	MaxChunksPerQuery          int            `yaml:"max_fetched_chunks_per_query" json:"max_fetched_chunks_per_query"`
 	MaxFetchedSeriesPerQuery   int            `yaml:"max_fetched_series_per_query" json:"max_fetched_series_per_query"`
+	MaxChunkBytesPerQuery      int            `yaml:"max_chunk_bytes_per_query" json:"max_chunk_bytes_per_query"`
 	MaxQueryLookback           model.Duration `yaml:"max_query_lookback" json:"max_query_lookback"`
 	MaxQueryLength             model.Duration `yaml:"max_query_length" json:"max_query_length"`
 	MaxQueryParallelism        int            `yaml:"max_query_parallelism" json:"max_query_parallelism"`
@@ -147,6 +148,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&l.MaxChunksPerQueryFromStore, "store.query-chunk-limit", 2e6, "Deprecated. Use -querier.max-fetched-chunks-per-query CLI flag and its respective YAML config option instead. Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage only. When running the Cortex chunks storage, this limit is enforced in the querier and ruler, while when running the Cortex blocks storage this limit is enforced in the querier, ruler and store-gateway. 0 to disable.")
 	f.IntVar(&l.MaxChunksPerQuery, "querier.max-fetched-chunks-per-query", 0, "Maximum number of chunks that can be fetched in a single query from ingesters and long-term storage: the total number of actual fetched chunks could be 2x the limit, being independently applied when querying ingesters and long-term storage. This limit is enforced in the ingester (if chunks streaming is enabled), querier, ruler and store-gateway. Takes precedence over the deprecated -store.query-chunk-limit. 0 to disable.")
 	f.IntVar(&l.MaxFetchedSeriesPerQuery, "querier.max-fetched-series-per-query", 0, "The maximum number of unique series for which a query can fetch samples from each ingesters and blocks storage. This limit is enforced in the querier only when running Cortex with blocks storage. 0 to disable")
+	f.IntVar(&l.MaxChunkBytesPerQuery, "querier.max-chunk bytes-per-query", 0, "The maximum number of chunk bytes for which a query can fetch from each ingesters and blocks storage. This limit is enforced in the querier only when running Cortex with blocks storage. 0 to disable")
 	f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.")
 	f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.")
 	f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.")
@@ -394,6 +396,12 @@ func (o *Overrides) MaxFetchedSeriesPerQuery(userID string) int {
 	return o.getOverridesForUser(userID).MaxFetchedSeriesPerQuery
 }
 
+// MaxChunkBytesPerQuery returns the maximum number of bytes for chunks allowed per query when fetching
+// chunks from ingesters and blocks storage.
+func (o *Overrides) MaxChunkBytesPerQuery(userID string) int {
+	return o.getOverridesForUser(userID).MaxChunkBytesPerQuery
+}
+
 // MaxQueryLookback returns the max lookback period of queries.
 func (o *Overrides) MaxQueryLookback(userID string) time.Duration {
 	return time.Duration(o.getOverridesForUser(userID).MaxQueryLookback)