Skip to content

Commit

Permalink
Query Frontend: dynamic horizontal query sharding (#5658)
Browse files Browse the repository at this point in the history
* Adding new parameter for more dynamic horizontal query sharding

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Update docs for query FE

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Adding new parameters and testing validation

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Using new parameters instead of changing behavior for existing ones

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Adding changelog entry for changes

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Fixing problem on config validation and adding more test cases

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Fixing linting

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Change name of cli arguments and improve validation

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Fixing changelog entry

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Final touches on docs

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Adding params names to changelog

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Fixing CR comments

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

* Adding additional check for minimal split interval

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>

Signed-off-by: Pedro Tanaka <pedro.tanaka@shopify.com>
  • Loading branch information
pedro-stanaka authored Sep 14, 2022
1 parent fb11fc7 commit 0392dd7
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
### Added
* [#5654](https://github.com/thanos-io/thanos/pull/5654) Query: add `--grpc-compression` flag that controls the compression used in gRPC client. With the flag it is now possible to compress the traffic between Query and StoreAPI nodes - you get lower network usage in exchange for a bit higher CPU/RAM usage.
- [#5650](https://github.com/thanos-io/thanos/pull/5650) Query Frontend: Add sharded queries metrics.
- [#5658](https://github.com/thanos-io/thanos/pull/5658) Query Frontend: Introduce new optional parameters (`query-range.min-split-interval`, `query-range.max-split-interval`, `query-range.horizontal-shards`) to implement more dynamic horizontal query splitting.

### Changed

Expand Down
11 changes: 11 additions & 0 deletions cmd/thanos/query_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,17 @@ func registerQueryFrontend(app *extkingpin.App) {
cmd.Flag("query-range.split-interval", "Split query range requests by an interval and execute in parallel, it should be greater than 0 when query-range.response-cache-config is configured.").
Default("24h").DurationVar(&cfg.QueryRangeConfig.SplitQueriesByInterval)

cmd.Flag("query-range.min-split-interval", "Split query range requests above this interval in query-range.horizontal-shards requests of equal range. "+
"Using this parameter is not allowed with query-range.split-interval. "+
"One should also set query-range.split-min-horizontal-shards to a value greater than 1 to enable splitting.").
Default("0").DurationVar(&cfg.QueryRangeConfig.MinQuerySplitInterval)

cmd.Flag("query-range.max-split-interval", "Split query range below this interval in query-range.horizontal-shards. Queries with a range longer than this value will be split in multiple requests of this length.").
Default("0").DurationVar(&cfg.QueryRangeConfig.MaxQuerySplitInterval)

cmd.Flag("query-range.horizontal-shards", "Split queries in this many requests when query duration is below query-range.max-split-interval.").
Default("0").Int64Var(&cfg.QueryRangeConfig.HorizontalShards)

cmd.Flag("query-range.max-retries-per-request", "Maximum number of retries for a single query range request; beyond this, the downstream error is returned.").
Default("5").IntVar(&cfg.QueryRangeConfig.MaxRetries)

Expand Down
17 changes: 17 additions & 0 deletions docs/components/query-frontend.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ Flags:
and end with their step for better
cache-ability. Note: Grafana dashboards do that
by default.
--query-range.horizontal-shards=0
Split queries in this many requests when query
duration is below
query-range.max-split-interval.
--query-range.max-query-length=0
Limit the query time range (end - start time)
in the query-frontend, 0 disables it.
Expand All @@ -290,6 +294,19 @@ Flags:
Maximum number of retries for a single query
range request; beyond this, the downstream
error is returned.
--query-range.max-split-interval=0
Split query range below this interval in
query-range.horizontal-shards. Queries with a
range longer than this value will be split in
multiple requests of this length.
--query-range.min-split-interval=0
Split query range requests above this interval
in query-range.horizontal-shards requests of
equal range. Using this parameter is not
allowed with query-range.split-interval. One
should also set
query-range.split-min-horizontal-shards to a
value greater than 1 to enable splitting.
--query-range.partial-response
Enable partial response for query range
requests if no partial_response param is
Expand Down
43 changes: 41 additions & 2 deletions pkg/queryfrontend/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ type QueryRangeConfig struct {
AlignRangeWithStep bool
RequestDownsampled bool
SplitQueriesByInterval time.Duration
MinQuerySplitInterval time.Duration
MaxQuerySplitInterval time.Duration
HorizontalShards int64
MaxRetries int
Limits *cortexvalidation.Limits
}
Expand All @@ -242,14 +245,25 @@ type LabelsConfig struct {
// Validate a fully initialized config.
func (cfg *Config) Validate() error {
if cfg.QueryRangeConfig.ResultsCacheConfig != nil {
if cfg.QueryRangeConfig.SplitQueriesByInterval <= 0 {
return errors.New("split queries interval should be greater than 0 when caching is enabled")
if cfg.QueryRangeConfig.SplitQueriesByInterval <= 0 && !cfg.isDynamicSplitSet() {
return errors.New("split queries or split threshold interval should be greater than 0 when caching is enabled")
}
if err := cfg.QueryRangeConfig.ResultsCacheConfig.Validate(querier.Config{}); err != nil {
return errors.Wrap(err, "invalid ResultsCache config for query_range tripperware")
}
}

if cfg.isDynamicSplitSet() && cfg.isStaticSplitSet() {
return errors.New("split queries interval and dynamic query split interval cannot be set at the same time")
}

if cfg.isDynamicSplitSet() {

if err := cfg.validateDynamicSplitParams(); err != nil {
return err
}
}

if cfg.LabelsConfig.ResultsCacheConfig != nil {
if cfg.LabelsConfig.SplitQueriesByInterval <= 0 {
return errors.New("split queries interval should be greater than 0 when caching is enabled")
Expand All @@ -269,3 +283,28 @@ func (cfg *Config) Validate() error {

return nil
}

func (cfg *Config) validateDynamicSplitParams() error {
if cfg.QueryRangeConfig.HorizontalShards <= 0 {
return errors.New("min horizontal shards should be greater than 0 when query split threshold is enabled")
}

if cfg.QueryRangeConfig.MaxQuerySplitInterval <= 0 {
return errors.New("max query split interval should be greater than 0 when query split threshold is enabled")
}

if cfg.QueryRangeConfig.MinQuerySplitInterval <= 0 {
return errors.New("min query split interval should be greater than 0 when query split threshold is enabled")
}
return nil
}

func (cfg *Config) isStaticSplitSet() bool {
return cfg.QueryRangeConfig.SplitQueriesByInterval != 0
}

func (cfg *Config) isDynamicSplitSet() bool {
return cfg.QueryRangeConfig.MinQuerySplitInterval > 0 ||
cfg.QueryRangeConfig.HorizontalShards > 0 ||
cfg.QueryRangeConfig.MaxQuerySplitInterval > 0
}
110 changes: 110 additions & 0 deletions pkg/queryfrontend/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package queryfrontend

import (
"fmt"
"testing"
"time"

"github.com/thanos-io/thanos/internal/cortex/chunk/cache"
"github.com/thanos-io/thanos/internal/cortex/querier/queryrange"
"github.com/thanos-io/thanos/pkg/testutil"
)

func TestConfig_Validate(t *testing.T) {

type testCase struct {
name string
config Config
err string
}

testCases := []testCase{
{
name: "invalid query range options",
config: Config{
QueryRangeConfig: QueryRangeConfig{
SplitQueriesByInterval: 10 * time.Hour,
HorizontalShards: 10,
MinQuerySplitInterval: 1 * time.Hour,
MaxQuerySplitInterval: day,
},
},
err: "split queries interval and dynamic query split interval cannot be set at the same time",
},
{
name: "invalid parameters for dynamic query range split",
config: Config{
QueryRangeConfig: QueryRangeConfig{
SplitQueriesByInterval: 0,
HorizontalShards: 0,
MinQuerySplitInterval: 1 * time.Hour,
},
},
err: "min horizontal shards should be greater than 0 when query split threshold is enabled",
},
{
name: "invalid parameters for dynamic query range split - 2",
config: Config{
QueryRangeConfig: QueryRangeConfig{
SplitQueriesByInterval: 0,
HorizontalShards: 10,
MaxQuerySplitInterval: 0,
MinQuerySplitInterval: 1 * time.Hour,
},
},
err: "max query split interval should be greater than 0 when query split threshold is enabled",
},
{
name: "invalid parameters for dynamic query range split - 3",
config: Config{
QueryRangeConfig: QueryRangeConfig{
SplitQueriesByInterval: 0,
HorizontalShards: 10,
MaxQuerySplitInterval: 1 * time.Hour,
MinQuerySplitInterval: 0,
},
LabelsConfig: LabelsConfig{
DefaultTimeRange: day,
},
},
err: "min query split interval should be greater than 0 when query split threshold is enabled",
},
{
name: "valid config with caching",
config: Config{
DownstreamURL: "localhost:8080",
QueryRangeConfig: QueryRangeConfig{
SplitQueriesByInterval: 10 * time.Hour,
HorizontalShards: 0,
MaxQuerySplitInterval: 0,
MinQuerySplitInterval: 0,
ResultsCacheConfig: &queryrange.ResultsCacheConfig{
CacheConfig: cache.Config{},
Compression: "",
CacheQueryableSamplesStats: false,
},
},
LabelsConfig: LabelsConfig{
DefaultTimeRange: day,
},
},
err: "",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := tc.config.Validate()
if tc.err != "" {
testutil.NotOk(t, err)
testutil.Equals(t, tc.err, err.Error())
} else {
testutil.Ok(t, err)
fmt.Println(err)
}
})
}
}
28 changes: 24 additions & 4 deletions pkg/queryfrontend/roundtrip.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,9 @@ func newQueryRangeTripperware(
)
}

queryIntervalFn := func(_ queryrange.Request) time.Duration {
return config.SplitQueriesByInterval
}
if config.SplitQueriesByInterval != 0 || config.MinQuerySplitInterval != 0 {
queryIntervalFn := dynamicIntervalFn(config)

if config.SplitQueriesByInterval != 0 {
queryRangeMiddleware = append(
queryRangeMiddleware,
queryrange.InstrumentMiddleware("split_by_interval", m),
Expand Down Expand Up @@ -237,6 +235,28 @@ func newQueryRangeTripperware(
}, nil
}

func dynamicIntervalFn(config QueryRangeConfig) queryrange.IntervalFn {
return func(r queryrange.Request) time.Duration {
// Use static interval, by default.
if config.SplitQueriesByInterval != 0 {
return config.SplitQueriesByInterval
}

queryInterval := time.Duration(r.GetEnd()-r.GetStart()) * time.Millisecond
// If the query is multiple of max interval, we use the max interval to split.
if queryInterval/config.MaxQuerySplitInterval >= 2 {
return config.MaxQuerySplitInterval
}

if queryInterval > config.MinQuerySplitInterval {
// If the query duration is less than max interval, we split it equally in HorizontalShards.
return time.Duration(queryInterval.Milliseconds()/config.HorizontalShards) * time.Millisecond
}

return config.MinQuerySplitInterval
}
}

// newLabelsTripperware returns a Tripperware for labels and series requests
// configured with middlewares of split by interval and retry.
func newLabelsTripperware(
Expand Down
51 changes: 45 additions & 6 deletions pkg/queryfrontend/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,15 @@ func TestRoundTripSplitIntervalMiddleware(t *testing.T) {
labelsCodec := NewThanosLabelsCodec(true, 2*time.Hour)

for _, tc := range []struct {
name string
splitInterval time.Duration
req queryrange.Request
codec queryrange.Codec
handlerFunc func(bool) (*int, http.Handler)
expected int
name string
splitInterval time.Duration
querySplitThreshold time.Duration
maxSplitInterval time.Duration
minHorizontalShards int64
req queryrange.Request
codec queryrange.Codec
handlerFunc func(bool) (*int, http.Handler)
expected int
}{
{
name: "split interval == 0, disable split",
Expand Down Expand Up @@ -280,6 +283,39 @@ func TestRoundTripSplitIntervalMiddleware(t *testing.T) {
splitInterval: 1 * time.Hour,
expected: 2,
},
{
name: "split to 4 requests, due to min horizontal shards",
req: testRequest,
handlerFunc: promqlResults,
codec: queryRangeCodec,
splitInterval: 0,
querySplitThreshold: 30 * time.Minute,
maxSplitInterval: 4 * time.Hour,
minHorizontalShards: 4,
expected: 4,
},
{
name: "split to 2 requests, due to maxSplitInterval",
req: testRequest,
handlerFunc: promqlResults,
codec: queryRangeCodec,
splitInterval: 0,
querySplitThreshold: 30 * time.Minute,
maxSplitInterval: 1 * time.Hour,
minHorizontalShards: 4,
expected: 2,
},
{
name: "split to 2 requests, due to maxSplitInterval",
req: testRequest,
handlerFunc: promqlResults,
codec: queryRangeCodec,
splitInterval: 0,
querySplitThreshold: 2 * time.Hour,
maxSplitInterval: 4 * time.Hour,
minHorizontalShards: 4,
expected: 1,
},
{
name: "labels request won't be split",
req: testLabelsRequest,
Expand Down Expand Up @@ -320,6 +356,9 @@ func TestRoundTripSplitIntervalMiddleware(t *testing.T) {
QueryRangeConfig: QueryRangeConfig{
Limits: defaultLimits,
SplitQueriesByInterval: tc.splitInterval,
MinQuerySplitInterval: tc.querySplitThreshold,
MaxQuerySplitInterval: tc.maxSplitInterval,
HorizontalShards: tc.minHorizontalShards,
},
LabelsConfig: LabelsConfig{
Limits: defaultLimits,
Expand Down

0 comments on commit 0392dd7

Please sign in to comment.