Skip to content

Commit

Permalink
Add limit for store gateway downloaded bytes (#5179)
Browse files Browse the repository at this point in the history
* add limit for store gateway downloaded bytes

Signed-off-by: Ben Ye <benye@amazon.com>

* update docs

Signed-off-by: Ben Ye <benye@amazon.com>

* update docs

Signed-off-by: Ben Ye <benye@amazon.com>

* update changelog

Signed-off-by: Ben Ye <benye@amazon.com>

---------

Signed-off-by: Ben Ye <benye@amazon.com>
  • Loading branch information
yeya24 authored Apr 28, 2023
1 parent 1b16ce3 commit 169a062
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## master / unreleased
* [CHANGE] Alertmanager: Validating new fields on the PagerDuty AM config. #5290
* [CHANGE] Ingester: Creating label `native-histogram-sample` on the `cortex_discarded_samples_total` to keep track of discarded native histogram samples. #5289
* [FEATURE] Store Gateway: Add `max_downloaded_bytes_per_request` to limit max bytes to download per store gateway request.
* [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
* [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
* [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2950,6 +2950,11 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
# CLI flag: -store-gateway.tenant-shard-size
[store_gateway_tenant_shard_size: <int> | default = 0]

# The maximum number of data bytes to download per gRPC request in Store
# Gateway, including Series/LabelNames/LabelValues requests. 0 to disable.
# CLI flag: -store-gateway.max-downloaded-bytes-per-request
[max_downloaded_bytes_per_request: <int> | default = 0]

# Delete blocks containing samples older than the specified retention period. 0
# to disable.
# CLI flag: -compactor.blocks-retention-period
Expand Down
12 changes: 11 additions & 1 deletion pkg/storegateway/bucket_stores.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro
u.syncDirForUser(userID),
newChunksLimiterFactory(u.limits, userID),
newSeriesLimiterFactory(u.limits, userID),
store.NewBytesLimiterFactory(0),
newBytesLimiterFactory(u.limits, userID),
u.partitioner,
u.cfg.BucketStore.BlockSyncConcurrency,
false, // No need to enable backward compatibility with Thanos pre 0.8.0 queriers
Expand Down Expand Up @@ -637,3 +637,13 @@ func newSeriesLimiterFactory(limits *validation.Overrides, userID string) store.
}
}
}

func newBytesLimiterFactory(limits *validation.Overrides, userID string) store.BytesLimiterFactory {
return func(failedCounter prometheus.Counter) store.BytesLimiter {
// Since limit overrides could be live reloaded, we have to get the current user's limit
// each time a new limiter is instantiated.
return &limiter{
limiter: store.NewLimiter(uint64(limits.MaxDownloadedBytesPerRequest(userID)), failedCounter),
}
}
}
10 changes: 9 additions & 1 deletion pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ type Limits struct {
RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"`

// Store-gateway.
StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"`
StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"`
MaxDownloadedBytesPerRequest int `yaml:"max_downloaded_bytes_per_request" json:"max_downloaded_bytes_per_request"`

// Compactor.
CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"`
Expand Down Expand Up @@ -182,6 +183,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {

// Store-gateway.
f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
f.IntVar(&l.MaxDownloadedBytesPerRequest, "store-gateway.max-downloaded-bytes-per-request", 0, "The maximum number of data bytes to download per gRPC request in Store Gateway, including Series/LabelNames/LabelValues requests. 0 to disable.")

// Alertmanager.
f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.")
Expand Down Expand Up @@ -430,6 +432,12 @@ func (o *Overrides) MaxFetchedDataBytesPerQuery(userID string) int {
return o.GetOverridesForUser(userID).MaxFetchedDataBytesPerQuery
}

// MaxDownloadedBytesPerRequest returns the maximum number of bytes to download for each gRPC request in Store Gateway,
// including any data fetched from cache or object storage.
func (o *Overrides) MaxDownloadedBytesPerRequest(userID string) int {
return o.GetOverridesForUser(userID).MaxDownloadedBytesPerRequest
}

// MaxQueryLookback returns the max lookback period of queries.
func (o *Overrides) MaxQueryLookback(userID string) time.Duration {
return time.Duration(o.GetOverridesForUser(userID).MaxQueryLookback)
Expand Down
32 changes: 32 additions & 0 deletions pkg/util/validation/limits_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,3 +564,35 @@ tenant2:
require.Equal(t, 3, ov.MaxExemplars("tenant2"))
require.Equal(t, 5, ov.MaxExemplars("tenant3"))
}

func TestMaxDownloadedBytesPerRequestOverridesPerTenant(t *testing.T) {
SetDefaultLimitsForYAMLUnmarshalling(Limits{
MaxLabelNameLength: 100,
})

baseYAML := `
max_downloaded_bytes_per_request: 5`
overridesYAML := `
tenant1:
max_downloaded_bytes_per_request: 1
tenant2:
max_downloaded_bytes_per_request: 3
`

l := Limits{}
err := yaml.UnmarshalStrict([]byte(baseYAML), &l)
require.NoError(t, err)

overrides := map[string]*Limits{}
err = yaml.Unmarshal([]byte(overridesYAML), &overrides)
require.NoError(t, err, "parsing overrides")

tl := newMockTenantLimits(overrides)

ov, err := NewOverrides(l, tl)
require.NoError(t, err)

require.Equal(t, 1, ov.MaxDownloadedBytesPerRequest("tenant1"))
require.Equal(t, 3, ov.MaxDownloadedBytesPerRequest("tenant2"))
require.Equal(t, 5, ov.MaxDownloadedBytesPerRequest("tenant3"))
}

0 comments on commit 169a062

Please sign in to comment.