Skip to content

Commit

Permalink
rule: add concurrent evals functionality (thanos-io#7835)
Browse files Browse the repository at this point in the history
Expose the new concurrent evaluation functionality from Ruler.

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
  • Loading branch information
GiedriusS authored and jnyi committed Oct 17, 2024
1 parent 342b0a2 commit c6295e5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 20 deletions.
48 changes: 28 additions & 20 deletions cmd/thanos/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,17 @@ type ruleConfig struct {

rwConfig *extflag.PathOrContent

resendDelay time.Duration
evalInterval time.Duration
outageTolerance time.Duration
forGracePeriod time.Duration
ruleFiles []string
objStoreConfig *extflag.PathOrContent
dataDir string
lset labels.Labels
ignoredLabelNames []string
storeRateLimits store.SeriesSelectLimits
resendDelay time.Duration
evalInterval time.Duration
outageTolerance time.Duration
forGracePeriod time.Duration
ruleFiles []string
objStoreConfig *extflag.PathOrContent
dataDir string
lset labels.Labels
ignoredLabelNames []string
storeRateLimits store.SeriesSelectLimits
ruleConcurrentEval int64

extendedFunctionsEnabled bool
}
Expand Down Expand Up @@ -156,6 +157,7 @@ func registerRule(app *extkingpin.App) {
Default("10m").DurationVar(&conf.forGracePeriod)
cmd.Flag("restore-ignored-label", "Label names to be ignored when restoring alerts from the remote storage. This is only used in stateless mode.").
StringsVar(&conf.ignoredLabelNames)
cmd.Flag("rule-concurrent-evaluation", "How many rules can be evaluated concurrently. Default is 1.").Default("1").Int64Var(&conf.ruleConcurrentEval)

cmd.Flag("grpc-query-endpoint", "Addresses of Thanos gRPC query API servers (repeatable). The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect Thanos API servers through respective DNS lookups.").
PlaceHolder("<endpoint>").StringsVar(&conf.grpcQueryEndpoints)
Expand Down Expand Up @@ -625,22 +627,28 @@ func runRule(
alertQ.Push(res)
}

managerOpts := rules.ManagerOptions{
NotifyFunc: notifyFunc,
Logger: logger,
Appendable: appendable,
ExternalURL: nil,
Queryable: queryable,
ResendDelay: conf.resendDelay,
OutageTolerance: conf.outageTolerance,
ForGracePeriod: conf.forGracePeriod,
}
if conf.ruleConcurrentEval > 1 {
managerOpts.MaxConcurrentEvals = conf.ruleConcurrentEval
managerOpts.ConcurrentEvalsEnabled = true
}

ctx, cancel := context.WithCancel(context.Background())
logger = log.With(logger, "component", "rules")
ruleMgr = thanosrules.NewManager(
tracing.ContextWithTracer(ctx, tracer),
reg,
conf.dataDir,
rules.ManagerOptions{
NotifyFunc: notifyFunc,
Logger: logger,
Appendable: appendable,
ExternalURL: nil,
Queryable: queryable,
ResendDelay: conf.resendDelay,
OutageTolerance: conf.outageTolerance,
ForGracePeriod: conf.forGracePeriod,
},
managerOpts,
queryFuncCreator(logger, queryClients, promClients, grpcEndpointSet, metrics.duplicatedQuery, metrics.ruleEvalWarnings, conf.query.httpMethod, conf.query.doNotAddThanosParams),
conf.lset,
// In our case the querying URL is the external URL because in Prometheus
Expand Down
3 changes: 3 additions & 0 deletions docs/components/rule.md
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,9 @@ Flags:
Label names to be ignored when restoring alerts
from the remote storage. This is only used in
stateless mode.
--rule-concurrent-evaluation=1
How many rules can be evaluated concurrently.
Default is 1.
--rule-file=rules/ ... Rule files that should be used by rule
manager. Can be in glob format (repeated).
Note that rules are not automatically detected,
Expand Down

0 comments on commit c6295e5

Please sign in to comment.