diff --git a/CHANGELOG.md b/CHANGELOG.md index 041e331e..5559c751 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.7.0 + +### Added + +- Cache each Prometheus server responses to minimize the number of API calls. + ## v0.6.6 ### Fixed diff --git a/internal/checks/alerts_count.go b/internal/checks/alerts_count.go index 8f0fb0b0..c1723634 100644 --- a/internal/checks/alerts_count.go +++ b/internal/checks/alerts_count.go @@ -13,11 +13,9 @@ const ( AlertsCheckName = "alerts/count" ) -func NewAlertsCheck(name, uri string, timeout, lookBack, step, resolve time.Duration) AlertsCheck { +func NewAlertsCheck(prom *promapi.Prometheus, lookBack, step, resolve time.Duration) AlertsCheck { return AlertsCheck{ - name: name, - uri: uri, - timeout: timeout, + prom: prom, lookBack: lookBack, step: step, resolve: resolve, @@ -25,16 +23,14 @@ func NewAlertsCheck(name, uri string, timeout, lookBack, step, resolve time.Dura } type AlertsCheck struct { - name string - uri string - timeout time.Duration + prom *promapi.Prometheus lookBack time.Duration step time.Duration resolve time.Duration } func (c AlertsCheck) String() string { - return fmt.Sprintf("%s(%s)", AlertsCheckName, c.name) + return fmt.Sprintf("%s(%s)", AlertsCheckName, c.prom.Name()) } func (c AlertsCheck) Check(rule parser.Rule) (problems []Problem) { @@ -49,13 +45,13 @@ func (c AlertsCheck) Check(rule parser.Rule) (problems []Problem) { end := time.Now() start := end.Add(-1 * c.lookBack) - qr, err := promapi.RangeQuery(c.uri, c.timeout, rule.AlertingRule.Expr.Value.Value, start, end, c.step, nil) + qr, err := c.prom.RangeQuery(rule.AlertingRule.Expr.Value.Value, start, end, c.step) if err != nil { problems = append(problems, Problem{ Fragment: rule.AlertingRule.Expr.Value.Value, Lines: rule.AlertingRule.Expr.Lines(), Reporter: AlertsCheckName, - Text: fmt.Sprintf("query using %s failed with: %s", c.name, err), + Text: fmt.Sprintf("query using %s failed with: %s", c.prom.Name(), err), Severity: Bug, }) return @@ -104,7 +100,7 @@ func (c AlertsCheck) Check(rule parser.Rule) (problems []Problem) { Fragment: rule.AlertingRule.Expr.Value.Value, Lines: lines, Reporter: AlertsCheckName, - Text: fmt.Sprintf("query using %s would trigger %d alert(s) in the last %s", c.name, alerts, promapi.HumanizeDuration(delta)), + Text: fmt.Sprintf("query using %s would trigger %d alert(s) in the last %s", c.prom.Name(), alerts, promapi.HumanizeDuration(delta)), Severity: Information, }) return diff --git a/internal/checks/alerts_count_test.go b/internal/checks/alerts_count_test.go index 1eb6e264..e1af511e 100644 --- a/internal/checks/alerts_count_test.go +++ b/internal/checks/alerts_count_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/cloudflare/pint/internal/checks" + "github.com/cloudflare/pint/internal/promapi" "github.com/rs/zerolog" ) @@ -85,17 +86,17 @@ func TestAlertsCheck(t *testing.T) { { description: "ignores recording rules", content: "- record: foo\n expr: up == 0\n", - checker: checks.NewAlertsCheck("prom", "http://localhost", time.Second*5, time.Hour*24, time.Minute, time.Minute*5), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", "http://localhost", time.Second*5), time.Hour*24, time.Minute, time.Minute*5), }, { description: "ignores rules with syntax errors", content: "- alert: Foo Is Down\n expr: sum(\n", - checker: checks.NewAlertsCheck("prom", "http://localhost", time.Second*5, time.Hour*24, time.Minute, time.Minute*5), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", "http://localhost", time.Second*5), time.Hour*24, time.Minute, time.Minute*5), }, { description: "bad request", content: content, - checker: checks.NewAlertsCheck("prom", srv.URL+"/400/", time.Second*5, time.Hour*24, time.Minute, time.Minute*5), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/400/", time.Second*5), time.Hour*24, time.Minute, time.Minute*5), problems: []checks.Problem{ { Fragment: `up{job="foo"} == 0`, @@ -109,7 +110,7 @@ func TestAlertsCheck(t *testing.T) { { description: "empty response", content: content, - checker: checks.NewAlertsCheck("prom", srv.URL+"/empty/", time.Second*5, time.Hour*24, time.Minute, time.Minute*5), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/empty/", time.Second*5), time.Hour*24, time.Minute, time.Minute*5), problems: []checks.Problem{ { Fragment: `up{job="foo"} == 0`, @@ -123,7 +124,7 @@ func TestAlertsCheck(t *testing.T) { { description: "multiple alerts", content: content, - checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute, time.Minute*5), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute, time.Minute*5), problems: []checks.Problem{ { Fragment: `up{job="foo"} == 0`, @@ -137,7 +138,7 @@ func TestAlertsCheck(t *testing.T) { { description: "for: 10m", content: "- alert: Foo Is Down\n for: 10m\n expr: up{job=\"foo\"} == 0\n", - checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute*6, time.Minute*10), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute*6, time.Minute*10), problems: []checks.Problem{ { Fragment: `up{job="foo"} == 0`, @@ -154,7 +155,7 @@ func TestAlertsCheck(t *testing.T) { - alert: foo expr: '{__name__="up", job="foo"} == 0' `, - checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute*6, time.Minute*10), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute*6, time.Minute*10), problems: []checks.Problem{ { Fragment: `{__name__="up", job="foo"} == 0`, @@ -171,7 +172,7 @@ func TestAlertsCheck(t *testing.T) { - alert: foo expr: '{__name__=~"(up|foo)", job="foo"} == 0' `, - checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute*6, time.Minute*10), + checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute*6, time.Minute*10), problems: []checks.Problem{ { Fragment: `{__name__=~"(up|foo)", job="foo"} == 0`, diff --git a/internal/checks/promql_rate.go b/internal/checks/promql_rate.go index 3687bde8..68f1f98b 100644 --- a/internal/checks/promql_rate.go +++ b/internal/checks/promql_rate.go @@ -14,18 +14,16 @@ const ( RateCheckName = "promql/rate" ) -func NewRateCheck(name, uri string, timeout time.Duration) RateCheck { - return RateCheck{name: name, uri: uri, timeout: timeout} +func NewRateCheck(prom *promapi.Prometheus) RateCheck { + return RateCheck{prom: prom} } type RateCheck struct { - name string - uri string - timeout time.Duration + prom *promapi.Prometheus } func (c RateCheck) String() string { - return fmt.Sprintf("%s(%s)", RateCheckName, c.name) + return fmt.Sprintf("%s(%s)", RateCheckName, c.prom.Name()) } func (c RateCheck) Check(rule parser.Rule) (problems []Problem) { @@ -42,7 +40,7 @@ func (c RateCheck) Check(rule parser.Rule) (problems []Problem) { Fragment: expr.Value.Value, Lines: expr.Lines(), Reporter: RateCheckName, - Text: fmt.Sprintf("failed to query %s prometheus config: %s", c.name, err), + Text: fmt.Sprintf("failed to query %s prometheus config: %s", c.prom.Name(), err), Severity: Bug, }) return @@ -64,7 +62,7 @@ func (c RateCheck) Check(rule parser.Rule) (problems []Problem) { func (c RateCheck) getScrapeInterval() (interval time.Duration, err error) { var cfg *promapi.PrometheusConfig - cfg, err = promapi.Config(c.uri, c.timeout) + cfg, err = c.prom.Config() if err != nil { return } @@ -88,14 +86,14 @@ func (c RateCheck) checkNode(node *parser.PromQLNode, scrapeInterval time.Durati if m.Range < scrapeInterval*time.Duration(minIntervals) { p := exprProblem{ expr: node.Expr, - text: fmt.Sprintf("duration for %s() must be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, minIntervals, c.name, promapi.HumanizeDuration(scrapeInterval)), + text: fmt.Sprintf("duration for %s() must be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, minIntervals, c.prom.Name(), promapi.HumanizeDuration(scrapeInterval)), severity: Bug, } problems = append(problems, p) } else if m.Range < scrapeInterval*time.Duration(recIntervals) { p := exprProblem{ expr: node.Expr, - text: fmt.Sprintf("duration for %s() is recommended to be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, recIntervals, c.name, promapi.HumanizeDuration(scrapeInterval)), + text: fmt.Sprintf("duration for %s() is recommended to be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, recIntervals, c.prom.Name(), promapi.HumanizeDuration(scrapeInterval)), severity: Warning, } problems = append(problems, p) diff --git a/internal/checks/promql_rate_test.go b/internal/checks/promql_rate_test.go index 3f6004b3..3061e98e 100644 --- a/internal/checks/promql_rate_test.go +++ b/internal/checks/promql_rate_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/cloudflare/pint/internal/checks" + "github.com/cloudflare/pint/internal/promapi" "github.com/rs/zerolog" ) @@ -46,12 +47,12 @@ func TestRateCheck(t *testing.T) { { description: "ignores rules with syntax errors", content: "- record: foo\n expr: sum(foo) without(\n", - checker: checks.NewRateCheck("prom", srv.URL, time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL, time.Second)), }, { description: "rate < 2x scrape_interval", content: "- record: foo\n expr: rate(foo[1m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: "rate(foo[1m])", @@ -65,7 +66,7 @@ func TestRateCheck(t *testing.T) { { description: "rate < 4x scrape_interval", content: "- record: foo\n expr: rate(foo[3m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: "rate(foo[3m])", @@ -79,12 +80,12 @@ func TestRateCheck(t *testing.T) { { description: "rate == 4x scrape interval", content: "- record: foo\n expr: rate(foo[2m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/30s/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/30s/", time.Second)), }, { description: "irate < 2x scrape_interval", content: "- record: foo\n expr: irate(foo[1m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: "irate(foo[1m])", @@ -98,7 +99,7 @@ func TestRateCheck(t *testing.T) { { description: "irate < 3x scrape_interval", content: "- record: foo\n expr: irate(foo[2m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: "irate(foo[2m])", @@ -115,7 +116,7 @@ func TestRateCheck(t *testing.T) { - record: foo expr: irate({__name__="foo"}[5m]) `, - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), }, { description: "irate{__name__=~} > 3x scrape_interval", @@ -123,7 +124,7 @@ func TestRateCheck(t *testing.T) { - record: foo expr: irate({__name__=~"(foo|bar)_total"}[5m]) `, - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), }, { description: "irate{__name__} < 3x scrape_interval", @@ -131,7 +132,7 @@ func TestRateCheck(t *testing.T) { - record: foo expr: irate({__name__="foo"}[2m]) `, - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: `irate({__name__="foo"}[2m])`, @@ -148,7 +149,7 @@ func TestRateCheck(t *testing.T) { - record: foo expr: irate({__name__=~"(foo|bar)_total"}[2m]) `, - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: `irate({__name__=~"(foo|bar)_total"}[2m])`, @@ -162,17 +163,17 @@ func TestRateCheck(t *testing.T) { { description: "irate == 3x scrape interval", content: "- record: foo\n expr: irate(foo[3m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), }, { description: "valid range selector", content: "- record: foo\n expr: foo[1m]\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), }, { description: "nested invalid rate", content: "- record: foo\n expr: sum(rate(foo[3m])) / sum(rate(bar[1m]))\n", - checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)), problems: []checks.Problem{ { Fragment: "rate(foo[3m])", @@ -193,7 +194,7 @@ func TestRateCheck(t *testing.T) { { description: "500 error from Prometheus API", content: "- record: foo\n expr: rate(foo[5m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/error/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/error/", time.Second)), problems: []checks.Problem{ { Fragment: "rate(foo[5m])", @@ -207,7 +208,7 @@ func TestRateCheck(t *testing.T) { { description: "invalid status", content: "- record: foo\n expr: rate(foo[5m])\n", - checker: checks.NewRateCheck("prom", srv.URL, time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL, time.Second)), problems: []checks.Problem{ { Fragment: "rate(foo[5m])", @@ -221,7 +222,7 @@ func TestRateCheck(t *testing.T) { { description: "invalid YAML", content: "- record: foo\n expr: rate(foo[5m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/badYaml/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/badYaml/", time.Second)), problems: []checks.Problem{ { Fragment: "rate(foo[5m])", @@ -235,12 +236,12 @@ func TestRateCheck(t *testing.T) { { description: "irate == 3 x default 1m", content: "- record: foo\n expr: irate(foo[3m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/default/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/default/", time.Second)), }, { description: "irate < 3 x default 1m", content: "- record: foo\n expr: irate(foo[2m])\n", - checker: checks.NewRateCheck("prom", srv.URL+"/default/", time.Second), + checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/default/", time.Second)), problems: []checks.Problem{ { Fragment: "irate(foo[2m])", diff --git a/internal/checks/promql_vectormatching.go b/internal/checks/promql_vectormatching.go index 0ee04cc7..41e20e94 100644 --- a/internal/checks/promql_vectormatching.go +++ b/internal/checks/promql_vectormatching.go @@ -5,7 +5,6 @@ import ( "reflect" "sort" "strings" - "time" "github.com/cloudflare/pint/internal/parser" "github.com/cloudflare/pint/internal/promapi" @@ -17,14 +16,12 @@ const ( VectorMatchingCheckName = "promql/vector_matching" ) -func NewVectorMatchingCheck(name, uri string, timeout time.Duration) VectorMatchingCheck { - return VectorMatchingCheck{name: name, uri: uri, timeout: timeout} +func NewVectorMatchingCheck(prom *promapi.Prometheus) VectorMatchingCheck { + return VectorMatchingCheck{prom: prom} } type VectorMatchingCheck struct { - name string - uri string - timeout time.Duration + prom *promapi.Prometheus } func (c VectorMatchingCheck) String() string { @@ -53,7 +50,7 @@ func (c VectorMatchingCheck) Check(rule parser.Rule) (problems []Problem) { func (c VectorMatchingCheck) checkNode(node *parser.PromQLNode) (problems []exprProblem) { if n, ok := node.Node.(*promParser.BinaryExpr); ok && n.VectorMatching != nil && n.Op != promParser.LOR && n.Op != promParser.LUNLESS { q := fmt.Sprintf("count(%s)", node.Expr) - qr, err := promapi.Query(c.uri, c.timeout, q, &q) + qr, err := c.prom.Query(q) if err != nil || len(qr.Series) != 0 { goto NEXT } @@ -122,7 +119,7 @@ NEXT: } func (c VectorMatchingCheck) seriesLabels(query string, ignored ...model.LabelName) ([]string, error) { - qr, err := promapi.Query(c.uri, c.timeout, query, &query) + qr, err := c.prom.Query(query) if err != nil { return nil, err } diff --git a/internal/checks/promql_vectormatching_test.go b/internal/checks/promql_vectormatching_test.go index ce7a4b72..389fecdb 100644 --- a/internal/checks/promql_vectormatching_test.go +++ b/internal/checks/promql_vectormatching_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/cloudflare/pint/internal/checks" + "github.com/cloudflare/pint/internal/promapi" ) func TestVectorMatchingCheck(t *testing.T) { @@ -161,12 +162,12 @@ func TestVectorMatchingCheck(t *testing.T) { { description: "ignores rules with syntax errors", content: "- record: foo\n expr: sum(foo) without(\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "one to one matching", content: "- record: foo\n expr: foo_with_notfound / bar\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), problems: []checks.Problem{ { Fragment: "foo_with_notfound / bar", @@ -180,37 +181,37 @@ func TestVectorMatchingCheck(t *testing.T) { { description: "ignore left query errors", content: "- record: foo\n expr: xxx / foo\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "ignore righ query errors", content: "- record: foo\n expr: foo / xxx\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "ignore missing left series", content: "- record: foo\n expr: missing / foo\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "ignore missing or vector", content: "- record: foo\n expr: sum(missing or vector(0))\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "ignore present or vector", content: "- record: foo\n expr: sum(foo or vector(0))\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "ignore missing right series", content: "- record: foo\n expr: foo / missing\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "ignore with mismatched series", content: "- record: foo\n expr: foo / ignoring(xxx) app_registry\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), problems: []checks.Problem{ { Fragment: "foo / ignoring(xxx) app_registry", @@ -224,7 +225,7 @@ func TestVectorMatchingCheck(t *testing.T) { { description: "one to one matching with on() - both missing", content: "- record: foo\n expr: foo / on(notfound) bar\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), problems: []checks.Problem{ { Fragment: "foo / on(notfound) bar", @@ -238,32 +239,32 @@ func TestVectorMatchingCheck(t *testing.T) { { description: "one to one matching with ignoring() - both missing", content: "- record: foo\n expr: foo / ignoring(notfound) foo\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "one to one matching with ignoring() - both present", content: "- record: foo\n expr: foo_with_notfound / ignoring(notfound) foo_with_notfound\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "one to one matching with ignoring() - left missing", content: "- record: foo\n expr: foo / ignoring(notfound) foo_with_notfound\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "one to one matching with ignoring() - right missing", content: "- record: foo\n expr: foo_with_notfound / ignoring(notfound) foo\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "one to one matching with ignoring() - mismatch", content: "- record: foo\n expr: foo_with_notfound / ignoring(notfound) bar\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, { description: "one to one matching with on() - left missing", content: "- record: foo\n expr: foo / on(notfound) bar_with_notfound\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), problems: []checks.Problem{ { Fragment: "foo / on(notfound) bar_with_notfound", @@ -277,7 +278,7 @@ func TestVectorMatchingCheck(t *testing.T) { { description: "one to one matching with on() - right missing", content: "- record: foo\n expr: foo_with_notfound / on(notfound) bar\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), problems: []checks.Problem{ { Fragment: "foo_with_notfound / on(notfound) bar", @@ -291,7 +292,7 @@ func TestVectorMatchingCheck(t *testing.T) { { description: "nested query", content: "- alert: foo\n expr: (memory_bytes / ignoring(job) (memory_limit > 0)) * on(app_name) group_left(a,b,c) app_registry\n", - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), problems: []checks.Problem{ { Fragment: "(memory_bytes / ignoring(job) (memory_limit > 0)) * on(app_name) group_left(a,b,c) app_registry", @@ -308,7 +309,7 @@ func TestVectorMatchingCheck(t *testing.T) { - record: foo expr: '{__name__="foo_with_notfound"} / ignoring(notfound) foo_with_notfound' `, - checker: checks.NewVectorMatchingCheck("prom", srv.URL, time.Second*1), + checker: checks.NewVectorMatchingCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*1)), }, } runTests(t, testCases) diff --git a/internal/checks/query_cost.go b/internal/checks/query_cost.go index 355b0f56..de249924 100644 --- a/internal/checks/query_cost.go +++ b/internal/checks/query_cost.go @@ -2,7 +2,6 @@ package checks import ( "fmt" - "time" "github.com/cloudflare/pint/internal/parser" "github.com/cloudflare/pint/internal/promapi" @@ -12,11 +11,9 @@ const ( CostCheckName = "query/cost" ) -func NewCostCheck(name, uri string, timeout time.Duration, bps, maxSeries int, severity Severity) CostCheck { +func NewCostCheck(prom *promapi.Prometheus, bps, maxSeries int, severity Severity) CostCheck { return CostCheck{ - name: name, - uri: uri, - timeout: timeout, + prom: prom, bytesPerSample: bps, maxSeries: maxSeries, severity: severity, @@ -24,16 +21,14 @@ func NewCostCheck(name, uri string, timeout time.Duration, bps, maxSeries int, s } type CostCheck struct { - name string - uri string - timeout time.Duration + prom *promapi.Prometheus bytesPerSample int maxSeries int severity Severity } func (c CostCheck) String() string { - return fmt.Sprintf("%s(%s)", CostCheckName, c.name) + return fmt.Sprintf("%s(%s)", CostCheckName, c.prom.Name()) } func (c CostCheck) Check(rule parser.Rule) (problems []Problem) { @@ -44,13 +39,13 @@ func (c CostCheck) Check(rule parser.Rule) (problems []Problem) { } query := fmt.Sprintf("count(%s)", expr.Value.Value) - qr, err := promapi.Query(c.uri, c.timeout, query, nil) + qr, err := c.prom.Query(query) if err != nil { problems = append(problems, Problem{ Fragment: expr.Value.Value, Lines: expr.Lines(), Reporter: CostCheckName, - Text: fmt.Sprintf("query using %s failed with: %s", c.name, err), + Text: fmt.Sprintf("query using %s failed with: %s", c.prom.Name(), err), Severity: Bug, }) return @@ -77,7 +72,7 @@ func (c CostCheck) Check(rule parser.Rule) (problems []Problem) { Fragment: expr.Value.Value, Lines: expr.Lines(), Reporter: CostCheckName, - Text: fmt.Sprintf("query using %s completed in %.2fs returning %d result(s)%s%s", c.name, qr.DurationSeconds, series, estimate, above), + Text: fmt.Sprintf("query using %s completed in %.2fs returning %d result(s)%s%s", c.prom.Name(), qr.DurationSeconds, series, estimate, above), Severity: severity, }) return diff --git a/internal/checks/query_cost_test.go b/internal/checks/query_cost_test.go index 5c9e8d8a..52919059 100644 --- a/internal/checks/query_cost_test.go +++ b/internal/checks/query_cost_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/cloudflare/pint/internal/checks" + "github.com/cloudflare/pint/internal/promapi" "github.com/google/go-cmp/cmp" "github.com/rs/zerolog" @@ -79,12 +80,12 @@ func TestCostCheck(t *testing.T) { { description: "ignores rules with syntax errors", content: "- record: foo\n expr: sum(foo) without(\n", - checker: checks.NewCostCheck("prom", "http://localhost", time.Second*5, 4096, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", "http://localhost", time.Second*5), 4096, 0, checks.Bug), }, { description: "empty response", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/empty/", time.Second*5, 4096, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/empty/", time.Second*5), 4096, 0, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -98,7 +99,7 @@ func TestCostCheck(t *testing.T) { { description: "response timeout", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/empty/", time.Millisecond*5, 4096, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/empty/", time.Millisecond*5), 4096, 0, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -112,7 +113,7 @@ func TestCostCheck(t *testing.T) { { description: "bad request", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/400/", time.Second*5, 4096, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/400/", time.Second*5), 4096, 0, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -126,7 +127,7 @@ func TestCostCheck(t *testing.T) { { description: "1 result", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/1/", time.Second*5, 4096, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/1/", time.Second*5), 4096, 0, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -140,7 +141,7 @@ func TestCostCheck(t *testing.T) { { description: "7 results", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/7/", time.Second*5, 101, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/7/", time.Second*5), 101, 0, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -154,7 +155,7 @@ func TestCostCheck(t *testing.T) { { description: "7 result with MB", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/7/", time.Second*5, 1024*1024, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/7/", time.Second*5), 1024*1024, 0, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -168,7 +169,7 @@ func TestCostCheck(t *testing.T) { { description: "7 results with 1 series max (1KB bps)", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/7/", time.Second*5, 1024, 1, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/7/", time.Second*5), 1024, 1, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -182,7 +183,7 @@ func TestCostCheck(t *testing.T) { { description: "7 results with 5 series max", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/7/", time.Second*5, 0, 5, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/7/", time.Second*5), 0, 5, checks.Bug), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -196,7 +197,7 @@ func TestCostCheck(t *testing.T) { { description: "7 results with 5 series max / infi", content: content, - checker: checks.NewCostCheck("prom", srv.URL+"/7/", time.Second*5, 0, 5, checks.Information), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/7/", time.Second*5), 0, 5, checks.Information), problems: []checks.Problem{ { Fragment: "sum(foo)", @@ -213,7 +214,7 @@ func TestCostCheck(t *testing.T) { - record: foo expr: 'sum({__name__="foo"})' `, - checker: checks.NewCostCheck("prom", srv.URL+"/7/", time.Second*5, 101, 0, checks.Bug), + checker: checks.NewCostCheck(promapi.NewPrometheus("prom", srv.URL+"/7/", time.Second*5), 101, 0, checks.Bug), problems: []checks.Problem{ { Fragment: `sum({__name__="foo"})`, diff --git a/internal/checks/query_series.go b/internal/checks/query_series.go index 8a041b49..ae57cdc0 100644 --- a/internal/checks/query_series.go +++ b/internal/checks/query_series.go @@ -2,7 +2,6 @@ package checks import ( "fmt" - "time" "github.com/cloudflare/pint/internal/parser" "github.com/cloudflare/pint/internal/promapi" @@ -15,18 +14,16 @@ const ( SeriesCheckName = "query/series" ) -func NewSeriesCheck(name, uri string, timeout time.Duration) SeriesCheck { - return SeriesCheck{name: name, uri: uri, timeout: timeout} +func NewSeriesCheck(prom *promapi.Prometheus) SeriesCheck { + return SeriesCheck{prom: prom} } type SeriesCheck struct { - name string - uri string - timeout time.Duration + prom *promapi.Prometheus } func (c SeriesCheck) String() string { - return fmt.Sprintf("%s(%s)", SeriesCheckName, c.name) + return fmt.Sprintf("%s(%s)", SeriesCheckName, c.prom.Name()) } func (c SeriesCheck) Check(rule parser.Rule) (problems []Problem) { @@ -58,13 +55,13 @@ func (c SeriesCheck) Check(rule parser.Rule) (problems []Problem) { func (c SeriesCheck) countSeries(expr parser.PromQLExpr, selector promParser.VectorSelector) (problems []Problem) { q := fmt.Sprintf("count(%s)", selector.String()) - qr, err := promapi.Query(c.uri, c.timeout, q, &q) + qr, err := c.prom.Query(q) if err != nil { problems = append(problems, Problem{ Fragment: selector.String(), Lines: expr.Lines(), Reporter: SeriesCheckName, - Text: fmt.Sprintf("query using %s failed with: %s", c.name, err), + Text: fmt.Sprintf("query using %s failed with: %s", c.prom.Name(), err), Severity: Bug, }) return @@ -92,7 +89,7 @@ func (c SeriesCheck) countSeries(expr parser.PromQLExpr, selector promParser.Vec Fragment: selector.String(), Lines: expr.Lines(), Reporter: SeriesCheckName, - Text: fmt.Sprintf("query using %s completed without any results for %s", c.name, selector.String()), + Text: fmt.Sprintf("query using %s completed without any results for %s", c.prom.Name(), selector.String()), Severity: Warning, }) return diff --git a/internal/checks/query_series_test.go b/internal/checks/query_series_test.go index c21cf4cb..c8cf443c 100644 --- a/internal/checks/query_series_test.go +++ b/internal/checks/query_series_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/cloudflare/pint/internal/checks" + "github.com/cloudflare/pint/internal/promapi" "github.com/rs/zerolog" ) @@ -108,12 +109,12 @@ func TestSeriesCheck(t *testing.T) { { description: "ignores rules with syntax errors", content: "- record: foo\n expr: sum(foo) without(\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), }, { description: "bad response", content: "- record: foo\n expr: sum(foo)\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: "foo", @@ -127,7 +128,7 @@ func TestSeriesCheck(t *testing.T) { { description: "simple query", content: "- record: foo\n expr: sum(notfound)\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: "notfound", @@ -141,7 +142,7 @@ func TestSeriesCheck(t *testing.T) { { description: "complex query", content: "- record: foo\n expr: sum(found_7 * on (job) sum(sum(notfound))) / found_7\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: "notfound", @@ -155,7 +156,7 @@ func TestSeriesCheck(t *testing.T) { { description: "complex query / bug", content: "- record: foo\n expr: sum(found_7 * on (job) sum(sum(notfound))) / found_7\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: "notfound", @@ -189,17 +190,17 @@ func TestSeriesCheck(t *testing.T) { ) for: 5m `, - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), }, { description: "offset", content: "- record: foo\n expr: node_filesystem_readonly{mountpoint!=\"\"} offset 5m\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), }, { description: "series found, label missing", content: "- record: foo\n expr: found{job=\"notfound\"}\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: `found{job="notfound"}`, @@ -213,7 +214,7 @@ func TestSeriesCheck(t *testing.T) { { description: "series missing, label missing", content: "- record: foo\n expr: notfound{job=\"notfound\"}\n", - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: "notfound", @@ -230,7 +231,7 @@ func TestSeriesCheck(t *testing.T) { - record: foo expr: '{__name__="notfound", job="bar"}' `, - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: `{__name__="notfound",job="bar"}`, @@ -248,7 +249,7 @@ func TestSeriesCheck(t *testing.T) { - record: foo expr: count(notfound) == 0 `, - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), }, { description: "series missing but check disabled, labels", @@ -257,7 +258,7 @@ func TestSeriesCheck(t *testing.T) { - record: foo expr: count(notfound{job="foo"}) == 0 `, - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), }, { description: "series missing but check disabled, negative labels", @@ -266,7 +267,7 @@ func TestSeriesCheck(t *testing.T) { - record: foo expr: count(notfound{job!="foo"}) == 0 `, - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), }, { description: "series missing, disabled comment for labels", @@ -275,7 +276,7 @@ func TestSeriesCheck(t *testing.T) { - record: foo expr: count(notfound) == 0 `, - checker: checks.NewSeriesCheck("prom", srv.URL, time.Second*5), + checker: checks.NewSeriesCheck(promapi.NewPrometheus("prom", srv.URL, time.Second*5)), problems: []checks.Problem{ { Fragment: `notfound`, diff --git a/internal/config/config.go b/internal/config/config.go index 22e9ac85..fa6d82c9 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,6 +9,7 @@ import ( "github.com/cloudflare/pint/internal/checks" "github.com/cloudflare/pint/internal/parser" + "github.com/cloudflare/pint/internal/promapi" "github.com/hashicorp/hcl/v2/hclsimple" "github.com/prometheus/common/model" @@ -16,11 +17,12 @@ import ( ) type Config struct { - CI *CI `hcl:"ci,block" json:"ci,omitempty"` - Repository *Repository `hcl:"repository,block" json:"repository,omitempty"` - Prometheus []PrometheusConfig `hcl:"prometheus,block" json:"prometheus,omitempty"` - Checks *Checks `hcl:"checks,block" json:"checks,omitempty"` - Rules []Rule `hcl:"rule,block" json:"rules,omitempty"` + CI *CI `hcl:"ci,block" json:"ci,omitempty"` + Repository *Repository `hcl:"repository,block" json:"repository,omitempty"` + Prometheus []PrometheusConfig `hcl:"prometheus,block" json:"prometheus,omitempty"` + Checks *Checks `hcl:"checks,block" json:"checks,omitempty"` + Rules []Rule `hcl:"rule,block" json:"rules,omitempty"` + prometheusServers []*promapi.Prometheus } func (cfg *Config) SetDisabledChecks(offline bool, l []string) { @@ -56,7 +58,7 @@ func (cfg Config) String() string { return string(content) } -func (cfg Config) GetChecksForRule(path string, r parser.Rule) []checks.RuleChecker { +func (cfg *Config) GetChecksForRule(path string, r parser.Rule) []checks.RuleChecker { enabled := []checks.RuleChecker{} if isEnabled(cfg.Checks.Enabled, cfg.Checks.Disabled, checks.SyntaxCheckName, r) { @@ -75,12 +77,16 @@ func (cfg Config) GetChecksForRule(path string, r parser.Rule) []checks.RuleChec enabled = append(enabled, checks.NewTemplateCheck()) } - proms := []PrometheusConfig{} + proms := []*promapi.Prometheus{} for _, prom := range cfg.Prometheus { if !prom.isEnabledForPath(path) { continue } - proms = append(proms, prom) + for _, p := range cfg.prometheusServers { + if p.Name() == prom.Name { + proms = append(proms, p) + } + } } for _, rule := range cfg.Rules { @@ -170,6 +176,8 @@ func Load(path string, failOnMissing bool) (cfg Config, err error) { if err = prom.validate(); err != nil { return cfg, err } + timeout, _ := parseDuration(prom.Timeout) + cfg.prometheusServers = append(cfg.prometheusServers, promapi.NewPrometheus(prom.Name, prom.URI, timeout)) } for _, rule := range cfg.Rules { diff --git a/internal/config/rule.go b/internal/config/rule.go index bd251149..4d024043 100644 --- a/internal/config/rule.go +++ b/internal/config/rule.go @@ -7,6 +7,7 @@ import ( "github.com/cloudflare/pint/internal/checks" "github.com/cloudflare/pint/internal/parser" + "github.com/cloudflare/pint/internal/promapi" "github.com/rs/zerolog/log" ) @@ -155,7 +156,7 @@ type Rule struct { Reject []RejectSettings `hcl:"reject,block" json:"reject,omitempty"` } -func (rule Rule) resolveChecks(path string, r parser.Rule, enabledChecks, disabledChecks []string, proms []PrometheusConfig) []checks.RuleChecker { +func (rule Rule) resolveChecks(path string, r parser.Rule, enabledChecks, disabledChecks []string, prometheusServers []*promapi.Prometheus) []checks.RuleChecker { enabled := []checks.RuleChecker{} if rule.Ignore != nil && rule.Ignore.IsMatch(path, r) { @@ -187,31 +188,27 @@ func (rule Rule) resolveChecks(path string, r parser.Rule, enabledChecks, disabl } if isEnabled(enabledChecks, disabledChecks, checks.RateCheckName, r) { - for _, prom := range proms { - timeout, _ := parseDuration(prom.Timeout) - enabled = append(enabled, checks.NewRateCheck(prom.Name, prom.URI, timeout)) + for _, prom := range prometheusServers { + enabled = append(enabled, checks.NewRateCheck(prom)) } } if isEnabled(enabledChecks, disabledChecks, checks.SeriesCheckName, r) { - for _, prom := range proms { - timeout, _ := parseDuration(prom.Timeout) - enabled = append(enabled, checks.NewSeriesCheck(prom.Name, prom.URI, timeout)) + for _, prom := range prometheusServers { + enabled = append(enabled, checks.NewSeriesCheck(prom)) } } if isEnabled(enabledChecks, disabledChecks, checks.VectorMatchingCheckName, r) { - for _, prom := range proms { - timeout, _ := parseDuration(prom.Timeout) - enabled = append(enabled, checks.NewVectorMatchingCheck(prom.Name, prom.URI, timeout)) + for _, prom := range prometheusServers { + enabled = append(enabled, checks.NewVectorMatchingCheck(prom)) } } if rule.Cost != nil && isEnabled(enabledChecks, disabledChecks, checks.CostCheckName, r) { severity := rule.Cost.getSeverity(checks.Bug) - for _, prom := range proms { - timeout, _ := parseDuration(prom.Timeout) - enabled = append(enabled, checks.NewCostCheck(prom.Name, prom.URI, timeout, rule.Cost.BytesPerSample, rule.Cost.MaxSeries, severity)) + for _, prom := range prometheusServers { + enabled = append(enabled, checks.NewCostCheck(prom, rule.Cost.BytesPerSample, rule.Cost.MaxSeries, severity)) } } @@ -249,9 +246,8 @@ func (rule Rule) resolveChecks(path string, r parser.Rule, enabledChecks, disabl if rule.Alerts.Resolve != "" { qResolve, _ = parseDuration(rule.Alerts.Resolve) } - for _, prom := range proms { - timeout, _ := parseDuration(prom.Timeout) - enabled = append(enabled, checks.NewAlertsCheck(prom.Name, prom.URI, timeout, qRange, qStep, qResolve)) + for _, prom := range prometheusServers { + enabled = append(enabled, checks.NewAlertsCheck(prom, qRange, qStep, qResolve)) } } diff --git a/internal/promapi/config.go b/internal/promapi/config.go index 2d452dd2..aea21140 100644 --- a/internal/promapi/config.go +++ b/internal/promapi/config.go @@ -5,8 +5,6 @@ import ( "fmt" "time" - "github.com/prometheus/client_golang/api" - v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/rs/zerolog/log" "gopkg.in/yaml.v3" ) @@ -22,27 +20,31 @@ type PrometheusConfig struct { Global ConfigSectionGlobal `yaml:"global"` } -func Config(uri string, timeout time.Duration) (*PrometheusConfig, error) { - log.Debug().Str("uri", uri).Msg("Query Prometheus configuration") +func (p *Prometheus) Config() (*PrometheusConfig, error) { + log.Debug().Str("uri", p.uri).Msg("Query Prometheus configuration") - client, err := api.NewClient(api.Config{Address: uri}) - if err != nil { - return nil, err + key := "/api/v1/status/config" + p.lock.Lock(key) + defer p.lock.Unlock((key)) + + if v, ok := p.cache.Load(key); ok { + log.Debug().Str("key", key).Str("uri", p.uri).Msg("Config cache hit") + cfg := v.(PrometheusConfig) + return &cfg, nil } - v1api := v1.NewAPI(client) - ctx, cancel := context.WithTimeout(context.Background(), timeout) + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) defer cancel() - resp, err := v1api.Config(ctx) + resp, err := p.api.Config(ctx) if err != nil { - log.Error().Err(err).Str("uri", uri).Msg("Failed to query Prometheus configuration") + log.Error().Err(err).Str("uri", p.uri).Msg("Failed to query Prometheus configuration") return nil, fmt.Errorf("failed to query Prometheus config: %w", err) } var cfg PrometheusConfig if err = yaml.Unmarshal([]byte(resp.YAML), &cfg); err != nil { - return nil, fmt.Errorf("failed to decode config data in /api/v1/status/config response: %w", err) + return nil, fmt.Errorf("failed to decode config data in %s response: %w", key, err) } if cfg.Global.ScrapeInterval == 0 { @@ -55,5 +57,8 @@ func Config(uri string, timeout time.Duration) (*PrometheusConfig, error) { cfg.Global.EvaluationInterval = time.Minute } + log.Debug().Str("key", key).Str("uri", p.uri).Msg("Config cache miss") + p.cache.Store(key, cfg) + return &cfg, nil } diff --git a/internal/promapi/prometheus.go b/internal/promapi/prometheus.go new file mode 100644 index 00000000..024bfa9f --- /dev/null +++ b/internal/promapi/prometheus.go @@ -0,0 +1,48 @@ +package promapi + +import ( + "sync" + "time" + + "github.com/cloudflare/pint/internal/keylock" + "github.com/prometheus/client_golang/api" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" +) + +type Prometheus struct { + name string + uri string + api v1.API + timeout time.Duration + cache *sync.Map + lock *keylock.PartitionLocker +} + +func NewPrometheus(name, uri string, timeout time.Duration) *Prometheus { + client, err := api.NewClient(api.Config{Address: uri}) + if err != nil { + // config validation should prevent this from ever happening + // panic so we don't need to return an error and it's easier to + // use this code in tests + panic(err) + } + return &Prometheus{ + name: name, + uri: uri, + api: v1.NewAPI(client), + timeout: timeout, + cache: &sync.Map{}, + lock: keylock.NewPartitionLocker((&sync.Mutex{})), + } +} + +func (p *Prometheus) Name() string { + return p.name +} + +func (p *Prometheus) ClearCache() { + p.cache.Range(func(key interface{}, value interface{}) bool { + p.cache.Delete(key) + return true + }) +} diff --git a/internal/promapi/query.go b/internal/promapi/query.go index d4ec01a0..a024b0c1 100644 --- a/internal/promapi/query.go +++ b/internal/promapi/query.go @@ -3,56 +3,46 @@ package promapi import ( "context" "fmt" - "sync" "time" - "github.com/cloudflare/pint/internal/keylock" - - "github.com/prometheus/client_golang/api" - v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" "github.com/rs/zerolog/log" ) -var km = keylock.NewPartitionLocker((&sync.Mutex{})) - type QueryResult struct { Series model.Vector DurationSeconds float64 } -func Query(uri string, timeout time.Duration, expr string, lockKey *string) (*QueryResult, error) { - log.Debug().Str("uri", uri).Str("query", expr).Msg("Scheduling prometheus query") - key := uri - if lockKey != nil { - key = *lockKey - } - km.Lock(key) - defer km.Unlock((key)) +func (p *Prometheus) Query(expr string) (*QueryResult, error) { + log.Debug().Str("uri", p.uri).Str("query", expr).Msg("Scheduling prometheus query") - log.Debug().Str("uri", uri).Str("query", expr).Msg("Query started") + lockKey := "/api/v1/query" + p.lock.Lock(lockKey) + defer p.lock.Unlock((lockKey)) - client, err := api.NewClient(api.Config{Address: uri}) - if err != nil { - log.Error().Err(err).Msg("Failed to setup new Prometheus API client") - return nil, err + if v, ok := p.cache.Load(expr); ok { + log.Debug().Str("key", expr).Str("uri", p.uri).Msg("Query cache hit") + r := v.(QueryResult) + return &r, nil } - v1api := v1.NewAPI(client) - ctx, cancel := context.WithTimeout(context.Background(), timeout) + log.Debug().Str("uri", p.uri).Str("query", expr).Msg("Query started") + + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) defer cancel() start := time.Now() - result, _, err := v1api.Query(ctx, expr, start) + result, _, err := p.api.Query(ctx, expr, start) duration := time.Since(start) log.Debug(). - Str("uri", uri). + Str("uri", p.uri). Str("query", expr). Str("duration", HumanizeDuration(duration)). Msg("Query completed") if err != nil { log.Error().Err(err). - Str("uri", uri). + Str("uri", p.uri). Str("query", expr). Msg("Query failed") return nil, err @@ -67,10 +57,13 @@ func Query(uri string, timeout time.Duration, expr string, lockKey *string) (*Qu vectorVal := result.(model.Vector) qr.Series = vectorVal default: - log.Error().Err(err).Str("uri", uri).Str("query", expr).Msgf("Query returned unknown result type: %v", result) + log.Error().Err(err).Str("uri", p.uri).Str("query", expr).Msgf("Query returned unknown result type: %v", result) return nil, fmt.Errorf("unknown result type: %v", result) } - log.Debug().Str("uri", uri).Str("query", expr).Int("series", len(qr.Series)).Msg("Parsed response") + log.Debug().Str("uri", p.uri).Str("query", expr).Int("series", len(qr.Series)).Msg("Parsed response") + + log.Debug().Str("key", expr).Str("uri", p.uri).Msg("Query cache miss") + p.cache.Store(expr, qr) return &qr, nil } diff --git a/internal/promapi/range.go b/internal/promapi/range.go index 82526efa..fbcf9d30 100644 --- a/internal/promapi/range.go +++ b/internal/promapi/range.go @@ -8,7 +8,6 @@ import ( "strings" "time" - "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" "github.com/rs/zerolog/log" @@ -21,34 +20,30 @@ type RangeQueryResult struct { DurationSeconds float64 } -func RangeQuery(uri string, timeout time.Duration, expr string, start, end time.Time, step time.Duration, lockKey *string) (*RangeQueryResult, error) { - key := uri - if lockKey != nil { - key = *lockKey - } +func (p *Prometheus) RangeQuery(expr string, start, end time.Time, step time.Duration) (*RangeQueryResult, error) { log.Debug(). - Str("key", key). - Str("uri", uri). + Str("uri", p.uri). Str("query", expr). Time("start", start). Time("end", end). Str("step", HumanizeDuration(step)). Msg("Scheduling prometheus range query") - km.Lock(key) - defer km.Unlock((key)) - - log.Debug().Str("uri", uri).Str("query", expr).Msg("Range query started") + lockKey := "/api/v1/query/range" + p.lock.Lock(lockKey) + defer p.lock.Unlock((lockKey)) - client, err := api.NewClient(api.Config{Address: uri}) - if err != nil { - log.Error().Err(err).Msg("Failed to setup new Prometheus API client") - return nil, err + cacheKey := strings.Join([]string{expr, start.String(), end.String(), step.String()}, "\n") + if v, ok := p.cache.Load(cacheKey); ok { + log.Debug().Str("key", cacheKey).Str("uri", p.uri).Msg("Range query cache hit") + r := v.(RangeQueryResult) + return &r, nil } - v1api := v1.NewAPI(client) - ctx, cancel := context.WithTimeout(context.Background(), timeout) + log.Debug().Str("uri", p.uri).Str("query", expr).Msg("Range query started") + + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) defer cancel() r := v1.Range{ @@ -57,21 +52,19 @@ func RangeQuery(uri string, timeout time.Duration, expr string, start, end time. Step: step, } qstart := time.Now() - result, _, err := v1api.QueryRange(ctx, expr, r) + result, _, err := p.api.QueryRange(ctx, expr, r) duration := time.Since(qstart) log.Debug(). - Str("uri", uri). + Str("uri", p.uri). Str("query", expr). Str("duration", HumanizeDuration(duration)). Msg("Range query completed") if err != nil { - log.Error().Err(err).Str("uri", uri).Str("query", expr).Msg("Range query failed") + log.Error().Err(err).Str("uri", p.uri).Str("query", expr).Msg("Range query failed") if isRetryable(err) { delta := end.Sub(start) / 2 log.Warn().Str("delta", HumanizeDuration(delta)).Msg("Retrying request with smaller range") - b, _ := start.MarshalText() - newKey := fmt.Sprintf("%s/retry/%s", key, string(b)) - return RangeQuery(uri, timeout, expr, start.Add(delta), end, step, &newKey) + return p.RangeQuery(expr, start.Add(delta), end, step) } return nil, err } @@ -87,10 +80,13 @@ func RangeQuery(uri string, timeout time.Duration, expr string, start, end time. samples := result.(model.Matrix) qr.Samples = samples default: - log.Error().Err(err).Str("uri", uri).Str("query", expr).Msgf("Range query returned unknown result type: %v", result) + log.Error().Err(err).Str("uri", p.uri).Str("query", expr).Msgf("Range query returned unknown result type: %v", result) return nil, fmt.Errorf("unknown result type: %v", result) } - log.Debug().Str("uri", uri).Str("query", expr).Int("samples", len(qr.Samples)).Msg("Parsed range response") + log.Debug().Str("uri", p.uri).Str("query", expr).Int("samples", len(qr.Samples)).Msg("Parsed range response") + + log.Debug().Str("key", cacheKey).Str("uri", p.uri).Msg("Range query cache miss") + p.cache.Store(cacheKey, qr) return &qr, nil }