Skip to content

Commit

Permalink
Cache prometheus server reponses
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Jan 17, 2022
1 parent 697bfb6 commit fbc4a0b
Show file tree
Hide file tree
Showing 17 changed files with 250 additions and 210 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v0.7.0

### Added

- Cache each Prometheus server responses to minimize the number of API calls.

## v0.6.6

### Fixed
Expand Down
18 changes: 7 additions & 11 deletions internal/checks/alerts_count.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,24 @@ const (
AlertsCheckName = "alerts/count"
)

func NewAlertsCheck(name, uri string, timeout, lookBack, step, resolve time.Duration) AlertsCheck {
func NewAlertsCheck(prom *promapi.Prometheus, lookBack, step, resolve time.Duration) AlertsCheck {
return AlertsCheck{
name: name,
uri: uri,
timeout: timeout,
prom: prom,
lookBack: lookBack,
step: step,
resolve: resolve,
}
}

type AlertsCheck struct {
name string
uri string
timeout time.Duration
prom *promapi.Prometheus
lookBack time.Duration
step time.Duration
resolve time.Duration
}

func (c AlertsCheck) String() string {
return fmt.Sprintf("%s(%s)", AlertsCheckName, c.name)
return fmt.Sprintf("%s(%s)", AlertsCheckName, c.prom.Name())
}

func (c AlertsCheck) Check(rule parser.Rule) (problems []Problem) {
Expand All @@ -49,13 +45,13 @@ func (c AlertsCheck) Check(rule parser.Rule) (problems []Problem) {
end := time.Now()
start := end.Add(-1 * c.lookBack)

qr, err := promapi.RangeQuery(c.uri, c.timeout, rule.AlertingRule.Expr.Value.Value, start, end, c.step, nil)
qr, err := c.prom.RangeQuery(rule.AlertingRule.Expr.Value.Value, start, end, c.step)
if err != nil {
problems = append(problems, Problem{
Fragment: rule.AlertingRule.Expr.Value.Value,
Lines: rule.AlertingRule.Expr.Lines(),
Reporter: AlertsCheckName,
Text: fmt.Sprintf("query using %s failed with: %s", c.name, err),
Text: fmt.Sprintf("query using %s failed with: %s", c.prom.Name(), err),
Severity: Bug,
})
return
Expand Down Expand Up @@ -104,7 +100,7 @@ func (c AlertsCheck) Check(rule parser.Rule) (problems []Problem) {
Fragment: rule.AlertingRule.Expr.Value.Value,
Lines: lines,
Reporter: AlertsCheckName,
Text: fmt.Sprintf("query using %s would trigger %d alert(s) in the last %s", c.name, alerts, promapi.HumanizeDuration(delta)),
Text: fmt.Sprintf("query using %s would trigger %d alert(s) in the last %s", c.prom.Name(), alerts, promapi.HumanizeDuration(delta)),
Severity: Information,
})
return
Expand Down
17 changes: 9 additions & 8 deletions internal/checks/alerts_count_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/cloudflare/pint/internal/checks"
"github.com/cloudflare/pint/internal/promapi"

"github.com/rs/zerolog"
)
Expand Down Expand Up @@ -85,17 +86,17 @@ func TestAlertsCheck(t *testing.T) {
{
description: "ignores recording rules",
content: "- record: foo\n expr: up == 0\n",
checker: checks.NewAlertsCheck("prom", "http://localhost", time.Second*5, time.Hour*24, time.Minute, time.Minute*5),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", "http://localhost", time.Second*5), time.Hour*24, time.Minute, time.Minute*5),
},
{
description: "ignores rules with syntax errors",
content: "- alert: Foo Is Down\n expr: sum(\n",
checker: checks.NewAlertsCheck("prom", "http://localhost", time.Second*5, time.Hour*24, time.Minute, time.Minute*5),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", "http://localhost", time.Second*5), time.Hour*24, time.Minute, time.Minute*5),
},
{
description: "bad request",
content: content,
checker: checks.NewAlertsCheck("prom", srv.URL+"/400/", time.Second*5, time.Hour*24, time.Minute, time.Minute*5),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/400/", time.Second*5), time.Hour*24, time.Minute, time.Minute*5),
problems: []checks.Problem{
{
Fragment: `up{job="foo"} == 0`,
Expand All @@ -109,7 +110,7 @@ func TestAlertsCheck(t *testing.T) {
{
description: "empty response",
content: content,
checker: checks.NewAlertsCheck("prom", srv.URL+"/empty/", time.Second*5, time.Hour*24, time.Minute, time.Minute*5),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/empty/", time.Second*5), time.Hour*24, time.Minute, time.Minute*5),
problems: []checks.Problem{
{
Fragment: `up{job="foo"} == 0`,
Expand All @@ -123,7 +124,7 @@ func TestAlertsCheck(t *testing.T) {
{
description: "multiple alerts",
content: content,
checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute, time.Minute*5),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute, time.Minute*5),
problems: []checks.Problem{
{
Fragment: `up{job="foo"} == 0`,
Expand All @@ -137,7 +138,7 @@ func TestAlertsCheck(t *testing.T) {
{
description: "for: 10m",
content: "- alert: Foo Is Down\n for: 10m\n expr: up{job=\"foo\"} == 0\n",
checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute*6, time.Minute*10),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute*6, time.Minute*10),
problems: []checks.Problem{
{
Fragment: `up{job="foo"} == 0`,
Expand All @@ -154,7 +155,7 @@ func TestAlertsCheck(t *testing.T) {
- alert: foo
expr: '{__name__="up", job="foo"} == 0'
`,
checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute*6, time.Minute*10),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute*6, time.Minute*10),
problems: []checks.Problem{
{
Fragment: `{__name__="up", job="foo"} == 0`,
Expand All @@ -171,7 +172,7 @@ func TestAlertsCheck(t *testing.T) {
- alert: foo
expr: '{__name__=~"(up|foo)", job="foo"} == 0'
`,
checker: checks.NewAlertsCheck("prom", srv.URL+"/alerts/", time.Second*5, time.Hour*24, time.Minute*6, time.Minute*10),
checker: checks.NewAlertsCheck(promapi.NewPrometheus("prom", srv.URL+"/alerts/", time.Second*5), time.Hour*24, time.Minute*6, time.Minute*10),
problems: []checks.Problem{
{
Fragment: `{__name__=~"(up|foo)", job="foo"} == 0`,
Expand Down
18 changes: 8 additions & 10 deletions internal/checks/promql_rate.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,16 @@ const (
RateCheckName = "promql/rate"
)

func NewRateCheck(name, uri string, timeout time.Duration) RateCheck {
return RateCheck{name: name, uri: uri, timeout: timeout}
func NewRateCheck(prom *promapi.Prometheus) RateCheck {
return RateCheck{prom: prom}
}

type RateCheck struct {
name string
uri string
timeout time.Duration
prom *promapi.Prometheus
}

func (c RateCheck) String() string {
return fmt.Sprintf("%s(%s)", RateCheckName, c.name)
return fmt.Sprintf("%s(%s)", RateCheckName, c.prom.Name())
}

func (c RateCheck) Check(rule parser.Rule) (problems []Problem) {
Expand All @@ -42,7 +40,7 @@ func (c RateCheck) Check(rule parser.Rule) (problems []Problem) {
Fragment: expr.Value.Value,
Lines: expr.Lines(),
Reporter: RateCheckName,
Text: fmt.Sprintf("failed to query %s prometheus config: %s", c.name, err),
Text: fmt.Sprintf("failed to query %s prometheus config: %s", c.prom.Name(), err),
Severity: Bug,
})
return
Expand All @@ -64,7 +62,7 @@ func (c RateCheck) Check(rule parser.Rule) (problems []Problem) {

func (c RateCheck) getScrapeInterval() (interval time.Duration, err error) {
var cfg *promapi.PrometheusConfig
cfg, err = promapi.Config(c.uri, c.timeout)
cfg, err = c.prom.Config()
if err != nil {
return
}
Expand All @@ -88,14 +86,14 @@ func (c RateCheck) checkNode(node *parser.PromQLNode, scrapeInterval time.Durati
if m.Range < scrapeInterval*time.Duration(minIntervals) {
p := exprProblem{
expr: node.Expr,
text: fmt.Sprintf("duration for %s() must be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, minIntervals, c.name, promapi.HumanizeDuration(scrapeInterval)),
text: fmt.Sprintf("duration for %s() must be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, minIntervals, c.prom.Name(), promapi.HumanizeDuration(scrapeInterval)),
severity: Bug,
}
problems = append(problems, p)
} else if m.Range < scrapeInterval*time.Duration(recIntervals) {
p := exprProblem{
expr: node.Expr,
text: fmt.Sprintf("duration for %s() is recommended to be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, recIntervals, c.name, promapi.HumanizeDuration(scrapeInterval)),
text: fmt.Sprintf("duration for %s() is recommended to be at least %d x scrape_interval, %s is using %s scrape_interval", n.Func.Name, recIntervals, c.prom.Name(), promapi.HumanizeDuration(scrapeInterval)),
severity: Warning,
}
problems = append(problems, p)
Expand Down
37 changes: 19 additions & 18 deletions internal/checks/promql_rate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/cloudflare/pint/internal/checks"
"github.com/cloudflare/pint/internal/promapi"
"github.com/rs/zerolog"
)

Expand Down Expand Up @@ -46,12 +47,12 @@ func TestRateCheck(t *testing.T) {
{
description: "ignores rules with syntax errors",
content: "- record: foo\n expr: sum(foo) without(\n",
checker: checks.NewRateCheck("prom", srv.URL, time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL, time.Second)),
},
{
description: "rate < 2x scrape_interval",
content: "- record: foo\n expr: rate(foo[1m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: "rate(foo[1m])",
Expand All @@ -65,7 +66,7 @@ func TestRateCheck(t *testing.T) {
{
description: "rate < 4x scrape_interval",
content: "- record: foo\n expr: rate(foo[3m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: "rate(foo[3m])",
Expand All @@ -79,12 +80,12 @@ func TestRateCheck(t *testing.T) {
{
description: "rate == 4x scrape interval",
content: "- record: foo\n expr: rate(foo[2m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/30s/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/30s/", time.Second)),
},
{
description: "irate < 2x scrape_interval",
content: "- record: foo\n expr: irate(foo[1m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: "irate(foo[1m])",
Expand All @@ -98,7 +99,7 @@ func TestRateCheck(t *testing.T) {
{
description: "irate < 3x scrape_interval",
content: "- record: foo\n expr: irate(foo[2m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: "irate(foo[2m])",
Expand All @@ -115,23 +116,23 @@ func TestRateCheck(t *testing.T) {
- record: foo
expr: irate({__name__="foo"}[5m])
`,
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
},
{
description: "irate{__name__=~} > 3x scrape_interval",
content: `
- record: foo
expr: irate({__name__=~"(foo|bar)_total"}[5m])
`,
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
},
{
description: "irate{__name__} < 3x scrape_interval",
content: `
- record: foo
expr: irate({__name__="foo"}[2m])
`,
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: `irate({__name__="foo"}[2m])`,
Expand All @@ -148,7 +149,7 @@ func TestRateCheck(t *testing.T) {
- record: foo
expr: irate({__name__=~"(foo|bar)_total"}[2m])
`,
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: `irate({__name__=~"(foo|bar)_total"}[2m])`,
Expand All @@ -162,17 +163,17 @@ func TestRateCheck(t *testing.T) {
{
description: "irate == 3x scrape interval",
content: "- record: foo\n expr: irate(foo[3m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
},
{
description: "valid range selector",
content: "- record: foo\n expr: foo[1m]\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
},
{
description: "nested invalid rate",
content: "- record: foo\n expr: sum(rate(foo[3m])) / sum(rate(bar[1m]))\n",
checker: checks.NewRateCheck("prom", srv.URL+"/1m/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/1m/", time.Second)),
problems: []checks.Problem{
{
Fragment: "rate(foo[3m])",
Expand All @@ -193,7 +194,7 @@ func TestRateCheck(t *testing.T) {
{
description: "500 error from Prometheus API",
content: "- record: foo\n expr: rate(foo[5m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/error/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/error/", time.Second)),
problems: []checks.Problem{
{
Fragment: "rate(foo[5m])",
Expand All @@ -207,7 +208,7 @@ func TestRateCheck(t *testing.T) {
{
description: "invalid status",
content: "- record: foo\n expr: rate(foo[5m])\n",
checker: checks.NewRateCheck("prom", srv.URL, time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL, time.Second)),
problems: []checks.Problem{
{
Fragment: "rate(foo[5m])",
Expand All @@ -221,7 +222,7 @@ func TestRateCheck(t *testing.T) {
{
description: "invalid YAML",
content: "- record: foo\n expr: rate(foo[5m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/badYaml/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/badYaml/", time.Second)),
problems: []checks.Problem{
{
Fragment: "rate(foo[5m])",
Expand All @@ -235,12 +236,12 @@ func TestRateCheck(t *testing.T) {
{
description: "irate == 3 x default 1m",
content: "- record: foo\n expr: irate(foo[3m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/default/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/default/", time.Second)),
},
{
description: "irate < 3 x default 1m",
content: "- record: foo\n expr: irate(foo[2m])\n",
checker: checks.NewRateCheck("prom", srv.URL+"/default/", time.Second),
checker: checks.NewRateCheck(promapi.NewPrometheus("prom", srv.URL+"/default/", time.Second)),
problems: []checks.Problem{
{
Fragment: "irate(foo[2m])",
Expand Down
Loading

0 comments on commit fbc4a0b

Please sign in to comment.