Skip to content

Commit

Permalink
*: add metrics to the reloader package
Browse files Browse the repository at this point in the history
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
  • Loading branch information
simonpasquier committed Apr 24, 2020
1 parent 8af5266 commit 4c8057a
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel
### Added

- [#2502](https://github.com/thanos-io/thanos/pull/2502) Added `hints` field to `SeriesResponse`. Hints in an opaque data structure that can be used to carry additional information from the store and its content is implementation specific.
- [#2521](https://github.com/thanos-io/thanos/pull/2521) Sidecar: add `thanos_sidecar_reloader_reloads_failed_total`, `thanos_sidecar_reloader_reloads_total`, `thanos_sidecar_reloader_watch_errors_total`, `thanos_sidecar_reloader_watch_events_total` and `thanos_sidecar_reloader_watches` metrics.

### Changed

Expand Down
1 change: 1 addition & 0 deletions cmd/thanos/sidecar.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application) {
m[component.Sidecar.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error {
rl := reloader.New(
log.With(logger, "component", "reloader"),
extprom.WrapRegistererWithPrefix("thanos_sidecar_", reg),
reloader.ReloadURLFromBase(*promURL),
*reloaderCfgFile,
*reloaderCfgOutputFile,
Expand Down
1 change: 1 addition & 0 deletions pkg/reloader/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func ExampleReloader() {
log.Fatal(err)
}
rl := reloader.New(
nil,
nil,
reloader.ReloadURLFromBase(u),
"/path/to/cfg",
Expand Down
50 changes: 47 additions & 3 deletions pkg/reloader/reloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ import (
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/thanos-io/thanos/pkg/runutil"
)

Expand All @@ -89,6 +91,12 @@ type Reloader struct {

lastCfgHash []byte
lastRuleHash []byte

reloads prometheus.Counter
reloadErrors prometheus.Counter
watches prometheus.Gauge
watchEvents prometheus.Counter
watchErrors prometheus.Counter
}

var firstGzipBytes = []byte{0x1f, 0x8b, 0x08}
Expand All @@ -98,19 +106,51 @@ var firstGzipBytes = []byte{0x1f, 0x8b, 0x08}
// If cfgOutputFile is not empty the config file will be decompressed if needed, environment variables
// will be substituted and the output written into the given path. Prometheus should then use
// cfgOutputFile as its config file path.
func New(logger log.Logger, reloadURL *url.URL, cfgFile string, cfgOutputFile string, ruleDirs []string) *Reloader {
func New(logger log.Logger, reg prometheus.Registerer, reloadURL *url.URL, cfgFile string, cfgOutputFile string, ruleDirs []string) *Reloader {
if logger == nil {
logger = log.NewNopLogger()
}
return &Reloader{
r := &Reloader{
logger: logger,
reloadURL: reloadURL,
cfgFile: cfgFile,
cfgOutputFile: cfgOutputFile,
ruleDirs: ruleDirs,
watchInterval: 3 * time.Minute,
retryInterval: 5 * time.Second,

reloads: promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Name: "reloader_reloads_total",
Help: "Total number of reload requests.",
},
),
reloadErrors: promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Name: "reloader_reloads_failed_total",
Help: "Total number of reload requests that failed.",
},
),
watches: promauto.With(reg).NewGauge(
prometheus.GaugeOpts{
Name: "reloader_watches",
Help: "Number of resources watched by the reloader.",
},
),
watchEvents: promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Name: "reloader_watch_events_total",
Help: "Total number of events received by the reloader from the watcher.",
},
),
watchErrors: promauto.With(reg).NewCounter(
prometheus.CounterOpts{
Name: "reloader_watch_errors_total",
Help: "Total number of errors received by the reloader from the watcher.",
},
),
}
return r
}

// We cannot detect everything via watch. Watch interval controls how often we re-read given dirs non-recursively.
Expand Down Expand Up @@ -154,6 +194,7 @@ func (r *Reloader) Watch(ctx context.Context) error {
tick := time.NewTicker(r.watchInterval)
defer tick.Stop()

r.watches.Set(float64(len(watchables)))
level.Info(r.logger).Log(
"msg", "started watching config file and non-recursively rule dirs for changes",
"cfg", r.cfgFile,
Expand All @@ -166,11 +207,12 @@ func (r *Reloader) Watch(ctx context.Context) error {
return nil
case <-tick.C:
case event := <-watcher.Events:
// TODO(bwplotka): Add metric if we are not cycling CPU here too much.
r.watchEvents.Inc()
if _, ok := watchables[filepath.Dir(event.Name)]; !ok {
continue
}
case err := <-watcher.Errors:
r.watchErrors.Inc()
level.Error(r.logger).Log("msg", "watch error", "err", err)
continue
}
Expand Down Expand Up @@ -280,7 +322,9 @@ func (r *Reloader) apply(ctx context.Context) error {
defer cancel()

if err := runutil.RetryWithLog(r.logger, r.retryInterval, retryCtx.Done(), func() error {
r.reloads.Inc()
if err := r.triggerReload(ctx); err != nil {
r.reloadErrors.Inc()
return errors.Wrap(err, "trigger reload")
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/reloader/reloader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func TestReloader_ConfigApply(t *testing.T) {
input = path.Join(dir, "in", "cfg.yaml.tmpl")
output = path.Join(dir, "out", "cfg.yaml")
)
reloader := New(nil, reloadURL, input, output, nil)
reloader := New(nil, nil, reloadURL, input, output, nil)
reloader.watchInterval = 9999 * time.Hour // Disable interval to test watch logic only.
reloader.retryInterval = 100 * time.Millisecond

Expand Down Expand Up @@ -205,7 +205,7 @@ func TestReloader_RuleApply(t *testing.T) {
testutil.Ok(t, os.Mkdir(path.Join(dir2, "rule-dir"), os.ModePerm))
testutil.Ok(t, os.Symlink(path.Join(dir2, "rule-dir"), path.Join(dir, "rule-dir")))

reloader := New(nil, reloadURL, "", "", []string{dir, path.Join(dir, "rule-dir")})
reloader := New(nil, nil, reloadURL, "", "", []string{dir, path.Join(dir, "rule-dir")})
reloader.watchInterval = 100 * time.Millisecond
reloader.retryInterval = 100 * time.Millisecond

Expand Down

0 comments on commit 4c8057a

Please sign in to comment.