diff --git a/.mdox.validate.yaml b/.mdox.validate.yaml index 9e67c44663..bd334deb39 100644 --- a/.mdox.validate.yaml +++ b/.mdox.validate.yaml @@ -42,6 +42,5 @@ validators: type: 'ignore' - regex: 'twitter\.com' type: 'ignore' - # 500 when requested my mdox in GH actions. - - regex: 'outshift\.cisco\.com' + - regex: 'outshift\.cisco\.com\/blog\/multi-cluster-monitoring' type: 'ignore' diff --git a/CHANGELOG.md b/CHANGELOG.md index a8acfeefa7..836d42549d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ### Added +- [#7429](https://github.com/thanos-io/thanos/pull/7429): Reloader: introduce `TolerateEnvVarExpansionErrors` to allow suppressing errors when expanding environment variables in the configuration file. When set, this will ensure that the reloader won't consider the operation to fail when an unset environment variable is encountered. Note that all unset environment variables are left as is, whereas all set environment variables are expanded as usual. - [#7317](https://github.com/thanos-io/thanos/pull/7317) Tracing: allow specifying resource attributes for the OTLP configuration. - [#7367](https://github.com/thanos-io/thanos/pull/7367) Store Gateway: log request ID in request logs. - [#7361](https://github.com/thanos-io/thanos/pull/7361) Query: *breaking :warning:* pass query stats from remote execution from server to client. We changed the protobuf of the QueryAPI, if you use `query.mode=distributed` you need to update your client (upper level Queriers) first, before updating leaf Queriers (servers). diff --git a/pkg/reloader/reloader.go b/pkg/reloader/reloader.go index 0821b90fed..77c6b05d70 100644 --- a/pkg/reloader/reloader.go +++ b/pkg/reloader/reloader.go @@ -75,7 +75,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/minio/sha256-simd" - ps "github.com/mitchellh/go-ps" + "github.com/mitchellh/go-ps" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -87,14 +87,15 @@ import ( // It optionally substitutes environment variables in the configuration. // Referenced environment variables must be of the form `$(var)` (not `$var` or `${var}`). type Reloader struct { - logger log.Logger - cfgFile string - cfgOutputFile string - cfgDirs []CfgDirOption - watchInterval time.Duration - retryInterval time.Duration - watchedDirs []string - watcher *watcher + logger log.Logger + cfgFile string + cfgOutputFile string + cfgDirs []CfgDirOption + tolerateEnvVarExpansionErrors bool + retryInterval time.Duration + watchInterval time.Duration + watchedDirs []string + watcher *watcher tr TriggerReloader @@ -104,13 +105,14 @@ type Reloader struct { lastCfgDirFiles []map[string]struct{} forceReload bool - reloads prometheus.Counter - reloadErrors prometheus.Counter - lastReloadSuccess prometheus.Gauge - lastReloadSuccessTimestamp prometheus.Gauge - configApplyErrors prometheus.Counter - configApply prometheus.Counter - reloaderInfo *prometheus.GaugeVec + reloads prometheus.Counter + reloadErrors prometheus.Counter + lastReloadSuccess prometheus.Gauge + lastReloadSuccessTimestamp prometheus.Gauge + configApplyErrors prometheus.Counter + configEnvVarExpansionErrors prometheus.Gauge + configApply prometheus.Counter + reloaderInfo *prometheus.GaugeVec } // TriggerReloader reloads the configuration of the process. @@ -172,6 +174,9 @@ type Options struct { // RetryInterval controls how often the reloader retries a reloading of the // configuration in case the reload operation returned an error. RetryInterval time.Duration + // TolerateEnvVarExpansionErrors suppresses errors when expanding environment variables in the config file, and + // leaves the unset variables as is. All found environment variables are still expanded. + TolerateEnvVarExpansionErrors bool } var firstGzipBytes = []byte{0x1f, 0x8b, 0x08} @@ -183,15 +188,16 @@ func New(logger log.Logger, reg prometheus.Registerer, o *Options) *Reloader { logger = log.NewNopLogger() } r := &Reloader{ - logger: logger, - cfgFile: o.CfgFile, - cfgOutputFile: o.CfgOutputFile, - cfgDirs: o.CfgDirs, - lastCfgDirFiles: make([]map[string]struct{}, len(o.CfgDirs)), - watcher: newWatcher(logger, reg, o.DelayInterval), - watchedDirs: o.WatchedDirs, - watchInterval: o.WatchInterval, - retryInterval: o.RetryInterval, + logger: logger, + cfgFile: o.CfgFile, + cfgOutputFile: o.CfgOutputFile, + cfgDirs: o.CfgDirs, + lastCfgDirFiles: make([]map[string]struct{}, len(o.CfgDirs)), + watcher: newWatcher(logger, reg, o.DelayInterval), + watchedDirs: o.WatchedDirs, + watchInterval: o.WatchInterval, + retryInterval: o.RetryInterval, + tolerateEnvVarExpansionErrors: o.TolerateEnvVarExpansionErrors, reloads: promauto.With(reg).NewCounter( prometheus.CounterOpts{ @@ -229,6 +235,12 @@ func New(logger log.Logger, reg prometheus.Registerer, o *Options) *Reloader { Help: "Total number of config apply operations that failed.", }, ), + configEnvVarExpansionErrors: promauto.With(reg).NewGauge( + prometheus.GaugeOpts{ + Name: "reloader_config_environment_variable_expansion_errors", + Help: "Number of environment variable expansions that failed during the last operation.", + }, + ), reloaderInfo: promauto.With(reg).NewGaugeVec( prometheus.GaugeOpts{ Name: "reloader_info", @@ -348,7 +360,7 @@ func (r *Reloader) Watch(ctx context.Context) error { } } -func normalize(logger log.Logger, inputFile, outputFile string) error { +func (r *Reloader) normalize(inputFile, outputFile string) error { b, err := os.ReadFile(inputFile) if err != nil { return errors.Wrap(err, "read file") @@ -360,7 +372,7 @@ func normalize(logger log.Logger, inputFile, outputFile string) error { if err != nil { return errors.Wrap(err, "create gzip reader") } - defer runutil.CloseWithLogOnErr(logger, zr, "gzip reader close") + defer runutil.CloseWithLogOnErr(r.logger, zr, "gzip reader close") b, err = io.ReadAll(zr) if err != nil { @@ -368,7 +380,7 @@ func normalize(logger log.Logger, inputFile, outputFile string) error { } } - b, err = expandEnv(b) + b, err = r.expandEnv(b) if err != nil { return errors.Wrap(err, "expand environment variables") } @@ -402,7 +414,7 @@ func (r *Reloader) apply(ctx context.Context) error { } cfgHash = h.Sum(nil) if r.cfgOutputFile != "" { - if err := normalize(r.logger, r.cfgFile, r.cfgOutputFile); err != nil { + if err := r.normalize(r.cfgFile, r.cfgOutputFile); err != nil { return err } } @@ -446,7 +458,7 @@ func (r *Reloader) apply(ctx context.Context) error { outFile := filepath.Join(outDir, targetFile.Name()) cfgDirFiles[outFile] = struct{}{} - if err := normalize(r.logger, path, outFile); err != nil { + if err := r.normalize(path, outFile); err != nil { return errors.Wrapf(err, "move file: %s", path) } } @@ -692,21 +704,30 @@ func RuntimeInfoURLFromBase(u *url.URL) *url.URL { var envRe = regexp.MustCompile(`\$\(([a-zA-Z_0-9]+)\)`) -func expandEnv(b []byte) (r []byte, err error) { - r = envRe.ReplaceAllFunc(b, func(n []byte) []byte { +func (r *Reloader) expandEnv(b []byte) (replaced []byte, err error) { + configEnvVarExpansionErrorsCount := 0 + replaced = envRe.ReplaceAllFunc(b, func(n []byte) []byte { if err != nil { return nil } + m := n n = n[2 : len(n)-1] v, ok := os.LookupEnv(string(n)) if !ok { - err = errors.Errorf("found reference to unset environment variable %q", n) + configEnvVarExpansionErrorsCount++ + errStr := errors.Errorf("found reference to unset environment variable %q", n) + if r.tolerateEnvVarExpansionErrors { + level.Warn(r.logger).Log("msg", "expand environment variable", "err", errStr) + return m + } + err = errStr return nil } return []byte(v) }) - return r, err + r.configEnvVarExpansionErrors.Set(float64(configEnvVarExpansionErrorsCount)) + return replaced, err } type watcher struct { diff --git a/pkg/reloader/reloader_test.go b/pkg/reloader/reloader_test.go index c2448b6ea1..35ecb03905 100644 --- a/pkg/reloader/reloader_test.go +++ b/pkg/reloader/reloader_test.go @@ -97,10 +97,42 @@ config: testutil.NotOk(t, err) testutil.Assert(t, strings.HasSuffix(err.Error(), `found reference to unset environment variable "TEST_RELOADER_THANOS_ENV"`), "expect error since there envvars are not set.") + // Don't fail with unset variables. + ctx2, cancel2 := context.WithTimeout(ctx, 10*time.Second) + + // Enable suppressing environment variables expansion errors. + reloader.tolerateEnvVarExpansionErrors = true + + // Set an environment variable while leaving the other unset, so as to ensure we don't break the flow when an unset + // variable is found. + testutil.Ok(t, os.Setenv("TEST_RELOADER_THANOS_ENV2", "3")) + err = reloader.Watch(ctx2) + cancel2() + + // Restore state. + reloader.tolerateEnvVarExpansionErrors = false + testutil.Ok(t, os.Unsetenv("TEST_RELOADER_THANOS_ENV2")) + + // The environment variable expansion errors should be suppressed, but recorded. + testutil.Equals(t, 1.0, promtest.ToFloat64(reloader.configEnvVarExpansionErrors)) + + // All environment variables expansion errors should be suppressed. + testutil.Ok(t, err) + + // Config should consist on unset as well as set variables. + f, err := os.ReadFile(output) + testutil.Ok(t, err) + testutil.Equals(t, ` +config: + a: 1 + b: $(TEST_RELOADER_THANOS_ENV) + c: 3 +`, string(f)) + testutil.Ok(t, os.Setenv("TEST_RELOADER_THANOS_ENV", "2")) testutil.Ok(t, os.Setenv("TEST_RELOADER_THANOS_ENV2", "3")) - rctx, cancel2 := context.WithCancel(ctx) + rctx, cancel3 := context.WithCancel(ctx) g := sync.WaitGroup{} g.Add(1) go func() { @@ -159,7 +191,7 @@ config: } } } - cancel2() + cancel3() g.Wait() testutil.Ok(t, os.Unsetenv("TEST_RELOADER_THANOS_ENV"))