cortexproject · bboreham · Sep 30, 2020 · Sep 16, 2020 · Sep 27, 2020 · Sep 28, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -60,6 +60,7 @@
 * [ENHANCEMENT] Blocksconvert – Scanner: metrics for tracking progress. #3222
 * [ENHANCEMENT] Blocksconvert – Builder: retry block upload before giving up. #3245
 * [ENHANCEMENT] Hash ring: added instance registered timestamp to the ring. #3248
+* [ENHANCEMENT] Reduce tail latency by smoothing out spikes in rate of chunk flush operations. #3191
 * [BUGFIX] No-longer-needed ingester operations for queries triggered by queriers and rulers are now canceled. #3178
 * [BUGFIX] Ruler: directories in the configured `rules-path` will be removed on startup and shutdown in order to ensure they don't persist between runs. #3195
 * [BUGFIX] Handle hash-collisions in the query path. #3192

diff --git a/pkg/ingester/flush.go b/pkg/ingester/flush.go
@@ -10,6 +10,7 @@ import (
 	ot "github.com/opentracing/opentracing-go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	"golang.org/x/time/rate"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
@@ -19,6 +20,8 @@ const (
 	// Backoff for retrying 'immediate' flushes. Only counts for queue
 	// position, not wallclock time.
 	flushBackoff = 1 * time.Second
+	// Lower bound on flushes per check period for rate-limiter
+	minFlushes = 100
 )
 
 // Flush triggers a flush of all the chunks and closes the flush queues.
@@ -94,6 +97,25 @@ func (i *Ingester) sweepUsers(immediate bool) {
 	}
 
 	i.metrics.oldestUnflushedChunkTimestamp.Set(float64(oldest.Unix()))
+	i.setFlushRate()
+}
+
+// Compute a rate such to spread calls to the store over nearly all of the flush period,
+// for example if we have 600 items in the queue and period 1 min we will send 10.5 per second.
+// Note if the store can't keep up with this rate then it doesn't make any difference.
+func (i *Ingester) setFlushRate() {
+	totalQueueLength := 0
+	for _, q := range i.flushQueues {
+		totalQueueLength += q.Length()
+	}
+	const fudge = 1.05 // aim to finish a little bit before the end of the period
+	flushesPerSecond := float64(totalQueueLength) / i.cfg.FlushCheckPeriod.Seconds() * fudge
+	// Avoid going very slowly with tiny queues
+	if flushesPerSecond*i.cfg.FlushCheckPeriod.Seconds() < minFlushes {
+		flushesPerSecond = minFlushes / i.cfg.FlushCheckPeriod.Seconds()
+	}
+	level.Debug(util.Logger).Log("msg", "computed flush rate", "rate", flushesPerSecond)
+	i.flushRateLimiter.SetLimit(rate.Limit(flushesPerSecond))
 }
 
 type flushReason int8
@@ -235,6 +257,9 @@ func (i *Ingester) flushLoop(j int) {
 		}
 		op := o.(*flushOp)
 
+		if !op.immediate {
+			_ = i.flushRateLimiter.Wait(context.Background())
+		}
 		outcome, err := i.flushUserSeries(j, op.userID, op.fp, op.immediate)
 		i.metrics.seriesDequeuedOutcome.WithLabelValues(outcome.String()).Inc()
 		if err != nil {

diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go
@@ -18,6 +18,7 @@ import (
 	tsdb_record "github.com/prometheus/prometheus/tsdb/record"
 	"github.com/weaveworks/common/httpgrpc"
 	"github.com/weaveworks/common/user"
+	"golang.org/x/time/rate"
 	"google.golang.org/grpc/codes"
 
 	cortex_chunk "github.com/cortexproject/cortex/pkg/chunk"
@@ -141,6 +142,9 @@ type Ingester struct {
 	flushQueues     []*util.PriorityQueue
 	flushQueuesDone sync.WaitGroup
 
+	// Spread out calls to the chunk store over the flush period
+	flushRateLimiter *rate.Limiter
+
 	// This should never be nil.
 	wal WAL
 	// To be passed to the WAL.
@@ -196,11 +200,12 @@ func New(cfg Config, clientConfig client.Config, limits *validation.Overrides, c
 		clientConfig: clientConfig,
 		metrics:      newIngesterMetrics(registerer, true, cfg.ActiveSeriesMetricsEnabled),
 
-		limits:        limits,
-		chunkStore:    chunkStore,
-		flushQueues:   make([]*util.PriorityQueue, cfg.ConcurrentFlushes),
-		usersMetadata: map[string]*userMetricsMetadata{},
-		registerer:    registerer,
+		limits:           limits,
+		chunkStore:       chunkStore,
+		flushQueues:      make([]*util.PriorityQueue, cfg.ConcurrentFlushes),
+		flushRateLimiter: rate.NewLimiter(rate.Inf, 1),
+		usersMetadata:    map[string]*userMetricsMetadata{},
+		registerer:       registerer,
 	}
 
 	var err error
@@ -275,12 +280,13 @@ func NewForFlusher(cfg Config, chunkStore ChunkStore, limits *validation.Overrid
 	}
 
 	i := &Ingester{
-		cfg:         cfg,
-		metrics:     newIngesterMetrics(registerer, true, false),
-		chunkStore:  chunkStore,
-		flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes),
-		wal:         &noopWAL{},
-		limits:      limits,
+		cfg:              cfg,
+		metrics:          newIngesterMetrics(registerer, true, false),
+		chunkStore:       chunkStore,
+		flushQueues:      make([]*util.PriorityQueue, cfg.ConcurrentFlushes),
+		flushRateLimiter: rate.NewLimiter(rate.Inf, 1),
+		wal:              &noopWAL{},
+		limits:           limits,
 	}
 
 	i.BasicService = services.NewBasicService(i.startingForFlusher, i.loopForFlusher, i.stopping)