From 86a749bb784f4fe073e37f02e38d314c1af0e1bd Mon Sep 17 00:00:00 2001 From: George Robinson Date: Thu, 9 May 2024 10:45:32 +0100 Subject: [PATCH] Add ingester_chunks_flush_failures_total This commit adds a new metric to pkg/ingester/metrics.go to count the total number of flush failures. Loki operators should create alerts on this metric to avoid periods of recurring failures from increasing memory-pressure on ingesters due to buffering of chunks. Signed-off-by: George Robinson --- pkg/ingester/flush.go | 1 + pkg/ingester/metrics.go | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/pkg/ingester/flush.go b/pkg/ingester/flush.go index f9904ca8409e5..00aad05475495 100644 --- a/pkg/ingester/flush.go +++ b/pkg/ingester/flush.go @@ -372,6 +372,7 @@ func (i *Ingester) encodeChunk(ctx context.Context, ch *chunk.Chunk, desc *chunk // chunk to have another opportunity to be flushed. func (i *Ingester) flushChunk(ctx context.Context, ch *chunk.Chunk) error { if err := i.store.Put(ctx, []chunk.Chunk{*ch}); err != nil { + i.metrics.chunksFlushFailures.Inc() return fmt.Errorf("store put chunk: %w", err) } i.metrics.flushedChunksStats.Inc(1) diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 8b005860555f1..756eba0ebea74 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -47,6 +47,7 @@ type ingesterMetrics struct { chunkSizePerTenant *prometheus.CounterVec chunkAge prometheus.Histogram chunkEncodeTime prometheus.Histogram + chunksFlushFailures prometheus.Counter chunksFlushedPerReason *prometheus.CounterVec chunkLifespan prometheus.Histogram flushedChunksStats *analytics.Counter @@ -232,6 +233,11 @@ func newIngesterMetrics(r prometheus.Registerer, metricsNamespace string) *inges // 10ms to 10s. Buckets: prometheus.ExponentialBuckets(0.01, 4, 6), }), + chunksFlushFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{ + Namespace: constants.Loki, + Name: "ingester_chunks_flush_failures_total", + Help: "Total number of flush failures.", + }), chunksFlushedPerReason: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: constants.Loki, Name: "ingester_chunks_flushed_total",