Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* [ENHANCEMENT] Ingester: Add support for exporting native histogram cost attribution metrics (`cortex_ingester_attributed_active_native_histogram_series` and `cortex_ingester_attributed_active_native_histogram_buckets`) with labels specified by customers to a custom Prometheus registry. #10892
* [ENHANCEMENT] Store-gateway: Download sparse headers uploaded by compactors. Compactors have to be configured with `-compactor.upload-sparse-index-headers=true` option. #10879
* [ENHANCEMENT] Compactor: Upload block index file and multiple segment files concurrently. Concurrency scales linearly with block size up to `-compactor.max-per-block-upload-concurrency`. #10947
* [ENHANCEMENT] Ingester: Add per-user `cortex_ingester_tsdb_wal_replay_unknown_refs_total` and `cortex_ingester_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during WAL/WBL replay. #10981
* [BUGFIX] OTLP: Fix response body and Content-Type header to align with spec. #10852
* [BUGFIX] Compactor: fix issue where block becomes permanently stuck when the Compactor's block cleanup job partially deletes a block. #10888
* [BUGFIX] Storage: fix intermittent failures in S3 upload retries. #10952
Expand Down
19 changes: 17 additions & 2 deletions pkg/ingester/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,9 @@ type tsdbMetrics struct {
memSeriesCreatedTotal *prometheus.Desc
memSeriesRemovedTotal *prometheus.Desc

tsdbWalReplayUnknownRefsTotal *prometheus.Desc
tsdbWblReplayUnknownRefsTotal *prometheus.Desc

headPostingsForMatchersCacheMetrics *tsdb.PostingsForMatchersCacheMetrics
blockPostingsForMatchersCacheMetrics *tsdb.PostingsForMatchersCacheMetrics

Expand Down Expand Up @@ -707,6 +710,15 @@ func newTSDBMetrics(r prometheus.Registerer, logger log.Logger) *tsdbMetrics {
"The total number of series that were removed per user.",
[]string{"user"}, nil),

tsdbWalReplayUnknownRefsTotal: prometheus.NewDesc(
"cortex_ingester_tsdb_wal_replay_unknown_refs_total",
"Total number of unknown series references encountered during WAL replay.",
[]string{"user", "type"}, nil),
tsdbWblReplayUnknownRefsTotal: prometheus.NewDesc(
"cortex_ingester_tsdb_wbl_replay_unknown_refs_total",
"Total number of unknown series references encountered during WBL replay.",
[]string{"user", "type"}, nil),

headPostingsForMatchersCacheMetrics: tsdb.NewPostingsForMatchersCacheMetrics(prometheus.WrapRegistererWithPrefix("cortex_ingester_tsdb_head_", r)),
blockPostingsForMatchersCacheMetrics: tsdb.NewPostingsForMatchersCacheMetrics(prometheus.WrapRegistererWithPrefix("cortex_ingester_tsdb_block_", r)),
}
Expand Down Expand Up @@ -762,6 +774,9 @@ func (sm *tsdbMetrics) Describe(out chan<- *prometheus.Desc) {
out <- sm.memSeries
out <- sm.memSeriesCreatedTotal
out <- sm.memSeriesRemovedTotal

out <- sm.tsdbWalReplayUnknownRefsTotal
out <- sm.tsdbWblReplayUnknownRefsTotal
}

func (sm *tsdbMetrics) Collect(out chan<- prometheus.Metric) {
Expand Down Expand Up @@ -804,12 +819,12 @@ func (sm *tsdbMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfGaugesPerTenant(out, sm.tsdbExemplarSeriesInStorage, "prometheus_tsdb_exemplar_series_with_exemplars_in_storage")
data.SendSumOfGaugesPerTenant(out, sm.tsdbExemplarLastTs, "prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds")
data.SendSumOfCounters(out, sm.tsdbExemplarsOutOfOrder, "prometheus_tsdb_exemplar_out_of_order_exemplars_total")

data.SendSumOfCountersPerTenant(out, sm.tsdbOOOAppendedSamples, "prometheus_tsdb_head_out_of_order_samples_appended_total")

data.SendSumOfGauges(out, sm.memSeries, "prometheus_tsdb_head_series")
data.SendSumOfCountersPerTenant(out, sm.memSeriesCreatedTotal, "prometheus_tsdb_head_series_created_total")
data.SendSumOfCountersPerTenant(out, sm.memSeriesRemovedTotal, "prometheus_tsdb_head_series_removed_total")
data.SendSumOfCountersPerTenant(out, sm.tsdbWalReplayUnknownRefsTotal, "prometheus_tsdb_wal_replay_unknown_refs_total", dskit_metrics.WithLabels("type"))
data.SendSumOfCountersPerTenant(out, sm.tsdbWblReplayUnknownRefsTotal, "prometheus_tsdb_wbl_replay_unknown_refs_total", dskit_metrics.WithLabels("type"))
}

func (sm *tsdbMetrics) setRegistryForUser(userID string, registry *prometheus.Registry) {
Expand Down
44 changes: 42 additions & 2 deletions pkg/ingester/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func TestTSDBMetrics(t *testing.T) {
# HELP cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total Total number of out-of-order exemplar ingestion failed attempts.
# TYPE cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total counter
cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total 9

# HELP cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage Number of TSDB series with exemplars currently in storage.
# TYPE cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage gauge
cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{user="user1"} 1
Expand Down Expand Up @@ -289,6 +289,21 @@ func TestTSDBMetrics(t *testing.T) {
cortex_ingester_tsdb_block_postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
cortex_ingester_tsdb_block_postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
cortex_ingester_tsdb_block_postings_for_matchers_cache_evictions_total{reason="unknown"} 0

# HELP cortex_ingester_tsdb_wal_replay_unknown_refs_total Total number of unknown series references encountered during WAL replay.
# TYPE cortex_ingester_tsdb_wal_replay_unknown_refs_total counter
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series", user="user1"} 12345
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples", user="user1"} 24690
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series", user="user2"} 85787
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples", user="user2"} 171574
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series", user="user3"} 999
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples", user="user3"} 1998

# HELP cortex_ingester_tsdb_wbl_replay_unknown_refs_total Total number of unknown series references encountered during WBL replay.
# TYPE cortex_ingester_tsdb_wbl_replay_unknown_refs_total counter
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars", user="user1"} 12345
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars", user="user2"} 85787
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars", user="user3"} 999
`))
require.NoError(t, err)
}
Expand Down Expand Up @@ -488,7 +503,7 @@ func TestTSDBMetricsWithRemoval(t *testing.T) {
# HELP cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total Total number of out-of-order exemplar ingestion failed attempts.
# TYPE cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total counter
cortex_ingester_tsdb_exemplar_out_of_order_exemplars_total 9

# HELP cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage Number of TSDB series with exemplars currently in storage.
# TYPE cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage gauge
cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{user="user1"} 1
Expand Down Expand Up @@ -557,6 +572,18 @@ func TestTSDBMetricsWithRemoval(t *testing.T) {
cortex_ingester_tsdb_block_postings_for_matchers_cache_evictions_total{reason="max-items-reached"} 0
cortex_ingester_tsdb_block_postings_for_matchers_cache_evictions_total{reason="ttl-expired"} 0
cortex_ingester_tsdb_block_postings_for_matchers_cache_evictions_total{reason="unknown"} 0

# HELP cortex_ingester_tsdb_wal_replay_unknown_refs_total Total number of unknown series references encountered during WAL replay.
# TYPE cortex_ingester_tsdb_wal_replay_unknown_refs_total counter
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series", user="user1"} 12345
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples", user="user1"} 24690
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="series", user="user2"} 85787
cortex_ingester_tsdb_wal_replay_unknown_refs_total{type="samples", user="user2"} 171574

# HELP cortex_ingester_tsdb_wbl_replay_unknown_refs_total Total number of unknown series references encountered during WBL replay.
# TYPE cortex_ingester_tsdb_wbl_replay_unknown_refs_total counter
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars", user="user1"} 12345
cortex_ingester_tsdb_wbl_replay_unknown_refs_total{type="exemplars", user="user2"} 85787
`))
require.NoError(t, err)
}
Expand Down Expand Up @@ -815,5 +842,18 @@ func populateTSDBMetrics(base float64) *prometheus.Registry {
})
chunksMmappedTotal.Add(30 * base)

tsdbWalReplayUnknownRefsTotal := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_wal_replay_unknown_refs_total",
Help: "Total number of unknown series references encountered during WAL replay.",
}, []string{"type"})
tsdbWalReplayUnknownRefsTotal.WithLabelValues("series").Add(base)
tsdbWalReplayUnknownRefsTotal.WithLabelValues("samples").Add(base * 2)

tsdbWblReplayUnknownRefsTotal := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Name: "prometheus_tsdb_wbl_replay_unknown_refs_total",
Help: "Total number of unknown series references encountered during WBL replay pprus.",
}, []string{"type"})
tsdbWblReplayUnknownRefsTotal.WithLabelValues("exemplars").Add(base)

return r
}
Loading