From 3c37062cd04f7f96c6ea9761fedaa25b7e1fbde5 Mon Sep 17 00:00:00 2001 From: Thomas Hartland Date: Thu, 27 Jun 2024 15:37:58 +0100 Subject: [PATCH 1/4] compact: Update filtered blocks list before second downsample pass If the second downsampling pass is given the same filteredMetas list as the first pass, it will create duplicates of blocks created in the first pass. It will also not be able to do further downsampling e.g 5m->1h using blocks created in the first pass, as it will not be aware of them. The metadata was already being synced before the second pass, but not updated into the filteredMetas list. Signed-off-by: Thomas Hartland --- cmd/thanos/compact.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 43fecdebc1..06d3ffaaa3 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -488,6 +488,14 @@ func runCompact( return errors.Wrap(err, "sync before second pass of downsampling") } + // Regenerate the filtered list of blocks after the sync, + // to include the blocks created by the first pass. + filteredMetas = sy.Metas() + noDownsampleBlocks = noDownsampleMarkerFilter.NoDownsampleMarkedBlocks() + for ul := range noDownsampleBlocks { + delete(filteredMetas, ul) + } + if err := downsampleBucket( ctx, logger, From de859e1f5e241a9be2489ea4512a86d25dbf15e9 Mon Sep 17 00:00:00 2001 From: Thomas Hartland Date: Thu, 27 Jun 2024 16:03:57 +0100 Subject: [PATCH 2/4] Update changelog Signed-off-by: Thomas Hartland --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b961d65ea..c0abb551fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re - [#7326](https://github.com/thanos-io/thanos/pull/7326) Query: fixing exemplars proxy when querying stores with multiple tenants. - [#7403](https://github.com/thanos-io/thanos/pull/7403) Sidecar: fix startup sequence - [#7484](https://github.com/thanos-io/thanos/pull/7484) Proxy: fix panic in lazy response set +- [#7492](https://github.com/thanos-io/thanos/pull/7492) Compactor: update filtered blocks list before second downsample pass. ### Added From 4fae4fc3b4c461e6c322c1a95c7d832aa899a7ab Mon Sep 17 00:00:00 2001 From: Thomas Hartland Date: Fri, 28 Jun 2024 13:15:11 +0100 Subject: [PATCH 3/4] e2e/compact: Fix number of blocks cleaned assertion The value was increased in 2ed48f7 to fix the test, with the reasoning that the hardcoded value must have been taken from a run of the CI that didn't reach the max value due to CI worker lag. More likely the real reason is that commit 68bef3f the day before had caused blocks to be duplicated during downsampling. The duplicate block is immediately marked for deletion, causing an extra +1 in the number of blocks cleaned. Subtracting one from the value again now that the block duplication issue is fixed. Signed-off-by: Thomas Hartland --- test/e2e/compact_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/compact_test.go b/test/e2e/compact_test.go index eb0ceb0054..6df722dcea 100644 --- a/test/e2e/compact_test.go +++ b/test/e2e/compact_test.go @@ -766,7 +766,7 @@ func testCompactWithStoreGateway(t *testing.T, penaltyDedup bool) { // NOTE: We cannot assert on intermediate `thanos_blocks_meta_` metrics as those are gauge and change dynamically due to many // compaction groups. Wait for at least first compaction iteration (next is in 5m). testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Greater(0), []string{"thanos_compact_iterations_total"}, e2emon.WaitMissingMetrics())) - testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(19), []string{"thanos_compact_blocks_cleaned_total"}, e2emon.WaitMissingMetrics())) + testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(18), []string{"thanos_compact_blocks_cleaned_total"}, e2emon.WaitMissingMetrics())) testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(0), []string{"thanos_compact_block_cleanup_failures_total"}, e2emon.WaitMissingMetrics())) testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(0), []string{"thanos_compact_aborted_partial_uploads_deletion_attempts_total"}, e2emon.WaitMissingMetrics())) testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(0), []string{"thanos_compact_group_compactions_total"}, e2emon.WaitMissingMetrics())) From bdcf63bd3657f90dbb56100e6c7a68f1d432135b Mon Sep 17 00:00:00 2001 From: Thomas Hartland Date: Fri, 28 Jun 2024 17:36:27 +0100 Subject: [PATCH 4/4] e2e/compact: Revert change to downsample count assertion Combined with the previous commit this effectively reverts all of 2ed48f7, in which two assertions were changed to (unknowingly) account for a bug which had just been introduced in the downsampling code, causing duplicate blocks. This assertion change I am less sure on the reasoning for, but after running through the e2e tests several times locally, it is consistent that the only downsampling happens in the "compact-working" step, and so all other steps would report 0 for their total downsamples metric. Signed-off-by: Thomas Hartland --- test/e2e/compact_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/compact_test.go b/test/e2e/compact_test.go index 6df722dcea..54c04fe91a 100644 --- a/test/e2e/compact_test.go +++ b/test/e2e/compact_test.go @@ -775,7 +775,7 @@ func testCompactWithStoreGateway(t *testing.T, penaltyDedup bool) { testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(7), []string{"thanos_compact_group_compaction_runs_started_total"}, e2emon.WaitMissingMetrics())) testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(7), []string{"thanos_compact_group_compaction_runs_completed_total"}, e2emon.WaitMissingMetrics())) - testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(2), []string{"thanos_compact_downsample_total"}, e2emon.WaitMissingMetrics())) + testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(0), []string{"thanos_compact_downsample_total"}, e2emon.WaitMissingMetrics())) testutil.Ok(t, c.WaitSumMetricsWithOptions(e2emon.Equals(0), []string{"thanos_compact_downsample_failures_total"}, e2emon.WaitMissingMetrics())) testutil.Ok(t, str.WaitSumMetricsWithOptions(