From fbe571e3e3bbb3b18e63c4c0ca40c90a3074dcbe Mon Sep 17 00:00:00 2001 From: Abhijat Malviya Date: Mon, 9 Sep 2024 11:23:15 +0530 Subject: [PATCH 1/2] cst/inv: Fix unchecked optional access --- src/v/cloud_storage/inventory/inv_consumer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v/cloud_storage/inventory/inv_consumer.cc b/src/v/cloud_storage/inventory/inv_consumer.cc index 7291ec7a0f475..ea192653dda03 100644 --- a/src/v/cloud_storage/inventory/inv_consumer.cc +++ b/src/v/cloud_storage/inventory/inv_consumer.cc @@ -113,7 +113,7 @@ inventory_consumer::process_paths(fragmented_vector paths) { void inventory_consumer::process_path(ss::sstring path) { if (auto maybe_ntp = ntp_from_path(path); - _ntps.contains(maybe_ntp.value())) { + maybe_ntp.has_value() && _ntps.contains(maybe_ntp.value())) { const auto& ntp = maybe_ntp.value(); auto hash = xxhash_64(path.data(), path.size()); From d709a8f2ec316c34353978c5bfb48f66b4cb9c54 Mon Sep 17 00:00:00 2001 From: Abhijat Malviya Date: Mon, 9 Sep 2024 14:18:47 +0530 Subject: [PATCH 2/2] cst/inv: Allow uuids in segment expression match Since the segment prefix has been changed recently to either contain cluster uuid or the regular hash, the path matching expression is adjusted to also accept UUID. --- src/v/cloud_storage/inventory/inv_consumer.cc | 5 +++-- src/v/cloud_storage/inventory/tests/inv_consumer_tests.cc | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/v/cloud_storage/inventory/inv_consumer.cc b/src/v/cloud_storage/inventory/inv_consumer.cc index ea192653dda03..790605be93cff 100644 --- a/src/v/cloud_storage/inventory/inv_consumer.cc +++ b/src/v/cloud_storage/inventory/inv_consumer.cc @@ -34,8 +34,9 @@ namespace ranges = std::ranges; namespace views = std::views; namespace { -// hash-string/ns/tp/partition_rev/.* -const RE2 path_expr{"^[[:xdigit:]]+/(.*?)/(.*?)/(\\d+)_\\d+/.*?"}; +// hash-string/ns/tp/partition_rev/.* OR +// cluster-uuid/ns/tp/partition_rev/.* +const RE2 path_expr{"^[[:xdigit:]-]+/(.*?)/(.*?)/(\\d+)_\\d+/.*?"}; // Holds hashes for a given NTP in memory before they will be flushed to disk. // One of these structures is held per NTP in a map keyed by the NTP itself. diff --git a/src/v/cloud_storage/inventory/tests/inv_consumer_tests.cc b/src/v/cloud_storage/inventory/tests/inv_consumer_tests.cc index 941b73f73fa31..ab19c7ccb048f 100644 --- a/src/v/cloud_storage/inventory/tests/inv_consumer_tests.cc +++ b/src/v/cloud_storage/inventory/tests/inv_consumer_tests.cc @@ -43,6 +43,8 @@ TEST(Consumer, ParseNTPFromPath) { std::vector

test_data{ {"a0a6eeb8/kafka/topic-x/999_24/178-188-1574137-1-v1.log.1", std::make_optional(make_ntp("kafka", "topic-x", 999))}, + {"d10492a6-2408-418e-9b6b-051697c5255b/k/t/1_24/---", + std::make_optional(make_ntp("k", "t", 1))}, {"a/k/t/1_24/---", std::make_optional(make_ntp("k", "t", 1))}, // Bad hex m {"m/k/t/1_24/---", std::nullopt},