From fa35e2c4a0fd858054c6fa11988a22d16ffc2244 Mon Sep 17 00:00:00 2001 From: Shinichi Umegane Date: Thu, 23 Jan 2025 18:35:44 +0900 Subject: [PATCH] WIP: Add blob_file class and update related classes as part of BLOB support - Added `blob_file` class to manage persistent BLOB data. - Updated existing classes to integrate with the new `blob_file` class. - Added `blob_pool` class as a placeholder with a dummy implementation. - Commit represents an intermediate step in implementing complete BLOB support. --- include/limestone/api/blob_file.h | 25 +++- include/limestone/api/datastore.h | 7 +- src/limestone/blob_file.cpp | 37 ++++++ src/limestone/blob_file_resolver.h | 112 ++++++++++++++++++ src/limestone/blob_pool_impl.cpp | 44 +++++++ src/limestone/blob_pool_impl.h | 64 ++++++++++ src/limestone/datastore.cpp | 19 +++ test/CMakeLists.txt | 1 + .../blob/blob_file_resolver_test.cpp | 102 ++++++++++++++++ test/limestone/blob/blob_file_test.cpp | 43 +++++++ 10 files changed, 448 insertions(+), 6 deletions(-) create mode 100644 src/limestone/blob_file.cpp create mode 100644 src/limestone/blob_file_resolver.h create mode 100644 src/limestone/blob_pool_impl.cpp create mode 100644 src/limestone/blob_pool_impl.h create mode 100644 test/limestone/blob/blob_file_resolver_test.cpp create mode 100644 test/limestone/blob/blob_file_test.cpp diff --git a/include/limestone/api/blob_file.h b/include/limestone/api/blob_file.h index 1c3b075f..47ee9fd8 100644 --- a/include/limestone/api/blob_file.h +++ b/include/limestone/api/blob_file.h @@ -15,26 +15,41 @@ */ #pragma once +#include + namespace limestone::api { -/** - * @brief represents a BLOB file that can provide persistent BLOB data. - */ class blob_file { +private: + boost::filesystem::path blob_path_; + bool available_ = false; + public: + /** + * @brief Constructor to create a BLOB file instance. + * @param path Path to the BLOB file. + * @param available Initial availability status of the BLOB file (default: false). + */ + explicit blob_file(boost::filesystem::path const& path, bool available = false); /** - * @brief retrieves the path to the BLOB file. + * @brief Retrieves the path to the BLOB file. * @returns BLOB file path */ [[nodiscard]] boost::filesystem::path const& path() const noexcept; /** - * @brief returns whether this BLOB file is available. + * @brief Returns whether this BLOB file is available. * @return true if this is available * @return false otherwise */ [[nodiscard]] explicit operator bool() const noexcept; + + /** + * @brief Sets the availability status of the BLOB file. + * @param available New availability status. + */ + void set_availability(bool available) noexcept; }; } // namespace limestone::api \ No newline at end of file diff --git a/include/limestone/api/datastore.h b/include/limestone/api/datastore.h index e01fd510..896d58f8 100644 --- a/include/limestone/api/datastore.h +++ b/include/limestone/api/datastore.h @@ -42,7 +42,8 @@ #include namespace limestone::internal { - class compaction_catalog; + class compaction_catalog; + class blob_file_resolver; } namespace limestone::api { @@ -450,6 +451,10 @@ class datastore { virtual void write_epoch_to_file(epoch_id_type epoch_id); int epoch_write_counter = 0; + + std::unique_ptr blob_file_resolver_; + + std::atomic next_blob_id_{0}; }; } // namespace limestone::api diff --git a/src/limestone/blob_file.cpp b/src/limestone/blob_file.cpp new file mode 100644 index 00000000..dfb8569a --- /dev/null +++ b/src/limestone/blob_file.cpp @@ -0,0 +1,37 @@ +/* + * Copyright 2022-2025 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace limestone::api { + +blob_file::blob_file(boost::filesystem::path const& path, bool available) + : blob_path_(path), available_(available) {} + +boost::filesystem::path const& blob_file::path() const noexcept { + return blob_path_; +} + +blob_file::operator bool() const noexcept { + return available_; +} + +void blob_file::set_availability(bool available) noexcept { + available_ = available; +} + +} // namespace limestone::api diff --git a/src/limestone/blob_file_resolver.h b/src/limestone/blob_file_resolver.h new file mode 100644 index 00000000..15a89c02 --- /dev/null +++ b/src/limestone/blob_file_resolver.h @@ -0,0 +1,112 @@ +/* + * Copyright 2022-2025 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace limestone::internal { + +using limestone::api::blob_id_type; +using limestone::api::blob_file; + +/** + * @brief Resolves file paths for given BLOB IDs with precomputed directory caching. + */ +class blob_file_resolver { +public: + /** + * @brief Constructs a blob_file_resolver with the given base directory. + * + * The BLOB files are assumed to be stored under `/blob/`. + * + * @param base_directory The base directory for storing BLOB files. + * @param directory_count The number of subdirectories to distribute files into. + * @param hash_function The function used to map `blob_id` to a directory index. + */ + explicit blob_file_resolver( + boost::filesystem::path base_directory, + std::size_t directory_count = 100, + std::function hash_function = [](blob_id_type id) { return id; }) + : blob_directory_(std::move(base_directory) / "blob"), + directory_count_(directory_count), + hash_function_(std::move(hash_function)) { + // Precompute and cache all directory paths + precompute_directory_cache(); + } + + /** + * @brief Resolves the file path for the given BLOB ID. + * + * @param blob_id The ID of the BLOB. + * @return The resolved file path. + */ + [[nodiscard]] boost::filesystem::path resolve_path(blob_id_type blob_id) const { + // Calculate directory index + std::size_t directory_index = hash_function_(blob_id) % directory_count_; + + // Retrieve precomputed directory path + const boost::filesystem::path& subdirectory = directory_cache_[directory_index]; + + // Generate the file name + std::ostringstream file_name; + file_name << std::hex << std::setw(16) << std::setfill('0') << blob_id << ".blob"; + + return subdirectory / file_name.str(); + } + + /** + * @brief Resolves the BLOB file for the given BLOB ID. + * + * @param blob_id The ID of the BLOB. + * @param available Initial availability status of the BLOB file (default: false). + * @return A blob_file instance corresponding to the BLOB ID. + */ + [[nodiscard]] blob_file resolve_blob_file(blob_id_type blob_id, bool available = false) const { + boost::filesystem::path file_path = resolve_path(blob_id); + return blob_file(file_path, available); + } + +private: + /** + * @brief Precomputes all directory paths and stores them in the cache. + */ + void precompute_directory_cache() { + directory_cache_.reserve(directory_count_); + for (std::size_t i = 0; i < directory_count_; ++i) { + std::ostringstream dir_name; + dir_name << "dir_" << std::setw(2) << std::setfill('0') << i; + directory_cache_.emplace_back(blob_directory_ / dir_name.str()); + } + } + + boost::filesystem::path blob_directory_; // Full path to the `blob` directory + std::size_t directory_count_; // Number of directories for distribution + std::function hash_function_; // Hash function to map blob_id to directory index + + std::vector directory_cache_; // Precomputed cache for directory paths +}; + +} // namespace limestone::internal + diff --git a/src/limestone/blob_pool_impl.cpp b/src/limestone/blob_pool_impl.cpp new file mode 100644 index 00000000..94244cd1 --- /dev/null +++ b/src/limestone/blob_pool_impl.cpp @@ -0,0 +1,44 @@ +/* + * Copyright 2022-2025 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "blob_pool_impl.h" + +namespace limestone::internal { + +blob_pool_impl::blob_pool_impl(std::function id_generator, + limestone::internal::blob_file_resolver& resolver) + : id_generator_(std::move(id_generator)), resolver_(resolver) {} + +blob_id_type blob_pool_impl::generate_blob_id() { + return id_generator_(); +} + +void blob_pool_impl::release() { + // 空実装 +} + +blob_id_type blob_pool_impl::register_file(boost::filesystem::path const& /*file*/, bool /*is_temporary_file*/) { + return generate_blob_id(); // ダミーとして新しいIDを返す +} + +blob_id_type blob_pool_impl::register_data(std::string_view /*data*/) { + return generate_blob_id(); // ダミーとして新しいIDを返す +} + +blob_id_type blob_pool_impl::duplicate_data(blob_id_type /*reference*/) { + return generate_blob_id(); // ダミーとして新しいIDを返す +} + +} // namespace limestone::internal diff --git a/src/limestone/blob_pool_impl.h b/src/limestone/blob_pool_impl.h new file mode 100644 index 00000000..6d3d1ea2 --- /dev/null +++ b/src/limestone/blob_pool_impl.h @@ -0,0 +1,64 @@ +/* + * Copyright 2022-2025 Project Tsurugi. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include "blob_file_resolver.h" + +namespace limestone::internal { + +using namespace limestone::api; + + +/** + * @brief Implementation of the blob_pool interface. + */ +class blob_pool_impl : public blob_pool { +public: + /** + * @brief Constructs a blob_pool_impl instance with the given ID generator and blob_file_resolver. + * @param id_generator A callable object that generates unique IDs of type blob_id_type. + * @param resolver Reference to a blob_file_resolver instance. + */ + explicit blob_pool_impl(std::function id_generator, + limestone::internal::blob_file_resolver& resolver); + + void release() override; + + [[nodiscard]] blob_id_type register_file(boost::filesystem::path const& file, + bool is_temporary_file) override; + + [[nodiscard]] blob_id_type register_data(std::string_view data) override; + + [[nodiscard]] blob_id_type duplicate_data(blob_id_type reference) override; + + +private: + /** + * @brief Generates a unique ID for a BLOB. + * + * @return A unique ID of type blob_id_type. + */ + [[nodiscard]] blob_id_type generate_blob_id(); + + std::function id_generator_; // Callable object for ID generation + + blob_file_resolver& resolver_; // reference to a blob_file_resolver instance +}; + +} // namespace limestone::internal \ No newline at end of file diff --git a/src/limestone/datastore.cpp b/src/limestone/datastore.cpp index fa63c392..df82521e 100644 --- a/src/limestone/datastore.cpp +++ b/src/limestone/datastore.cpp @@ -32,6 +32,8 @@ #include "log_entry.h" #include "online_compaction.h" #include "compaction_catalog.h" +#include "blob_file_resolver.h" +#include "blob_pool_impl.h" namespace limestone::api { using namespace limestone::internal; @@ -109,6 +111,8 @@ datastore::datastore(configuration const& conf) : location_(conf.data_locations_ recover_max_parallelism_ = conf.recover_max_parallelism_; LOG(INFO) << "/:limestone:config:datastore setting the number of recover process thread = " << recover_max_parallelism_; + blob_file_resolver_ = std::make_unique(location_); + VLOG_LP(log_debug) << "datastore is created, location = " << location_.string(); } catch (...) { HANDLE_EXCEPTION_AND_ABORT(); @@ -697,5 +701,20 @@ void datastore::compact_with_online() { TRACE_END; } +std::unique_ptr datastore::acquire_blob_pool() { + // Store the ID generation logic as a lambda function in a variable + auto id_generator = [this]() { + return next_blob_id_.fetch_add(1, std::memory_order_relaxed); + }; + + // Pass the lambda function as a constructor argument to create blob_pool_impl + return std::make_unique(id_generator, *blob_file_resolver_); +} + +blob_file datastore::get_blob_file(blob_id_type reference) { + check_after_ready(static_cast(__func__)); + return blob_file_resolver_->resolve_blob_file(reference, true); +} + } // namespace limestone::api diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0555ec06..fbf05702 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -30,6 +30,7 @@ endfunction (add_test_executable) file(GLOB SRCS "limestone/api/*.cpp" + "limestone/blob/*.cpp" "limestone/log/*.cpp" "limestone/epoch/*.cpp" "limestone/utils/*.cpp" diff --git a/test/limestone/blob/blob_file_resolver_test.cpp b/test/limestone/blob/blob_file_resolver_test.cpp new file mode 100644 index 00000000..fa24e3cf --- /dev/null +++ b/test/limestone/blob/blob_file_resolver_test.cpp @@ -0,0 +1,102 @@ +#include +#include +#include "blob_file_resolver.h" + +namespace limestone::testing { + +using limestone::api::blob_id_type; + +constexpr const char* base_directory = "/tmp/blob_file_resolver_test"; + +class blob_file_resolver_test : public ::testing::Test { +protected: + virtual void SetUp() { + // Remove and recreate the test directory + if (system(("rm -rf " + std::string(base_directory)).c_str()) != 0) { + std::cerr << "cannot remove directory" << std::endl; + } + if (system(("mkdir -p " + std::string(base_directory)).c_str()) != 0) { + std::cerr << "cannot make directory" << std::endl; + } + + resolver_ = std::make_unique( + boost::filesystem::path(base_directory), 10 /* directory count */); + } + + virtual void TearDown() { + resolver_.reset(); + if (system(("rm -rf " + std::string(base_directory)).c_str()) != 0) { + std::cerr << "cannot remove directory" << std::endl; + } + } + + std::unique_ptr resolver_; +}; + +TEST_F(blob_file_resolver_test, resolves_correct_path) { + // Test that paths are resolved correctly + blob_id_type blob_id = 123456; + + auto path = resolver_->resolve_path(blob_id); + + // Expected path + std::ostringstream dir_name; + dir_name << "dir_" << std::setw(2) << std::setfill('0') << (blob_id % 10); // Mod 10 for directory count + boost::filesystem::path expected_path = boost::filesystem::path(base_directory) / "blob" / dir_name.str(); + expected_path /= "000000000001e240.blob"; // Blob ID in hex: 123456 = 1e240 + + ASSERT_EQ(path, expected_path); +} + +TEST_F(blob_file_resolver_test, handles_multiple_blob_ids) { + // Test multiple blob IDs resolve to correct paths + for (blob_id_type blob_id = 0; blob_id < 100; ++blob_id) { + auto path = resolver_->resolve_path(blob_id); + + std::ostringstream dir_name; + dir_name << "dir_" << std::setw(2) << std::setfill('0') << (blob_id % 10); // Mod 10 for directory count + boost::filesystem::path expected_path = boost::filesystem::path(base_directory) / "blob" / dir_name.str(); + std::ostringstream file_name; + file_name << std::hex << std::setw(16) << std::setfill('0') << blob_id << ".blob"; + expected_path /= file_name.str(); + + ASSERT_EQ(path, expected_path); + } +} + +TEST_F(blob_file_resolver_test, resolves_blob_file) { + // Test that blob_file is resolved correctly + blob_id_type blob_id = 123456; + bool initial_availability = true; + + auto blob = resolver_->resolve_blob_file(blob_id, initial_availability); + + // Expected path + std::ostringstream dir_name; + dir_name << "dir_" << std::setw(2) << std::setfill('0') << (blob_id % 10); // Mod 10 for directory count + boost::filesystem::path expected_path = boost::filesystem::path(base_directory) / "blob" / dir_name.str(); + expected_path /= "000000000001e240.blob"; // Blob ID in hex: 123456 = 1e240 + + ASSERT_EQ(blob.path(), expected_path); + ASSERT_EQ(static_cast(blob), initial_availability); +} + +TEST_F(blob_file_resolver_test, resolves_multiple_blob_files) { + // Test multiple blob IDs resolve to correct blob_file instances + for (blob_id_type blob_id = 0; blob_id < 100; ++blob_id) { + bool initial_availability = (blob_id % 2 == 0); // Alternate availability + auto blob = resolver_->resolve_blob_file(blob_id, initial_availability); + + std::ostringstream dir_name; + dir_name << "dir_" << std::setw(2) << std::setfill('0') << (blob_id % 10); // Mod 10 for directory count + boost::filesystem::path expected_path = boost::filesystem::path(base_directory) / "blob" / dir_name.str(); + std::ostringstream file_name; + file_name << std::hex << std::setw(16) << std::setfill('0') << blob_id << ".blob"; + expected_path /= file_name.str(); + + ASSERT_EQ(blob.path(), expected_path); + ASSERT_EQ(static_cast(blob), initial_availability); + } +} + +} // namespace limestone::testing diff --git a/test/limestone/blob/blob_file_test.cpp b/test/limestone/blob/blob_file_test.cpp new file mode 100644 index 00000000..ef6e4b52 --- /dev/null +++ b/test/limestone/blob/blob_file_test.cpp @@ -0,0 +1,43 @@ +#include +#include +#include + +namespace limestone::api { + +TEST(blob_file_test, constructor_with_default_availability) { + boost::filesystem::path test_path("/path/to/blob"); + blob_file blob(test_path); + + EXPECT_EQ(blob.path(), test_path); + EXPECT_FALSE(static_cast(blob)); +} + +TEST(blob_file_test, constructor_with_availability) { + boost::filesystem::path test_path("/path/to/blob"); + blob_file blob(test_path, true); + + EXPECT_EQ(blob.path(), test_path); + EXPECT_TRUE(static_cast(blob)); +} + +TEST(blob_file_test, set_availability) { + boost::filesystem::path test_path("/path/to/blob"); + blob_file blob(test_path); + + EXPECT_FALSE(static_cast(blob)); + + blob.set_availability(true); + EXPECT_TRUE(static_cast(blob)); + + blob.set_availability(false); + EXPECT_FALSE(static_cast(blob)); +} + +TEST(blob_file_test, path_returns_correct_value) { + boost::filesystem::path test_path("/path/to/blob"); + blob_file blob(test_path); + + EXPECT_EQ(blob.path(), test_path); +} + +} // namespace limestone::api