From 75147f3ac71fadf5f72e9c5349b3052b69927992 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 1 Mar 2024 06:56:27 +0000 Subject: [PATCH 01/22] S3-plain based disk supporting directory rename Fixes: https://github.com/ClickHouse/ClickHouse/issues/58347 --- programs/keeper/CMakeLists.txt | 3 + src/Common/ObjectStorageKeyGenerator.cpp | 9 +- src/Common/ObjectStorageKeyGenerator.h | 5 +- .../CommonPathPrefixKeyGenerator.cpp | 69 ++++++ .../CommonPathPrefixKeyGenerator.h | 26 +++ .../ObjectStorages/DiskObjectStorage.cpp | 30 ++- src/Disks/ObjectStorages/DiskObjectStorage.h | 3 +- .../DiskObjectStorageTransaction.cpp | 17 +- src/Disks/ObjectStorages/IMetadataOperation.h | 18 ++ src/Disks/ObjectStorages/IObjectStorage.cpp | 9 +- src/Disks/ObjectStorages/IObjectStorage.h | 9 + .../MetadataOperationsHolder.cpp | 94 ++++++++ .../ObjectStorages/MetadataOperationsHolder.h | 26 +++ .../MetadataStorageFromDisk.cpp | 84 +------ .../ObjectStorages/MetadataStorageFromDisk.h | 11 +- ...dataStorageFromDiskTransactionOperations.h | 13 +- .../MetadataStorageFromPlainObjectStorage.cpp | 218 +++++++++++++++--- .../MetadataStorageFromPlainObjectStorage.h | 32 ++- ...torageFromPlainObjectStorageOperations.cpp | 168 ++++++++++++++ ...aStorageFromPlainObjectStorageOperations.h | 72 ++++++ .../ObjectStorages/ObjectStorageFactory.cpp | 36 ++- .../RegisterDiskObjectStorage.cpp | 1 + .../ObjectStorages/S3/S3ObjectStorage.cpp | 10 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 10 +- .../S3/S3PlainRewritableObjectStorage.h | 23 ++ src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../03008_s3_plain_rewritable.reference | 1 + .../0_stateless/03008_s3_plain_rewritable.sql | 18 ++ 28 files changed, 833 insertions(+), 184 deletions(-) create mode 100644 src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp create mode 100644 src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h create mode 100644 src/Disks/ObjectStorages/IMetadataOperation.h create mode 100644 src/Disks/ObjectStorages/MetadataOperationsHolder.cpp create mode 100644 src/Disks/ObjectStorages/MetadataOperationsHolder.h create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h create mode 100644 src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h create mode 100644 tests/queries/0_stateless/03008_s3_plain_rewritable.reference create mode 100644 tests/queries/0_stateless/03008_s3_plain_rewritable.sql diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 70e0f229fd48..889644658785 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -121,6 +121,8 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/DiskType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/IObjectStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp @@ -137,6 +139,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp diff --git a/src/Common/ObjectStorageKeyGenerator.cpp b/src/Common/ObjectStorageKeyGenerator.cpp index 7b4507a3abcf..e9212c3f04d1 100644 --- a/src/Common/ObjectStorageKeyGenerator.cpp +++ b/src/Common/ObjectStorageKeyGenerator.cpp @@ -14,10 +14,7 @@ class GeneratorWithTemplate : public DB::IObjectStorageKeysGenerator , re_gen(key_template) { } - DB::ObjectStorageKey generate(const String &) const override - { - return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); - } + DB::ObjectStorageKey generate(const String &, bool) const override { return DB::ObjectStorageKey::createAsAbsolute(re_gen.generate()); } private: String key_template; @@ -32,7 +29,7 @@ class GeneratorWithPrefix : public DB::IObjectStorageKeysGenerator : key_prefix(std::move(key_prefix_)) {} - DB::ObjectStorageKey generate(const String &) const override + DB::ObjectStorageKey generate(const String &, bool) const override { /// Path to store the new S3 object. @@ -63,7 +60,7 @@ class GeneratorAsIsWithPrefix : public DB::IObjectStorageKeysGenerator : key_prefix(std::move(key_prefix_)) {} - DB::ObjectStorageKey generate(const String & path) const override + DB::ObjectStorageKey generate(const String & path, bool) const override { return DB::ObjectStorageKey::createAsRelative(key_prefix, path); } diff --git a/src/Common/ObjectStorageKeyGenerator.h b/src/Common/ObjectStorageKeyGenerator.h index 29f2a4a22c25..11da039b33b3 100644 --- a/src/Common/ObjectStorageKeyGenerator.h +++ b/src/Common/ObjectStorageKeyGenerator.h @@ -1,7 +1,7 @@ #pragma once -#include "ObjectStorageKey.h" #include +#include "ObjectStorageKey.h" namespace DB { @@ -9,8 +9,9 @@ namespace DB class IObjectStorageKeysGenerator { public: - virtual ObjectStorageKey generate(const String & path) const = 0; virtual ~IObjectStorageKeysGenerator() = default; + + virtual ObjectStorageKey generate(const String & path, bool is_directory) const = 0; }; using ObjectStorageKeysGeneratorPtr = std::shared_ptr; diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp new file mode 100644 index 000000000000..7a3c2a7a8470 --- /dev/null +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -0,0 +1,69 @@ +#include "CommonPathPrefixKeyGenerator.h" + +#include +#include + +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_) + : key_prefix(key_prefix_), path_map(std::move(path_map_)) +{ +} + +ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory) const +{ + auto result = getLongestPrefix(path); + + const auto & unrealized_parts = std::get<1>(result); + + if (unrealized_parts.size() >= 2) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not find object key prefix for path {}", path); + + auto key = std::filesystem::path(std::get<0>(result)); + if (unrealized_parts.empty()) + return ObjectStorageKey::createAsRelative(std::move(key)); + + constexpr size_t part_size = 8; + if (is_directory) + key /= getRandomASCIIString(part_size); + else + key /= unrealized_parts.front(); + + return ObjectStorageKey::createAsRelative(key); +} + +std::tuple> CommonPathPrefixKeyGenerator::getLongestPrefix(const std::string & path) const +{ + std::filesystem::path p(path); + std::deque dq; + + auto ptr = path_map.lock(); + + while (p != p.root_path()) + { + auto it = ptr->find(p / ""); + if (it != ptr->end()) + { + std::vector vec(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end())); + return std::make_tuple(it->second, std::move(vec)); + } + + if (!p.filename().empty()) + dq.push_front(p.filename()); + + p = p.parent_path(); + } + + return {key_prefix, std::vector(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end()))}; +} + +} diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h new file mode 100644 index 000000000000..e14f45c500f0 --- /dev/null +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator +{ +public: + using PathMap = std::unordered_map; + + explicit CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_); + + ObjectStorageKey generate(const String & path, bool is_directory) const override; + +private: + std::tuple> getLongestPrefix(const String & path) const; + + String key_prefix; + std::weak_ptr path_map; +}; + +} diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index c43845116ddd..06abec715679 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -112,20 +112,21 @@ size_t DiskObjectStorage::getFileSize(const String & path) const return metadata_storage->getFileSize(path); } +void DiskObjectStorage::moveDirectory(const String & from_path, const String & to_path) +{ + if (send_metadata) + sendMoveMetadata(from_path, to_path); + + auto transaction = createObjectStorageTransaction(); + transaction->moveDirectory(from_path, to_path); + transaction->commit(); +} + void DiskObjectStorage::moveFile(const String & from_path, const String & to_path, bool should_send_metadata) { if (should_send_metadata) - { - auto revision = metadata_helper->revision_counter + 1; - metadata_helper->revision_counter += 1; - - const ObjectAttributes object_metadata { - {"from_path", from_path}, - {"to_path", to_path} - }; - metadata_helper->createFileOperationObject("rename", revision, object_metadata); - } + sendMoveMetadata(from_path, to_path); auto transaction = createObjectStorageTransaction(); transaction->moveFile(from_path, to_path); @@ -409,6 +410,15 @@ bool DiskObjectStorage::tryReserve(UInt64 bytes) return false; } +void DiskObjectStorage::sendMoveMetadata(const String & from_path, const String & to_path) +{ + chassert(send_metadata); + auto revision = metadata_helper->revision_counter + 1; + metadata_helper->revision_counter += 1; + + const ObjectAttributes object_metadata{{"from_path", from_path}, {"to_path", to_path}}; + metadata_helper->createFileOperationObject("rename", revision, object_metadata); +} bool DiskObjectStorage::supportsCache() const { diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 88c5e3203b86..9702573b875f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -112,7 +112,7 @@ friend class DiskObjectStorageRemoteMetadataRestoreHelper; void clearDirectory(const String & path) override; - void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } + void moveDirectory(const String & from_path, const String & to_path) override; void removeDirectory(const String & path) override; @@ -228,6 +228,7 @@ friend class DiskObjectStorageRemoteMetadataRestoreHelper; std::mutex reservation_mutex; bool tryReserve(UInt64 bytes); + void sendMoveMetadata(const String & from_path, const String & to_path); const bool send_metadata; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index d25add625e89..4e364e44624c 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -507,7 +507,7 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation std::string to_path; StoredObjects created_objects; - IObjectStorage& destination_object_storage; + IObjectStorage & destination_object_storage; CopyFileObjectStorageOperation( IObjectStorage & object_storage_, @@ -714,7 +714,7 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile { /// Otherwise we will produce lost blobs which nobody points to /// WriteOnce storages are not affected by the issue - if (!tx->object_storage.isWriteOnce() && tx->metadata_storage.exists(path)) + if (!tx->object_storage.isPlain() && tx->metadata_storage.exists(path)) tx->object_storage.removeObjectsIfExist(tx->metadata_storage.getStorageObjects(path)); tx->metadata_transaction->createMetadataFile(path, key_, count); @@ -747,10 +747,9 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile { /// Otherwise we will produce lost blobs which nobody points to /// WriteOnce storages are not affected by the issue - if (!object_storage_tx->object_storage.isWriteOnce() && object_storage_tx->metadata_storage.exists(path)) + if (!object_storage_tx->object_storage.isPlain() && object_storage_tx->metadata_storage.exists(path)) { - object_storage_tx->object_storage.removeObjectsIfExist( - object_storage_tx->metadata_storage.getStorageObjects(path)); + object_storage_tx->object_storage.removeObjectsIfExist(object_storage_tx->metadata_storage.getStorageObjects(path)); } tx->createMetadataFile(path, key_, count); @@ -877,14 +876,14 @@ void DiskObjectStorageTransaction::createFile(const std::string & path) void DiskObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings) { - operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, object_storage, read_settings, write_settings, from_file_path, to_file_path)); + operations_to_execute.emplace_back(std::make_unique( + object_storage, metadata_storage, object_storage, read_settings, write_settings, from_file_path, to_file_path)); } void MultipleDisksObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings) { - operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, destination_object_storage, read_settings, write_settings, from_file_path, to_file_path)); + operations_to_execute.emplace_back(std::make_unique( + object_storage, metadata_storage, destination_object_storage, read_settings, write_settings, from_file_path, to_file_path)); } void DiskObjectStorageTransaction::commit() diff --git a/src/Disks/ObjectStorages/IMetadataOperation.h b/src/Disks/ObjectStorages/IMetadataOperation.h new file mode 100644 index 000000000000..e4b241ad3889 --- /dev/null +++ b/src/Disks/ObjectStorages/IMetadataOperation.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +namespace DB +{ + +struct IMetadataOperation +{ + virtual void execute(std::unique_lock & metadata_lock) = 0; + virtual void undo() = 0; + virtual void finalize() { } + virtual ~IMetadataOperation() = default; +}; + +using MetadataOperationPtr = std::unique_ptr; + +} diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index 78fbdcaddfa0..5fd9695ec9e4 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -1,11 +1,12 @@ -#include #include -#include +#include +#include +#include #include #include -#include #include -#include +#include +#include namespace DB diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index fde97d82ad1e..8ac4e609ffa8 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -83,6 +83,9 @@ using ObjectKeysWithMetadata = std::vector; class IObjectStorageIterator; using ObjectStorageIteratorPtr = std::shared_ptr; +class IObjectStorageKeysGenerator; +using ObjectStorageKeysGeneratorPtr = std::shared_ptr; + /// Base class for all object storages which implement some subset of ordinary filesystem operations. /// /// Examples of object storages are S3, Azure Blob Storage, HDFS. @@ -207,6 +210,10 @@ class IObjectStorage /// Generate blob name for passed absolute local path. /// Path can be generated either independently or based on `path`. virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path) const = 0; + virtual std::string generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); + } /// Get unique id for passed absolute path in object storage. virtual std::string getUniqueId(const std::string & path) const { return path; } @@ -226,6 +233,8 @@ class IObjectStorage virtual WriteSettings patchSettings(const WriteSettings & write_settings) const; + virtual void setKeysGenerator(ObjectStorageKeysGeneratorPtr) { } + #if USE_AZURE_BLOB_STORAGE virtual std::shared_ptr getAzureBlobStorageClient() { diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp b/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp new file mode 100644 index 000000000000..f051f62fa419 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp @@ -0,0 +1,94 @@ +#include "MetadataOperationsHolder.h" + +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int FS_METADATA_ERROR; +} + +void MetadataOperationsHolder::rollback(size_t until_pos) +{ + /// Otherwise everything is alright + if (state == MetadataFromDiskTransactionState::FAILED) + { + for (int64_t i = until_pos; i >= 0; --i) + { + try + { + operations[i]->undo(); + } + catch (Exception & ex) + { + state = MetadataFromDiskTransactionState::PARTIALLY_ROLLED_BACK; + ex.addMessage(fmt::format("While rolling back operation #{}", i)); + throw; + } + } + } + else + { + /// Nothing to do, transaction committed or not even started to commit + } +} + +void MetadataOperationsHolder::addOperation(MetadataOperationPtr && operation) +{ + if (state != MetadataFromDiskTransactionState::PREPARING) + throw Exception( + ErrorCodes::FS_METADATA_ERROR, + "Cannot add operations to transaction in {} state, it should be in {} state", + toString(state), + toString(MetadataFromDiskTransactionState::PREPARING)); + + operations.emplace_back(std::move(operation)); +} + +void MetadataOperationsHolder::commitImpl(SharedMutex & metadata_mutex) +{ + if (state != MetadataFromDiskTransactionState::PREPARING) + throw Exception( + ErrorCodes::FS_METADATA_ERROR, + "Cannot commit transaction in {} state, it should be in {} state", + toString(state), + toString(MetadataFromDiskTransactionState::PREPARING)); + + { + std::unique_lock lock(metadata_mutex); + for (size_t i = 0; i < operations.size(); ++i) + { + try + { + operations[i]->execute(lock); + } + catch (Exception & ex) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + ex.addMessage(fmt::format("While committing metadata operation #{}", i)); + state = MetadataFromDiskTransactionState::FAILED; + rollback(i); + throw; + } + } + } + + /// Do it in "best effort" mode + for (size_t i = 0; i < operations.size(); ++i) + { + try + { + operations[i]->finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to finalize operation #{}", i)); + } + } + + state = MetadataFromDiskTransactionState::COMMITTED; +} + +} diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h new file mode 100644 index 000000000000..dc090675e4e1 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +/** + * Implementations for transactional operations with metadata used by MetadataStorageFromDisk. + */ + +namespace DB +{ + +class MetadataOperationsHolder +{ +private: + std::vector operations; + MetadataFromDiskTransactionState state{MetadataFromDiskTransactionState::PREPARING}; + + void rollback(size_t until_pos); + +protected: + void addOperation(MetadataOperationPtr && operation); + void commitImpl(SharedMutex & metadata_mutex); +}; + +} diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 9b9c4eb388cb..ab9528884192 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -10,14 +10,8 @@ namespace DB { -namespace ErrorCodes -{ - extern const int FS_METADATA_ERROR; -} - MetadataStorageFromDisk::MetadataStorageFromDisk(DiskPtr disk_, String compatible_key_prefix_) - : disk(disk_) - , compatible_key_prefix(compatible_key_prefix_) + : disk(disk_), compatible_key_prefix(compatible_key_prefix_) { } @@ -158,83 +152,9 @@ const IMetadataStorage & MetadataStorageFromDiskTransaction::getStorageForNonTra return metadata_storage; } -void MetadataStorageFromDiskTransaction::addOperation(MetadataOperationPtr && operation) -{ - if (state != MetadataFromDiskTransactionState::PREPARING) - throw Exception( - ErrorCodes::FS_METADATA_ERROR, - "Cannot add operations to transaction in {} state, it should be in {} state", - toString(state), toString(MetadataFromDiskTransactionState::PREPARING)); - - operations.emplace_back(std::move(operation)); -} - void MetadataStorageFromDiskTransaction::commit() { - if (state != MetadataFromDiskTransactionState::PREPARING) - throw Exception( - ErrorCodes::FS_METADATA_ERROR, - "Cannot commit transaction in {} state, it should be in {} state", - toString(state), toString(MetadataFromDiskTransactionState::PREPARING)); - - { - std::unique_lock lock(metadata_storage.metadata_mutex); - for (size_t i = 0; i < operations.size(); ++i) - { - try - { - operations[i]->execute(lock); - } - catch (Exception & ex) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - ex.addMessage(fmt::format("While committing metadata operation #{}", i)); - state = MetadataFromDiskTransactionState::FAILED; - rollback(i); - throw; - } - } - } - - /// Do it in "best effort" mode - for (size_t i = 0; i < operations.size(); ++i) - { - try - { - operations[i]->finalize(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to finalize operation #{}", i)); - } - } - - state = MetadataFromDiskTransactionState::COMMITTED; -} - -void MetadataStorageFromDiskTransaction::rollback(size_t until_pos) -{ - /// Otherwise everything is alright - if (state == MetadataFromDiskTransactionState::FAILED) - { - for (int64_t i = until_pos; i >= 0; --i) - { - try - { - operations[i]->undo(); - } - catch (Exception & ex) - { - state = MetadataFromDiskTransactionState::PARTIALLY_ROLLED_BACK; - ex.addMessage(fmt::format("While rolling back operation #{}", i)); - throw; - } - } - } - else - { - /// Nothing to do, transaction committed or not even started to commit - } + MetadataOperationsHolder::commitImpl(metadata_storage.metadata_mutex); } void MetadataStorageFromDiskTransaction::writeStringToFile( diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 7059d8e9a6a8..5dca40afc59f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -6,6 +6,7 @@ #include #include #include +#include #include namespace DB @@ -74,18 +75,11 @@ class MetadataStorageFromDisk final : public IMetadataStorage DiskObjectStorageMetadataPtr readMetadataUnlocked(const std::string & path, std::shared_lock & lock) const; }; -class MetadataStorageFromDiskTransaction final : public IMetadataTransaction +class MetadataStorageFromDiskTransaction final : public IMetadataTransaction, private MetadataOperationsHolder { private: const MetadataStorageFromDisk & metadata_storage; - std::vector operations; - MetadataFromDiskTransactionState state{MetadataFromDiskTransactionState::PREPARING}; - - void addOperation(MetadataOperationPtr && operation); - - void rollback(size_t until_pos); - public: explicit MetadataStorageFromDiskTransaction(const MetadataStorageFromDisk & metadata_storage_) : metadata_storage(metadata_storage_) @@ -135,7 +129,6 @@ class MetadataStorageFromDiskTransaction final : public IMetadataTransaction UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; - }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index e8fda177b950..4c5fa0f6e78f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -14,17 +14,6 @@ class IDisk; * Implementations for transactional operations with metadata used by MetadataStorageFromDisk. */ -struct IMetadataOperation -{ - virtual void execute(std::unique_lock & metadata_lock) = 0; - virtual void undo() = 0; - virtual void finalize() {} - virtual ~IMetadataOperation() = default; -}; - -using MetadataOperationPtr = std::unique_ptr; - - struct SetLastModifiedOperation final : public IMetadataOperation { SetLastModifiedOperation(const std::string & path_, Poco::Timestamp new_timestamp_, IDisk & disk_); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 4b8fc74e9563..f3dd3daf8e2b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -1,26 +1,80 @@ #include "MetadataStorageFromPlainObjectStorage.h" #include +#include #include + +#include +#include #include +#include #include -#include -#include +#include "CommonPathPrefixKeyGenerator.h" +#include +#include +#include namespace DB { -MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage( - ObjectStoragePtr object_storage_, - String storage_path_prefix_) +namespace +{ + +std::filesystem::path normalizePath(const std::filesystem::path & path) +{ + return std::filesystem::path(path).lexically_normal(); +} + +std::filesystem::path normalizeDirectoryPath(const std::filesystem::path & path) +{ + return normalizePath(path) / ""; +} + +MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage) +{ + constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; + MetadataStorageFromPlainObjectStorage::PathMap result; + + RelativePathsWithMetadata files; + object_storage->listObjects(root, files, 0); + for (const auto & file : files) + { + auto remote_path = std::filesystem::path(file.relative_path); + if (remote_path.filename() != PREFIX_PATH_FILE_NAME) + continue; + + StoredObject object{file.relative_path}; + + auto read_buf = object_storage->readObject(object, {}); + String content; + readString(content, *read_buf); + + result.emplace(content, remote_path.parent_path().string()); + } + return result; +} + +} + +MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_) : object_storage(object_storage_) , storage_path_prefix(std::move(storage_path_prefix_)) + , path_map(std::make_shared(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage))) { + LOG_TRACE( + getLogger("MetadataStorageFromPlainObjectStorage"), "MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage"); + + if (!object_storage->isWriteOnce()) + { + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + object_storage->setKeysGenerator(keys_gen); + } } + MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() { - return std::make_shared(*this); + return std::make_shared(*this, object_storage); } const std::string & MetadataStorageFromPlainObjectStorage::getPath() const @@ -44,8 +98,19 @@ bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) con bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const { - auto object_key = object_storage->generateObjectKeyForPath(path); - std::string directory = object_key.serialize(); + auto key_prefix = [&] -> std::optional + { + if (object_storage->isWriteOnce()) + return object_storage->generateObjectKeyForPath(path).serialize(); + + auto it = path_map->find(normalizeDirectoryPath(path)); + return it == path_map->end() ? std::nullopt : std::make_optional(it->second); + }(); + + if (!key_prefix) + return false; + + std::string directory = key_prefix.value(); if (!directory.ends_with('/')) directory += '/'; return object_storage->existsOrHasAnyChild(directory); @@ -60,37 +125,82 @@ uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) return 0; } -std::vector MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const +namespace { - auto object_key = object_storage->generateObjectKeyForPath(path); - - RelativePathsWithMetadata files; - std::string abs_key = object_key.serialize(); - if (!abs_key.ends_with('/')) - abs_key += '/'; - - object_storage->listObjects(abs_key, files, 0); - std::vector result; - for (const auto & path_size : files) +std::vector getDirectChildrenOnWriteOnceDisk(const std::string & storage_key, const RelativePathsWithMetadata & remote_paths) +{ + std::unordered_set duplicates_filter; + for (const auto & elem : remote_paths) { - result.push_back(path_size.relative_path); + const auto & path = elem.relative_path; + chassert(path.find(storage_key) == 0); + const auto child_pos = storage_key.size(); + /// string::npos is ok. + const auto slash_pos = path.find('/', child_pos); + if (slash_pos == std::string::npos) + duplicates_filter.emplace(path.substr(child_pos)); + else + duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); } + return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); +} +std::vector getDirectChildrenOnRewritableDisk( + const std::string & storage_key, + const RelativePathsWithMetadata & remote_paths, + const std::string & local_path, + const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes) +{ + /// Subdirectories. std::unordered_set duplicates_filter; - for (auto & row : result) + for (const auto & [prefix, _] : local_path_prefixes) + { + if (!prefix.starts_with(local_path)) + continue; + + auto slash_num = count(prefix.begin() + local_path.size(), prefix.end(), '/'); + if (slash_num != 1) + continue; + + chassert(prefix.back() == '/'); + duplicates_filter.emplace(std::string(prefix.begin() + local_path.size(), prefix.end() - 1)); + } + + /// File names. + for (const auto & elem : remote_paths) { - chassert(row.starts_with(abs_key)); - row.erase(0, abs_key.size()); - auto slash_pos = row.find_first_of('/'); - if (slash_pos != std::string::npos) - row.erase(slash_pos, row.size() - slash_pos); - duplicates_filter.insert(row); + const auto & path = elem.relative_path; + chassert(path.find(storage_key) == 0); + const auto child_pos = storage_key.size(); + + if (path.find('/', child_pos) == std::string::npos) + duplicates_filter.emplace(path.substr(child_pos)); } - return std::vector(duplicates_filter.begin(), duplicates_filter.end()); + return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); +} +} + +std::vector MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const +{ + auto key_prefix = object_storage->isWriteOnce() ? object_storage->generateObjectKeyForPath(path).serialize() + : object_storage->generateObjectKeyPrefixForDirectoryPath(path); + + RelativePathsWithMetadata files; + std::string abs_key = key_prefix; + if (!abs_key.ends_with('/')) + abs_key += '/'; + + object_storage->listObjects(abs_key, files, 0); + + if (object_storage->isWriteOnce()) + return getDirectChildrenOnWriteOnceDisk(abs_key, files); + else + return getDirectChildrenOnRewritableDisk(abs_key, files, path, *path_map); } + DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const { /// Required for MergeTree @@ -122,18 +232,58 @@ void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::str void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std::string & path) { + /// TODO: Make transactional. for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next()) metadata_storage.object_storage->removeObject(StoredObject(it->path())); + if (!metadata_storage.object_storage->isWriteOnce()) + addOperation(std::make_unique( + path, *metadata_storage.path_map, object_storage)); +} + +void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std::string & path) +{ + if (metadata_storage.object_storage->isWriteOnce()) + return; + + auto op = std::make_unique( + normalizeDirectoryPath(path), *metadata_storage.path_map, object_storage); + addOperation(std::move(op)); } -void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std::string &) +void MetadataStorageFromPlainObjectStorageTransaction::createDirectoryRecursive(const std::string & path) { - /// Noop. It is an Object Storage not a filesystem. + if (metadata_storage.object_storage->isWriteOnce()) + return; + + auto p = normalizeDirectoryPath(path); + std::vector paths_created; + + while (true) + { + paths_created.push_back(p); + if (!p.has_parent_path()) + break; + + p = p.parent_path(); + } + + for (auto path_to_create : paths_created | std::views::reverse) + { + auto op = std::make_unique( + path_to_create / "", *metadata_storage.path_map, object_storage); + addOperation(std::move(op)); + } } -void MetadataStorageFromPlainObjectStorageTransaction::createDirectoryRecursive(const std::string &) + +void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::string & path_from, const std::string & path_to) { - /// Noop. It is an Object Storage not a filesystem. + if (metadata_storage.object_storage->isWriteOnce()) + return; + + addOperation(std::make_unique( + normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.path_map, object_storage)); } + void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( const std::string &, ObjectStorageKey /* object_key */, uint64_t /* size_in_bytes */) { @@ -146,4 +296,8 @@ UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromPlainObjectStorageTrans return std::make_shared(UnlinkMetadataFileOperationOutcome{0}); } +void MetadataStorageFromPlainObjectStorageTransaction::commit() +{ + MetadataOperationsHolder::commitImpl(metadata_storage.metadata_mutex); +} } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 09623716b1f1..cff7d48f368f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -3,8 +3,9 @@ #include #include #include -#include +#include +#include namespace DB { @@ -25,12 +26,19 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; + private: friend class MetadataStorageFromPlainObjectStorageTransaction; ObjectStoragePtr object_storage; String storage_path_prefix; + mutable SharedMutex metadata_mutex; + std::shared_ptr path_map; + public: MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_); @@ -71,21 +79,28 @@ class MetadataStorageFromPlainObjectStorage final : public IMetadataStorage bool supportsStat() const override { return false; } }; -class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction +class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder { private: - const MetadataStorageFromPlainObjectStorage & metadata_storage; + MetadataStorageFromPlainObjectStorage & metadata_storage; + ObjectStoragePtr object_storage; std::vector operations; public: - explicit MetadataStorageFromPlainObjectStorageTransaction(const MetadataStorageFromPlainObjectStorage & metadata_storage_) - : metadata_storage(metadata_storage_) + explicit MetadataStorageFromPlainObjectStorageTransaction( + MetadataStorageFromPlainObjectStorage & metadata_storage_, ObjectStoragePtr object_storage_) + : metadata_storage(metadata_storage_), object_storage(object_storage_) {} const IMetadataStorage & getStorageForNonTransactionalReads() const override; void addBlobToMetadata(const std::string & path, ObjectStorageKey object_key, uint64_t size_in_bytes) override; + void setLastModified(const String &, const Poco::Timestamp &) override + { + /// Noop + } + void createEmptyMetadataFile(const std::string & /* path */) override { /// No metadata, no need to create anything. @@ -100,15 +115,14 @@ class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataT void createDirectoryRecursive(const std::string & path) override; + void moveDirectory(const std::string & /* path_from */, const std::string & /* path_to */) override; + void unlinkFile(const std::string & path) override; void removeDirectory(const std::string & path) override; UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; - void commit() override - { - /// TODO: rewrite with transactions - } + void commit() override; bool supportsChmod() const override { return false; } }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp new file mode 100644 index 000000000000..c36687412441 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -0,0 +1,168 @@ +#include "MetadataStorageFromPlainObjectStorageOperations.h" + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int FILE_DOESNT_EXIST; +extern const int FILE_ALREADY_EXISTS; +extern const int INCORRECT_DATA; +; +}; + +namespace +{ + +constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; + +} + +MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( + std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_) + : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_) +{ +} + +void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std::unique_lock &) +{ + if (path_map.contains(path)) + return; + + LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), "Creating metadata for directory '{}'", path); + + auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(path / ""); + auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); + + auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + auto buf = object_storage->writeObject( + object, + WriteMode::Rewrite, + /* object_attributes */ std::nullopt, + /* buf_size */ 4096, + /* settings */ {}); + + write_created = true; + + path_map.emplace(path, std::move(key_prefix)); + + writeString(path.string(), *buf); + buf->finalize(); +} + +void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo() +{ + if (!write_created) + return; + + auto node = path_map.extract(path); + auto object_key = ObjectStorageKey::createAsRelative(node.mapped(), PREFIX_PATH_FILE_NAME); + object_storage->removeObject(StoredObject(object_key.serialize(), path)); +} + +MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( + std::filesystem::path && path_from_, + std::filesystem::path && path_to_, + MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + ObjectStoragePtr object_storage_) + : path_from(std::move(path_from_)), path_to(std::move(path_to_)), path_map(path_map_), object_storage(object_storage_) +{ +} + +std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::write( + const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content) +{ + auto from_it = path_map.find(from); + if (from_it == path_map.end()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the source path '{}' does not exist", from); + + if (path_map.contains(to)) + throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the destination path '{}' already exists", to); + + auto object_key = ObjectStorageKey::createAsRelative(from_it->second, PREFIX_PATH_FILE_NAME); + + auto object = StoredObject(object_key.serialize(), path_from / PREFIX_PATH_FILE_NAME); + + if (validate_content) + { + std::string data; + auto readBuf = object_storage->readObject(object, {}); + readStringUntilEOF(data, *readBuf); + chassert(data == path_from); + if (data != path_from) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Incorrect data for object key {}, expected {}, got {}", + object_key.serialize(), + path_from, + data); + } + + auto write_buf = object_storage->writeObject( + object, + WriteMode::Rewrite, + std::nullopt, + /*buf_size*/ 4096, + /*settings*/ {}); + + return write_buf; +} + +void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) +{ + LOG_TRACE( + getLogger("MetadataStorageFromPlainObjectStorageMoveDirectoryOperation"), "Moving directory '{}' to '{}'", path_from, path_to); + + auto write_buf = write(path_from, path_to, /* validate_content */ true); + write_created = true; + writeString(path_to.string(), *write_buf); + write_buf->finalize(); + + path_map.emplace(path_to, path_map.extract(path_from).mapped()); + write_finalized = true; +} + +void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo() +{ + if (write_finalized) + path_map.emplace(path_from, path_map.extract(path_to).mapped()); + + if (write_created) + { + auto write_buf = write(path_to, path_from, /* verify_content */ false); + writeString(path_from.string(), *write_buf); + write_buf->finalize(); + } +} + +MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( + std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_) + : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_) +{ +} + +void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std::unique_lock & /* metadata_lock */) +{ + auto path_it = path_map.find(path); + if (path_it == path_map.end()) + return; + + LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path); + + auto object_key = ObjectStorageKey::createAsRelative(path_it->second, PREFIX_PATH_FILE_NAME); + auto object = StoredObject(path_it->second, path / PREFIX_PATH_FILE_NAME); + object_storage->removeObject(object); + path_map.erase(path_it); +} + +/// TODO +void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo() +{ +} + +} diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h new file mode 100644 index 000000000000..84189a492439 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include + +#include +#include + +namespace DB +{ + +class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : public IMetadataOperation +{ +private: + std::filesystem::path path; + MetadataStorageFromPlainObjectStorage::PathMap & path_map; + ObjectStoragePtr object_storage; + + bool write_created = false; + +public: + // Assuming that paths are normalized. + MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( + std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); + + void execute(std::unique_lock & metadata_lock) override; + void undo() override; +}; + +class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public IMetadataOperation +{ +private: + std::filesystem::path path_from; + std::filesystem::path path_to; + MetadataStorageFromPlainObjectStorage::PathMap & path_map; + ObjectStoragePtr object_storage; + + bool write_created = false; + bool write_finalized = false; + + std::unique_ptr + write(const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content); + +public: + MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( + std::filesystem::path && path_from_, + std::filesystem::path && path_to_, + MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + ObjectStoragePtr object_storage_); + + void execute(std::unique_lock & metadata_lock) override; + + void undo() override; +}; + +class MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation final : public IMetadataOperation +{ +private: + std::filesystem::path path; + + MetadataStorageFromPlainObjectStorage::PathMap & path_map; + ObjectStoragePtr object_storage; + +public: + MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( + std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); + + void execute(std::unique_lock & metadata_lock) override; + void undo() override; +}; + +} diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 1fec0c4b6735..e80bcd85718a 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -1,9 +1,10 @@ #include "config.h" #include #if USE_AWS_S3 +#include #include +#include #include -#include #endif #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) #include @@ -210,6 +211,38 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) return object_storage; }); } + +void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) +{ + static constexpr auto disk_type = "s3_plain_rewritable"; + + factory.registerObjectStorageType( + disk_type, + [](const std::string & name, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const ContextPtr & context, + bool /* skip_access_check */) -> ObjectStoragePtr + { + /// send_metadata changes the filenames (includes revision), while + /// s3_plain do not care about this, and expect that the file name + /// will not be changed. + if (config.getBool(config_prefix + ".send_metadata", false)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain_rewritable does not supports send_metadata"); + + auto uri = getS3URI(config, config_prefix, context); + auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); + auto settings = getSettings(config, config_prefix, context); + auto client = getClient(config, config_prefix, context, *settings); + auto key_generator = getKeyGenerator(uri, config, config_prefix); + + auto object_storage = std::make_shared( + std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); + + return object_storage; + }); +} + #endif #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) @@ -317,6 +350,7 @@ void registerObjectStorages() #if USE_AWS_S3 registerS3ObjectStorage(factory); registerS3PlainObjectStorage(factory); + registerS3PlainRewritableObjectStorage(factory); #endif #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) diff --git a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp index 669a01029517..433ff00146b3 100644 --- a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp @@ -53,6 +53,7 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch #if USE_AWS_S3 factory.registerDiskType("s3", creator); /// For compatibility factory.registerDiskType("s3_plain", creator); /// For compatibility + factory.registerDiskType("s3_plain_rewritable", creator); #endif #if USE_HDFS factory.registerDiskType("hdfs", creator); /// For compatibility diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 3211332021e6..78847242b04f 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -568,10 +568,16 @@ ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & p { if (!key_generator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); - return key_generator->generate(path); -} + return key_generator->generate(path, /* is_directory */ false); +} +std::string S3ObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const +{ + if (!key_generator) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); + return key_generator->generate(path, /* is_directory */ true).serialize(); +} } #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 4ece98c5ec44..2d88195117f4 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -43,8 +43,6 @@ struct S3ObjectStorageSettings class S3ObjectStorage : public IObjectStorage { private: - friend class S3PlainObjectStorage; - S3ObjectStorage( const char * logger_name, std::unique_ptr && client_, @@ -54,11 +52,11 @@ class S3ObjectStorage : public IObjectStorage ObjectStorageKeysGeneratorPtr key_generator_, const String & disk_name_) : uri(uri_) - , key_generator(std::move(key_generator_)) , disk_name(disk_name_) , client(std::move(client_)) , s3_settings(std::move(s3_settings_)) , s3_capabilities(s3_capabilities_) + , key_generator(std::move(key_generator_)) , log(getLogger(logger_name)) { } @@ -161,9 +159,12 @@ class S3ObjectStorage : public IObjectStorage bool supportParallelWrite() const override { return true; } ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + std::string generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; bool isReadOnly() const override { return s3_settings.get()->read_only; } + void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; } + private: void setNewSettings(std::unique_ptr && s3_settings_); @@ -172,13 +173,14 @@ class S3ObjectStorage : public IObjectStorage const S3::URI uri; - ObjectStorageKeysGeneratorPtr key_generator; std::string disk_name; MultiVersion client; MultiVersion s3_settings; S3Capabilities s3_capabilities; + ObjectStorageKeysGeneratorPtr key_generator; + LoggerPtr log; }; diff --git a/src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h new file mode 100644 index 000000000000..7f43f32370af --- /dev/null +++ b/src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +class S3PlainRewritableObjectStorage : public S3ObjectStorage +{ +public: + template + explicit S3PlainRewritableObjectStorage(Args &&... args) : S3ObjectStorage(std::forward(args)...) + { + } + + std::string getName() const override { return "S3PlainRewritableObjectStorage"; } + + bool isWriteOnce() const override { return false; } + + bool isPlain() const override { return true; } +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 814db3172b48..b3f9167b1254 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1672,7 +1672,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionaliterateDirectory(relative_data_path); it->isValid(); it->next()) { /// Skip temporary directories, file 'format_version.txt' and directory 'detached'. - if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME + if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME || it->name() == "prefix.path" || it->name() == DETACHED_DIR_NAME) continue; diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.reference b/tests/queries/0_stateless/03008_s3_plain_rewritable.reference new file mode 100644 index 000000000000..ee8b8c9fe3de --- /dev/null +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.reference @@ -0,0 +1 @@ +10000006 diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql new file mode 100644 index 000000000000..2a7b0b8db4fe --- /dev/null +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql @@ -0,0 +1,18 @@ +-- Tags: no-fasttest +-- Tag: no-fasttest -- requires S3 + +drop table if exists test_s3; +create table test_s3 (a Int32) engine = MergeTree() order by a +settings disk=disk(name='s3_plain_rewritable', + type = s3_plain_rewritable, + endpoint = 'http://localhost:11111/test/s3_plain_rewritable/', + access_key_id = clickhouse, + secret_access_key = clickhouse, + send_metadata = false, skip_access_check=true + ); + +insert into test_s3 (*) values (1), (2), (3), (4), (5), (6); +insert into test_s3 (*) select * from numbers(10000000); + +select count(*) from test_s3 LIMIT 10; + From 7916792baaca3ae20c59f91b189950c5352f908d Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 18 Mar 2024 01:01:17 +0000 Subject: [PATCH 02/22] Do not create directory metadata recursively --- .../CommonPathPrefixKeyGenerator.cpp | 36 ++++---- .../CommonPathPrefixKeyGenerator.h | 4 +- .../MetadataStorageFromPlainObjectStorage.cpp | 85 +++++++++---------- .../MetadataStorageFromPlainObjectStorage.h | 5 +- 4 files changed, 63 insertions(+), 67 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 7a3c2a7a8470..249284761b50 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -1,6 +1,5 @@ #include "CommonPathPrefixKeyGenerator.h" -#include #include #include @@ -9,39 +8,36 @@ namespace DB { -namespace ErrorCodes -{ -extern const int LOGICAL_ERROR; -} CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_) - : key_prefix(key_prefix_), path_map(std::move(path_map_)) + : storage_key_prefix(key_prefix_), path_map(std::move(path_map_)) { } ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, bool is_directory) const { - auto result = getLongestPrefix(path); - - const auto & unrealized_parts = std::get<1>(result); + const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); - if (unrealized_parts.size() >= 2) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not find object key prefix for path {}", path); - - auto key = std::filesystem::path(std::get<0>(result)); - if (unrealized_parts.empty()) + auto key = std::filesystem::path(object_key_prefix.empty() ? storage_key_prefix : object_key_prefix); + if (suffix_parts.empty()) return ObjectStorageKey::createAsRelative(std::move(key)); - constexpr size_t part_size = 8; - if (is_directory) - key /= getRandomASCIIString(part_size); + if (!is_directory || object_key_prefix.empty()) + for (const auto & part : suffix_parts) + key /= part; else - key /= unrealized_parts.front(); + { + for (size_t i = 0; i + 1 < suffix_parts.size(); ++i) + key /= suffix_parts[i]; + + constexpr size_t part_size = 16; + key /= getRandomASCIIString(part_size); + } return ObjectStorageKey::createAsRelative(key); } -std::tuple> CommonPathPrefixKeyGenerator::getLongestPrefix(const std::string & path) const +std::tuple> CommonPathPrefixKeyGenerator::getLongestObjectKeyPrefix(const std::string & path) const { std::filesystem::path p(path); std::deque dq; @@ -63,7 +59,7 @@ std::tuple> CommonPathPrefixKeyGenerator:: p = p.parent_path(); } - return {key_prefix, std::vector(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end()))}; + return {std::string(), std::vector(std::make_move_iterator(dq.begin()), std::make_move_iterator(dq.end()))}; } } diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index e14f45c500f0..9eafb60317e0 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -17,9 +17,9 @@ class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator ObjectStorageKey generate(const String & path, bool is_directory) const override; private: - std::tuple> getLongestPrefix(const String & path) const; + std::tuple> getLongestObjectKeyPrefix(const String & path) const; - String key_prefix; + String storage_key_prefix; std::weak_ptr path_map; }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index f3dd3daf8e2b..c7257487b7c4 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -86,7 +86,7 @@ bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) con { /// NOTE: exists() cannot be used here since it works only for existing /// key, and does not work for some intermediate path. - auto object_key = object_storage->generateObjectKeyForPath(path); + auto object_key = getObjectKeyForPath(path); return object_storage->existsOrHasAnyChild(object_key.serialize()); } @@ -98,21 +98,9 @@ bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) con bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const { - auto key_prefix = [&] -> std::optional - { - if (object_storage->isWriteOnce()) - return object_storage->generateObjectKeyForPath(path).serialize(); - - auto it = path_map->find(normalizeDirectoryPath(path)); - return it == path_map->end() ? std::nullopt : std::make_optional(it->second); - }(); + auto key_prefix = getObjectKeyForPath(path).serialize(); + auto directory = std::filesystem::path(std::move(key_prefix)) / ""; - if (!key_prefix) - return false; - - std::string directory = key_prefix.value(); - if (!directory.ends_with('/')) - directory += '/'; return object_storage->existsOrHasAnyChild(directory); } @@ -152,30 +140,47 @@ std::vector getDirectChildrenOnRewritableDisk( const std::string & local_path, const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes) { - /// Subdirectories. + using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; + std::unordered_set duplicates_filter; - for (const auto & [prefix, _] : local_path_prefixes) + + /// Map remote paths into local subdirectories. + std::unordered_map reversed; + for (const auto & [k, v] : local_path_prefixes) { - if (!prefix.starts_with(local_path)) + if (!k.starts_with(local_path)) continue; - auto slash_num = count(prefix.begin() + local_path.size(), prefix.end(), '/'); + auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); if (slash_num != 1) continue; - chassert(prefix.back() == '/'); - duplicates_filter.emplace(std::string(prefix.begin() + local_path.size(), prefix.end() - 1)); + chassert(k.back() == '/'); + reversed.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); } - /// File names. for (const auto & elem : remote_paths) { const auto & path = elem.relative_path; chassert(path.find(storage_key) == 0); const auto child_pos = storage_key.size(); - if (path.find('/', child_pos) == std::string::npos) + auto slash_pos = path.find('/', child_pos); + + /// File names. + if (slash_pos == std::string::npos) duplicates_filter.emplace(path.substr(child_pos)); + /// Subdirectories. + else + { + auto it = reversed.find(path.substr(0, slash_pos)); + /// Mapped subdirectories. + if (it != reversed.end()) + duplicates_filter.emplace(it->second); + /// The remote subdirectory name is the same as the local subdirectory. + else + duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); + } } return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); @@ -218,6 +223,18 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std return {StoredObject(object_key.serialize(), path, object_size)}; } +ObjectStorageKey MetadataStorageFromPlainObjectStorage::getObjectKeyForPath(const std::string & path) const +{ + if (!object_storage->isWriteOnce()) + { + auto it = path_map->find(normalizeDirectoryPath(path)); + if (it != path_map->end()) + return ObjectStorageKey::createAsRelative(it->second); + } + + return object_storage->generateObjectKeyForPath(path); +} + const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const { return metadata_storage; @@ -252,27 +269,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std void MetadataStorageFromPlainObjectStorageTransaction::createDirectoryRecursive(const std::string & path) { - if (metadata_storage.object_storage->isWriteOnce()) - return; - - auto p = normalizeDirectoryPath(path); - std::vector paths_created; - - while (true) - { - paths_created.push_back(p); - if (!p.has_parent_path()) - break; - - p = p.parent_path(); - } - - for (auto path_to_create : paths_created | std::views::reverse) - { - auto op = std::make_unique( - path_to_create / "", *metadata_storage.path_map, object_storage); - addOperation(std::move(op)); - } + return createDirectory(path); } void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::string & path_from, const std::string & path_to) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index cff7d48f368f..6427ef2e79aa 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -77,6 +77,9 @@ class MetadataStorageFromPlainObjectStorage final : public IMetadataStorage bool supportsChmod() const override { return false; } bool supportsStat() const override { return false; } + +private: + ObjectStorageKey getObjectKeyForPath(const std::string & path) const; }; class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder @@ -86,6 +89,7 @@ class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataT ObjectStoragePtr object_storage; std::vector operations; + public: explicit MetadataStorageFromPlainObjectStorageTransaction( MetadataStorageFromPlainObjectStorage & metadata_storage_, ObjectStoragePtr object_storage_) @@ -126,5 +130,4 @@ class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataT bool supportsChmod() const override { return false; } }; - } From b4375131cbb4c8361b65af275c62e6ca81551db1 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 20 Mar 2024 06:45:35 +0000 Subject: [PATCH 03/22] concurrency control --- .../CommonPathPrefixKeyGenerator.cpp | 7 ++- .../CommonPathPrefixKeyGenerator.h | 7 ++- .../MetadataStorageFromPlainObjectStorage.cpp | 54 +++++++++---------- .../MetadataStorageFromPlainObjectStorage.h | 3 -- ...torageFromPlainObjectStorageOperations.cpp | 8 +-- ...aStorageFromPlainObjectStorageOperations.h | 6 ++- 6 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 249284761b50..159be00e9c7f 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -9,8 +9,9 @@ namespace DB { -CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_) - : storage_key_prefix(key_prefix_), path_map(std::move(path_map_)) +CommonPathPrefixKeyGenerator::CommonPathPrefixKeyGenerator( + String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_) + : storage_key_prefix(key_prefix_), shared_mutex(shared_mutex_), path_map(std::move(path_map_)) { } @@ -42,6 +43,8 @@ std::tuple> CommonPathPrefixKeyGenerator:: std::filesystem::path p(path); std::deque dq; + std::shared_lock lock(shared_mutex); + auto ptr = path_map.lock(); while (p != p.root_path()) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index 9eafb60317e0..6e13713a7bb8 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -12,14 +13,16 @@ class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator public: using PathMap = std::unordered_map; - explicit CommonPathPrefixKeyGenerator(String key_prefix_, std::weak_ptr path_map_); + explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_); ObjectStorageKey generate(const String & path, bool is_directory) const override; private: std::tuple> getLongestObjectKeyPrefix(const String & path) const; - String storage_key_prefix; + const String storage_key_prefix; + + SharedMutex & shared_mutex; std::weak_ptr path_map; }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index c7257487b7c4..a6512af9fe5c 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -66,7 +66,7 @@ MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(Obj if (!object_storage->isWriteOnce()) { - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), path_map); + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), metadata_mutex, path_map); object_storage->setKeysGenerator(keys_gen); } } @@ -86,7 +86,7 @@ bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) con { /// NOTE: exists() cannot be used here since it works only for existing /// key, and does not work for some intermediate path. - auto object_key = getObjectKeyForPath(path); + auto object_key = object_storage->generateObjectKeyForPath(path); return object_storage->existsOrHasAnyChild(object_key.serialize()); } @@ -98,7 +98,7 @@ bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) con bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const { - auto key_prefix = getObjectKeyForPath(path).serialize(); + auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); auto directory = std::filesystem::path(std::move(key_prefix)) / ""; return object_storage->existsOrHasAnyChild(directory); @@ -138,7 +138,8 @@ std::vector getDirectChildrenOnRewritableDisk( const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path, - const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes) + const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes, + SharedMutex & shared_mutex) { using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; @@ -146,17 +147,21 @@ std::vector getDirectChildrenOnRewritableDisk( /// Map remote paths into local subdirectories. std::unordered_map reversed; - for (const auto & [k, v] : local_path_prefixes) + { - if (!k.starts_with(local_path)) - continue; + std::shared_lock lock(shared_mutex); + for (const auto & [k, v] : local_path_prefixes) + { + if (!k.starts_with(local_path)) + continue; - auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); - if (slash_num != 1) - continue; + auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); + if (slash_num != 1) + continue; - chassert(k.back() == '/'); - reversed.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); + chassert(k.back() == '/'); + reversed.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); + } } for (const auto & elem : remote_paths) @@ -189,8 +194,7 @@ std::vector getDirectChildrenOnRewritableDisk( std::vector MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const { - auto key_prefix = object_storage->isWriteOnce() ? object_storage->generateObjectKeyForPath(path).serialize() - : object_storage->generateObjectKeyPrefixForDirectoryPath(path); + auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); RelativePathsWithMetadata files; std::string abs_key = key_prefix; @@ -202,7 +206,7 @@ std::vector MetadataStorageFromPlainObjectStorage::listDirectory(co if (object_storage->isWriteOnce()) return getDirectChildrenOnWriteOnceDisk(abs_key, files); else - return getDirectChildrenOnRewritableDisk(abs_key, files, path, *path_map); + return getDirectChildrenOnRewritableDisk(abs_key, files, path, *path_map, metadata_mutex); } @@ -223,18 +227,6 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std return {StoredObject(object_key.serialize(), path, object_size)}; } -ObjectStorageKey MetadataStorageFromPlainObjectStorage::getObjectKeyForPath(const std::string & path) const -{ - if (!object_storage->isWriteOnce()) - { - auto it = path_map->find(normalizeDirectoryPath(path)); - if (it != path_map->end()) - return ObjectStorageKey::createAsRelative(it->second); - } - - return object_storage->generateObjectKeyForPath(path); -} - const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const { return metadata_storage; @@ -249,12 +241,15 @@ void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::str void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std::string & path) { - /// TODO: Make transactional. + /// TODO: Should it be handled by the disk transaction? for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next()) metadata_storage.object_storage->removeObject(StoredObject(it->path())); + if (!metadata_storage.object_storage->isWriteOnce()) + { addOperation(std::make_unique( path, *metadata_storage.path_map, object_storage)); + } } void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std::string & path) @@ -262,8 +257,9 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std if (metadata_storage.object_storage->isWriteOnce()) return; + auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(std::filesystem::path(path) / ""); auto op = std::make_unique( - normalizeDirectoryPath(path), *metadata_storage.path_map, object_storage); + normalizeDirectoryPath(path), std::move(key_prefix), *metadata_storage.path_map, object_storage); addOperation(std::move(op)); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 6427ef2e79aa..b78137f4729e 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -77,9 +77,6 @@ class MetadataStorageFromPlainObjectStorage final : public IMetadataStorage bool supportsChmod() const override { return false; } bool supportsStat() const override { return false; } - -private: - ObjectStorageKey getObjectKeyForPath(const std::string & path) const; }; class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index c36687412441..8b28db3bb532 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -24,8 +24,11 @@ constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; } MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( - std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_) - : path(std::move(path_)), path_map(path_map_), object_storage(object_storage_) + std::filesystem::path && path_, + std::string && key_prefix_, + MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + ObjectStoragePtr object_storage_) + : path(std::move(path_)), key_prefix(key_prefix_), path_map(path_map_), object_storage(object_storage_) { } @@ -36,7 +39,6 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageCreateDirectoryOperation"), "Creating metadata for directory '{}'", path); - auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(path / ""); auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 84189a492439..c853a3bfd842 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -13,6 +13,7 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ { private: std::filesystem::path path; + std::string key_prefix; MetadataStorageFromPlainObjectStorage::PathMap & path_map; ObjectStoragePtr object_storage; @@ -21,7 +22,10 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ public: // Assuming that paths are normalized. MetadataStorageFromPlainObjectStorageCreateDirectoryOperation( - std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); + std::filesystem::path && path_, + std::string && key_prefix_, + MetadataStorageFromPlainObjectStorage::PathMap & path_map_, + ObjectStoragePtr object_storage_); void execute(std::unique_lock & metadata_lock) override; void undo() override; From d1e5a09b1896a6092bc2d09a0c72d83d689925a1 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 25 Mar 2024 00:50:55 +0000 Subject: [PATCH 04/22] better transaction rollback --- .../MetadataStorageFromPlainObjectStorage.cpp | 11 ++--- ...torageFromPlainObjectStorageOperations.cpp | 40 ++++++++++++++----- ...aStorageFromPlainObjectStorageOperations.h | 6 ++- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index a6512af9fe5c..560798159870 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -241,11 +241,12 @@ void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::str void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std::string & path) { - /// TODO: Should it be handled by the disk transaction? - for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next()) - metadata_storage.object_storage->removeObject(StoredObject(it->path())); - - if (!metadata_storage.object_storage->isWriteOnce()) + if (metadata_storage.object_storage->isWriteOnce()) + { + for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next()) + metadata_storage.object_storage->removeObject(StoredObject(it->path())); + } + else { addOperation(std::make_unique( path, *metadata_storage.path_map, object_storage)); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 8b28db3bb532..e74c6487ff4b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -55,16 +55,20 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: writeString(path.string(), *buf); buf->finalize(); + + write_finalized = true; } void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo() { - if (!write_created) - return; + if (write_finalized) + path_map.erase(path); - auto node = path_map.extract(path); - auto object_key = ObjectStorageKey::createAsRelative(node.mapped(), PREFIX_PATH_FILE_NAME); - object_storage->removeObject(StoredObject(object_key.serialize(), path)); + if (write_created) + { + auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); + object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); + } } MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( @@ -76,7 +80,7 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom { } -std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::write( +std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::moveObject( const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content) { auto from_it = path_map.find(from); @@ -95,7 +99,6 @@ std::unique_ptr MetadataStorageFromPlainObjectStorageMo std::string data; auto readBuf = object_storage->readObject(object, {}); readStringUntilEOF(data, *readBuf); - chassert(data == path_from); if (data != path_from) throw Exception( ErrorCodes::INCORRECT_DATA, @@ -120,7 +123,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u LOG_TRACE( getLogger("MetadataStorageFromPlainObjectStorageMoveDirectoryOperation"), "Moving directory '{}' to '{}'", path_from, path_to); - auto write_buf = write(path_from, path_to, /* validate_content */ true); + auto write_buf = moveObject(path_from, path_to, /* validate_content */ true); write_created = true; writeString(path_to.string(), *write_buf); write_buf->finalize(); @@ -136,7 +139,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo() if (write_created) { - auto write_buf = write(path_to, path_from, /* verify_content */ false); + auto write_buf = moveObject(path_to, path_from, /* verify_content */ false); writeString(path_from.string(), *write_buf); write_buf->finalize(); } @@ -156,15 +159,30 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: LOG_TRACE(getLogger("MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation"), "Removing directory '{}'", path); - auto object_key = ObjectStorageKey::createAsRelative(path_it->second, PREFIX_PATH_FILE_NAME); + key_prefix = path_it->second; + auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); auto object = StoredObject(path_it->second, path / PREFIX_PATH_FILE_NAME); object_storage->removeObject(object); path_map.erase(path_it); } -/// TODO void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo() { + if (!removed) + return; + + auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); + auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); + auto buf = object_storage->writeObject( + object, + WriteMode::Rewrite, + /* object_attributes */ std::nullopt, + /* buf_size */ 4096, + /* settings */ {}); + writeString(path.string(), *buf); + buf->finalize(); + + path_map.emplace(path, std::move(key_prefix)); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index c853a3bfd842..d2cbbf5df2af 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -18,6 +18,7 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ ObjectStoragePtr object_storage; bool write_created = false; + bool write_finalized = false; public: // Assuming that paths are normalized. @@ -43,7 +44,7 @@ class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public bool write_finalized = false; std::unique_ptr - write(const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content); + moveObject(const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content); public: MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( @@ -65,6 +66,9 @@ class MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation final : publ MetadataStorageFromPlainObjectStorage::PathMap & path_map; ObjectStoragePtr object_storage; + std::string key_prefix; + bool removed = false; + public: MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation( std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); From a67a8299ce3a8a5afc266733b650c5cb44578a8e Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 1 Apr 2024 21:42:48 +0000 Subject: [PATCH 05/22] Do not list prefix.path in listDirectory --- .../MetadataStorageFromPlainObjectStorage.cpp | 29 ++++++++++++------- ...torageFromPlainObjectStorageOperations.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 560798159870..e90bb6685ffb 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -20,6 +20,8 @@ namespace DB namespace { +constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; + std::filesystem::path normalizePath(const std::filesystem::path & path) { return std::filesystem::path(path).lexically_normal(); @@ -32,7 +34,6 @@ std::filesystem::path normalizeDirectoryPath(const std::filesystem::path & path) MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage) { - constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; MetadataStorageFromPlainObjectStorage::PathMap result; RelativePathsWithMetadata files; @@ -146,7 +147,7 @@ std::vector getDirectChildrenOnRewritableDisk( std::unordered_set duplicates_filter; /// Map remote paths into local subdirectories. - std::unordered_map reversed; + std::unordered_map remote_to_local_subdir; { std::shared_lock lock(shared_mutex); @@ -160,10 +161,11 @@ std::vector getDirectChildrenOnRewritableDisk( continue; chassert(k.back() == '/'); - reversed.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); + remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); } } + auto skip_list = std::set{PREFIX_PATH_FILE_NAME}; for (const auto & elem : remote_paths) { const auto & path = elem.relative_path; @@ -172,15 +174,19 @@ std::vector getDirectChildrenOnRewritableDisk( auto slash_pos = path.find('/', child_pos); - /// File names. if (slash_pos == std::string::npos) - duplicates_filter.emplace(path.substr(child_pos)); - /// Subdirectories. + { + /// File names. + auto filename = path.substr(child_pos); + if (!skip_list.contains(filename)) + duplicates_filter.emplace(std::move(filename)); + } else { - auto it = reversed.find(path.substr(0, slash_pos)); + /// Subdirectories. + auto it = remote_to_local_subdir.find(path.substr(0, slash_pos)); /// Mapped subdirectories. - if (it != reversed.end()) + if (it != remote_to_local_subdir.end()) duplicates_filter.emplace(it->second); /// The remote subdirectory name is the same as the local subdirectory. else @@ -249,7 +255,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std else { addOperation(std::make_unique( - path, *metadata_storage.path_map, object_storage)); + normalizeDirectoryPath(path), *metadata_storage.path_map, object_storage)); } } @@ -258,9 +264,10 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std if (metadata_storage.object_storage->isWriteOnce()) return; - auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(std::filesystem::path(path) / ""); + auto normalized_path = normalizeDirectoryPath(path); + auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path); auto op = std::make_unique( - normalizeDirectoryPath(path), std::move(key_prefix), *metadata_storage.path_map, object_storage); + std::move(normalized_path), std::move(key_prefix), *metadata_storage.path_map, object_storage); addOperation(std::move(op)); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index e74c6487ff4b..3cafee047e88 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -161,7 +161,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: key_prefix = path_it->second; auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); - auto object = StoredObject(path_it->second, path / PREFIX_PATH_FILE_NAME); + auto object = StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME); object_storage->removeObject(object); path_map.erase(path_it); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b3f9167b1254..814db3172b48 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1672,7 +1672,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionaliterateDirectory(relative_data_path); it->isValid(); it->next()) { /// Skip temporary directories, file 'format_version.txt' and directory 'detached'. - if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME || it->name() == "prefix.path" + if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME || it->name() == DETACHED_DIR_NAME) continue; From cb3cf73be20e55b28b81fab0260004bdc7979a27 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 27 Mar 2024 02:12:26 +0000 Subject: [PATCH 06/22] more tests --- .../03008_s3_plain_rewritable.reference | 12 +++++++- .../0_stateless/03008_s3_plain_rewritable.sql | 28 +++++++++++-------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.reference b/tests/queries/0_stateless/03008_s3_plain_rewritable.reference index ee8b8c9fe3de..35c782ee432c 100644 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.reference +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.reference @@ -1 +1,11 @@ -10000006 +1000006 +0 0 +1 1 +1 2 +2 2 +2 2 +3 1 +3 3 +4 4 +4 7 +5 5 diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql index 2a7b0b8db4fe..345820b69a6b 100644 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql @@ -1,18 +1,22 @@ -- Tags: no-fasttest -- Tag: no-fasttest -- requires S3 -drop table if exists test_s3; -create table test_s3 (a Int32) engine = MergeTree() order by a -settings disk=disk(name='s3_plain_rewritable', - type = s3_plain_rewritable, - endpoint = 'http://localhost:11111/test/s3_plain_rewritable/', - access_key_id = clickhouse, - secret_access_key = clickhouse, - send_metadata = false, skip_access_check=true - ); +drop table if exists test_mt; +create table test_mt (a Int32, b Int64) engine = MergeTree() order by a +settings disk = disk( + type = s3_plain_rewritable, + endpoint = 'http://localhost:11111/test/test_mt/', + access_key_id = clickhouse, + secret_access_key = clickhouse); -insert into test_s3 (*) values (1), (2), (3), (4), (5), (6); -insert into test_s3 (*) select * from numbers(10000000); +insert into test_mt (*) values (1, 2), (2, 2), (3, 1), (4, 7), (5, 10), (6, 12); +insert into test_mt (*) select number, number from numbers_mt(1000000); -select count(*) from test_s3 LIMIT 10; +select count(*) from test_mt; +select (*) from test_mt order by tuple(a, b) limit 10; +-- File moving is not supported. +alter table test_mt update b = 0 where a % 2 = 1; --{ serverError NOT_IMPLEMENTED } + +alter table test_mt add column c Int64 after b; --{ serverError BAD_GET } +alter table test_mt drop column b; --{ serverError BAD_GET } From a3ce6162c44a48caea4e2e59087925ffc77044f5 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 28 Mar 2024 03:48:57 +0000 Subject: [PATCH 07/22] add integration test --- .../test_s3_plain_rewritable/__init__.py | 0 .../configs/storage_conf.xml | 21 +++++ .../test_s3_plain_rewritable/test.py | 78 +++++++++++++++++++ 3 files changed, 99 insertions(+) create mode 100644 tests/integration/test_s3_plain_rewritable/__init__.py create mode 100644 tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml create mode 100644 tests/integration/test_s3_plain_rewritable/test.py diff --git a/tests/integration/test_s3_plain_rewritable/__init__.py b/tests/integration/test_s3_plain_rewritable/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml b/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml new file mode 100644 index 000000000000..1e4641fc8b22 --- /dev/null +++ b/tests/integration/test_s3_plain_rewritable/configs/storage_conf.xml @@ -0,0 +1,21 @@ + + + + + s3_plain_rewritable + http://minio1:9001/root/data/ + minio + minio123 + + + + + +
+ disk_s3_plain_rewritable +
+
+
+
+
+
diff --git a/tests/integration/test_s3_plain_rewritable/test.py b/tests/integration/test_s3_plain_rewritable/test.py new file mode 100644 index 000000000000..ac35da01897d --- /dev/null +++ b/tests/integration/test_s3_plain_rewritable/test.py @@ -0,0 +1,78 @@ +import pytest +import random +import string + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/storage_conf.xml"], + with_minio=True, + stay_alive=True, +) + +insert_values = [ + "(0,'data'),(1,'data')", + ",".join( + f"({i},'{''.join(random.choices(string.ascii_lowercase, k=5))}')" + for i in range(10) + ), +] + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_insert(): + for index, value in enumerate(insert_values): + node.query( + """ + CREATE TABLE test_{} ( + id Int64, + data String + ) ENGINE=MergeTree() + ORDER BY id + SETTINGS storage_policy='s3_plain_rewritable' + """.format( + index + ) + ) + + node.query("INSERT INTO test_{} VALUES {}".format(index, value)) + assert ( + node.query("SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)) + == value + ) + + +def test_restart(): + for index, value in enumerate(insert_values): + assert ( + node.query("SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)) + == value + ) + node.restart_clickhouse() + + for index, value in enumerate(insert_values): + assert ( + node.query("SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)) + == value + ) + + +def test_drop(): + for index, value in enumerate(insert_values): + node.query("DROP TABLE IF EXISTS test_{} SYNC".format(index)) + + it = cluster.minio_client.list_objects( + cluster.minio_bucket, "data/", recursive=True + ) + + assert len(list(it)) == 0 From 5e716bcfb894ca3797b346ce12b52ec8604c2221 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sun, 7 Apr 2024 06:07:12 +0000 Subject: [PATCH 08/22] extend system logs integration tests --- .../configs/config.d/disks.xml | 15 +++++++++- ...logs_engine_s3_plain_rewritable_policy.xml | 6 ++++ .../test_system_logs/test_system_logs.py | 28 +++++++++++++++++++ 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_system_logs/configs/config.d/system_logs_engine_s3_plain_rewritable_policy.xml diff --git a/tests/integration/test_system_logs/configs/config.d/disks.xml b/tests/integration/test_system_logs/configs/config.d/disks.xml index 90a1b110326e..fc41e62eb4be 100644 --- a/tests/integration/test_system_logs/configs/config.d/disks.xml +++ b/tests/integration/test_system_logs/configs/config.d/disks.xml @@ -7,6 +7,12 @@ /var/lib/clickhouse2/ + + s3_plain_rewritable + http://minio1:9001/root/data/ + minio + minio123 +
@@ -23,6 +29,13 @@ + + + + disk3 + + + - \ No newline at end of file + diff --git a/tests/integration/test_system_logs/configs/config.d/system_logs_engine_s3_plain_rewritable_policy.xml b/tests/integration/test_system_logs/configs/config.d/system_logs_engine_s3_plain_rewritable_policy.xml new file mode 100644 index 000000000000..3c14e130f617 --- /dev/null +++ b/tests/integration/test_system_logs/configs/config.d/system_logs_engine_s3_plain_rewritable_policy.xml @@ -0,0 +1,6 @@ + + + + Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day SETTINGS storage_policy='s3_plain_rewritable', ttl_only_drop_parts=1 + + diff --git a/tests/integration/test_system_logs/test_system_logs.py b/tests/integration/test_system_logs/test_system_logs.py index 72249cd64ee6..1c45e69957b2 100644 --- a/tests/integration/test_system_logs/test_system_logs.py +++ b/tests/integration/test_system_logs/test_system_logs.py @@ -35,6 +35,18 @@ ) +node4 = cluster.add_instance( + "node4", + base_config_dir="configs", + main_configs=[ + "configs/config.d/system_logs_engine_s3_plain_rewritable_policy.xml", + "configs/config.d/disks.xml", + ], + with_minio=True, + stay_alive=True, +) + + @pytest.fixture(scope="module", autouse=True) def start_cluster(): try: @@ -78,6 +90,22 @@ def test_system_logs_engine_expr(start_cluster): ) +def test_system_logs_engine_s3_plain_rw_expr(start_cluster): + node4.query("SET log_query_threads = 1") + node4.query("SELECT count() FROM system.tables") + node4.query("SYSTEM FLUSH LOGS") + + # Check 'engine_full' of system.query_log. + expected = "MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + toIntervalDay(30) SETTINGS storage_policy = \\'s3_plain_rewritable\\', ttl_only_drop_parts = 1" + assert expected in node4.query( + "SELECT engine_full FROM system.tables WHERE database='system' and name='query_log'" + ) + node4.restart_clickhouse() + assert expected in node4.query( + "SELECT engine_full FROM system.tables WHERE database='system' and name='query_log'" + ) + + def test_system_logs_settings_expr(start_cluster): node3.query("SET log_query_threads = 1") node3.query("SELECT count() FROM system.tables") From 8c99b0d5eb7bb850da5a3750954f7847eb61ca20 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 8 Apr 2024 22:40:22 +0000 Subject: [PATCH 09/22] docs --- .../table-engines/mergetree-family/mergetree.md | 1 + docs/en/operations/storing-data.md | 17 +++++++++++++++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 3 files changed, 19 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 1bbd995d1895..886c29e755ed 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -769,6 +769,7 @@ In addition to local block devices, ClickHouse supports these storage types: - [`web` for read-only from web](#web-storage) - [`cache` for local caching](/docs/en/operations/storing-data.md/#using-local-cache) - [`s3_plain` for backups to S3](/docs/en/operations/backup#backuprestore-using-an-s3-disk) +- [`s3_plain_rewritable` for immutable, non-replicated tables in S3](/docs/en/operations/storing-data.md#s3-plain-rewritable-storage) ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 2c642dd2f0b5..a402a731933f 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -341,6 +341,23 @@ Configuration: ``` +### Using S3 Plain Rewritable Storage {#s3-plain-rewritable-storage} +A new disk type `s3_plain_rewritable` was introduced in `24.4`. +Similar to the `s3_plain` disk type, it does not require additional storage for metadata files; instead, metadata is stored in S3. +Unlike `s3_plain` disk type, `s3_plain_rewritable` allows executing merges and supports INSERT operations. +[Mutations](/docs/en/sql-reference/statements/alter#mutations) and replication of tables are not supported. + +A use case for this disk type are non-replicated `MergeTree` tables. e.g., system tables. + +Configuration: +``` xml + + s3_plain_rewritable + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + ### Using Azure Blob Storage {#azure-blob-storage} `MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 54ff4d1d0adc..0fcc5dd49547 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2321,6 +2321,7 @@ retentions rethrow retransmit retriable +rewritable reverseUTF rightPad rightPadUTF From 01ee500b066f6498da37705814e224d49995829c Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 8 Apr 2024 23:11:18 +0000 Subject: [PATCH 10/22] improvements, cleanups, comments --- .../CommonPathPrefixKeyGenerator.cpp | 4 +++ .../CommonPathPrefixKeyGenerator.h | 10 ++++++++ .../ObjectStorages/MetadataOperationsHolder.h | 5 +++- .../MetadataStorageFromPlainObjectStorage.cpp | 25 +++++++++++++------ ...torageFromPlainObjectStorageOperations.cpp | 10 ++++---- .../ObjectStorages/ObjectStorageFactory.cpp | 9 ++++--- 6 files changed, 46 insertions(+), 17 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp index 159be00e9c7f..e321c8a3c5ad 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.cpp @@ -20,12 +20,16 @@ ObjectStorageKey CommonPathPrefixKeyGenerator::generate(const String & path, boo const auto & [object_key_prefix, suffix_parts] = getLongestObjectKeyPrefix(path); auto key = std::filesystem::path(object_key_prefix.empty() ? storage_key_prefix : object_key_prefix); + + /// The longest prefix is the same as path, meaning that the path is already mapped. if (suffix_parts.empty()) return ObjectStorageKey::createAsRelative(std::move(key)); + /// File and top-level directory paths are mapped as is. if (!is_directory || object_key_prefix.empty()) for (const auto & part : suffix_parts) key /= part; + /// Replace the last part of the directory path with a pseudorandom suffix. else { for (size_t i = 0; i + 1 < suffix_parts.size(); ++i) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index 6e13713a7bb8..c0622c3e63a6 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -8,6 +8,15 @@ namespace DB { +/// Object storage key generator used specifically with the +/// MetadataStorageFromPlainObjectStorage if multiple writes are allowed. + +/// It searches for the local (metadata) path in a pre-loaded path map. +/// If no such path exists, it searches for the parent path, until it is found +/// or no parent path exists. +/// +/// The key generator ensures that the original directory hierarchy is +/// preserved, which is required for the MergeTree family. class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator { public: @@ -18,6 +27,7 @@ class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator ObjectStorageKey generate(const String & path, bool is_directory) const override; private: + /// Longest key prefix and unresolved parts of the source path. std::tuple> getLongestObjectKeyPrefix(const String & path) const; const String storage_key_prefix; diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index dc090675e4e1..e264df5e0949 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,10 +1,13 @@ #pragma once #include + +/// TODO: rename to MetadataStorageTransactionState. #include /** - * Implementations for transactional operations with metadata used by MetadataStorageFromDisk. + * Implementations for transactional operations with metadata used by MetadataStorageFromDisk + * and MetadataStorageFromPlainObjectStorage. */ namespace DB diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index e90bb6685ffb..61bbfc07b79d 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -46,11 +46,23 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri StoredObject object{file.relative_path}; - auto read_buf = object_storage->readObject(object, {}); - String content; - readString(content, *read_buf); - - result.emplace(content, remote_path.parent_path().string()); + auto read_buf = object_storage->readObject(object); + String local_path; + readStringUntilEOF(local_path, *read_buf); + + chassert(remote_path.has_parent_path()); + auto res = result.emplace(local_path, remote_path.parent_path()); + + /// This can happen if table replication is enabled, then the same local path is written + /// in `prefix.path` of each replica. + /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? + if (!res.second) + LOG_WARNING( + getLogger("MetadataStorageFromPlainObjectStorage"), + "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", + local_path, + res.first->second, + remote_path.parent_path().string()); } return result; } @@ -62,9 +74,6 @@ MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(Obj , storage_path_prefix(std::move(storage_path_prefix_)) , path_map(std::make_shared(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage))) { - LOG_TRACE( - getLogger("MetadataStorageFromPlainObjectStorage"), "MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage"); - if (!object_storage->isWriteOnce()) { auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), metadata_mutex, path_map); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 3cafee047e88..827e2cd6fb87 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -46,7 +46,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: object, WriteMode::Rewrite, /* object_attributes */ std::nullopt, - /* buf_size */ 4096, + /* buf_size */ DBMS_DEFAULT_BUFFER_SIZE, /* settings */ {}); write_created = true; @@ -97,7 +97,7 @@ std::unique_ptr MetadataStorageFromPlainObjectStorageMo if (validate_content) { std::string data; - auto readBuf = object_storage->readObject(object, {}); + auto readBuf = object_storage->readObject(object); readStringUntilEOF(data, *readBuf); if (data != path_from) throw Exception( @@ -111,8 +111,8 @@ std::unique_ptr MetadataStorageFromPlainObjectStorageMo auto write_buf = object_storage->writeObject( object, WriteMode::Rewrite, - std::nullopt, - /*buf_size*/ 4096, + /* object_attributes */ std::nullopt, + /*buf_size*/ DBMS_DEFAULT_BUFFER_SIZE, /*settings*/ {}); return write_buf; @@ -177,7 +177,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo() object, WriteMode::Rewrite, /* object_attributes */ std::nullopt, - /* buf_size */ 4096, + /* buf_size */ DBMS_DEFAULT_BUFFER_SIZE, /* settings */ {}); writeString(path.string(), *buf); buf->finalize(); diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index e80bcd85718a..958c6400a7d5 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -222,11 +222,10 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const ContextPtr & context, - bool /* skip_access_check */) -> ObjectStoragePtr + bool skip_access_check) -> ObjectStoragePtr { /// send_metadata changes the filenames (includes revision), while - /// s3_plain do not care about this, and expect that the file name - /// will not be changed. + /// s3_plain_rewritable does not support file renaming. if (config.getBool(config_prefix + ".send_metadata", false)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "s3_plain_rewritable does not supports send_metadata"); @@ -239,6 +238,10 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) auto object_storage = std::make_shared( std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); + /// NOTE: should we still perform this check for clickhouse-disks? + if (!skip_access_check) + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name); + return object_storage; }); } From 89f28f3c18f6d8d19b0f5d59c12548674d20bdda Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Wed, 24 Apr 2024 20:50:14 +0000 Subject: [PATCH 11/22] explicitly disallow ALTERs and mutations for plain --- src/Disks/IDisk.h | 3 ++ .../ObjectStorages/DiskObjectStorage.cpp | 5 ++++ src/Disks/ObjectStorages/DiskObjectStorage.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 13 +++++++- .../03008_s3_plain_rewritable.reference | 22 +++++++------- .../0_stateless/03008_s3_plain_rewritable.sql | 30 ++++++++++++++----- 6 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index fd5298588c59..83cf3d53057c 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -363,6 +363,9 @@ class IDisk : public Space virtual bool isWriteOnce() const { return false; } + /// Whether this disk support mutations. + virtual bool isMutable() const { return true; } + /// Check if disk is broken. Broken disks will have 0 space and cannot be used. virtual bool isBroken() const { return false; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 06abec715679..70eb0ec6706a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -435,6 +435,11 @@ bool DiskObjectStorage::isWriteOnce() const return object_storage->isWriteOnce(); } +bool DiskObjectStorage::isMutable() const +{ + return !isWriteOnce() && !object_storage->isPlain(); +} + DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() { const auto config_prefix = "storage_configuration.disks." + name; diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 9702573b875f..8a7e52cee6a9 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -183,6 +183,8 @@ friend class DiskObjectStorageRemoteMetadataRestoreHelper; /// MergeTree table on this disk. bool isWriteOnce() const override; + bool isMutable() const override; + /// Get structure of object storage this disk works with. Examples: /// DiskObjectStorage(S3ObjectStorage) /// DiskObjectStorage(CachedObjectStorage(S3ObjectStorage)) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 814db3172b48..c7c2b940152c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2964,6 +2964,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); + for (const auto & disk : getDisks()) + if (!disk->isMutable()) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE is not supported for immutable disk '{}'", disk->getName()); + /// Set of columns that shouldn't be altered. NameSet columns_alter_type_forbidden; @@ -3334,7 +3338,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context void MergeTreeData::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const { - /// Some validation will be added + for (const auto & disk : getDisks()) + if (!disk->isMutable()) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Mutations are not supported for immutable disk '{}'", disk->getName()); } MergeTreeDataPartFormat MergeTreeData::choosePartFormat(size_t bytes_uncompressed, size_t rows_count) const @@ -4824,6 +4830,11 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D void MergeTreeData::checkAlterPartitionIsPossible( const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const { + for (const auto & disk : getDisks()) + if (!disk->isMutable()) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE PARTITION is not supported for immutable disk '{}'", disk->getName()); + for (const auto & command : commands) { if (command.type == PartitionCommand::DROP_DETACHED_PARTITION diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.reference b/tests/queries/0_stateless/03008_s3_plain_rewritable.reference index 35c782ee432c..9aa9873514a9 100644 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.reference +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.reference @@ -1,11 +1,11 @@ -1000006 -0 0 -1 1 -1 2 -2 2 -2 2 -3 1 -3 3 -4 4 -4 7 -5 5 +10006 +0 0 0 +1 1 1 +1 2 0 +2 2 2 +2 2 2 +3 1 9 +3 3 3 +4 4 4 +4 7 7 +5 5 5 diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql index 345820b69a6b..1bfd8118875e 100644 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql @@ -2,21 +2,37 @@ -- Tag: no-fasttest -- requires S3 drop table if exists test_mt; -create table test_mt (a Int32, b Int64) engine = MergeTree() order by a +create table test_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 500) order by tuple(a, b) settings disk = disk( type = s3_plain_rewritable, endpoint = 'http://localhost:11111/test/test_mt/', access_key_id = clickhouse, secret_access_key = clickhouse); -insert into test_mt (*) values (1, 2), (2, 2), (3, 1), (4, 7), (5, 10), (6, 12); -insert into test_mt (*) select number, number from numbers_mt(1000000); +insert into test_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5); +insert into test_mt (*) select number, number, number from numbers_mt(10000); select count(*) from test_mt; select (*) from test_mt order by tuple(a, b) limit 10; --- File moving is not supported. -alter table test_mt update b = 0 where a % 2 = 1; --{ serverError NOT_IMPLEMENTED } +optimize table test_mt final; -alter table test_mt add column c Int64 after b; --{ serverError BAD_GET } -alter table test_mt drop column b; --{ serverError BAD_GET } +alter table test_mt add projection test_mt_projection ( + select * order by b); -- { serverError SUPPORT_IS_DISABLED } + +alter table test_mt update c = 0 where a % 2 = 1; -- { serverError SUPPORT_IS_DISABLED } +alter table test_mt add column d Int64 after c; -- { serverError SUPPORT_IS_DISABLED } +alter table test_mt drop column c; -- { serverError SUPPORT_IS_DISABLED } + +detach table test_mt; +attach table test_mt; + +drop table if exists test_mt_dst; + +create table test_mt_dst (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 500) order by tuple(a, b) +settings disk = disk( + type = s3_plain_rewritable, + endpoint = 'http://localhost:11111/test/test_mt/', + access_key_id = clickhouse, + secret_access_key = clickhouse); +alter table test_mt move partition 0 to table test_mt_dst; -- { serverError SUPPORT_IS_DISABLED } From 70d55aa618e97227cba6d9b377f829314528dea7 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 25 Apr 2024 16:54:44 -0700 Subject: [PATCH 12/22] Update src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index c0622c3e63a6..68a390a51b64 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -20,6 +20,7 @@ namespace DB class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator { public: + /// Local to remote path map. using PathMap = std::unordered_map; explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_); From 4f6a3e27b79c0f599edc5f3586878d563b367460 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Thu, 25 Apr 2024 16:55:29 -0700 Subject: [PATCH 13/22] Update src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- .../MetadataStorageFromPlainObjectStorageOperations.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 827e2cd6fb87..2ee6961274d0 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -13,7 +13,6 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int FILE_ALREADY_EXISTS; extern const int INCORRECT_DATA; -; }; namespace From 36a1cae9102e0c7e8e894e5a1ef8316e678c6fd2 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 26 Apr 2024 00:09:12 +0000 Subject: [PATCH 14/22] address feedback - pt.1 --- src/Disks/ObjectStorages/IMetadataStorage.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- .../MetadataStorageFromPlainObjectStorage.cpp | 2 +- .../MetadataStorageFromPlainObjectStorage.h | 2 +- ...torageFromPlainObjectStorageOperations.cpp | 43 ++++++++++--------- ...aStorageFromPlainObjectStorageOperations.h | 2 +- .../test_s3_plain_rewritable/test.py | 3 ++ 7 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index f95db2e1eee2..af026bdb0959 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -145,7 +145,7 @@ class IMetadataTransaction : private boost::noncopyable virtual ~IMetadataTransaction() = default; -private: +protected: [[noreturn]] static void throwNotImplemented() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Operation is not implemented"); diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 8ac4e609ffa8..e2f041e3715e 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -212,7 +212,7 @@ class IObjectStorage virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path) const = 0; virtual std::string generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'generateObjectKeyPrefixForDirectoryPath' is not implemented"); } /// Get unique id for passed absolute path in object storage. diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 61bbfc07b79d..54cc96effee7 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -288,7 +288,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectoryRecursive( void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::string & path_from, const std::string & path_to) { if (metadata_storage.object_storage->isWriteOnce()) - return; + throwNotImplemented(); addOperation(std::make_unique( normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.path_map, object_storage)); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index b78137f4729e..3820fd893b53 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -116,7 +116,7 @@ class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataT void createDirectoryRecursive(const std::string & path) override; - void moveDirectory(const std::string & /* path_from */, const std::string & /* path_to */) override; + void moveDirectory(const std::string & path_from, const std::string & path_to) override; void unlinkFile(const std::string & path) override; void removeDirectory(const std::string & path) override; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index 2ee6961274d0..c3fa4ef1ec54 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -50,7 +50,8 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: write_created = true; - path_map.emplace(path, std::move(key_prefix)); + [[maybe_unused]] auto result = path_map.emplace(path, std::move(key_prefix)); + chassert(result.second); writeString(path.string(), *buf); buf->finalize(); @@ -60,14 +61,14 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo() { + auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); if (write_finalized) - path_map.erase(path); - - if (write_created) { - auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); + path_map.erase(path); object_storage->removeObject(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); } + else if (write_created) + object_storage->removeObjectIfExists(StoredObject(object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); } MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( @@ -79,31 +80,31 @@ MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::MetadataStorageFrom { } -std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::moveObject( - const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content) +std::unique_ptr MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::createWriteBuf( + const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content) { - auto from_it = path_map.find(from); - if (from_it == path_map.end()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the source path '{}' does not exist", from); + auto expected_it = path_map.find(expected_path); + if (expected_it == path_map.end()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata object for the expected (source) path '{}' does not exist", expected_path); - if (path_map.contains(to)) - throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the destination path '{}' already exists", to); + if (path_map.contains(new_path)) + throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Metadata object for the new (destination) path '{}' already exists", new_path); - auto object_key = ObjectStorageKey::createAsRelative(from_it->second, PREFIX_PATH_FILE_NAME); + auto object_key = ObjectStorageKey::createAsRelative(expected_it->second, PREFIX_PATH_FILE_NAME); - auto object = StoredObject(object_key.serialize(), path_from / PREFIX_PATH_FILE_NAME); + auto object = StoredObject(object_key.serialize(), expected_path / PREFIX_PATH_FILE_NAME); if (validate_content) { std::string data; - auto readBuf = object_storage->readObject(object); - readStringUntilEOF(data, *readBuf); + auto read_buf = object_storage->readObject(object); + readStringUntilEOF(data, *read_buf); if (data != path_from) throw Exception( ErrorCodes::INCORRECT_DATA, "Incorrect data for object key {}, expected {}, got {}", object_key.serialize(), - path_from, + expected_path, data); } @@ -122,12 +123,14 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u LOG_TRACE( getLogger("MetadataStorageFromPlainObjectStorageMoveDirectoryOperation"), "Moving directory '{}' to '{}'", path_from, path_to); - auto write_buf = moveObject(path_from, path_to, /* validate_content */ true); + auto write_buf = createWriteBuf(path_from, path_to, /* validate_content */ true); write_created = true; writeString(path_to.string(), *write_buf); write_buf->finalize(); - path_map.emplace(path_to, path_map.extract(path_from).mapped()); + [[maybe_unused]] auto result = path_map.emplace(path_to, path_map.extract(path_from).mapped()); + chassert(result.second); + write_finalized = true; } @@ -138,7 +141,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo() if (write_created) { - auto write_buf = moveObject(path_to, path_from, /* verify_content */ false); + auto write_buf = createWriteBuf(path_to, path_from, /* verify_content */ false); writeString(path_from.string(), *write_buf); write_buf->finalize(); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index d2cbbf5df2af..0f9ef07568f1 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -44,7 +44,7 @@ class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public bool write_finalized = false; std::unique_ptr - moveObject(const std::filesystem::path & from, const std::filesystem::path & to, bool validate_content); + createWriteBuf(const std::filesystem::path & expected_path, const std::filesystem::path & new_path, bool validate_content); public: MetadataStorageFromPlainObjectStorageMoveDirectoryOperation( diff --git a/tests/integration/test_s3_plain_rewritable/test.py b/tests/integration/test_s3_plain_rewritable/test.py index ac35da01897d..5e27a690f1fc 100644 --- a/tests/integration/test_s3_plain_rewritable/test.py +++ b/tests/integration/test_s3_plain_rewritable/test.py @@ -30,6 +30,7 @@ def start_cluster(): cluster.shutdown() +@pytest.mark.order(0) def test_insert(): for index, value in enumerate(insert_values): node.query( @@ -52,6 +53,7 @@ def test_insert(): ) +@pytest.mark.order(1) def test_restart(): for index, value in enumerate(insert_values): assert ( @@ -67,6 +69,7 @@ def test_restart(): ) +@pytest.mark.order(2) def test_drop(): for index, value in enumerate(insert_values): node.query("DROP TABLE IF EXISTS test_{} SYNC".format(index)) From 4a7f28f6bd95e198d3efcfb37b08e8785f1f72af Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 26 Apr 2024 01:34:14 +0000 Subject: [PATCH 15/22] address feedback - pt.2 --- src/Disks/ObjectStorages/IObjectStorage.h | 4 +++- .../MetadataStorageFromPlainObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 6 ++++-- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index e2f041e3715e..eae31af9d448 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -210,7 +210,9 @@ class IObjectStorage /// Generate blob name for passed absolute local path. /// Path can be generated either independently or based on `path`. virtual ObjectStorageKey generateObjectKeyForPath(const std::string & path) const = 0; - virtual std::string generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */) const + + /// Object key prefix for local paths in the directory 'path'. + virtual ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & /* path */) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'generateObjectKeyPrefixForDirectoryPath' is not implemented"); } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 54cc96effee7..2491bacac811 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -274,7 +274,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std return; auto normalized_path = normalizeDirectoryPath(path); - auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path); + auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize(); auto op = std::make_unique( std::move(normalized_path), std::move(key_prefix), *metadata_storage.path_map, object_storage); addOperation(std::move(op)); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 78847242b04f..2f0d93907ae6 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -1,4 +1,5 @@ #include +#include "Common/ObjectStorageKey.h" #if USE_AWS_S3 @@ -572,11 +573,12 @@ ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & p return key_generator->generate(path, /* is_directory */ false); } -std::string S3ObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const +ObjectStorageKey S3ObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const { if (!key_generator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); - return key_generator->generate(path, /* is_directory */ true).serialize(); + + return key_generator->generate(path, /* is_directory */ true); } } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 2d88195117f4..ff66b00e47ce 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -159,7 +159,7 @@ class S3ObjectStorage : public IObjectStorage bool supportParallelWrite() const override { return true; } ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; - std::string generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; + ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override; bool isReadOnly() const override { return s3_settings.get()->read_only; } From 802ee27b1b86b340549b2149c67260bd6c56d32b Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 26 Apr 2024 03:49:07 +0000 Subject: [PATCH 16/22] address feedback - pt.3 non-functional changes --- programs/keeper/CMakeLists.txt | 2 +- .../MetadataFromDiskTransactionState.cpp | 23 ------------------- .../MetadataOperationsHolder.cpp | 16 ++++++------- .../ObjectStorages/MetadataOperationsHolder.h | 4 ++-- .../ObjectStorages/MetadataStorageFromDisk.h | 2 +- .../MetadataStorageFromPlainObjectStorage.h | 2 +- .../MetadataStorageTransactionState.cpp | 23 +++++++++++++++++++ ...te.h => MetadataStorageTransactionState.h} | 5 ++-- .../MetadataStorageFromStaticFilesWebServer.h | 4 ++-- 9 files changed, 40 insertions(+), 41 deletions(-) delete mode 100644 src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp create mode 100644 src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp rename src/Disks/ObjectStorages/{MetadataFromDiskTransactionState.h => MetadataStorageTransactionState.h} (53%) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 889644658785..51529036ed5b 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -125,7 +125,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp diff --git a/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp b/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp deleted file mode 100644 index f6915370b101..000000000000 --- a/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -namespace DB -{ - -std::string toString(MetadataFromDiskTransactionState state) -{ - switch (state) - { - case MetadataFromDiskTransactionState::PREPARING: - return "PREPARING"; - case MetadataFromDiskTransactionState::FAILED: - return "FAILED"; - case MetadataFromDiskTransactionState::COMMITTED: - return "COMMITTED"; - case MetadataFromDiskTransactionState::PARTIALLY_ROLLED_BACK: - return "PARTIALLY_ROLLED_BACK"; - } - UNREACHABLE(); -} - -} diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp b/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp index f051f62fa419..3023700631c3 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp @@ -13,7 +13,7 @@ extern const int FS_METADATA_ERROR; void MetadataOperationsHolder::rollback(size_t until_pos) { /// Otherwise everything is alright - if (state == MetadataFromDiskTransactionState::FAILED) + if (state == MetadataStorageTransactionState::FAILED) { for (int64_t i = until_pos; i >= 0; --i) { @@ -23,7 +23,7 @@ void MetadataOperationsHolder::rollback(size_t until_pos) } catch (Exception & ex) { - state = MetadataFromDiskTransactionState::PARTIALLY_ROLLED_BACK; + state = MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK; ex.addMessage(fmt::format("While rolling back operation #{}", i)); throw; } @@ -37,24 +37,24 @@ void MetadataOperationsHolder::rollback(size_t until_pos) void MetadataOperationsHolder::addOperation(MetadataOperationPtr && operation) { - if (state != MetadataFromDiskTransactionState::PREPARING) + if (state != MetadataStorageTransactionState::PREPARING) throw Exception( ErrorCodes::FS_METADATA_ERROR, "Cannot add operations to transaction in {} state, it should be in {} state", toString(state), - toString(MetadataFromDiskTransactionState::PREPARING)); + toString(MetadataStorageTransactionState::PREPARING)); operations.emplace_back(std::move(operation)); } void MetadataOperationsHolder::commitImpl(SharedMutex & metadata_mutex) { - if (state != MetadataFromDiskTransactionState::PREPARING) + if (state != MetadataStorageTransactionState::PREPARING) throw Exception( ErrorCodes::FS_METADATA_ERROR, "Cannot commit transaction in {} state, it should be in {} state", toString(state), - toString(MetadataFromDiskTransactionState::PREPARING)); + toString(MetadataStorageTransactionState::PREPARING)); { std::unique_lock lock(metadata_mutex); @@ -68,7 +68,7 @@ void MetadataOperationsHolder::commitImpl(SharedMutex & metadata_mutex) { tryLogCurrentException(__PRETTY_FUNCTION__); ex.addMessage(fmt::format("While committing metadata operation #{}", i)); - state = MetadataFromDiskTransactionState::FAILED; + state = MetadataStorageTransactionState::FAILED; rollback(i); throw; } @@ -88,7 +88,7 @@ void MetadataOperationsHolder::commitImpl(SharedMutex & metadata_mutex) } } - state = MetadataFromDiskTransactionState::COMMITTED; + state = MetadataStorageTransactionState::COMMITTED; } } diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index e264df5e0949..7c5988a70f8d 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -3,7 +3,7 @@ #include /// TODO: rename to MetadataStorageTransactionState. -#include +#include /** * Implementations for transactional operations with metadata used by MetadataStorageFromDisk @@ -17,7 +17,7 @@ class MetadataOperationsHolder { private: std::vector operations; - MetadataFromDiskTransactionState state{MetadataFromDiskTransactionState::PREPARING}; + MetadataStorageTransactionState state{MetadataStorageTransactionState::PREPARING}; void rollback(size_t until_pos); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 5dca40afc59f..1346a4dcf93b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 3820fd893b53..3f5d2f8b2606 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp new file mode 100644 index 000000000000..245578b5d9e8 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp @@ -0,0 +1,23 @@ +#include +#include + +namespace DB +{ + +std::string toString(MetadataStorageTransactionState state) +{ + switch (state) + { + case MetadataStorageTransactionState::PREPARING: + return "PREPARING"; + case MetadataStorageTransactionState::FAILED: + return "FAILED"; + case MetadataStorageTransactionState::COMMITTED: + return "COMMITTED"; + case MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK: + return "PARTIALLY_ROLLED_BACK"; + } + UNREACHABLE(); +} + +} diff --git a/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.h b/src/Disks/ObjectStorages/MetadataStorageTransactionState.h similarity index 53% rename from src/Disks/ObjectStorages/MetadataFromDiskTransactionState.h rename to src/Disks/ObjectStorages/MetadataStorageTransactionState.h index 3dc4c610e3a7..fb05d185a373 100644 --- a/src/Disks/ObjectStorages/MetadataFromDiskTransactionState.h +++ b/src/Disks/ObjectStorages/MetadataStorageTransactionState.h @@ -4,7 +4,7 @@ namespace DB { -enum class MetadataFromDiskTransactionState +enum class MetadataStorageTransactionState { PREPARING, FAILED, @@ -12,6 +12,5 @@ enum class MetadataFromDiskTransactionState PARTIALLY_ROLLED_BACK, }; -std::string toString(MetadataFromDiskTransactionState state); - +std::string toString(MetadataStorageTransactionState state); } diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h index b720a9c91f3e..35271d7192c3 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h @@ -1,9 +1,9 @@ #pragma once +#include #include -#include +#include #include -#include namespace DB From d1217af3895190cd5314da014e8d7eb4c76a91b3 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 26 Apr 2024 04:25:35 +0000 Subject: [PATCH 17/22] address feedback - pt.4 --- src/Disks/ObjectStorages/IMetadataOperation.h | 3 +- .../MetadataOperationsHolder.cpp | 7 ++-- .../ObjectStorages/MetadataOperationsHolder.h | 6 +-- .../ObjectStorages/MetadataStorageFromDisk.h | 2 +- ...taStorageFromDiskTransactionOperations.cpp | 40 +++++++++---------- ...dataStorageFromDiskTransactionOperations.h | 31 +++++++------- .../MetadataStorageFromPlainObjectStorage.h | 2 +- ...torageFromPlainObjectStorageOperations.cpp | 6 +-- ...aStorageFromPlainObjectStorageOperations.h | 6 +-- 9 files changed, 52 insertions(+), 51 deletions(-) diff --git a/src/Disks/ObjectStorages/IMetadataOperation.h b/src/Disks/ObjectStorages/IMetadataOperation.h index e4b241ad3889..c4f62c6f0bff 100644 --- a/src/Disks/ObjectStorages/IMetadataOperation.h +++ b/src/Disks/ObjectStorages/IMetadataOperation.h @@ -1,5 +1,6 @@ #pragma once +#include #include namespace DB @@ -8,7 +9,7 @@ namespace DB struct IMetadataOperation { virtual void execute(std::unique_lock & metadata_lock) = 0; - virtual void undo() = 0; + virtual void undo(std::unique_lock & metadata_lock) = 0; virtual void finalize() { } virtual ~IMetadataOperation() = default; }; diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp b/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp index 3023700631c3..4bd816895500 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int FS_METADATA_ERROR; } -void MetadataOperationsHolder::rollback(size_t until_pos) +void MetadataOperationsHolder::rollback(std::unique_lock & lock, size_t until_pos) { /// Otherwise everything is alright if (state == MetadataStorageTransactionState::FAILED) @@ -19,7 +19,7 @@ void MetadataOperationsHolder::rollback(size_t until_pos) { try { - operations[i]->undo(); + operations[i]->undo(lock); } catch (Exception & ex) { @@ -69,7 +69,7 @@ void MetadataOperationsHolder::commitImpl(SharedMutex & metadata_mutex) tryLogCurrentException(__PRETTY_FUNCTION__); ex.addMessage(fmt::format("While committing metadata operation #{}", i)); state = MetadataStorageTransactionState::FAILED; - rollback(i); + rollback(lock, i); throw; } } @@ -90,5 +90,4 @@ void MetadataOperationsHolder::commitImpl(SharedMutex & metadata_mutex) state = MetadataStorageTransactionState::COMMITTED; } - } diff --git a/src/Disks/ObjectStorages/MetadataOperationsHolder.h b/src/Disks/ObjectStorages/MetadataOperationsHolder.h index 7c5988a70f8d..8997f40b9a22 100644 --- a/src/Disks/ObjectStorages/MetadataOperationsHolder.h +++ b/src/Disks/ObjectStorages/MetadataOperationsHolder.h @@ -1,9 +1,9 @@ #pragma once +#include #include - -/// TODO: rename to MetadataStorageTransactionState. #include +#include /** * Implementations for transactional operations with metadata used by MetadataStorageFromDisk @@ -19,7 +19,7 @@ class MetadataOperationsHolder std::vector operations; MetadataStorageTransactionState state{MetadataStorageTransactionState::PREPARING}; - void rollback(size_t until_pos); + void rollback(std::unique_lock & lock, size_t until_pos); protected: void addOperation(MetadataOperationPtr && operation); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 1346a4dcf93b..df16bf76a3c3 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -5,9 +5,9 @@ #include #include -#include #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index 1357acdfc66d..194a735f64f0 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -32,7 +32,7 @@ void SetLastModifiedOperation::execute(std::unique_lock &) disk.setLastModified(path, new_timestamp); } -void SetLastModifiedOperation::undo() +void SetLastModifiedOperation::undo(std::unique_lock &) { disk.setLastModified(path, old_timestamp); } @@ -50,7 +50,7 @@ void ChmodOperation::execute(std::unique_lock &) disk.chmod(path, mode); } -void ChmodOperation::undo() +void ChmodOperation::undo(std::unique_lock &) { disk.chmod(path, old_mode); } @@ -68,7 +68,7 @@ void UnlinkFileOperation::execute(std::unique_lock &) disk.removeFile(path); } -void UnlinkFileOperation::undo() +void UnlinkFileOperation::undo(std::unique_lock &) { auto buf = disk.writeFile(path); writeString(prev_data, *buf); @@ -86,7 +86,7 @@ void CreateDirectoryOperation::execute(std::unique_lock &) disk.createDirectory(path); } -void CreateDirectoryOperation::undo() +void CreateDirectoryOperation::undo(std::unique_lock &) { disk.removeDirectory(path); } @@ -112,7 +112,7 @@ void CreateDirectoryRecursiveOperation::execute(std::unique_lock &) disk.createDirectory(path_to_create); } -void CreateDirectoryRecursiveOperation::undo() +void CreateDirectoryRecursiveOperation::undo(std::unique_lock &) { for (const auto & path_created : paths_created) disk.removeDirectory(path_created); @@ -129,7 +129,7 @@ void RemoveDirectoryOperation::execute(std::unique_lock &) disk.removeDirectory(path); } -void RemoveDirectoryOperation::undo() +void RemoveDirectoryOperation::undo(std::unique_lock &) { disk.createDirectory(path); } @@ -149,7 +149,7 @@ void RemoveRecursiveOperation::execute(std::unique_lock &) disk.moveDirectory(path, temp_path); } -void RemoveRecursiveOperation::undo() +void RemoveRecursiveOperation::undo(std::unique_lock &) { if (disk.isFile(temp_path)) disk.moveFile(temp_path, path); @@ -187,10 +187,10 @@ void CreateHardlinkOperation::execute(std::unique_lock & lock) disk.createHardLink(path_from, path_to); } -void CreateHardlinkOperation::undo() +void CreateHardlinkOperation::undo(std::unique_lock & lock) { if (write_operation) - write_operation->undo(); + write_operation->undo(lock); disk.removeFile(path_to); } @@ -206,7 +206,7 @@ void MoveFileOperation::execute(std::unique_lock &) disk.moveFile(path_from, path_to); } -void MoveFileOperation::undo() +void MoveFileOperation::undo(std::unique_lock &) { disk.moveFile(path_to, path_from); } @@ -223,7 +223,7 @@ void MoveDirectoryOperation::execute(std::unique_lock &) disk.moveDirectory(path_from, path_to); } -void MoveDirectoryOperation::undo() +void MoveDirectoryOperation::undo(std::unique_lock &) { disk.moveDirectory(path_to, path_from); } @@ -244,7 +244,7 @@ void ReplaceFileOperation::execute(std::unique_lock &) disk.replaceFile(path_from, path_to); } -void ReplaceFileOperation::undo() +void ReplaceFileOperation::undo(std::unique_lock &) { disk.moveFile(path_to, path_from); disk.moveFile(temp_path_to, path_to); @@ -275,7 +275,7 @@ void WriteFileOperation::execute(std::unique_lock &) buf->finalize(); } -void WriteFileOperation::undo() +void WriteFileOperation::undo(std::unique_lock &) { if (!existed) { @@ -303,10 +303,10 @@ void AddBlobOperation::execute(std::unique_lock & metadata_lock) write_operation->execute(metadata_lock); } -void AddBlobOperation::undo() +void AddBlobOperation::undo(std::unique_lock & lock) { if (write_operation) - write_operation->undo(); + write_operation->undo(lock); } void UnlinkMetadataFileOperation::execute(std::unique_lock & metadata_lock) @@ -325,17 +325,17 @@ void UnlinkMetadataFileOperation::execute(std::unique_lock & metada unlink_operation->execute(metadata_lock); } -void UnlinkMetadataFileOperation::undo() +void UnlinkMetadataFileOperation::undo(std::unique_lock & lock) { /// Operations MUST be reverted in the reversed order, so /// when we apply operation #1 (write) and operation #2 (unlink) /// we should revert #2 and only after it #1. Otherwise #1 will overwrite /// file with incorrect data. if (unlink_operation) - unlink_operation->undo(); + unlink_operation->undo(lock); if (write_operation) - write_operation->undo(); + write_operation->undo(lock); /// Update outcome to reflect the fact that we have restored the file. outcome->num_hardlinks++; @@ -349,10 +349,10 @@ void SetReadonlyFileOperation::execute(std::unique_lock & metadata_ write_operation->execute(metadata_lock); } -void SetReadonlyFileOperation::undo() +void SetReadonlyFileOperation::undo(std::unique_lock & lock) { if (write_operation) - write_operation->undo(); + write_operation->undo(lock); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index 4c5fa0f6e78f..3df29833f443 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -20,7 +20,7 @@ struct SetLastModifiedOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -35,7 +35,7 @@ struct ChmodOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -51,7 +51,7 @@ struct UnlinkFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -66,7 +66,7 @@ struct CreateDirectoryOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -80,7 +80,7 @@ struct CreateDirectoryRecursiveOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -95,7 +95,7 @@ struct RemoveDirectoryOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -108,7 +108,7 @@ struct RemoveRecursiveOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; void finalize() override; @@ -124,7 +124,8 @@ struct WriteFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; + private: std::string path; IDisk & disk; @@ -143,7 +144,7 @@ struct CreateHardlinkOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path_from; @@ -160,7 +161,7 @@ struct MoveFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path_from; @@ -175,7 +176,7 @@ struct MoveDirectoryOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path_from; @@ -190,7 +191,7 @@ struct ReplaceFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; void finalize() override; @@ -218,7 +219,7 @@ struct AddBlobOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -246,7 +247,7 @@ struct UnlinkMetadataFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; @@ -271,7 +272,7 @@ struct SetReadonlyFileOperation final : public IMetadataOperation void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; private: std::string path; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 3f5d2f8b2606..5e6a5b13ec4b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -2,8 +2,8 @@ #include #include -#include #include +#include #include diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index c3fa4ef1ec54..a28f4e7a8821 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -59,7 +59,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::execute(std: write_finalized = true; } -void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo() +void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::unique_lock &) { auto object_key = ObjectStorageKey::createAsRelative(key_prefix, PREFIX_PATH_FILE_NAME); if (write_finalized) @@ -134,7 +134,7 @@ void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::execute(std::u write_finalized = true; } -void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo() +void MetadataStorageFromPlainObjectStorageMoveDirectoryOperation::undo(std::unique_lock &) { if (write_finalized) path_map.emplace(path_from, path_map.extract(path_to).mapped()); @@ -168,7 +168,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: path_map.erase(path_it); } -void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo() +void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::undo(std::unique_lock &) { if (!removed) return; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h index 0f9ef07568f1..4b196f787fd7 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h @@ -29,7 +29,7 @@ class MetadataStorageFromPlainObjectStorageCreateDirectoryOperation final : publ ObjectStoragePtr object_storage_); void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; }; class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public IMetadataOperation @@ -55,7 +55,7 @@ class MetadataStorageFromPlainObjectStorageMoveDirectoryOperation final : public void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; }; class MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation final : public IMetadataOperation @@ -74,7 +74,7 @@ class MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation final : publ std::filesystem::path && path_, MetadataStorageFromPlainObjectStorage::PathMap & path_map_, ObjectStoragePtr object_storage_); void execute(std::unique_lock & metadata_lock) override; - void undo() override; + void undo(std::unique_lock & metadata_lock) override; }; } From 24d5abba10483b1cfd86bce078b7c79a9ab47657 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 26 Apr 2024 20:51:25 +0000 Subject: [PATCH 18/22] extract plain_rewritable metadata type Make PlainRewritableObjectStorage generic; Support config type ``` object_storage s3 plain_rewritable https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ 1 ``` --- programs/keeper/CMakeLists.txt | 1 + src/Disks/DiskType.cpp | 2 + src/Disks/DiskType.h | 1 + src/Disks/ObjectStorages/IMetadataStorage.h | 2 +- .../ObjectStorages/MetadataStorageFactory.cpp | 16 ++ .../MetadataStorageFromPlainObjectStorage.cpp | 165 +++--------------- .../MetadataStorageFromPlainObjectStorage.h | 10 +- ...torageFromPlainRewritableObjectStorage.cpp | 141 +++++++++++++++ ...aStorageFromPlainRewritableObjectStorage.h | 26 +++ .../ObjectStorages/ObjectStorageFactory.cpp | 113 ++++++------ .../PlainRewritableObjectStorage.h | 24 +++ .../RegisterDiskObjectStorage.cpp | 7 +- .../S3/S3PlainRewritableObjectStorage.h | 23 --- .../0_stateless/03008_s3_plain_rewritable.sql | 11 +- 14 files changed, 313 insertions(+), 229 deletions(-) create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h create mode 100644 src/Disks/ObjectStorages/PlainRewritableObjectStorage.h delete mode 100644 src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 51529036ed5b..b811868333b8 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -124,6 +124,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataOperationsHolder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp diff --git a/src/Disks/DiskType.cpp b/src/Disks/DiskType.cpp index 1778ae8025b5..448e173a30f3 100644 --- a/src/Disks/DiskType.cpp +++ b/src/Disks/DiskType.cpp @@ -16,6 +16,8 @@ MetadataStorageType metadataTypeFromString(const String & type) return MetadataStorageType::Local; if (check_type == "plain") return MetadataStorageType::Plain; + if (check_type == "plain_rewritable") + return MetadataStorageType::PlainRewritable; if (check_type == "web") return MetadataStorageType::StaticWeb; diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 36fe4d83004c..8659f3962707 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -28,6 +28,7 @@ enum class MetadataStorageType None, Local, Plain, + PlainRewritable, StaticWeb, }; diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index af026bdb0959..168160f61a62 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -229,7 +229,7 @@ class IMetadataStorage : private boost::noncopyable /// object_storage_path is absolute. virtual StoredObjects getStorageObjects(const std::string & path) const = 0; -private: +protected: [[noreturn]] static void throwNotImplemented() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Operation is not implemented"); diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index adc1f84372c8..4a3e8a37d288 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #endif @@ -118,6 +119,20 @@ void registerPlainMetadataStorage(MetadataStorageFactory & factory) }); } +void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory) +{ + factory.registerMetadataStorageType( + "plain_rewritable", + [](const std::string & /* name */, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + ObjectStoragePtr object_storage) -> MetadataStoragePtr + { + auto key_compatibility_prefix = getObjectKeyCompatiblePrefix(*object_storage, config, config_prefix); + return std::make_shared(object_storage, key_compatibility_prefix); + }); +} + #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD void registerMetadataStorageFromStaticFilesWebServer(MetadataStorageFactory & factory) { @@ -137,6 +152,7 @@ void registerMetadataStorages() auto & factory = MetadataStorageFactory::instance(); registerMetadataStorageFromDisk(factory); registerPlainMetadataStorage(factory); + registerPlainRewritableMetadataStorage(factory); #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD registerMetadataStorageFromStaticFilesWebServer(factory); #endif diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 2491bacac811..963d3059b5b6 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -3,15 +3,9 @@ #include #include -#include -#include #include -#include -#include -#include "CommonPathPrefixKeyGenerator.h" #include -#include #include namespace DB @@ -20,8 +14,6 @@ namespace DB namespace { -constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; - std::filesystem::path normalizePath(const std::filesystem::path & path) { return std::filesystem::path(path).lexically_normal(); @@ -32,56 +24,14 @@ std::filesystem::path normalizeDirectoryPath(const std::filesystem::path & path) return normalizePath(path) / ""; } -MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage) -{ - MetadataStorageFromPlainObjectStorage::PathMap result; - - RelativePathsWithMetadata files; - object_storage->listObjects(root, files, 0); - for (const auto & file : files) - { - auto remote_path = std::filesystem::path(file.relative_path); - if (remote_path.filename() != PREFIX_PATH_FILE_NAME) - continue; - - StoredObject object{file.relative_path}; - - auto read_buf = object_storage->readObject(object); - String local_path; - readStringUntilEOF(local_path, *read_buf); - - chassert(remote_path.has_parent_path()); - auto res = result.emplace(local_path, remote_path.parent_path()); - - /// This can happen if table replication is enabled, then the same local path is written - /// in `prefix.path` of each replica. - /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? - if (!res.second) - LOG_WARNING( - getLogger("MetadataStorageFromPlainObjectStorage"), - "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", - local_path, - res.first->second, - remote_path.parent_path().string()); - } - return result; -} - } MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_) : object_storage(object_storage_) , storage_path_prefix(std::move(storage_path_prefix_)) - , path_map(std::make_shared(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage))) { - if (!object_storage->isWriteOnce()) - { - auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), metadata_mutex, path_map); - object_storage->setKeysGenerator(keys_gen); - } } - MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() { return std::make_shared(*this, object_storage); @@ -123,90 +73,6 @@ uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) return 0; } -namespace -{ - -std::vector getDirectChildrenOnWriteOnceDisk(const std::string & storage_key, const RelativePathsWithMetadata & remote_paths) -{ - std::unordered_set duplicates_filter; - for (const auto & elem : remote_paths) - { - const auto & path = elem.relative_path; - chassert(path.find(storage_key) == 0); - const auto child_pos = storage_key.size(); - /// string::npos is ok. - const auto slash_pos = path.find('/', child_pos); - if (slash_pos == std::string::npos) - duplicates_filter.emplace(path.substr(child_pos)); - else - duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); - } - return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); -} - -std::vector getDirectChildrenOnRewritableDisk( - const std::string & storage_key, - const RelativePathsWithMetadata & remote_paths, - const std::string & local_path, - const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes, - SharedMutex & shared_mutex) -{ - using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; - - std::unordered_set duplicates_filter; - - /// Map remote paths into local subdirectories. - std::unordered_map remote_to_local_subdir; - - { - std::shared_lock lock(shared_mutex); - for (const auto & [k, v] : local_path_prefixes) - { - if (!k.starts_with(local_path)) - continue; - - auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); - if (slash_num != 1) - continue; - - chassert(k.back() == '/'); - remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); - } - } - - auto skip_list = std::set{PREFIX_PATH_FILE_NAME}; - for (const auto & elem : remote_paths) - { - const auto & path = elem.relative_path; - chassert(path.find(storage_key) == 0); - const auto child_pos = storage_key.size(); - - auto slash_pos = path.find('/', child_pos); - - if (slash_pos == std::string::npos) - { - /// File names. - auto filename = path.substr(child_pos); - if (!skip_list.contains(filename)) - duplicates_filter.emplace(std::move(filename)); - } - else - { - /// Subdirectories. - auto it = remote_to_local_subdir.find(path.substr(0, slash_pos)); - /// Mapped subdirectories. - if (it != remote_to_local_subdir.end()) - duplicates_filter.emplace(it->second); - /// The remote subdirectory name is the same as the local subdirectory. - else - duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); - } - } - - return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); -} -} - std::vector MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const { auto key_prefix = object_storage->generateObjectKeyForPath(path).serialize(); @@ -218,13 +84,9 @@ std::vector MetadataStorageFromPlainObjectStorage::listDirectory(co object_storage->listObjects(abs_key, files, 0); - if (object_storage->isWriteOnce()) - return getDirectChildrenOnWriteOnceDisk(abs_key, files); - else - return getDirectChildrenOnRewritableDisk(abs_key, files, path, *path_map, metadata_mutex); + return getDirectChildrenOnDisk(abs_key, files, path); } - DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const { /// Required for MergeTree @@ -242,6 +104,25 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std return {StoredObject(object_key.serialize(), path, object_size)}; } +std::vector MetadataStorageFromPlainObjectStorage::getDirectChildrenOnDisk( + const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & /* local_path */) const +{ + std::unordered_set duplicates_filter; + for (const auto & elem : remote_paths) + { + const auto & path = elem.relative_path; + chassert(path.find(storage_key) == 0); + const auto child_pos = storage_key.size(); + /// string::npos is ok. + const auto slash_pos = path.find('/', child_pos); + if (slash_pos == std::string::npos) + duplicates_filter.emplace(path.substr(child_pos)); + else + duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); + } + return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); +} + const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const { return metadata_storage; @@ -264,7 +145,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std else { addOperation(std::make_unique( - normalizeDirectoryPath(path), *metadata_storage.path_map, object_storage)); + normalizeDirectoryPath(path), *metadata_storage.getPathMap(), object_storage)); } } @@ -276,7 +157,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std auto normalized_path = normalizeDirectoryPath(path); auto key_prefix = object_storage->generateObjectKeyPrefixForDirectoryPath(normalized_path).serialize(); auto op = std::make_unique( - std::move(normalized_path), std::move(key_prefix), *metadata_storage.path_map, object_storage); + std::move(normalized_path), std::move(key_prefix), *metadata_storage.getPathMap(), object_storage); addOperation(std::move(op)); } @@ -291,7 +172,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std:: throwNotImplemented(); addOperation(std::make_unique( - normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.path_map, object_storage)); + normalizeDirectoryPath(path_from), normalizeDirectoryPath(path_to), *metadata_storage.getPathMap(), object_storage)); } void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 5e6a5b13ec4b..0c30d9b48d32 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -24,7 +24,7 @@ using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr path_map; public: MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_); @@ -77,6 +77,12 @@ class MetadataStorageFromPlainObjectStorage final : public IMetadataStorage bool supportsChmod() const override { return false; } bool supportsStat() const override { return false; } + +protected: + virtual std::shared_ptr getPathMap() const { throwNotImplemented(); } + + virtual std::vector getDirectChildrenOnDisk( + const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const; }; class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction, private MetadataOperationsHolder diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp new file mode 100644 index 000000000000..4f987118fc50 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -0,0 +1,141 @@ +#include + +#include +#include +#include +#include "CommonPathPrefixKeyGenerator.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +constexpr auto PREFIX_PATH_FILE_NAME = "prefix.path"; + +MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::string & root, ObjectStoragePtr object_storage) +{ + MetadataStorageFromPlainObjectStorage::PathMap result; + + RelativePathsWithMetadata files; + object_storage->listObjects(root, files, 0); + for (const auto & file : files) + { + auto remote_path = std::filesystem::path(file.relative_path); + if (remote_path.filename() != PREFIX_PATH_FILE_NAME) + continue; + + StoredObject object{file.relative_path}; + + auto read_buf = object_storage->readObject(object); + String local_path; + readStringUntilEOF(local_path, *read_buf); + + chassert(remote_path.has_parent_path()); + auto res = result.emplace(local_path, remote_path.parent_path()); + + /// This can happen if table replication is enabled, then the same local path is written + /// in `prefix.path` of each replica. + /// TODO: should replicated tables (e.g., RMT) be explicitly disallowed? + if (!res.second) + LOG_WARNING( + getLogger("MetadataStorageFromPlainObjectStorage"), + "The local path '{}' is already mapped to a remote path '{}', ignoring: '{}'", + local_path, + res.first->second, + remote_path.parent_path().string()); + } + return result; +} + +std::vector getDirectChildrenOnRewritableDisk( + const std::string & storage_key, + const RelativePathsWithMetadata & remote_paths, + const std::string & local_path, + const MetadataStorageFromPlainObjectStorage::PathMap & local_path_prefixes, + SharedMutex & shared_mutex) +{ + using PathMap = MetadataStorageFromPlainObjectStorage::PathMap; + + std::unordered_set duplicates_filter; + + /// Map remote paths into local subdirectories. + std::unordered_map remote_to_local_subdir; + + { + std::shared_lock lock(shared_mutex); + for (const auto & [k, v] : local_path_prefixes) + { + if (!k.starts_with(local_path)) + continue; + + auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); + if (slash_num != 1) + continue; + + chassert(k.back() == '/'); + remote_to_local_subdir.emplace(v, std::string(k.begin() + local_path.size(), k.end() - 1)); + } + } + + auto skip_list = std::set{PREFIX_PATH_FILE_NAME}; + for (const auto & elem : remote_paths) + { + const auto & path = elem.relative_path; + chassert(path.find(storage_key) == 0); + const auto child_pos = storage_key.size(); + + auto slash_pos = path.find('/', child_pos); + + if (slash_pos == std::string::npos) + { + /// File names. + auto filename = path.substr(child_pos); + if (!skip_list.contains(filename)) + duplicates_filter.emplace(std::move(filename)); + } + else + { + /// Subdirectories. + auto it = remote_to_local_subdir.find(path.substr(0, slash_pos)); + /// Mapped subdirectories. + if (it != remote_to_local_subdir.end()) + duplicates_filter.emplace(it->second); + /// The remote subdirectory name is the same as the local subdirectory. + else + duplicates_filter.emplace(path.substr(child_pos, slash_pos - child_pos)); + } + } + + return std::vector(std::make_move_iterator(duplicates_filter.begin()), std::make_move_iterator(duplicates_filter.end())); +} + +} + +MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage( + ObjectStoragePtr object_storage_, String storage_path_prefix_) + : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_) + , path_map(std::make_shared(loadPathPrefixMap(object_storage->getCommonKeyPrefix(), object_storage))) +{ + if (object_storage->isWriteOnce()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "MetadataStorageFromPlainRewritableObjectStorage is not compatible with write-once storage '{}'", + object_storage->getName()); + + auto keys_gen = std::make_shared(object_storage->getCommonKeyPrefix(), metadata_mutex, path_map); + object_storage->setKeysGenerator(keys_gen); +} + +std::vector MetadataStorageFromPlainRewritableObjectStorage::getDirectChildrenOnDisk( + const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const +{ + return getDirectChildrenOnRewritableDisk(storage_key, remote_paths, local_path, *getPathMap(), metadata_mutex); +} + +} diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h new file mode 100644 index 000000000000..4415a68c24ea --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h @@ -0,0 +1,26 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class MetadataStorageFromPlainRewritableObjectStorage final : public MetadataStorageFromPlainObjectStorage +{ +private: + std::shared_ptr path_map; + +public: + MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_); + + MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; } + +protected: + std::shared_ptr getPathMap() const override { return path_map; } + std::vector getDirectChildrenOnDisk( + const std::string & storage_key, const RelativePathsWithMetadata & remote_paths, const std::string & local_path) const override; +}; + +} diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 958c6400a7d5..7b949db268b7 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -1,9 +1,10 @@ -#include "config.h" +#include #include +#include "Disks/DiskType.h" +#include "config.h" #if USE_AWS_S3 #include #include -#include #include #endif #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) @@ -21,6 +22,7 @@ #endif #include #include +#include #include #include @@ -36,36 +38,50 @@ namespace ErrorCodes extern const int UNKNOWN_ELEMENT_IN_CONFIG; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } namespace { - bool isPlainStorage( - ObjectStorageType type, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix) - { - auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(type); - auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); - return metadataTypeFromString(metadata_type) == MetadataStorageType::Plain; - } - template - ObjectStoragePtr createObjectStorage( - ObjectStorageType type, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Args && ...args) +bool isCompatibleWithMetadataStorage( + ObjectStorageType storage_type, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + MetadataStorageType target_metadata_type) +{ + auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(storage_type); + auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); + return metadataTypeFromString(metadata_type) == target_metadata_type; +} + +bool isPlainStorage(ObjectStorageType type, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) +{ + return isCompatibleWithMetadataStorage(type, config, config_prefix, MetadataStorageType::Plain); +} + +bool isPlainRewritableStorage(ObjectStorageType type, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) +{ + return isCompatibleWithMetadataStorage(type, config, config_prefix, MetadataStorageType::PlainRewritable); +} + +template +ObjectStoragePtr createObjectStorage( + ObjectStorageType type, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Args &&... args) +{ + if (isPlainStorage(type, config, config_prefix)) + return std::make_shared>(std::forward(args)...); + else if (isPlainRewritableStorage(type, config, config_prefix)) { - if (isPlainStorage(type, config, config_prefix)) - { - return std::make_shared>(std::forward(args)...); - } - else - { - return std::make_shared(std::forward(args)...); - } + /// TODO(jkartseva@): Test support for generic disk type + if (type != ObjectStorageType::S3) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "plain_rewritable metadata storage support is implemented only for S3"); + + return std::make_shared>(std::forward(args)...); } + else + return std::make_shared(std::forward(args)...); +} } ObjectStorageFactory & ObjectStorageFactory::instance() @@ -77,10 +93,7 @@ ObjectStorageFactory & ObjectStorageFactory::instance() void ObjectStorageFactory::registerObjectStorageType(const std::string & type, Creator creator) { if (!registry.emplace(type, creator).second) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "ObjectStorageFactory: the metadata type '{}' is not unique", type); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "ObjectStorageFactory: the metadata type '{}' is not unique", type); } ObjectStoragePtr ObjectStorageFactory::create( @@ -92,13 +105,9 @@ ObjectStoragePtr ObjectStorageFactory::create( { std::string type; if (config.has(config_prefix + ".object_storage_type")) - { type = config.getString(config_prefix + ".object_storage_type"); - } else if (config.has(config_prefix + ".type")) /// .type -- for compatibility. - { type = config.getString(config_prefix + ".type"); - } else { throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `object_storage_type` in config"); @@ -235,7 +244,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) auto client = getClient(config, config_prefix, context, *settings); auto key_generator = getKeyGenerator(uri, config, config_prefix); - auto object_storage = std::make_shared( + auto object_storage = std::make_shared>( std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? @@ -251,26 +260,26 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory) #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) void registerHDFSObjectStorage(ObjectStorageFactory & factory) { - factory.registerObjectStorageType("hdfs", []( - const std::string & /* name */, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - const ContextPtr & context, - bool /* skip_access_check */) -> ObjectStoragePtr - { - auto uri = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); - checkHDFSURL(uri); - if (uri.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); + factory.registerObjectStorageType( + "hdfs", + [](const std::string & /* name */, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const ContextPtr & context, + bool /* skip_access_check */) -> ObjectStoragePtr + { + auto uri = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + checkHDFSURL(uri); + if (uri.back() != '/') + throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); - std::unique_ptr settings = std::make_unique( - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - context->getSettingsRef().hdfs_replication - ); + std::unique_ptr settings = std::make_unique( + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + context->getSettingsRef().hdfs_replication); - return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config); - }); + return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config); + }); } #endif diff --git a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h new file mode 100644 index 000000000000..d71e995b4907 --- /dev/null +++ b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +template +class PlainRewritableObjectStorage : public BaseObjectStorage +{ +public: + template + explicit PlainRewritableObjectStorage(Args &&... args) : BaseObjectStorage(std::forward(args)...) + { + } + + std::string getName() const override { return "PlainRewritable" + BaseObjectStorage::getName(); } + + bool isWriteOnce() const override { return false; } + + bool isPlain() const override { return true; } +}; + +} diff --git a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp index 433ff00146b3..0963dd37974e 100644 --- a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp @@ -29,7 +29,10 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch if (!config.has(config_prefix + ".metadata_type")) { if (object_storage->isPlain()) - compatibility_metadata_type_hint = "plain"; + if (object_storage->isWriteOnce()) + compatibility_metadata_type_hint = "plain"; + else + compatibility_metadata_type_hint = "plain_rewritable"; else compatibility_metadata_type_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(object_storage->getType()); } @@ -53,7 +56,7 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch #if USE_AWS_S3 factory.registerDiskType("s3", creator); /// For compatibility factory.registerDiskType("s3_plain", creator); /// For compatibility - factory.registerDiskType("s3_plain_rewritable", creator); + factory.registerDiskType("s3_plain_rewritable", creator); // For compatibility #endif #if USE_HDFS factory.registerDiskType("hdfs", creator); /// For compatibility diff --git a/src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h deleted file mode 100644 index 7f43f32370af..000000000000 --- a/src/Disks/ObjectStorages/S3/S3PlainRewritableObjectStorage.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -class S3PlainRewritableObjectStorage : public S3ObjectStorage -{ -public: - template - explicit S3PlainRewritableObjectStorage(Args &&... args) : S3ObjectStorage(std::forward(args)...) - { - } - - std::string getName() const override { return "S3PlainRewritableObjectStorage"; } - - bool isWriteOnce() const override { return false; } - - bool isPlain() const override { return true; } -}; - -} diff --git a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql index 1bfd8118875e..af02ebbd8748 100644 --- a/tests/queries/0_stateless/03008_s3_plain_rewritable.sql +++ b/tests/queries/0_stateless/03008_s3_plain_rewritable.sql @@ -2,8 +2,9 @@ -- Tag: no-fasttest -- requires S3 drop table if exists test_mt; -create table test_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 500) order by tuple(a, b) +create table test_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) settings disk = disk( + name = s3_plain_rewritable, type = s3_plain_rewritable, endpoint = 'http://localhost:11111/test/test_mt/', access_key_id = clickhouse, @@ -29,10 +30,6 @@ attach table test_mt; drop table if exists test_mt_dst; -create table test_mt_dst (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 500) order by tuple(a, b) -settings disk = disk( - type = s3_plain_rewritable, - endpoint = 'http://localhost:11111/test/test_mt/', - access_key_id = clickhouse, - secret_access_key = clickhouse); +create table test_mt_dst (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b) +settings disk = 's3_plain_rewritable'; alter table test_mt move partition 0 to table test_mt_dst; -- { serverError SUPPORT_IS_DISABLED } From c1d62dd8a1ea61aad3f526135dde5a461baa6d71 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 26 Apr 2024 22:56:23 +0000 Subject: [PATCH 19/22] documentation for unbundled configuration --- docs/en/operations/storing-data.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index a402a731933f..389c917d4276 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -28,7 +28,7 @@ Starting from 24.1 clickhouse version, it is possible to use a new configuration It requires to specify: 1. `type` equal to `object_storage` 2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`. -Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`. +Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web` and, starting from `24.4`, `plain_rewritable`. Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them). E.g. configuration option @@ -347,7 +347,9 @@ Similar to the `s3_plain` disk type, it does not require additional storage for Unlike `s3_plain` disk type, `s3_plain_rewritable` allows executing merges and supports INSERT operations. [Mutations](/docs/en/sql-reference/statements/alter#mutations) and replication of tables are not supported. -A use case for this disk type are non-replicated `MergeTree` tables. e.g., system tables. +A use case for this disk type are non-replicated `MergeTree` tables. Although the `s3` disk type is suitable for non-replicated +MergeTree tables, you may opt for the `s3_plain_rewritable` disk type if you do not require local metadata for the table and are +willing to accept a limited set of operations. This could be useful, for example, for system tables. Configuration: ``` xml @@ -358,6 +360,17 @@ Configuration: ``` +is equal to +``` xml + + object_storage + s3 + plain_rewritable + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + ### Using Azure Blob Storage {#azure-blob-storage} `MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. From 508a42bc8f6c9bcbeec7f71ac700f43a19818fca Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sat, 27 Apr 2024 03:08:25 +0000 Subject: [PATCH 20/22] use ordered map for path map --- src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h | 7 ++++--- .../ObjectStorages/MetadataStorageFromPlainObjectStorage.h | 4 ++-- .../MetadataStorageFromPlainRewritableObjectStorage.cpp | 6 ++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h index 68a390a51b64..fb1140de908a 100644 --- a/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h +++ b/src/Disks/ObjectStorages/CommonPathPrefixKeyGenerator.h @@ -3,7 +3,8 @@ #include #include -#include +#include +#include namespace DB { @@ -20,8 +21,8 @@ namespace DB class CommonPathPrefixKeyGenerator : public IObjectStorageKeysGenerator { public: - /// Local to remote path map. - using PathMap = std::unordered_map; + /// Local to remote path map. Leverages filesystem::path comparator for paths. + using PathMap = std::map; explicit CommonPathPrefixKeyGenerator(String key_prefix_, SharedMutex & shared_mutex_, std::weak_ptr path_map_); diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index 0c30d9b48d32..f290a7622058 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -5,7 +5,7 @@ #include #include -#include +#include namespace DB { @@ -28,7 +28,7 @@ class MetadataStorageFromPlainObjectStorage : public IMetadataStorage { public: /// Local path prefixes mapped to storage key prefixes. - using PathMap = std::unordered_map; + using PathMap = std::map; private: friend class MetadataStorageFromPlainObjectStorageTransaction; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp index 4f987118fc50..d910dae80b4b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp @@ -69,10 +69,12 @@ std::vector getDirectChildrenOnRewritableDisk( { std::shared_lock lock(shared_mutex); - for (const auto & [k, v] : local_path_prefixes) + auto end_it = local_path_prefixes.end(); + for (auto it = local_path_prefixes.lower_bound(local_path); it != end_it; ++it) { + const auto & [k, v] = std::make_tuple(it->first.string(), it->second); if (!k.starts_with(local_path)) - continue; + break; auto slash_num = count(k.begin() + local_path.size(), k.end(), '/'); if (slash_num != 1) From 3c1207ed4de6222add178634550e4c9c53987457 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sat, 27 Apr 2024 05:35:56 +0000 Subject: [PATCH 21/22] remove path normalization --- .../MetadataStorageFromPlainObjectStorage.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index 963d3059b5b6..071b2ff4613f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -14,14 +14,9 @@ namespace DB namespace { -std::filesystem::path normalizePath(const std::filesystem::path & path) -{ - return std::filesystem::path(path).lexically_normal(); -} - std::filesystem::path normalizeDirectoryPath(const std::filesystem::path & path) { - return normalizePath(path) / ""; + return path / ""; } } From dc955589626b3ea02616649b31df71adeae31c0e Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Mon, 29 Apr 2024 19:48:17 +0000 Subject: [PATCH 22/22] method rename --- src/Disks/IDisk.h | 3 +-- src/Disks/ObjectStorages/DiskObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/DiskObjectStorage.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 83cf3d53057c..b84d60c45918 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -363,8 +363,7 @@ class IDisk : public Space virtual bool isWriteOnce() const { return false; } - /// Whether this disk support mutations. - virtual bool isMutable() const { return true; } + virtual bool supportsHardLinks() const { return true; } /// Check if disk is broken. Broken disks will have 0 space and cannot be used. virtual bool isBroken() const { return false; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 70eb0ec6706a..f6980d1e8f18 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -435,7 +435,7 @@ bool DiskObjectStorage::isWriteOnce() const return object_storage->isWriteOnce(); } -bool DiskObjectStorage::isMutable() const +bool DiskObjectStorage::supportsHardLinks() const { return !isWriteOnce() && !object_storage->isPlain(); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 8a7e52cee6a9..787937af8467 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -183,7 +183,7 @@ friend class DiskObjectStorageRemoteMetadataRestoreHelper; /// MergeTree table on this disk. bool isWriteOnce() const override; - bool isMutable() const override; + bool supportsHardLinks() const override; /// Get structure of object storage this disk works with. Examples: /// DiskObjectStorage(S3ObjectStorage) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c7c2b940152c..0fa786c0c576 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2965,7 +2965,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); for (const auto & disk : getDisks()) - if (!disk->isMutable()) + if (!disk->supportsHardLinks()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE is not supported for immutable disk '{}'", disk->getName()); /// Set of columns that shouldn't be altered. @@ -3339,7 +3339,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context void MergeTreeData::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const { for (const auto & disk : getDisks()) - if (!disk->isMutable()) + if (!disk->supportsHardLinks()) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Mutations are not supported for immutable disk '{}'", disk->getName()); } @@ -4831,7 +4831,7 @@ void MergeTreeData::checkAlterPartitionIsPossible( const PartitionCommands & commands, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & settings, ContextPtr local_context) const { for (const auto & disk : getDisks()) - if (!disk->isMutable()) + if (!disk->supportsHardLinks()) throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, "ALTER TABLE PARTITION is not supported for immutable disk '{}'", disk->getName());