diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp index 21a76b4fb7df..baba7ec7b6c9 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp @@ -10,24 +10,10 @@ void IBlobsReadingAction::StartReading(std::vector&& ranges) { for (auto&& i : ranges) { Counters->OnRequest(i.Size); } - std::sort(ranges.begin(), ranges.end()); THashSet result; - std::optional currentRange; - std::vector currentList; - for (auto&& br : ranges) { - if (!currentRange) { - currentRange = br; - } else if (!currentRange->TryGlueWithNext(br)) { - result.emplace(*currentRange); - Groups.emplace(*currentRange, std::move(currentList)); - currentRange = br; - currentList.clear(); - } - currentList.emplace_back(br); - } - if (currentRange) { - result.emplace(*currentRange); - Groups.emplace(*currentRange, std::move(currentList)); + Groups = GroupBlobsForOptimization(std::move(ranges)); + for (auto&& [range, _] :Groups) { + result.emplace(range); } return DoStartReading(std::move(result)); } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/read.h b/ydb/core/tx/columnshard/blobs_action/abstract/read.h index b5833f0ba446..2a916305f87a 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/read.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/read.h @@ -71,6 +71,54 @@ class TActionReadBlobs { } }; +class TBlobsGlueing { +public: + class TSequentialGluePolicy { + public: + bool Glue(TBlobRange& currentRange, const TBlobRange& addRange) const { + return currentRange.TryGlueWithNext(addRange); + } + }; + + class TBlobGluePolicy { + private: + const ui64 BlobLimitSize = 8LLU << 20; + public: + TBlobGluePolicy(const ui64 blobLimitSize) + : BlobLimitSize(blobLimitSize) + { + } + + bool Glue(TBlobRange& currentRange, const TBlobRange& addRange) const { + return currentRange.TryGlueSameBlob(addRange, BlobLimitSize); + } + }; + + template + static THashMap> GroupRanges(std::vector&& ranges, const TGluePolicy& policy) { + std::sort(ranges.begin(), ranges.end()); + THashMap> result; + std::optional currentRange; + std::vector currentList; + for (auto&& br : ranges) { + if (!currentRange) { + currentRange = br; + } + else if (!policy.Glue(*currentRange, br)) { + result.emplace(*currentRange, std::move(currentList)); + currentRange = br; + currentList.clear(); + } + currentList.emplace_back(br); + } + if (currentRange) { + result.emplace(*currentRange, std::move(currentList)); + } + return result; + } + +}; + class IBlobsReadingAction: public ICommonBlobsAction { public: using TErrorStatus = TConclusionSpecialStatus; @@ -91,6 +139,7 @@ class IBlobsReadingAction: public ICommonBlobsAction { protected: virtual void DoStartReading(THashSet&& range) = 0; void StartReading(std::vector&& ranges); + virtual THashMap> GroupBlobsForOptimization(std::vector&& ranges) const = 0; public: const THashMap>& GetGroups() const { diff --git a/ydb/core/tx/columnshard/blobs_action/bs/read.h b/ydb/core/tx/columnshard/blobs_action/bs/read.h index 31a9dccb524f..1bd6bbc03dfc 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/read.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/read.h @@ -10,6 +10,9 @@ class TReadingAction: public IBlobsReadingAction { const TActorId BlobCacheActorId; protected: virtual void DoStartReading(THashSet&& ranges) override; + virtual THashMap> GroupBlobsForOptimization(std::vector&& ranges) const override { + return TBlobsGlueing::GroupRanges(std::move(ranges), TBlobsGlueing::TSequentialGluePolicy()); + } public: TReadingAction(const TString& storageId, const TActorId& blobCacheActorId) diff --git a/ydb/core/tx/columnshard/blobs_action/tier/read.h b/ydb/core/tx/columnshard/blobs_action/tier/read.h index 3e7baee57c3d..e4742cc285ee 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/read.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/read.h @@ -11,6 +11,9 @@ class TReadingAction: public IBlobsReadingAction { const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr ExternalStorageOperator; protected: virtual void DoStartReading(THashSet&& ranges) override; + virtual THashMap> GroupBlobsForOptimization(std::vector&& ranges) const override { + return TBlobsGlueing::GroupRanges(std::move(ranges), TBlobsGlueing::TBlobGluePolicy(8LLU << 20)); + } public: TReadingAction(const TString& storageId, const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& storageOperator) diff --git a/ydb/core/tx/columnshard/common/blob.h b/ydb/core/tx/columnshard/common/blob.h index 87be46ba542c..9aa06bd3d558 100644 --- a/ydb/core/tx/columnshard/common/blob.h +++ b/ydb/core/tx/columnshard/common/blob.h @@ -193,7 +193,7 @@ struct TBlobRange { bool operator<(const TBlobRange& br) const { if (BlobId != br.BlobId) { - return BlobId.Hash() < br.BlobId.Hash(); + return BlobId.GetLogoBlobId().Compare(br.BlobId.GetLogoBlobId()) < 0; } else if (Offset != br.Offset) { return Offset < br.Offset; } else { @@ -209,6 +209,21 @@ struct TBlobRange { return BlobId == br.BlobId && br.Offset + br.Size == Offset; } + bool TryGlueSameBlob(const TBlobRange& br, const ui64 limit) { + if (GetBlobId() != br.GetBlobId()) { + return false; + } + const ui32 right = std::max(Offset + Size, br.Offset + br.Size); + const ui32 offset = std::min(Offset, br.Offset); + const ui32 size = right - offset; + if (size > limit) { + return false; + } + Size = size; + Offset = offset; + return true; + } + bool TryGlueWithNext(const TBlobRange& br) { if (!br.IsNextRangeFor(*this)) { return false;