From dc266edbeae6a74b43d7362b65bdd2d78b1768a2 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Wed, 21 May 2025 15:07:09 +0800 Subject: [PATCH] [Optimize] Optimize stripe footer multiple reads. --- c++/src/Reader.cc | 9 ++------- c++/src/StripeStream.cc | 5 +++-- c++/src/StripeStream.hh | 8 +++++--- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index fcfe3309803..70fa56eec2b 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -679,7 +679,7 @@ namespace orc { return std::unique_ptr(new StripeInformationImpl( stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(), stripeInfo.footerlength(), stripeInfo.numberofrows(), contents->stream.get(), - *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics)); + *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics, nullptr)); } FileVersion ReaderImpl::getFormatVersion() const { @@ -1185,7 +1185,7 @@ namespace orc { currentStripeInfo.offset(), currentStripeInfo.indexlength(), currentStripeInfo.datalength(), currentStripeInfo.footerlength(), currentStripeInfo.numberofrows(), contents->stream.get(), *contents->pool, - contents->compression, contents->blockSize, contents->readerMetrics)); + contents->compression, contents->blockSize, contents->readerMetrics, ¤tStripeFooter)); contents->stream->beforeReadStripe(std::move(currentStripeInformation), selectedColumns); if (sargsApplier) { @@ -1216,11 +1216,6 @@ namespace orc { if (stringDictFilter != nullptr) { std::list dictFilterColumnNames; - std::unique_ptr currentStripeInformation(new StripeInformationImpl( - currentStripeInfo.offset(), currentStripeInfo.indexlength(), - currentStripeInfo.datalength(), currentStripeInfo.footerlength(), - currentStripeInfo.numberofrows(), contents->stream.get(), *contents->pool, - contents->compression, contents->blockSize, contents->readerMetrics)); stringDictFilter->fillDictFilterColumnNames(std::move(currentStripeInformation), dictFilterColumnNames); std::unordered_map columnIdToNameMap; diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc index 1f43da4f243..91acb3cddf0 100644 --- a/c++/src/StripeStream.cc +++ b/c++/src/StripeStream.cc @@ -130,13 +130,14 @@ namespace orc { } void StripeInformationImpl::ensureStripeFooterLoaded() const { - if (stripeFooter.get() == nullptr) { + if (stripeFooter == nullptr && managedStripeFooter.get() == nullptr) { std::unique_ptr pbStream = createDecompressor(compression, std::make_unique( stream, offset + indexLength + dataLength, footerLength, memory), blockSize, memory, metrics); - stripeFooter = std::make_unique(); + managedStripeFooter = std::make_unique(); + stripeFooter = managedStripeFooter.get(); if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) { throw ParseError("Failed to parse the stripe footer"); } diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh index 74bebda6f25..74fabb06e26 100644 --- a/c++/src/StripeStream.hh +++ b/c++/src/StripeStream.hh @@ -132,15 +132,16 @@ namespace orc { MemoryPool& memory; CompressionKind compression; uint64_t blockSize; - mutable std::unique_ptr stripeFooter; ReaderMetrics* metrics; + mutable proto::StripeFooter* stripeFooter; + mutable std::unique_ptr managedStripeFooter; void ensureStripeFooterLoaded() const; public: StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t _dataLength, uint64_t _footerLength, uint64_t _numRows, InputStream* _stream, MemoryPool& _memory, CompressionKind _compression, uint64_t _blockSize, - ReaderMetrics* _metrics) + ReaderMetrics* _metrics, proto::StripeFooter* _stripeFooter) : offset(_offset), indexLength(_indexLength), dataLength(_dataLength), @@ -150,7 +151,8 @@ namespace orc { memory(_memory), compression(_compression), blockSize(_blockSize), - metrics(_metrics) { + metrics(_metrics), + stripeFooter(_stripeFooter) { // PASS }