From 0178fe21c6e0bcd5b936679d294d92e91efb4894 Mon Sep 17 00:00:00 2001 From: kakachen Date: Wed, 21 May 2025 11:13:21 +0800 Subject: [PATCH] [Optimize] Optimize stripe footer multiple reads. --- c++/src/Reader.cc | 9 ++------- c++/src/StripeStream.cc | 5 +++-- c++/src/StripeStream.hh | 8 +++++--- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 4c0144da89e..13e1b82d4e3 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -716,7 +716,7 @@ namespace orc { return std::unique_ptr(new StripeInformationImpl( stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(), stripeInfo.footerlength(), stripeInfo.numberofrows(), contents->stream.get(), - *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics)); + *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics, nullptr)); } FileVersion ReaderImpl::getFormatVersion() const { @@ -1228,7 +1228,7 @@ namespace orc { currentStripeInfo.offset(), currentStripeInfo.indexlength(), currentStripeInfo.datalength(), currentStripeInfo.footerlength(), currentStripeInfo.numberofrows(), contents->stream.get(), *contents->pool, - contents->compression, contents->blockSize, contents->readerMetrics)); + contents->compression, contents->blockSize, contents->readerMetrics, ¤tStripeFooter)); streams.clear(); contents->stream->beforeReadStripe(std::move(currentStripeInformation), selectedColumns, streams); @@ -1266,11 +1266,6 @@ namespace orc { if (stringDictFilter != nullptr) { std::list dictFilterColumnNames; - std::unique_ptr currentStripeInformation(new StripeInformationImpl( - currentStripeInfo.offset(), currentStripeInfo.indexlength(), - currentStripeInfo.datalength(), currentStripeInfo.footerlength(), - currentStripeInfo.numberofrows(), contents->stream.get(), *contents->pool, - contents->compression, contents->blockSize, contents->readerMetrics)); stringDictFilter->fillDictFilterColumnNames(std::move(currentStripeInformation), dictFilterColumnNames); std::unordered_map columnIdToNameMap; diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc index 8efa23efa86..56cde6bfe8d 100644 --- a/c++/src/StripeStream.cc +++ b/c++/src/StripeStream.cc @@ -133,13 +133,14 @@ namespace orc { } void StripeInformationImpl::ensureStripeFooterLoaded() const { - if (stripeFooter.get() == nullptr) { + if (stripeFooter == nullptr && managedStripeFooter.get() == nullptr) { std::unique_ptr pbStream = createDecompressor(compression, std::make_unique( stream, offset + indexLength + dataLength, footerLength, memory), blockSize, memory, metrics); - stripeFooter = std::make_unique(); + managedStripeFooter = std::make_unique(); + stripeFooter = managedStripeFooter.get(); if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) { throw ParseError("Failed to parse the stripe footer"); } diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh index 57e51ef76f0..296305091a1 100644 --- a/c++/src/StripeStream.hh +++ b/c++/src/StripeStream.hh @@ -134,7 +134,8 @@ namespace orc { MemoryPool& memory; CompressionKind compression; uint64_t blockSize; - mutable std::unique_ptr stripeFooter; + mutable proto::StripeFooter* stripeFooter; + mutable std::unique_ptr managedStripeFooter; ReaderMetrics* metrics; void ensureStripeFooterLoaded() const; @@ -142,7 +143,7 @@ namespace orc { StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t _dataLength, uint64_t _footerLength, uint64_t _numRows, InputStream* _stream, MemoryPool& _memory, CompressionKind _compression, uint64_t _blockSize, - ReaderMetrics* _metrics) + ReaderMetrics* _metrics, proto::StripeFooter* _stripeFooter) : offset(_offset), indexLength(_indexLength), dataLength(_dataLength), @@ -152,7 +153,8 @@ namespace orc { memory(_memory), compression(_compression), blockSize(_blockSize), - metrics(_metrics) { + metrics(_metrics), + stripeFooter(_stripeFooter) { // PASS }