diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java index 17a5a76951f6b..659cbf8c8d18a 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java @@ -20,6 +20,7 @@ import org.apache.hudi.common.engine.HoodieReaderContext; import org.apache.hudi.common.model.HoodieFileFormat; +import org.apache.hudi.common.model.HoodieLogFile; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType; import org.apache.hudi.common.schema.HoodieSchema; @@ -34,11 +35,10 @@ import org.apache.hudi.storage.HoodieStorage; import org.apache.hudi.storage.StorageConfiguration; import org.apache.hudi.storage.StoragePath; +import org.apache.hudi.storage.StoragePathInfo; import org.apache.hudi.storage.inline.InLineFSUtils; import org.apache.avro.generic.IndexedRecord; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -57,8 +57,6 @@ * base file format. */ public class HoodieHFileDataBlock extends HoodieDataBlock { - private static final Logger LOG = LoggerFactory.getLogger(HoodieHFileDataBlock.class); - private final Option compressionCodec; // This path is used for constructing HFile reader context, which should not be // interpreted as the actual file path for the HFile data blocks @@ -164,12 +162,13 @@ protected ClosableIterator> lookupRecords(List sorte blockContentLoc.getLogFile().getPath().toUri().getScheme(), blockContentLoc.getContentPositionInLogFile(), blockContentLoc.getBlockSize()); + StoragePathInfo storagePathInfo = getStoragePathInfo(inlinePath, blockContentLoc); HoodieStorage inlineStorage = getBlockContentLocation().get().getStorage().newInstance(inlinePath, inlineConf); try (final HoodieAvroHFileReaderImplBase reader = (HoodieAvroHFileReaderImplBase) HoodieIOFactory .getIOFactory(inlineStorage) .getReaderFactory(HoodieRecordType.AVRO) - .getFileReader(ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER, inlinePath, HoodieFileFormat.HFILE, Option.of(getSchemaFromHeader()))) { + .getFileReader(ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER, storagePathInfo, HoodieFileFormat.HFILE, Option.of(getSchemaFromHeader()))) { // Get writer's schema from the header final ClosableIterator> recordIterator = fullKey ? reader.getRecordsByKeysIterator(sortedKeys, readerSchema) : @@ -190,13 +189,14 @@ protected ClosableIterator lookupEngineRecords(List sortedKeys, b blockContentLoc.getLogFile().getPath().toUri().getScheme(), blockContentLoc.getContentPositionInLogFile(), blockContentLoc.getBlockSize()); + StoragePathInfo storagePathInfo = getStoragePathInfo(inlinePath, blockContentLoc); HoodieStorage inlineStorage = blockContentLoc.getStorage().newInstance(inlinePath, inlineConf); try (final HoodieAvroHFileReaderImplBase reader = (HoodieAvroHFileReaderImplBase) HoodieIOFactory .getIOFactory(inlineStorage) .getReaderFactory(HoodieRecordType.AVRO) .getFileReader(ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER, - inlinePath, + storagePathInfo, HoodieFileFormat.HFILE, Option.of(getSchemaFromHeader()))) { // Get writer's schema from the header @@ -204,4 +204,16 @@ protected ClosableIterator lookupEngineRecords(List sortedKeys, b reader.getEngineRecordsByKeyPrefixIterator(sortedKeys, readerSchema)); } } + + private static StoragePathInfo getStoragePathInfo(StoragePath inlinePath, HoodieLogBlockContentLocation blockContentLoc) { + HoodieLogFile logFile = blockContentLoc.getLogFile(); + StoragePathInfo pathInfo = logFile.getPathInfo(); + return new StoragePathInfo( + inlinePath, + blockContentLoc.getBlockSize(), + false, + pathInfo == null ? 0 : pathInfo.getBlockReplication(), + pathInfo == null ? 0L : pathInfo.getBlockSize(), + pathInfo == null ? 0L : pathInfo.getModificationTime()); + } } diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HFileReaderFactory.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HFileReaderFactory.java index 3e80f63bb4111..7cb53021f6c96 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HFileReaderFactory.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HFileReaderFactory.java @@ -107,7 +107,7 @@ private SeekableDataInputStream createInputStream(long fileSize) throws IOExcept StoragePath path = fileSource.asLeft(); byte[] buffer; try (SeekableDataInputStream stream = storage.openSeekable(path, false)) { - buffer = new byte[(int) storage.getPathInfo(path).getLength()]; + buffer = new byte[(int) fileSize]; stream.readFully(buffer); } return new ByteArraySeekableDataInputStream(new ByteBufferBackedInputStream(buffer)); diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java index 5f83a5a6cc426..72e6129ec736c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java +++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java @@ -516,7 +516,7 @@ private ClosableIterator readSliceWithFilter(Predicate predicate, if (fileSlice.getBaseFile().isPresent()) { HoodieConfig fileGroupReaderConfig = new HoodieConfig(fileGroupReaderProps); baseFileReader = (HoodieAvroFileReader) HoodieIOFactory.getIOFactory(getStorage()).getReaderFactory(HoodieRecord.HoodieRecordType.AVRO) - .getFileReader(fileGroupReaderConfig, fileSlice.getBaseFile().get().getStoragePath(), metadataMetaClient.getTableConfig().getBaseFileFormat(), Option.empty()); + .getFileReader(fileGroupReaderConfig, fileSlice.getBaseFile().get().getPathInfo(), metadataMetaClient.getTableConfig().getBaseFileFormat(), Option.empty()); } return Pair.of(baseFileReader, buildReusableRecordBufferLoader(fileSlice, latestMetadataInstantTime, instantRange)); } catch (IOException ex) { diff --git a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHFileReaderFactory.java b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHFileReaderFactory.java index a0a98575d4cd6..ed02990bb9b06 100644 --- a/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHFileReaderFactory.java +++ b/hudi-common/src/test/java/org/apache/hudi/io/storage/TestHFileReaderFactory.java @@ -99,7 +99,7 @@ void testCreateHFileReader_FileSizeBelowThreshold_ShouldUseContentCache() throws assertInstanceOf(HFileReaderImpl.class, result); // Verify that content was downloaded (cache was used) - verify(mockStorage, times(2)).getPathInfo(mockPath); // Once for size determination, once for download + verify(mockStorage, times(1)).getPathInfo(mockPath); // Once for size determination, which is reused for download verify(mockStorage, times(1)).openSeekable(mockPath, false); // For content download verify(mockInputStream, times(1)).readFully(any(byte[].class)); } diff --git a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileReaderFactory.java b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileReaderFactory.java index ce670c51114ee..a0512e84522d1 100644 --- a/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileReaderFactory.java +++ b/hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileReaderFactory.java @@ -70,11 +70,11 @@ protected HoodieFileReader newHFileFileReader(HoodieConfig hoodieConfig, HoodieStorage storage, byte[] content, Option schemaOption) throws IOException { - HFileReaderFactory readerFactory = HFileReaderFactory.builder() + HFileReaderFactory.Builder readerFactoryBuilder = HFileReaderFactory.builder() .withStorage(storage).withProps(hoodieConfig.getProps()) - .withContent(content).build(); + .withContent(content); return HoodieNativeAvroHFileReader.builder() - .readerFactory(readerFactory).path(path).schema(schemaOption).build(); + .readerFactory(readerFactoryBuilder.build()).path(path).schema(schemaOption).build(); } @Override