diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java index aefbf7b8bb..805d7579c6 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroParquetReader.java @@ -192,9 +192,9 @@ public Builder withCompatibility(boolean enableCompatibility) { @Override protected ReadSupport getReadSupport() { if (isReflect) { - conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false); + configuration.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false); } else { - conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility); + configuration.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility); } return new AvroReadSupport(model); } diff --git a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java index d9a172cecf..e37ee12483 100644 --- a/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java +++ b/parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java @@ -86,7 +86,9 @@ private Binary() {} public abstract ByteBuffer toByteBuffer(); - public abstract short get2BytesLittleEndian(); + public short get2BytesLittleEndian() { + throw new UnsupportedOperationException("Not implemented"); + } @Override public boolean equals(Object obj) { diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java index f0775484c5..f1041a83b8 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/CodecFactory.java @@ -51,9 +51,13 @@ public class CodecFactory implements CompressionCodecFactory { private final Map compressors = new HashMap<>(); private final Map decompressors = new HashMap<>(); - protected final ParquetConfiguration configuration; + protected final ParquetConfiguration conf; protected final int pageSize; + // May be null if parquetConfiguration is not an instance of org.apache.parquet.conf.HadoopParquetConfiguration + @Deprecated + protected final Configuration configuration; + static final BytesDecompressor NO_OP_DECOMPRESSOR = new BytesDecompressor() { @Override public void decompress(ByteBuffer input, int compressedSize, ByteBuffer output, int decompressedSize) { @@ -115,7 +119,12 @@ public CodecFactory(Configuration configuration, int pageSize) { * decompressors this parameter has no impact on the function of the factory */ public CodecFactory(ParquetConfiguration configuration, int pageSize) { - this.configuration = configuration; + if (configuration instanceof HadoopParquetConfiguration) { + this.configuration = ((HadoopParquetConfiguration) configuration).getConfiguration(); + } else { + this.configuration = null; + } + this.conf = configuration; this.pageSize = pageSize; } @@ -293,7 +302,7 @@ protected CompressionCodec getCodec(CompressionCodecName codecName) { codecClass = new Configuration(false).getClassLoader().loadClass(codecClassName); } codec = (CompressionCodec) - ReflectionUtils.newInstance(codecClass, ConfigurationUtil.createHadoopConfiguration(configuration)); + ReflectionUtils.newInstance(codecClass, ConfigurationUtil.createHadoopConfiguration(conf)); CODEC_BY_NAME.put(codecCacheKey, codec); return codec; } catch (ClassNotFoundException e) { @@ -305,13 +314,13 @@ private String cacheKey(CompressionCodecName codecName) { String level = null; switch (codecName) { case GZIP: - level = configuration.get("zlib.compress.level"); + level = conf.get("zlib.compress.level"); break; case BROTLI: - level = configuration.get("compression.brotli.quality"); + level = conf.get("compression.brotli.quality"); break; case ZSTD: - level = configuration.get("parquet.compression.codec.zstd.level"); + level = conf.get("parquet.compression.codec.zstd.level"); break; default: // compression level is not supported; ignore it diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java index 5599d2509b..795063e5c8 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java @@ -50,6 +50,7 @@ import org.apache.parquet.crypto.ModuleCipherFactory.ModuleType; import org.apache.parquet.format.BlockCipher; import org.apache.parquet.format.converter.ParquetMetadataConverter; +import org.apache.parquet.hadoop.CodecFactory.BytesCompressor; import org.apache.parquet.hadoop.metadata.ColumnPath; import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder; import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder; @@ -513,6 +514,20 @@ public void writeBloomFilter(BloomFilter bloomFilter) { new HashMap(); private final MessageType schema; + @Deprecated + public ColumnChunkPageWriteStore( + BytesCompressor compressor, + MessageType schema, + ByteBufferAllocator allocator, + int columnIndexTruncateLength) { + this( + (BytesInputCompressor) compressor, + schema, + allocator, + columnIndexTruncateLength, + ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED); + } + public ColumnChunkPageWriteStore( BytesInputCompressor compressor, MessageType schema, @@ -526,6 +541,16 @@ public ColumnChunkPageWriteStore( ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED); } + @Deprecated + public ColumnChunkPageWriteStore( + BytesCompressor compressor, + MessageType schema, + ByteBufferAllocator allocator, + int columnIndexTruncateLength, + boolean pageWriteChecksumEnabled) { + this((BytesInputCompressor) compressor, schema, allocator, columnIndexTruncateLength, pageWriteChecksumEnabled); + } + public ColumnChunkPageWriteStore( BytesInputCompressor compressor, MessageType schema, @@ -550,6 +575,25 @@ public ColumnChunkPageWriteStore( } } + @Deprecated + public ColumnChunkPageWriteStore( + BytesCompressor compressor, + MessageType schema, + ByteBufferAllocator allocator, + int columnIndexTruncateLength, + boolean pageWriteChecksumEnabled, + InternalFileEncryptor fileEncryptor, + int rowGroupOrdinal) { + this( + (BytesInputCompressor) compressor, + schema, + allocator, + columnIndexTruncateLength, + pageWriteChecksumEnabled, + fileEncryptor, + rowGroupOrdinal); + } + public ColumnChunkPageWriteStore( BytesInputCompressor compressor, MessageType schema, diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java index 523e57dbf4..b2b5233eeb 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/DirectCodecFactory.java @@ -410,9 +410,9 @@ private class ZstdCompressor extends BaseCompressor { ZstdCompressor() { context = new ZstdCompressCtx(); - context.setLevel(configuration.getInt( + context.setLevel(conf.getInt( ZstandardCodec.PARQUET_COMPRESS_ZSTD_LEVEL, ZstandardCodec.DEFAULT_PARQUET_COMPRESS_ZSTD_LEVEL)); - context.setWorkers(configuration.getInt( + context.setWorkers(conf.getInt( ZstandardCodec.PARQUET_COMPRESS_ZSTD_WORKERS, ZstandardCodec.DEFAULTPARQUET_COMPRESS_ZSTD_WORKERS)); } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java index 47647f10a2..9ca1202eb9 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java @@ -196,17 +196,21 @@ public static class Builder { private final Path path; private Filter filter = null; private ByteBufferAllocator allocator = new HeapByteBufferAllocator(); - protected ParquetConfiguration conf; + protected ParquetConfiguration configuration; private ParquetReadOptions.Builder optionsBuilder; + // May be null if parquetConfiguration is not an instance of org.apache.parquet.conf.HadoopParquetConfiguration + @Deprecated + protected Configuration conf; + @Deprecated private Builder(ReadSupport readSupport, Path path) { this.readSupport = Objects.requireNonNull(readSupport, "readSupport cannot be null"); this.file = null; this.path = Objects.requireNonNull(path, "path cannot be null"); - Configuration hadoopConf = new Configuration(); - this.conf = new HadoopParquetConfiguration(hadoopConf); - this.optionsBuilder = HadoopReadOptions.builder(hadoopConf, path); + this.conf = new Configuration(); + this.configuration = new HadoopParquetConfiguration(this.conf); + this.optionsBuilder = HadoopReadOptions.builder(this.conf, path); } @Deprecated @@ -214,9 +218,9 @@ protected Builder(Path path) { this.readSupport = null; this.file = null; this.path = Objects.requireNonNull(path, "path cannot be null"); - Configuration hadoopConf = new Configuration(); - this.conf = new HadoopParquetConfiguration(hadoopConf); - this.optionsBuilder = HadoopReadOptions.builder(hadoopConf, path); + this.conf = new Configuration(); + this.configuration = new HadoopParquetConfiguration(this.conf); + this.optionsBuilder = HadoopReadOptions.builder(this.conf, path); } protected Builder(InputFile file) { @@ -225,9 +229,9 @@ protected Builder(InputFile file) { this.path = null; if (file instanceof HadoopInputFile) { HadoopInputFile hadoopFile = (HadoopInputFile) file; - Configuration hadoopConf = hadoopFile.getConfiguration(); - this.conf = new HadoopParquetConfiguration(hadoopConf); - optionsBuilder = HadoopReadOptions.builder(hadoopConf, hadoopFile.getPath()); + this.conf = hadoopFile.getConfiguration(); + this.configuration = new HadoopParquetConfiguration(this.conf); + optionsBuilder = HadoopReadOptions.builder(this.conf, hadoopFile.getPath()); } else { optionsBuilder = ParquetReadOptions.builder(new HadoopParquetConfiguration()); } @@ -237,11 +241,11 @@ protected Builder(InputFile file, ParquetConfiguration conf) { this.readSupport = null; this.file = Objects.requireNonNull(file, "file cannot be null"); this.path = null; - this.conf = conf; + this.configuration = conf; if (file instanceof HadoopInputFile) { + this.conf = ConfigurationUtil.createHadoopConfiguration(conf); HadoopInputFile hadoopFile = (HadoopInputFile) file; - optionsBuilder = HadoopReadOptions.builder( - ConfigurationUtil.createHadoopConfiguration(conf), hadoopFile.getPath()); + optionsBuilder = HadoopReadOptions.builder(this.conf, hadoopFile.getPath()); } else { optionsBuilder = ParquetReadOptions.builder(conf); } @@ -249,7 +253,8 @@ protected Builder(InputFile file, ParquetConfiguration conf) { // when called, resets options to the defaults from conf public Builder withConf(Configuration conf) { - this.conf = new HadoopParquetConfiguration(Objects.requireNonNull(conf, "conf cannot be null")); + this.conf = Objects.requireNonNull(conf, "conf cannot be null"); + this.configuration = new HadoopParquetConfiguration(this.conf); // previous versions didn't use the builder, so may set filter before conf. this maintains // compatibility for filter. other options are reset by a new conf. @@ -262,7 +267,7 @@ public Builder withConf(Configuration conf) { } public Builder withConf(ParquetConfiguration conf) { - this.conf = conf; + this.configuration = conf; this.optionsBuilder = ParquetReadOptions.builder(conf); if (filter != null) { optionsBuilder.withRecordFilter(filter); @@ -383,7 +388,7 @@ public ParquetReader build() throws IOException { ParquetReadOptions options = optionsBuilder.withAllocator(allocator).build(); if (path != null) { - Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(conf); + Configuration hadoopConf = ConfigurationUtil.createHadoopConfiguration(configuration); FileSystem fs = path.getFileSystem(hadoopConf); FileStatus stat = fs.getFileStatus(path); diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java index 0e2f49eaee..51528b10be 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetRecordWriter.java @@ -27,6 +27,7 @@ import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.column.ParquetProperties.WriterVersion; import org.apache.parquet.compression.CompressionCodecFactory.BytesInputCompressor; +import org.apache.parquet.hadoop.CodecFactory.BytesCompressor; import org.apache.parquet.hadoop.api.WriteSupport; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.MessageType; @@ -43,6 +44,33 @@ public class ParquetRecordWriter extends RecordWriter { private final MemoryManager memoryManager; private final CodecFactory codecFactory; + @Deprecated + public ParquetRecordWriter( + ParquetFileWriter w, + WriteSupport writeSupport, + MessageType schema, + Map extraMetaData, + int blockSize, + int pageSize, + BytesCompressor compressor, + int dictionaryPageSize, + boolean enableDictionary, + boolean validating, + WriterVersion writerVersion) { + this( + w, + writeSupport, + schema, + extraMetaData, + blockSize, + pageSize, + (BytesInputCompressor) compressor, + dictionaryPageSize, + enableDictionary, + validating, + writerVersion); + } + /** * @param w the file to write to * @param writeSupport the class to convert incoming records @@ -81,6 +109,35 @@ public ParquetRecordWriter( this.codecFactory = null; } + @Deprecated + public ParquetRecordWriter( + ParquetFileWriter w, + WriteSupport writeSupport, + MessageType schema, + Map extraMetaData, + long blockSize, + int pageSize, + BytesCompressor compressor, + int dictionaryPageSize, + boolean enableDictionary, + boolean validating, + WriterVersion writerVersion, + MemoryManager memoryManager) { + this( + w, + writeSupport, + schema, + extraMetaData, + blockSize, + pageSize, + (BytesInputCompressor) compressor, + dictionaryPageSize, + enableDictionary, + validating, + writerVersion, + memoryManager); + } + /** * @param w the file to write to * @param writeSupport the class to convert incoming records diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java index 3988c03f26..264790333a 100644 --- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java +++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java @@ -134,7 +134,9 @@ public interface StateVisitor { R visit(StringType stringType, S state); - R visit(UUIDType uuidType, S state); + default R visit(UUIDType uuidType, S state) { + throw new UnsupportedOperationException("Not implemented"); + } } /** @@ -166,7 +168,9 @@ public interface TypeVisitor { void visit(StringType stringType); - void visit(UUIDType uuidType); + default void visit(UUIDType uuidType) { + throw new UnsupportedOperationException("Not implemented"); + } } /** diff --git a/pom.xml b/pom.xml index a5625da6ea..a81ae1630e 100644 --- a/pom.xml +++ b/pom.xml @@ -69,13 +69,13 @@ com.fasterxml.jackson 2.17.0 2.17.0 - 0.18.2 + 0.21.0 1.3.2 2.30.0 shaded.parquet 3.3.6 2.10.0 - 1.13.0 + 1.13.1 thrift ${thrift.executable} 2.12.17 @@ -577,39 +577,14 @@ ${shade.prefix} - org.apache.parquet.hadoop.CodecFactory - org.apache.parquet.hadoop.ParquetReader - org.apache.parquet.thrift.projection.deprecated.PathGlobPattern - - org.apache.parquet.hadoop.ColumnChunkPageWriteStore - org.apache.parquet.hadoop.ParquetRecordWriter - - org.apache.parquet.conf.PlainParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class) - org.apache.parquet.conf.ParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class) - org.apache.parquet.hadoop.util.SerializationUtil#readObjectFromConfAsBase64(java.lang.String,org.apache.parquet.conf.ParquetConfiguration) - - org.apache.parquet.hadoop.util.wrapped.io.FutureIO#awaitFuture(java.util.concurrent.Future,long,java.util.concurrent.TimeUnit) - org.apache.parquet.hadoop.util.wrapped.io.FutureIO#raiseInnerCause(java.util.concurrent.ExecutionException) - org.apache.parquet.conf.HadoopParquetConfiguration#getClass(java.lang.String,java.lang.Class,java.lang.Class) - org.apache.parquet.avro.AvroParquetReader#builder(org.apache.parquet.io.InputFile,org.apache.parquet.conf.ParquetConfiguration) - org.apache.parquet.hadoop.thrift.TBaseWriteSupport#setThriftClass(org.apache.parquet.conf.ParquetConfiguration,java.lang.Class) - org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.hadoop.fs.Path,boolean) - org.apache.parquet.proto.ProtoParquetReader#builder(org.apache.parquet.io.InputFile,boolean) - + org.apache.parquet.column.values.bytestreamsplit.ByteStreamSplitValuesReader#gatherElementDataFromStreams(byte[]) - - org.apache.parquet.arrow.schema.SchemaMapping - + org.apache.parquet.arrow.schema.SchemaMapping$TypeMappingVisitor#visit(org.apache.parquet.arrow.schema.SchemaMapping$MapTypeMapping) org.apache.parquet.avro.AvroReadSupport#AVRO_REQUESTED_PROJECTION org.apache.parquet.avro.AvroReadSupport#AVRO_DATA_SUPPLIER org.apache.parquet.hadoop.ParquetFileReader#PARQUET_READ_PARALLELISM - - org.apache.parquet.thrift.struct.ThriftType - - org.apache.parquet.io.api.Binary#get2BytesLittleEndian() - org.apache.parquet.schema.LogicalTypeAnnotation$Float16LogicalTypeAnnotation#accept(org.apache.parquet.schema.LogicalTypeAnnotation$LogicalTypeAnnotationVisitor)