Skip to content

Commit 9c8fde0

Browse files
wgtmacRustedBones
andauthored
PARQUET-2468: ParquetMetadata must convert to json (#1349) (#1360)
Co-authored-by: Michel Davit <michel@davit.fr>
1 parent 78a36be commit 9c8fde0

File tree

8 files changed

+100
-13
lines changed

8 files changed

+100
-13
lines changed

parquet-hadoop/pom.xml

+10
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,21 @@
118118
<artifactId>jackson-core</artifactId>
119119
<version>${jackson.version}</version>
120120
</dependency>
121+
<dependency>
122+
<groupId>${jackson.groupId}</groupId>
123+
<artifactId>jackson-annotations</artifactId>
124+
<version>${jackson.version}</version>
125+
</dependency>
121126
<dependency>
122127
<groupId>${jackson.groupId}</groupId>
123128
<artifactId>jackson-databind</artifactId>
124129
<version>${jackson-databind.version}</version>
125130
</dependency>
131+
<dependency>
132+
<groupId>${jackson.datatype.groupId}</groupId>
133+
<artifactId>jackson-datatype-jdk8</artifactId>
134+
<version>${jackson-modules-java8.version}</version>
135+
</dependency>
126136
<dependency>
127137
<groupId>org.xerial.snappy</groupId>
128138
<artifactId>snappy-java</artifactId>

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java

+4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import static org.apache.parquet.column.Encoding.RLE_DICTIONARY;
2323
import static org.apache.parquet.format.Util.readColumnMetaData;
2424

25+
import com.fasterxml.jackson.annotation.JsonIgnore;
2526
import java.io.ByteArrayInputStream;
2627
import java.io.IOException;
2728
import java.util.Set;
@@ -338,6 +339,7 @@ public ColumnPath getPath() {
338339
* @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
339340
*/
340341
@Deprecated
342+
@JsonIgnore
341343
public PrimitiveTypeName getType() {
342344
decryptIfNeeded();
343345
return properties.getType();
@@ -380,13 +382,15 @@ public PrimitiveType getPrimitiveType() {
380382
/**
381383
* @return the stats for this column
382384
*/
385+
@JsonIgnore
383386
public abstract Statistics getStatistics();
384387

385388
/**
386389
* Method should be considered private
387390
*
388391
* @return the size stats for this column
389392
*/
393+
@JsonIgnore
390394
public SizeStatistics getSizeStatistics() {
391395
throw new UnsupportedOperationException("SizeStatistics is not implemented");
392396
}

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkProperties.java

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.parquet.hadoop.metadata;
2020

21+
import com.fasterxml.jackson.annotation.JsonIgnore;
2122
import java.util.Arrays;
2223
import java.util.Set;
2324
import org.apache.parquet.column.Encoding;
@@ -76,6 +77,7 @@ public ColumnPath getPath() {
7677
* @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
7778
*/
7879
@Deprecated
80+
@JsonIgnore
7981
public PrimitiveTypeName getType() {
8082
return type.getPrimitiveTypeName();
8183
}

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/FileMetaData.java

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static java.util.Collections.unmodifiableMap;
2222

23+
import com.fasterxml.jackson.annotation.JsonIgnore;
2324
import java.io.Serializable;
2425
import java.util.Map;
2526
import java.util.Objects;
@@ -109,6 +110,7 @@ public String getCreatedBy() {
109110
return createdBy;
110111
}
111112

113+
@JsonIgnore
112114
public InternalFileDecryptor getFileDecryptor() {
113115
return fileDecryptor;
114116
}

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ParquetMetadata.java

+25-10
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
package org.apache.parquet.hadoop.metadata;
2020

2121
import com.fasterxml.jackson.databind.ObjectMapper;
22+
import com.fasterxml.jackson.databind.ObjectWriter;
23+
import com.fasterxml.jackson.databind.SerializationFeature;
24+
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
2225
import java.io.IOException;
2326
import java.io.StringReader;
2427
import java.io.StringWriter;
@@ -32,6 +35,14 @@ public class ParquetMetadata {
3235

3336
private static final ObjectMapper objectMapper = new ObjectMapper();
3437

38+
static {
39+
// Enable FAIL_ON_EMPTY_BEANS on objectmapper. Without this feature parquet-casdacing tests fail,
40+
// because LogicalTypeAnnotation implementations are classes without any property.
41+
objectMapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS);
42+
// Add support for Java 8 Optional
43+
objectMapper.registerModule(new Jdk8Module());
44+
}
45+
3546
/**
3647
* @param parquetMetaData an instance of parquet metadata to convert
3748
* @return the json representation
@@ -50,19 +61,23 @@ public static String toPrettyJSON(ParquetMetadata parquetMetaData) {
5061

5162
private static String toJSON(ParquetMetadata parquetMetaData, boolean isPrettyPrint) {
5263
try (StringWriter stringWriter = new StringWriter()) {
64+
Object objectToPrint;
65+
if (parquetMetaData.getFileMetaData() == null
66+
|| parquetMetaData.getFileMetaData().getEncryptionType()
67+
== FileMetaData.EncryptionType.UNENCRYPTED) {
68+
objectToPrint = parquetMetaData;
69+
} else {
70+
objectToPrint = parquetMetaData.getFileMetaData();
71+
}
72+
73+
ObjectWriter writer;
5374
if (isPrettyPrint) {
54-
Object objectToPrint;
55-
if (parquetMetaData.getFileMetaData() == null
56-
|| parquetMetaData.getFileMetaData().getEncryptionType()
57-
== FileMetaData.EncryptionType.UNENCRYPTED) {
58-
objectToPrint = parquetMetaData;
59-
} else {
60-
objectToPrint = parquetMetaData.getFileMetaData();
61-
}
62-
objectMapper.writerWithDefaultPrettyPrinter().writeValue(stringWriter, objectToPrint);
75+
writer = objectMapper.writerWithDefaultPrettyPrinter();
6376
} else {
64-
objectMapper.writeValue(stringWriter, parquetMetaData);
77+
writer = objectMapper.writer();
6578
}
79+
80+
writer.writeValue(stringWriter, objectToPrint);
6681
return stringWriter.toString();
6782
} catch (IOException e) {
6883
throw new RuntimeException(e);

parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java

+38-3
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@
8787
import org.apache.parquet.column.statistics.LongStatistics;
8888
import org.apache.parquet.column.statistics.SizeStatistics;
8989
import org.apache.parquet.column.statistics.Statistics;
90+
import org.apache.parquet.crypto.DecryptionPropertiesFactory;
91+
import org.apache.parquet.crypto.EncryptionPropertiesFactory;
92+
import org.apache.parquet.crypto.FileDecryptionProperties;
93+
import org.apache.parquet.crypto.InternalFileDecryptor;
9094
import org.apache.parquet.example.Paper;
9195
import org.apache.parquet.example.data.Group;
9296
import org.apache.parquet.example.data.simple.SimpleGroup;
@@ -635,18 +639,49 @@ public void randomTestFilterMetaData() {
635639
}
636640

637641
@Test
638-
public void testNullFieldMetadataDebugLogging() {
642+
public void testFieldMetadataDebugLogging() {
639643
MessageType schema = parseMessageType("message test { optional binary some_null_field; }");
640644
org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
641-
new org.apache.parquet.hadoop.metadata.FileMetaData(schema, new HashMap<String, String>(), null);
642-
List<BlockMetaData> blockMetaDataList = new ArrayList<BlockMetaData>();
645+
new org.apache.parquet.hadoop.metadata.FileMetaData(
646+
schema,
647+
new HashMap<>(),
648+
null,
649+
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.UNENCRYPTED,
650+
null);
651+
List<BlockMetaData> blockMetaDataList = new ArrayList<>();
643652
BlockMetaData blockMetaData = new BlockMetaData();
644653
blockMetaData.addColumn(createColumnChunkMetaData());
645654
blockMetaDataList.add(blockMetaData);
646655
ParquetMetadata metadata = new ParquetMetadata(fileMetaData, blockMetaDataList);
647656
ParquetMetadata.toJSON(metadata);
648657
}
649658

659+
@Test
660+
public void testEncryptedFieldMetadataDebugLogging() {
661+
Configuration conf = new Configuration();
662+
conf.set(
663+
EncryptionPropertiesFactory.CRYPTO_FACTORY_CLASS_PROPERTY_NAME,
664+
"org.apache.parquet.crypto.SampleDecryptionPropertiesFactory");
665+
DecryptionPropertiesFactory decryptionPropertiesFactory = DecryptionPropertiesFactory.loadFactory(conf);
666+
FileDecryptionProperties decryptionProperties =
667+
decryptionPropertiesFactory.getFileDecryptionProperties(conf, null);
668+
669+
MessageType schema = parseMessageType("message test { optional binary some_null_field; }");
670+
671+
org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
672+
new org.apache.parquet.hadoop.metadata.FileMetaData(
673+
schema,
674+
new HashMap<>(),
675+
null,
676+
org.apache.parquet.hadoop.metadata.FileMetaData.EncryptionType.ENCRYPTED_FOOTER,
677+
new InternalFileDecryptor(decryptionProperties));
678+
679+
List<BlockMetaData> blockMetaDataList = new ArrayList<>();
680+
ParquetMetadata metadata = new ParquetMetadata(fileMetaData, blockMetaDataList);
681+
ParquetMetadata.toJSON(metadata);
682+
System.out.println(ParquetMetadata.toPrettyJSON(metadata));
683+
}
684+
650685
@Test
651686
public void testMetadataToJson() {
652687
ParquetMetadata metadata = new ParquetMetadata(null, null);

parquet-jackson/pom.xml

+18
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,22 @@
3838
<artifactId>jackson-core</artifactId>
3939
<version>${jackson.version}</version>
4040
</dependency>
41+
<dependency>
42+
<groupId>${jackson.groupId}</groupId>
43+
<artifactId>jackson-annotations</artifactId>
44+
<version>${jackson.version}</version>
45+
</dependency>
4146
<dependency>
4247
<groupId>com.fasterxml.jackson.core</groupId>
4348
<artifactId>jackson-databind</artifactId>
4449
<version>${jackson-databind.version}</version>
4550
</dependency>
51+
<!-- Add support for Java 8 Optional -->
52+
<dependency>
53+
<groupId>com.fasterxml.jackson.datatype</groupId>
54+
<artifactId>jackson-datatype-jdk8</artifactId>
55+
<version>${jackson-modules-java8.version}</version>
56+
</dependency>
4657
</dependencies>
4758

4859
<properties>
@@ -70,6 +81,7 @@
7081
<artifactSet>
7182
<includes>
7283
<include>${jackson.groupId}:*</include>
84+
<include>${jackson.datatype.groupId}:*</include>
7385
</includes>
7486
</artifactSet>
7587
<filters>
@@ -79,6 +91,12 @@
7991
<include>**</include>
8092
</includes>
8193
</filter>
94+
<filter>
95+
<artifact>${jackson.datatype.groupId}:*</artifact>
96+
<includes>
97+
<include>**</include>
98+
</includes>
99+
</filter>
82100
</filters>
83101
<relocations>
84102
<relocation>

pom.xml

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
<jackson.package>com.fasterxml.jackson</jackson.package>
6969
<jackson.version>2.17.0</jackson.version>
7070
<jackson-databind.version>2.17.0</jackson-databind.version>
71+
<jackson-modules-java8.version>2.17.0</jackson-modules-java8.version>
7172
<japicmp.version>0.21.0</japicmp.version>
7273
<javax.annotation.version>1.3.2</javax.annotation.version>
7374
<spotless.version>2.30.0</spotless.version>

0 commit comments

Comments
 (0)