Skip to content

Commit 0bf2b45

Browse files
committed
PARQUET-212: Read non-thrift files if a Thrift class is supplied.
Parquet-thrift can now read files not written by parquet-thrift if an appropriate Thrift class is supplied. This adds a check to derive the necessary StructType from a class. Previously, attempting to read a file without Thrift metadata in its properties would return a null ThriftMetaData and throw NPE. This also updates the logic in ThriftMetaData.fromExtraMetaData to avoid NPE when the class is present by the descriptor property is not.
1 parent 4e148dc commit 0bf2b45

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

parquet-thrift/src/main/java/org/apache/parquet/hadoop/thrift/ThriftReadSupport.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,16 @@ private void initThriftClassFromMultipleFiles(Map<String, Set<String>> fileMetad
203203
}
204204

205205
@SuppressWarnings("unchecked")
206-
private void initThriftClass(Map<String, String> fileMetadata, Configuration conf) throws ClassNotFoundException {
206+
private void initThriftClass(ThriftMetaData metadata, Configuration conf) throws ClassNotFoundException {
207207
if (thriftClass != null) {
208208
return;
209209
}
210210
String className = conf.get(THRIFT_READ_CLASS_KEY, null);
211211
if (className == null) {
212-
final ThriftMetaData metaData = ThriftMetaData.fromExtraMetaData(fileMetadata);
213-
if (metaData == null) {
212+
if (metadata == null) {
214213
throw new ParquetDecodingException("Could not read file as the Thrift class is not provided and could not be resolved from the file");
215214
}
216-
thriftClass = (Class<T>)metaData.getThriftClass();
215+
thriftClass = (Class<T>)metadata.getThriftClass();
217216
} else {
218217
thriftClass = (Class<T>)Class.forName(className);
219218
}
@@ -225,7 +224,12 @@ public RecordMaterializer<T> prepareForRead(Configuration configuration,
225224
org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) {
226225
ThriftMetaData thriftMetaData = ThriftMetaData.fromExtraMetaData(keyValueMetaData);
227226
try {
228-
initThriftClass(keyValueMetaData, configuration);
227+
initThriftClass(thriftMetaData, configuration);
228+
229+
// if there was not metadata in the file, get it from requested class
230+
if (thriftMetaData == null) {
231+
thriftMetaData = ThriftMetaData.fromThriftClass(thriftClass);
232+
}
229233

230234
String converterClassName = configuration.get(RECORD_CONVERTER_CLASS_KEY, RECORD_CONVERTER_DEFAULT);
231235
@SuppressWarnings("unchecked")

parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftMetaData.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.parquet.hadoop.BadConfigurationException;
2424
import org.apache.parquet.thrift.struct.ThriftType;
2525
import org.apache.parquet.thrift.struct.ThriftType.StructType;
26+
import org.apache.thrift.TBase;
2627

2728
/**
2829
*
@@ -86,19 +87,35 @@ public StructType getDescriptor() {
8687
* Reads ThriftMetadata from the parquet file footer.
8788
*
8889
* @param extraMetaData extraMetaData field of the parquet footer
89-
* @return
90+
* @return the ThriftMetaData used to write a data file
9091
*/
9192
public static ThriftMetaData fromExtraMetaData(
9293
Map<String, String> extraMetaData) {
9394
final String thriftClassName = extraMetaData.get(THRIFT_CLASS);
9495
final String thriftDescriptorString = extraMetaData.get(THRIFT_DESCRIPTOR);
95-
if (thriftClassName == null && thriftDescriptorString == null) {
96+
if (thriftClassName == null || thriftDescriptorString == null) {
9697
return null;
9798
}
9899
final StructType descriptor = parseDescriptor(thriftDescriptorString);
99100
return new ThriftMetaData(thriftClassName, descriptor);
100101
}
101102

103+
/**
104+
* Creates ThriftMetaData from a Thrift-generated class.
105+
*
106+
* @param thriftClass a Thrift-generated class
107+
* @return ThriftMetaData for the given class
108+
*/
109+
@SuppressWarnings("unchecked")
110+
public static ThriftMetaData fromThriftClass(Class<?> thriftClass) {
111+
if (thriftClass != null && TBase.class.isAssignableFrom(thriftClass)) {
112+
Class<? extends TBase<?, ?>> tClass = (Class<? extends TBase<?, ?>>) thriftClass;
113+
StructType descriptor = new ThriftSchemaConverter().toStructType(tClass);
114+
return new ThriftMetaData(thriftClass.getName(), descriptor);
115+
}
116+
return null;
117+
}
118+
102119
private static StructType parseDescriptor(String json) {
103120
try {
104121
return (StructType)ThriftType.fromJSON(json);

0 commit comments

Comments
 (0)