Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ public AvroWriteSupport(MessageType schema, Schema avroSchema,
this.model = model;
}

@Override
public String getName() {
return "avro";
}

/**
* @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ public class TupleWriteSupport extends WriteSupport<TupleEntry> {
private MessageType rootSchema;
public static final String PARQUET_CASCADING_SCHEMA = "parquet.cascading.schema";

@Override
public String getName() {
return "cascading";
}

@Override
public WriteContext init(Configuration configuration) {
String schema = configuration.get(PARQUET_CASCADING_SCHEMA);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ public void close() throws IOException, InterruptedException {
flushRowGroupToStore();
FinalizedWriteContext finalWriteContext = writeSupport.finalizeWrite();
Map<String, String> finalMetadata = new HashMap<String, String>(extraMetaData);
String modelName = writeSupport.getName();
if (modelName != null) {
finalMetadata.put(ParquetWriter.OBJECT_MODEL_NAME_PROP, modelName);
}
finalMetadata.putAll(finalWriteContext.getExtraMetaData());
parquetFileWriter.end(finalMetadata);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public class ParquetWriter<T> implements Closeable {
public static final WriterVersion DEFAULT_WRITER_VERSION =
WriterVersion.PARQUET_1_0;

public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name";

// max size (bytes) to write as padding and the min size of a row group
public static final int MAX_PADDING_SIZE_DEFAULT = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ public void write(T record) {
delegate.write(record);
}

@Override
public String getName() {
return delegate.getName();
}

@Override
public WriteSupport.FinalizedWriteContext finalizeWrite() {
return delegate.finalizeWrite();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,18 @@ public Map<String, String> getExtraMetaData() {
*/
public abstract void write(T record);

/**
* Called to get a name to identify the WriteSupport object model.
* If not null, this is added to the file footer metadata.
* <p>
* Defining this method will be required in a future API version.
*
* @return a String name for file metadata.
*/
public String getName() {
return null;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could also add a constructor parameter. (and keep the original default constructor for compatibility)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and mark the old one deprecated

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My reasoning is that using the constructor requires updates in a lot more places and it would be easier to overlook cases that need it.


/**
* called once in the end after the last record was written
* @return information to be added in the file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ public GroupWriteSupport() {
this.extraMetaData = extraMetaData;
}

@Override
public String getName() {
return "example";
}

@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
// if present, prefer the schema passed to the constructor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ public void test() throws Exception {
}
}
}
assertEquals("Object model property should be example",
"example", footer.getFileMetaData().getKeyValueMetaData()
.get(ParquetWriter.OBJECT_MODEL_NAME_PROP));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ public TupleWriteSupport(Schema pigSchema) {
this.rootPigSchema = pigSchema;
}

@Override
public String getName() {
return "pig";
}

public Schema getPigSchema() {
return rootPigSchema;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ public ProtoWriteSupport(Class<? extends Message> protobufClass) {
this.protoMessage = protobufClass;
}

@Override
public String getName() {
return "protobuf";
}

public static void setSchema(Configuration configuration, Class<? extends Message> protoClass) {
configuration.setClass(PB_CLASS_WRITE, protoClass, Message.class);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ public ScroogeWriteSupport(Class<T> thriftClass) {
super(thriftClass);
}

@Override
public String getName() {
return "scrooge";
}

@Override
protected StructType getThriftStruct() {
ScroogeStructConverter schemaConverter = new ScroogeStructConverter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ public TBaseWriteSupport(Class<T> thriftClass) {
super(thriftClass);
}

@Override
public String getName() {
return "thrift";
}

@Override
protected StructType getThriftStruct() {
return ThriftSchemaConverter.toStructType(thriftClass);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ public ThriftBytesWriteSupport(TProtocolFactory protocolFactory, Class<? extends
}
}

@Override
public String getName() {
return "thrift";
}

@Override
public WriteContext init(Configuration configuration) {
if (this.protocolFactory == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ public ThriftWriteSupport(Class<T> thriftClass) {
this.writeSupport = new TBaseWriteSupport(thriftClass);
}

@Override
public String getName() {
return writeSupport.getName();
}

@Override
public WriteContext init(Configuration configuration) {
return this.writeSupport.init(configuration);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ public TupleToThriftWriteSupport(String className) {
this.className = className;
}

@Override
public String getName() {
return "thrift";
}

@SuppressWarnings({"rawtypes", "unchecked"})
@Override
public WriteContext init(Configuration configuration) {
Expand Down