Skip to content

Commit 43c5bdd

Browse files
author
Piyush Narang
committed
Keep avro on char sequence
1 parent 2d50c8c commit 43c5bdd

File tree

3 files changed

+38
-13
lines changed

3 files changed

+38
-13
lines changed

parquet-avro/src/main/java/org/apache/parquet/avro/AvroWriteSupport.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ private Binary fromAvroString(Object value) {
364364
Utf8 utf8 = (Utf8) value;
365365
return Binary.fromReusedByteArray(utf8.getBytes(), 0, utf8.getByteLength());
366366
}
367-
return Binary.fromString(value.toString());
367+
return Binary.fromCharSequence((CharSequence) value);
368368
}
369369

370370
private static GenericData getDataModel(Configuration conf) {

parquet-avro/src/test/java/org/apache/parquet/avro/TestReflectLogicalTypes.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ public void testWriteNullableUUID() throws IOException {
426426
read(REFLECT, nullableUuidStringSchema, test));
427427
}
428428

429-
// @Test(expected = ClassCastException.class)
429+
@Test(expected = ClassCastException.class)
430430
public void testWriteUUIDMissingLogicalType() throws IOException {
431431
Schema uuidSchema = SchemaBuilder.record(RecordWithUUID.class.getName())
432432
.fields().requiredString("uuid").endRecord();

parquet-column/src/main/java/org/apache/parquet/io/api/Binary.java

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ public void writeTo(DataOutput out) throws IOException {
214214

215215
}
216216

217-
private static class FromStringBinary extends ByteArrayBackedBinary {
217+
private static class FromStringBinary extends ByteBufferBackedBinary {
218218
public FromStringBinary(String value) {
219219
// reused is false, because we do not hold on to the buffer after
220220
// conversion, and nobody else has a handle to it
@@ -226,23 +226,44 @@ public String toString() {
226226
return "Binary{\"" + toStringUsingUTF8() + "\"}";
227227
}
228228

229-
private static final ThreadLocal<CharsetEncoder> ENCODER =
230-
new ThreadLocal<CharsetEncoder>() {
231-
@Override
232-
protected CharsetEncoder initialValue() {
233-
return StandardCharsets.UTF_8.newEncoder();
234-
}
235-
};
236-
237-
private static byte[] encodeUTF8(String value) {
229+
private static ByteBuffer encodeUTF8(String value) {
238230
try {
239-
return value.getBytes("UTF-8");
231+
return ByteBuffer.wrap(value.getBytes("UTF-8"));
240232
} catch (UnsupportedEncodingException e) {
241233
throw new ParquetEncodingException("UTF-8 not supported.", e);
242234
}
243235
}
244236
}
245237

238+
private static class FromCharSequenceBinary extends ByteBufferBackedBinary {
239+
public FromCharSequenceBinary(CharSequence value) {
240+
// reused is false, because we do not hold on to the buffer after
241+
// conversion, and nobody else has a handle to it
242+
super(encodeUTF8(value), false);
243+
}
244+
245+
@Override
246+
public String toString() {
247+
return "Binary{\"" + toStringUsingUTF8() + "\"}";
248+
}
249+
250+
private static final ThreadLocal<CharsetEncoder> ENCODER =
251+
new ThreadLocal<CharsetEncoder>() {
252+
@Override
253+
protected CharsetEncoder initialValue() {
254+
return StandardCharsets.UTF_8.newEncoder();
255+
}
256+
};
257+
258+
private static ByteBuffer encodeUTF8(CharSequence value) {
259+
try {
260+
return ENCODER.get().encode(CharBuffer.wrap(value));
261+
} catch (CharacterCodingException e) {
262+
throw new ParquetEncodingException("UTF-8 not supported.", e);
263+
}
264+
}
265+
}
266+
246267
public static Binary fromReusedByteArray(final byte[] value, final int offset, final int length) {
247268
return new ByteArraySliceBackedBinary(value, offset, length, true);
248269
}
@@ -569,6 +590,10 @@ public static Binary fromString(String value) {
569590
return new FromStringBinary(value);
570591
}
571592

593+
public static Binary fromCharSequence(CharSequence value) {
594+
return new FromCharSequenceBinary(value);
595+
}
596+
572597
/**
573598
* @see {@link Arrays#hashCode(byte[])}
574599
* @param array

0 commit comments

Comments
 (0)