From 4ad9fe952c734b7278ae8b1fc907de093ad9bee5 Mon Sep 17 00:00:00 2001 From: James Duong Date: Tue, 7 Nov 2023 12:32:22 -0800 Subject: [PATCH] GH-38614: [Java] Add writer helper methods to more writer classes - Rename from writeTo$type to write$type for consistency with other methods - Add new helper methods to PromotableWriter - Add new helper methods to complex writers such as list and union --- .../templates/AbstractFieldWriter.java | 11 +- .../AbstractPromotableFieldWriter.java | 32 +++ .../codegen/templates/ComplexWriters.java | 16 +- .../templates/UnionFixedSizeListWriter.java | 56 +++++ .../codegen/templates/UnionListWriter.java | 37 +++ .../main/codegen/templates/UnionWriter.java | 36 +++ .../vector/complex/impl/PromotableWriter.java | 63 +++++- .../arrow/vector/TestFixedSizeListVector.java | 92 +++++++- .../complex/impl/TestPromotableWriter.java | 197 ++++++++++++++++ .../complex/writer/TestComplexWriter.java | 211 ++++++++++++++++++ .../complex/writer/TestSimpleWriter.java | 16 +- 11 files changed, 741 insertions(+), 26 deletions(-) diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java index bb4ee45eaa0..6c2368117f7 100644 --- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java @@ -27,6 +27,9 @@ /* * This class is generated using freemarker and the ${.template_name} template. + * Note that changes to the AbstractFieldWriter template should also get reflected in the + * AbstractPromotableFieldWriter, ComplexWriters, UnionFixedSizeListWriter, UnionListWriter + * and UnionWriter templates and the PromotableWriter concrete code. */ @SuppressWarnings("unused") abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWriter { @@ -125,19 +128,19 @@ public void write(${name}Holder holder) { <#if minor.class?ends_with("VarBinary")> - public void writeTo${minor.class}(byte[] value) { + public void write${minor.class}(byte[] value) { fail("${name}"); } - public void writeTo${minor.class}(byte[] value, int offset, int length) { + public void write${minor.class}(byte[] value, int offset, int length) { fail("${name}"); } - public void writeTo${minor.class}(ByteBuffer value) { + public void write${minor.class}(ByteBuffer value) { fail("${name}"); } - public void writeTo${minor.class}(ByteBuffer value, int offset, int length) { + public void write${minor.class}(ByteBuffer value, int offset, int length) { fail("${name}"); } diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index 2f963a9df0d..59f9fb5b809 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -221,6 +221,38 @@ public void write(${name}Holder holder) { } + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value, offset, length); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value, offset, length); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + @Override + public void write${minor.class}(String value) { + getWriter(MinorType.${name?upper_case}).write${minor.class}(value); + } + + public void writeNull() { } diff --git a/java/vector/src/main/codegen/templates/ComplexWriters.java b/java/vector/src/main/codegen/templates/ComplexWriters.java index 51a52a6e307..2e3caae1f0f 100644 --- a/java/vector/src/main/codegen/templates/ComplexWriters.java +++ b/java/vector/src/main/codegen/templates/ComplexWriters.java @@ -194,22 +194,22 @@ public void writeNull() { <#if minor.class?ends_with("VarBinary")> - public void writeTo${minor.class}(byte[] value) { + public void write${minor.class}(byte[] value) { vector.setSafe(idx(), value); vector.setValueCount(idx() + 1); } - public void writeTo${minor.class}(byte[] value, int offset, int length) { + public void write${minor.class}(byte[] value, int offset, int length) { vector.setSafe(idx(), value, offset, length); vector.setValueCount(idx() + 1); } - public void writeTo${minor.class}(ByteBuffer value) { + public void write${minor.class}(ByteBuffer value) { vector.setSafe(idx(), value, 0, value.remaining()); vector.setValueCount(idx() + 1); } - public void writeTo${minor.class}(ByteBuffer value, int offset, int length) { + public void write${minor.class}(ByteBuffer value, int offset, int length) { vector.setSafe(idx(), value, offset, length); vector.setValueCount(idx() + 1); } @@ -259,13 +259,13 @@ public interface ${eName}Writer extends BaseWriter { <#if minor.class?ends_with("VarBinary")> - public void writeTo${minor.class}(byte[] value); + public void write${minor.class}(byte[] value); - public void writeTo${minor.class}(byte[] value, int offset, int length); + public void write${minor.class}(byte[] value, int offset, int length); - public void writeTo${minor.class}(ByteBuffer value); + public void write${minor.class}(ByteBuffer value); - public void writeTo${minor.class}(ByteBuffer value, int offset, int length); + public void write${minor.class}(ByteBuffer value, int offset, int length); <#if minor.class?ends_with("VarChar")> diff --git a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java index 55c661bfc60..3436e3a9676 100644 --- a/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java @@ -295,6 +295,62 @@ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { <#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign uncappedName = name?uncap_first/> + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(String value) { + if (writer.idx() >= (idx() + 1) * listSize) { + throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize)); + } + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + <#if !minor.typeParams?? > @Override public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index fac75a9ce56..5c0565ee271 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -274,6 +274,43 @@ public void write(${name}Holder holder) { writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); writer.setPosition(writer.idx()+1); } + + + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + public void write${minor.class}(String value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java index 4efd1026cac..08dbf24324b 100644 --- a/java/vector/src/main/codegen/templates/UnionWriter.java +++ b/java/vector/src/main/codegen/templates/UnionWriter.java @@ -302,6 +302,42 @@ public void write(${name}Holder holder) { get${name}Writer(arrowType).setPosition(idx()); get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType); } + <#elseif minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value, offset, length); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value, offset, length); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(${friendlyType} value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } + + @Override + public void write${minor.class}(String value) { + get${name}Writer().setPosition(idx()); + get${name}Writer().write${minor.class}(value); + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index d99efceae3e..f7be277f592 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.complex.impl; import java.math.BigDecimal; +import java.nio.ByteBuffer; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.FieldVector; @@ -37,6 +38,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; /** @@ -378,7 +380,66 @@ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) { /*bitWidth=*/256)).writeBigEndianBytesToDecimal256(value, arrowType); } - + @Override + public void writeVarBinary(byte[] value) { + getWriter(MinorType.VARBINARY).writeVarBinary(value); + } + + @Override + public void writeVarBinary(byte[] value, int offset, int length) { + getWriter(MinorType.VARBINARY).writeVarBinary(value, offset, length); + } + + @Override + public void writeVarBinary(ByteBuffer value) { + getWriter(MinorType.VARBINARY).writeVarBinary(value); + } + + @Override + public void writeVarBinary(ByteBuffer value, int offset, int length) { + getWriter(MinorType.VARBINARY).writeVarBinary(value, offset, length); + } + + @Override + public void writeLargeVarBinary(byte[] value) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value); + } + + @Override + public void writeLargeVarBinary(byte[] value, int offset, int length) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value, offset, length); + } + + @Override + public void writeLargeVarBinary(ByteBuffer value) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value); + } + + @Override + public void writeLargeVarBinary(ByteBuffer value, int offset, int length) { + getWriter(MinorType.LARGEVARBINARY).writeLargeVarBinary(value, offset, length); + } + + @Override + public void writeVarChar(Text value) { + getWriter(MinorType.VARCHAR).writeVarChar(value); + } + + @Override + public void writeVarChar(String value) { + getWriter(MinorType.VARCHAR).writeVarChar(value); + } + + @Override + public void writeLargeVarChar(Text value) { + getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); + } + + @Override + public void writeLargeVarChar(String value) { + getWriter(MinorType.LARGEVARCHAR).writeLargeVarChar(value); + } + @Override public void allocate() { getWriter().allocate(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index 9d7e413a739..0023b1dddb8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.util.Arrays; import java.util.List; @@ -37,6 +38,7 @@ import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Assert; @@ -457,18 +459,98 @@ public void testVectorWithNulls() { assertEquals(4, vector1.getValueCount()); - List realValue1 = vector1.getObject(0); + List realValue1 = vector1.getObject(0); assertEquals(values1, realValue1); - List realValue2 = vector1.getObject(1); + List realValue2 = vector1.getObject(1); assertEquals(values2, realValue2); - List realValue3 = vector1.getObject(2); + List realValue3 = vector1.getObject(2); assertEquals(values3, realValue3); - List realValue4 = vector1.getObject(3); + List realValue4 = vector1.getObject(3); assertEquals(values4, realValue4); } } - private int[] convertListToIntArray(List list) { + @Test + public void testWriteVarCharHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeVarChar("row1,1"); + writer.writeVarChar(new Text("row1,2")); + writer.writeNull(); + writer.writeNull(); + writer.endList(); + + assertEquals("row1,1", vector.getObject(0).get(0).toString()); + assertEquals("row1,2", vector.getObject(0).get(1).toString()); + } + } + + @Test + public void testWriteLargeVarCharHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeLargeVarChar("row1,1"); + writer.writeLargeVarChar(new Text("row1,2")); + writer.writeNull(); + writer.writeNull(); + writer.endList(); + + assertEquals("row1,1", vector.getObject(0).get(0).toString()); + assertEquals("row1,2", vector.getObject(0).get(1).toString()); + } + } + + @Test + public void testWriteVarBinaryHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeVarBinary("row1,1".getBytes()); + writer.writeVarBinary("row1,2".getBytes(), 0, "row1,2".getBytes().length); + writer.writeVarBinary(ByteBuffer.wrap("row1,3".getBytes())); + writer.writeVarBinary(ByteBuffer.wrap("row1,4".getBytes()), 0, "row1,4".getBytes().length); + writer.endList(); + + assertEquals("row1,1", new String((byte[]) (vector.getObject(0).get(0)))); + assertEquals("row1,2", new String((byte[]) (vector.getObject(0).get(1)))); + assertEquals("row1,3", new String((byte[]) (vector.getObject(0).get(2)))); + assertEquals("row1,4", new String((byte[]) (vector.getObject(0).get(3)))); + } + } + + @Test + public void testWriteLargeVarBinaryHelpers() throws Exception { + try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) { + + UnionFixedSizeListWriter writer = vector.getWriter(); + writer.allocate(); + + writer.startList(); + writer.writeLargeVarBinary("row1,1".getBytes()); + writer.writeLargeVarBinary("row1,2".getBytes(), 0, "row1,2".getBytes().length); + writer.writeLargeVarBinary(ByteBuffer.wrap("row1,3".getBytes())); + writer.writeLargeVarBinary(ByteBuffer.wrap("row1,4".getBytes()), 0, "row1,4".getBytes().length); + writer.endList(); + + assertEquals("row1,1", new String((byte[]) (vector.getObject(0).get(0)))); + assertEquals("row1,2", new String((byte[]) (vector.getObject(0).get(1)))); + assertEquals("row1,3", new String((byte[]) (vector.getObject(0).get(2)))); + assertEquals("row1,4", new String((byte[]) (vector.getObject(0).get(3)))); + } + } + + private int[] convertListToIntArray(List list) { int[] values = new int[list.size()]; for (int i = 0; i < list.size(); i++) { values[i] = (int) list.get(i); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index 1068f7c030e..4c8c96a0d74 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -28,6 +28,10 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.DirtyRootAllocator; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StructVector; @@ -43,6 +47,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -392,4 +397,196 @@ public void testNoPromoteFixedSizeBinaryToUnionWithNull() throws Exception { buf.close(); } } + + @Test + public void testPromoteLargeVarCharHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.largeVarChar("c").writeLargeVarChar(new Text("foo")); + writer.setPosition(1); + writer.largeVarChar("c").writeLargeVarChar("foo2"); + writer.end(); + + final LargeVarCharVector uv = v.getChild("c", LargeVarCharVector.class); + assertEquals("foo", uv.getObject(0).toString()); + assertEquals("foo2", uv.getObject(1).toString()); + } + } + + @Test + public void testPromoteVarCharHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.varChar("c").writeVarChar(new Text("foo")); + writer.setPosition(1); + writer.varChar("c").writeVarChar("foo2"); + writer.end(); + + final VarCharVector uv = v.getChild("c", VarCharVector.class); + assertEquals("foo", uv.getObject(0).toString()); + assertEquals("foo2", uv.getObject(1).toString()); + } + } + + @Test + public void testPromoteVarCharHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeVarChar(new Text("foo")); + writer.setPosition(1); + writer.writeVarChar("foo2"); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + VarCharVector vector = promotedVector.getVarCharVector(); + assertEquals("foo", vector.getObject(0).toString()); + assertEquals("foo2", vector.getObject(1).toString()); + } + } + + @Test + public void testPromoteLargeVarCharHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeLargeVarChar(new Text("foo")); + writer.setPosition(1); + writer.writeLargeVarChar("foo2"); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + LargeVarCharVector vector = promotedVector.getLargeVarCharVector(); + assertEquals("foo", vector.getObject(0).toString()); + assertEquals("foo2", vector.getObject(1).toString()); + } + } + + @Test + public void testPromoteVarBinaryHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.varBinary("c").writeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.varBinary("c").writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + final VarBinaryVector uv = v.getChild("c", VarBinaryVector.class); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void testPromoteVarBinaryHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + VarBinaryVector uv = promotedVector.getVarBinaryVector(); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void testPromoteLargeVarBinaryHelpersOnStruct() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.largeVarBinary("c").writeLargeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + final LargeVarBinaryVector uv = v.getChild("c", LargeVarBinaryVector.class); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void testPromoteLargeVarBinaryHelpersDirect() throws Exception { + try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator); + final StructVector v = container.addOrGetStruct("test"); + final PromotableWriter writer = new PromotableWriter(v, container)) { + container.allocateNew(); + + writer.start(); + writer.setPosition(0); + writer.writeLargeVarBinary("row1".getBytes()); + writer.setPosition(1); + writer.writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + writer.setPosition(2); + writer.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + writer.setPosition(3); + writer.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + writer.end(); + + // The "test" vector in the parent container should have been replaced with a UnionVector. + UnionVector promotedVector = container.getChild("test", UnionVector.class); + LargeVarBinaryVector uv = promotedVector.getLargeVarBinaryVector(); + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 6f7f5abd30a..96d39e85f1f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -20,6 +20,7 @@ import static org.junit.Assert.*; import java.math.BigDecimal; +import java.nio.ByteBuffer; import java.time.LocalDateTime; import java.util.ArrayList; import java.util.HashSet; @@ -35,7 +36,11 @@ import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.SchemaChangeCallBack; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NonNullableStructVector; @@ -1667,4 +1672,210 @@ public void testMapWithStructKey() { assertEquals(1, mapReader.value().readInteger().intValue()); } } + + @Test + public void structWriterVarCharHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.varChar("c").writeVarChar(new Text("row1")); + rootWriter.setPosition(1); + rootWriter.varChar("c").writeVarChar("row2"); + rootWriter.end(); + + VarCharVector vector = parent.getChild("root", StructVector.class).getChild("c", VarCharVector.class); + + assertEquals("row1", vector.getObject(0).toString()); + assertEquals("row2", vector.getObject(1).toString()); + } + } + + @Test + public void structWriterLargeVarCharHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.largeVarChar("c").writeLargeVarChar(new Text("row1")); + rootWriter.setPosition(1); + rootWriter.largeVarChar("c").writeLargeVarChar("row2"); + rootWriter.end(); + + LargeVarCharVector vector = parent.getChild("root", StructVector.class).getChild("c", + LargeVarCharVector.class); + + assertEquals("row1", vector.getObject(0).toString()); + assertEquals("row2", vector.getObject(1).toString()); + } + } + + @Test + public void structWriterVarBinaryHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.varBinary("c").writeVarBinary("row1".getBytes()); + rootWriter.setPosition(1); + rootWriter.varBinary("c").writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + rootWriter.setPosition(2); + rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + rootWriter.setPosition(3); + rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + rootWriter.end(); + + VarBinaryVector uv = parent.getChild("root", StructVector.class).getChild("c", VarBinaryVector.class); + + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void structWriterLargeVarBinaryHelpers() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent, false, true); + StructWriter rootWriter = writer.rootAsStruct(); + rootWriter.start(); + rootWriter.setPosition(0); + rootWriter.largeVarBinary("c").writeLargeVarBinary("row1".getBytes()); + rootWriter.setPosition(1); + rootWriter.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + rootWriter.setPosition(2); + rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + rootWriter.setPosition(3); + rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, + "row4".getBytes().length); + rootWriter.end(); + + LargeVarBinaryVector uv = parent.getChild("root", StructVector.class).getChild("c", + LargeVarBinaryVector.class); + + assertEquals("row1", new String(uv.get(0))); + assertEquals("row2", new String(uv.get(1))); + assertEquals("row3", new String(uv.get(2))); + assertEquals("row4", new String(uv.get(3))); + } + } + + @Test + public void listVarCharHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeVarChar("row1"); + listWriter.writeVarChar(new Text("row2")); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", listVector.getObject(0).get(0).toString()); + assertEquals("row2", listVector.getObject(0).get(1).toString()); + } + } + + @Test + public void listLargeVarCharHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeLargeVarChar("row1"); + listWriter.writeLargeVarChar(new Text("row2")); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", listVector.getObject(0).get(0).toString()); + assertEquals("row2", listVector.getObject(0).get(1).toString()); + } + } + + @Test + public void listVarBinaryHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeVarBinary("row1".getBytes()); + listWriter.writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + listWriter.writeVarBinary(ByteBuffer.wrap("row3".getBytes())); + listWriter.writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0))); + assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1))); + assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2))); + assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3))); + } + } + + @Test + public void listLargeVarBinaryHelpers() { + try (ListVector listVector = ListVector.empty("list", allocator)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + listWriter.startList(); + listWriter.writeLargeVarBinary("row1".getBytes()); + listWriter.writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length); + listWriter.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes())); + listWriter.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length); + listWriter.endList(); + listWriter.setValueCount(1); + assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0))); + assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1))); + assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2))); + assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3))); + } + } + + @Test + public void unionWithVarCharAndBinaryHelpers() throws Exception { + try (UnionVector vector = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null)) { + UnionWriter unionWriter = new UnionWriter(vector); + unionWriter.allocate(); + unionWriter.start(); + unionWriter.setPosition(0); + unionWriter.writeVarChar("row1"); + unionWriter.setPosition(1); + unionWriter.writeVarChar(new Text("row2")); + unionWriter.setPosition(2); + unionWriter.writeLargeVarChar("row3"); + unionWriter.setPosition(3); + unionWriter.writeLargeVarChar(new Text("row4")); + unionWriter.setPosition(4); + unionWriter.writeVarBinary("row5".getBytes()); + unionWriter.setPosition(5); + unionWriter.writeVarBinary("row6".getBytes(), 0, "row6".getBytes().length); + unionWriter.setPosition(6); + unionWriter.writeVarBinary(ByteBuffer.wrap("row7".getBytes())); + unionWriter.setPosition(7); + unionWriter.writeVarBinary(ByteBuffer.wrap("row8".getBytes()), 0, "row8".getBytes().length); + unionWriter.setPosition(8); + unionWriter.writeLargeVarBinary("row9".getBytes()); + unionWriter.setPosition(9); + unionWriter.writeLargeVarBinary("row10".getBytes(), 0, "row10".getBytes().length); + unionWriter.setPosition(10); + unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row11".getBytes())); + unionWriter.setPosition(11); + unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row12".getBytes()), 0, "row12".getBytes().length); + unionWriter.end(); + + assertEquals("row1", new String(vector.getVarCharVector().get(0))); + assertEquals("row2", new String(vector.getVarCharVector().get(1))); + assertEquals("row3", new String(vector.getLargeVarCharVector().get(2))); + assertEquals("row4", new String(vector.getLargeVarCharVector().get(3))); + assertEquals("row5", new String(vector.getVarBinaryVector().get(4))); + assertEquals("row6", new String(vector.getVarBinaryVector().get(5))); + assertEquals("row7", new String(vector.getVarBinaryVector().get(6))); + assertEquals("row8", new String(vector.getVarBinaryVector().get(7))); + assertEquals("row9", new String(vector.getLargeVarBinaryVector().get(8))); + assertEquals("row10", new String(vector.getLargeVarBinaryVector().get(9))); + assertEquals("row11", new String(vector.getLargeVarBinaryVector().get(10))); + assertEquals("row12", new String(vector.getLargeVarBinaryVector().get(11))); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index ef918b13fb6..27b8f1796ee 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -54,7 +54,7 @@ public void testWriteByteArrayToVarBinary() throws Exception { try (VarBinaryVector vector = new VarBinaryVector("test", allocator); VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToVarBinary(input); + writer.writeVarBinary(input); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -65,7 +65,7 @@ public void testWriteByteArrayWithOffsetToVarBinary() throws Exception { try (VarBinaryVector vector = new VarBinaryVector("test", allocator); VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToVarBinary(input, 1, 1); + writer.writeVarBinary(input, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); } @@ -77,7 +77,7 @@ public void testWriteByteBufferToVarBinary() throws Exception { VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToVarBinary(buffer); + writer.writeVarBinary(buffer); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -89,7 +89,7 @@ public void testWriteByteBufferWithOffsetToVarBinary() throws Exception { VarBinaryWriter writer = new VarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToVarBinary(buffer, 1, 1); + writer.writeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); } @@ -100,7 +100,7 @@ public void testWriteByteArrayToLargeVarBinary() throws Exception { try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToLargeVarBinary(input); + writer.writeLargeVarBinary(input); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -111,7 +111,7 @@ public void testWriteByteArrayWithOffsetToLargeVarBinary() throws Exception { try (LargeVarBinaryVector vector = new LargeVarBinaryVector("test", allocator); LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; - writer.writeToLargeVarBinary(input, 1, 1); + writer.writeLargeVarBinary(input, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); } @@ -123,7 +123,7 @@ public void testWriteByteBufferToLargeVarBinary() throws Exception { LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToLargeVarBinary(buffer); + writer.writeLargeVarBinary(buffer); byte[] result = vector.get(0); Assert.assertArrayEquals(input, result); } @@ -135,7 +135,7 @@ public void testWriteByteBufferWithOffsetToLargeVarBinary() throws Exception { LargeVarBinaryWriter writer = new LargeVarBinaryWriterImpl(vector)) { byte[] input = new byte[] { 0x01, 0x02 }; ByteBuffer buffer = ByteBuffer.wrap(input); - writer.writeToLargeVarBinary(buffer, 1, 1); + writer.writeLargeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); Assert.assertArrayEquals(new byte[] { 0x02 }, result); }