From a5d92713c4ab9bb290b46a4acef4f8396fb86a21 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Fri, 7 Jul 2023 12:15:06 +0300 Subject: [PATCH] GH-36375: [Java] Added creating MapWriter in ComplexWriter. (#36351) (#32) Added new method rootAsMap() to ComplexWriter and implement it in ComplexWriterImpl for supporting map type. Previously in dremio side: When i trying to return map like output ComplexWrite with this code: org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter mapWriter = out.rootAsList().map(false); mapWriter.startMap(); for (java.util.Map.Entry element : map.entrySet()) { mapWriter.startEntry(); mapWriter.key().integer().writeInt((Integer) element.getKey()); mapWriter.value().integer().writeInt((Integer) element.getValue()); mapWriter.endEntry(); } mapWriter.endMap(); It use UnionMapWriter and generate schema like: EXPR$0: Map(false)<$data$: Union(Sparse, [1, 39]) not null, map: Map(false) not null>>> But in OutputDerivation impl class where i should create output Complete type List children = Arrays.asList( CompleteType.INT.toField("key", false), CompleteType.INT.toField("value", false)); return new CompleteType(CompleteType.MAP.getType(), CompleteType.struct(children).toField(MapVector.DATA_VECTOR_NAME, false)); (This is only one valid case, because MapVector.initializeChildrenFromFields()) return EXPR$0::map I found a place where it start using union - PromotableWriter.promoteToUnion. And in the end i have SCHEMA_CHANGE ERROR: Schema changed during projection. Schema was schema(EXPR$0::map) but then changed to schema(EXPR$0::map, map::map>) * Closes: #36375 Authored-by: Ivan Chesnov Signed-off-by: David Li --- .../main/codegen/templates/BaseWriter.java | 1 + .../complex/impl/ComplexWriterImpl.java | 37 ++++++- .../complex/writer/TestComplexWriter.java | 104 ++++++++++++++++++ 3 files changed, 141 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java index 4d63fb73e98c8..3b35d22692e68 100644 --- a/java/vector/src/main/codegen/templates/BaseWriter.java +++ b/java/vector/src/main/codegen/templates/BaseWriter.java @@ -106,6 +106,7 @@ public interface ComplexWriter { void copyReader(FieldReader reader); StructWriter rootAsStruct(); ListWriter rootAsList(); + MapWriter rootAsMap(boolean keysSorted); void setPosition(int index); void setValueCount(int count); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java index 13b26bb67dace..8d2694b6df887 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java @@ -19,6 +19,7 @@ import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StateTool; import org.apache.arrow.vector.complex.StructVector; @@ -32,6 +33,7 @@ public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWri private NullableStructWriter structRoot; private UnionListWriter listRoot; + private UnionMapWriter mapRoot; private final NonNullableStructVector container; Mode mode = Mode.INIT; @@ -39,7 +41,7 @@ public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWri private final boolean unionEnabled; private final NullableStructWriterFactory nullableStructWriterFactory; - private enum Mode { INIT, STRUCT, LIST } + private enum Mode { INIT, STRUCT, LIST, MAP } /** * Constructs a new instance. @@ -107,6 +109,9 @@ public void clear() { case LIST: listRoot.clear(); break; + case MAP: + mapRoot.clear(); + break; default: break; } @@ -121,6 +126,9 @@ public void setValueCount(int count) { case LIST: listRoot.setValueCount(count); break; + case MAP: + mapRoot.setValueCount(count); + break; default: break; } @@ -136,6 +144,9 @@ public void setPosition(int index) { case LIST: listRoot.setPosition(index); break; + case MAP: + mapRoot.setPosition(index); + break; default: break; } @@ -223,5 +234,29 @@ public ListWriter rootAsList() { return listRoot; } + @Override + public MapWriter rootAsMap(boolean keysSorted) { + switch (mode) { + case INIT: + int vectorCount = container.size(); + // TODO allow dictionaries in complex types + MapVector mapVector = container.addOrGetMap(name, keysSorted); + if (container.size() > vectorCount) { + mapVector.allocateNew(); + } + mapRoot = new UnionMapWriter(mapVector); + mapRoot.setPosition(idx()); + mode = Mode.MAP; + break; + + case MAP: + break; + + default: + check(Mode.INIT, Mode.STRUCT); + } + + return mapRoot; + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 55041496653a6..e42926b6163d0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -1401,4 +1401,108 @@ public void testStructOfList() { Assert.assertEquals(0, size); } } + + @Test + public void testMap() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter mapWriter = writer.rootAsMap(false); + for (int i = 0; i < COUNT; i++) { + mapWriter.startMap(); + for (int j = 0; j < i % 7; j++) { + mapWriter.startEntry(); + if (j % 2 == 0) { + mapWriter.key().integer().writeInt(j); + mapWriter.value().integer().writeInt(j + 1); + } else { + IntHolder keyHolder = new IntHolder(); + keyHolder.value = j; + IntHolder valueHolder = new IntHolder(); + valueHolder.value = j + 1; + mapWriter.key().integer().write(keyHolder); + mapWriter.value().integer().write(valueHolder); + } + mapWriter.endEntry(); + } + mapWriter.endMap(); + } + writer.setValueCount(COUNT); + UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); + for (int i = 0; i < COUNT; i++) { + mapReader.setPosition(i); + for (int j = 0; j < i % 7; j++) { + mapReader.next(); + assertEquals(j, mapReader.key().readInteger().intValue()); + assertEquals(j + 1, mapReader.value().readInteger().intValue()); + } + } + } + } + + @Test + public void testMapWithNulls() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter mapWriter = writer.rootAsMap(false); + mapWriter.startMap(); + mapWriter.startEntry(); + mapWriter.key().integer().writeNull(); + mapWriter.value().integer().writeInt(1); + mapWriter.endEntry(); + mapWriter.endMap(); + writer.setValueCount(1); + UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); + Assert.assertNull(mapReader.key().readInteger()); + assertEquals(1, mapReader.value().readInteger().intValue()); + } + } + + @Test + public void testMapWithListKey() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter mapWriter = writer.rootAsMap(false); + mapWriter.startMap(); + mapWriter.startEntry(); + mapWriter.key().list().startList(); + for (int i = 0; i < 3; i++) { + mapWriter.key().list().integer().writeInt(i); + } + mapWriter.key().list().endList(); + mapWriter.value().integer().writeInt(1); + mapWriter.endEntry(); + mapWriter.endMap(); + writer.setValueCount(1); + UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); + mapReader.key().next(); + assertEquals(0, mapReader.key().reader().readInteger().intValue()); + mapReader.key().next(); + assertEquals(1, mapReader.key().reader().readInteger().intValue()); + mapReader.key().next(); + assertEquals(2, mapReader.key().reader().readInteger().intValue()); + assertEquals(1, mapReader.value().readInteger().intValue()); + } + } + + @Test + public void testMapWithStructKey() { + try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) { + ComplexWriter writer = new ComplexWriterImpl("root", parent); + MapWriter mapWriter = writer.rootAsMap(false); + mapWriter.startMap(); + mapWriter.startEntry(); + mapWriter.key().struct().start(); + mapWriter.key().struct().integer("value1").writeInt(1); + mapWriter.key().struct().integer("value2").writeInt(2); + mapWriter.key().struct().end(); + mapWriter.value().integer().writeInt(1); + mapWriter.endEntry(); + mapWriter.endMap(); + writer.setValueCount(1); + UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); + assertEquals(1, mapReader.key().reader("value1").readInteger().intValue()); + assertEquals(2, mapReader.key().reader("value2").readInteger().intValue()); + assertEquals(1, mapReader.value().readInteger().intValue()); + } + } }