From cd1b7111fdb49703a2f59a241fe799a0ec86ebc1 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Thu, 22 Sep 2016 11:04:58 -0700 Subject: [PATCH] ARROW-257: Add a typeids Vector to Union type --- format/Message.fbs | 5 +++ .../src/main/codegen/data/ArrowTypes.tdd | 2 +- .../src/main/codegen/templates/ArrowType.java | 38 +++++++++++++++---- .../main/codegen/templates/UnionVector.java | 7 +++- .../org/apache/arrow/vector/types/Types.java | 2 +- .../apache/arrow/vector/pojo/TestConvert.java | 5 ++- 6 files changed, 45 insertions(+), 14 deletions(-) diff --git a/format/Message.fbs b/format/Message.fbs index 657904a7032..0d32bf0eb52 100644 --- a/format/Message.fbs +++ b/format/Message.fbs @@ -23,8 +23,13 @@ table List { enum UnionMode:short { Sparse, Dense } +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child typeIds[offset] is the id used in the type vector table Union { mode: UnionMode; + typeIds: [ int ]; // optional, describes typeid of each child. } table Int { diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 5cb43bed2b6..7f6dc6ccfa4 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -30,7 +30,7 @@ }, { name: "Union", - fields: [{name: "mode", type: short}] + fields: [{name: "mode", type: short}, {name: "typeIds", type: "int[]"}] }, { name: "Int", diff --git a/java/vector/src/main/codegen/templates/ArrowType.java b/java/vector/src/main/codegen/templates/ArrowType.java index 29dee20040a..30f2c68efe0 100644 --- a/java/vector/src/main/codegen/templates/ArrowType.java +++ b/java/vector/src/main/codegen/templates/ArrowType.java @@ -33,12 +33,23 @@ import java.util.Objects; +/** + * Arrow types + **/ public abstract class ArrowType { public abstract byte getTypeType(); public abstract int getType(FlatBufferBuilder builder); public abstract T accept(ArrowTypeVisitor visitor); + /** + * to visit the ArrowTypes + * + * type.accept(new ArrowTypeVisitor() { + * ... + * }); + * + */ public static interface ArrowTypeVisitor { <#list arrowTypes.types as type> T visit(${type.name} type); @@ -55,9 +66,7 @@ public static class ${name} extends ArrowType { <#list fields as field> - <#assign fieldName = field.name> - <#assign fieldType = field.type> - ${fieldType} ${fieldName}; + ${field.type} ${field.name}; <#if type.fields?size != 0> @@ -79,6 +88,9 @@ public int getType(FlatBufferBuilder builder) { <#if field.type == "String"> int ${field.name} = builder.createString(this.${field.name}); + <#if field.type == "int[]"> + int ${field.name} = org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name}); + org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder); <#list type.fields as field> @@ -96,7 +108,7 @@ public int getType(FlatBufferBuilder builder) { public String toString() { return "${name}{" <#list fields as field> - + ", " + ${field.name} + + <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}<#if field_has_next> + ", " + "}"; } @@ -115,8 +127,7 @@ public boolean equals(Object obj) { return true; <#else> ${type.name} that = (${type.name}) obj; - return - <#list type.fields as field>Objects.equals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>; + return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>; } @@ -134,9 +145,20 @@ public static org.apache.arrow.vector.types.pojo.ArrowType getTypeForField(org.a <#assign name = type.name> <#assign nameLower = type.name?lower_case> <#assign fields = type.fields> - case Type.${type.name}: + case Type.${type.name}: { org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name}) field.type(new org.apache.arrow.flatbuf.${type.name}()); - return new ${type.name}(<#list type.fields as field>${nameLower}Type.${field.name}()<#if field_has_next>, ); + <#list type.fields as field> + <#if field.type == "int[]"> + ${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()]; + for (int i = 0; i< ${field.name}.length; ++i) { + ${field.name}[i] = ${nameLower}Type.${field.name}(i); + } + <#else> + ${field.type} ${field.name} = ${nameLower}Type.${field.name}(); + + + return new ${type.name}(<#list type.fields as field>${field.name}<#if field_has_next>, ); + } default: throw new UnsupportedOperationException("Unsupported type: " + field.typeType()); diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 3014bbba9d5..b14314d2b0d 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -232,10 +232,13 @@ public void clear() { @Override public Field getField() { List childFields = new ArrayList<>(); - for (ValueVector v : internalMap.getChildren()) { + List children = internalMap.getChildren(); + int[] typeIds = new int[children.size()]; + for (ValueVector v : children) { + typeIds[childFields.size()] = v.getMinorType().ordinal(); childFields.add(v.getField()); } - return new Field(name, true, new ArrowType.Union(Sparse), childFields); + return new Field(name, true, new ArrowType.Union(Sparse, typeIds), childFields); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 66ef7562ced..f7f34808630 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -472,7 +472,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, - UNION(new Union(UnionMode.Sparse)) { + UNION(new Union(UnionMode.Sparse, null)) { @Override public Field getField() { throw new UnsupportedOperationException("Cannot get simple field for Union type"); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 448117d84dc..ed740cd0f1b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -22,11 +22,12 @@ import static org.junit.Assert.assertEquals; import org.apache.arrow.flatbuf.UnionMode; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.ArrowType.List; -import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Struct_; +import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; import org.apache.arrow.vector.types.pojo.Field; @@ -78,7 +79,7 @@ public void nestedSchema() { childrenBuilder.add(new Field("child4", true, new List(), ImmutableList.of( new Field("child4.1", true, Utf8.INSTANCE, null) ))); - childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse), ImmutableList.of( + childrenBuilder.add(new Field("child5", true, new Union(UnionMode.Sparse, new int[] { MinorType.TIMESTAMP.ordinal(), MinorType.FLOAT8.ordinal() } ), ImmutableList.of( new Field("child5.1", true, new Timestamp("UTC"), null), new Field("child5.2", true, new FloatingPoint(DOUBLE), ImmutableList.of()) )));