diff --git a/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/VectorizedSparkOrcReaders.java b/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/VectorizedSparkOrcReaders.java index d626e01191..26cc731cb6 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/VectorizedSparkOrcReaders.java +++ b/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/VectorizedSparkOrcReaders.java @@ -432,11 +432,11 @@ public ColumnVector getChild(int ordinal) { } private static class UnionConverter implements Converter { - private final Types.StructType structType; + private final Type type; private final List optionConverters; private UnionConverter(Type type, List optionConverters) { - this.structType = type.asStructType(); + this.type = type; this.optionConverters = optionConverters; } @@ -444,24 +444,30 @@ private UnionConverter(Type type, List optionConverters) { public ColumnVector convert(org.apache.orc.storage.ql.exec.vector.ColumnVector vector, int batchSize, long batchOffsetInFile) { UnionColumnVector unionColumnVector = (UnionColumnVector) vector; - List fields = structType.fields(); - List fieldVectors = Lists.newArrayListWithExpectedSize(fields.size()); - - LongColumnVector longColumnVector = new LongColumnVector(); - longColumnVector.vector = Arrays.stream(unionColumnVector.tags).asLongStream().toArray(); + if (optionConverters.size() > 1) { + // the case of complex union with multiple types + List fields = type.asStructType().fields(); + List fieldVectors = Lists.newArrayListWithExpectedSize(fields.size()); + + LongColumnVector longColumnVector = new LongColumnVector(); + longColumnVector.vector = Arrays.stream(unionColumnVector.tags).asLongStream().toArray(); + + fieldVectors.add(new PrimitiveOrcColumnVector(Types.IntegerType.get(), batchSize, longColumnVector, + OrcValueReaders.ints(), batchOffsetInFile)); + for (int i = 0; i < fields.size() - 1; i += 1) { + fieldVectors.add(optionConverters.get(i).convert(unionColumnVector.fields[i], batchSize, batchOffsetInFile)); + } - fieldVectors.add(new PrimitiveOrcColumnVector(Types.IntegerType.get(), batchSize, longColumnVector, - OrcValueReaders.ints(), batchOffsetInFile)); - for (int i = 0; i < fields.size() - 1; i += 1) { - fieldVectors.add(optionConverters.get(i).convert(unionColumnVector.fields[i], batchSize, batchOffsetInFile)); + return new BaseOrcColumnVector(type.asStructType(), batchSize, vector) { + @Override + public ColumnVector getChild(int ordinal) { + return fieldVectors.get(ordinal); + } + }; + } else { + // the case of single type union + return optionConverters.get(0).convert(unionColumnVector.fields[0], batchSize, batchOffsetInFile); } - - return new BaseOrcColumnVector(structType, batchSize, vector) { - @Override - public ColumnVector getChild(int ordinal) { - return fieldVectors.get(ordinal); - } - }; } } } diff --git a/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcUnions.java b/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcUnions.java index edcc006dd1..93e95750e5 100644 --- a/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcUnions.java +++ b/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkOrcUnions.java @@ -293,7 +293,6 @@ public void testSingleTypeUnion() throws IOException { } // Test vectorized reader - /* try (CloseableIterable reader = ORC.read(Files.localInput(orcFile)) .project(expectedSchema) .createBatchedReaderFunc(readOrcSchema -> @@ -304,7 +303,6 @@ public void testSingleTypeUnion() throws IOException { assertEquals(expectedSchema, expectedFirstRow, actualRowsIt.next()); assertEquals(expectedSchema, expectedSecondRow, actualRowsIt.next()); } - */ } @Test @@ -366,7 +364,6 @@ public void testSingleTypeUnionOfStruct() throws IOException { } // Test vectorized reader - /* try (CloseableIterable reader = ORC.read(Files.localInput(orcFile)) .project(expectedSchema) .createBatchedReaderFunc(readOrcSchema -> @@ -377,7 +374,6 @@ public void testSingleTypeUnionOfStruct() throws IOException { assertEquals(expectedSchema, expectedFirstRow, actualRowsIt.next()); assertEquals(expectedSchema, expectedSecondRow, actualRowsIt.next()); } - */ } @Test @@ -441,7 +437,6 @@ public void testDeepNestedSingleTypeUnion() throws IOException { } // Test vectorized reader - /* try (CloseableIterable reader = ORC.read(Files.localInput(orcFile)) .project(expectedSchema) .createBatchedReaderFunc(readOrcSchema -> @@ -452,7 +447,6 @@ public void testDeepNestedSingleTypeUnion() throws IOException { assertEquals(expectedSchema, expectedFirstRow, actualRowsIt.next()); assertEquals(expectedSchema, expectedSecondRow, actualRowsIt.next()); } - */ } private Iterator batchesToRows(Iterator batches) {