Skip to content

Commit

Permalink
Added an interface for Iterable ValueVectors
Browse files Browse the repository at this point in the history
* The new interface indicates that a ValueVector is iterable
* Contains default methods for getting an Iterator and Iterable
  • Loading branch information
normanj-bitquill committed Jun 12, 2024
1 parent e6e37b2 commit b795f12
Show file tree
Hide file tree
Showing 51 changed files with 169 additions and 88 deletions.
5 changes: 5 additions & 0 deletions java/dataset/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ under the License.
<version>2.15.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.dataset.substrait;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;
Expand All @@ -38,11 +39,14 @@
import org.apache.arrow.dataset.scanner.Scanner;
import org.apache.arrow.dataset.source.Dataset;
import org.apache.arrow.dataset.source.DatasetFactory;
import org.apache.arrow.vector.ValueIterableVector;
import org.apache.arrow.vector.ipc.ArrowReader;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.Text;
import org.hamcrest.collection.IsIterableContainingInOrder;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
Expand Down Expand Up @@ -289,13 +293,13 @@ public void testRunExtendedExpressionsFilter() throws Exception {
int rowcount = 0;
while (reader.loadNextBatch()) {
rowcount += reader.getVectorSchemaRoot().getRowCount();
assertTrue(reader.getVectorSchemaRoot().getVector("id").toString().equals("[19, 1, 11]"));
assertTrue(
reader
.getVectorSchemaRoot()
.getVector("name")
.toString()
.equals("[value_19, value_1, value_11]"));
final ValueIterableVector<Integer> idVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("id");
assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(19, 1, 11));
final ValueIterableVector<Text> nameVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("name");
assertThat(nameVector.getValueIterable(), IsIterableContainingInOrder.contains(
new Text("value_19"), new Text("value_1"), new Text("value_11")));
}
assertEquals(3, rowcount);
}
Expand Down Expand Up @@ -442,20 +446,16 @@ public void testRunExtendedExpressionsProjection() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertTrue(
reader
.getVectorSchemaRoot()
.getVector("add_two_to_column_a")
.toString()
.equals("[21, 3, 13, 23, 47]"));
assertTrue(
reader
.getVectorSchemaRoot()
.getVector("concat_column_a_and_b")
.toString()
.equals(
"[value_19 - value_19, value_1 - value_1, value_11 - value_11, "
+ "value_21 - value_21, value_45 - value_45]"));
final ValueIterableVector<Integer> sumVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("add_two_to_column_a");
assertThat(sumVector.getValueIterable(), IsIterableContainingInOrder.contains(21, 3, 13, 23, 47));
final ValueIterableVector<Text> nameVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("concat_column_a_and_b");
assertThat(nameVector.getValueIterable(),
IsIterableContainingInOrder.contains(
new Text("value_19 - value_19"), new Text("value_1 - value_1"),
new Text("value_11 - value_11"), new Text("value_21 - value_21"),
new Text("value_45 - value_45")));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(5, rowcount);
Expand Down Expand Up @@ -506,12 +506,10 @@ public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionExc
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertTrue(
reader
.getVectorSchemaRoot()
.getVector("filter_id_lower_than_20")
.toString()
.equals("[true, true, true, false, false]"));
final ValueIterableVector<Boolean> booleanVector =
(ValueIterableVector<Boolean>) reader.getVectorSchemaRoot().getVector("filter_id_lower_than_20");
assertThat(booleanVector.getValueIterable(),
IsIterableContainingInOrder.contains(true, true, true, false, false));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(5, rowcount);
Expand Down Expand Up @@ -617,18 +615,15 @@ public void testRunExtendedExpressionsProjectAndFilter() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertTrue(
reader
.getVectorSchemaRoot()
.getVector("add_two_to_column_a")
.toString()
.equals("[21, 3, 13]"));
assertTrue(
reader
.getVectorSchemaRoot()
.getVector("concat_column_a_and_b")
.toString()
.equals("[value_19 - value_19, value_1 - value_1, value_11 - value_11]"));
final ValueIterableVector<Integer> sumVector =
(ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("add_two_to_column_a");
assertThat(sumVector.getValueIterable(), IsIterableContainingInOrder.contains(21, 3, 13));
final ValueIterableVector<Text> nameVector =
(ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("conccat_column_a_and_b");
assertThat(nameVector.getValueIterable(),
IsIterableContainingInOrder.contains(
new Text("value_19 - value_19"), new Text("value_1 - value_1"),
new Text("value_11 - value_11")));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(3, rowcount);
Expand Down
4 changes: 3 additions & 1 deletion java/vector/src/main/codegen/templates/DenseUnionVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueIterableVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.AbstractStructVector;
import org.apache.arrow.vector.complex.ListVector;
Expand Down Expand Up @@ -62,6 +63,7 @@
import org.apache.arrow.vector.util.CallBack;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.ValueIterableVector;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.util.Preconditions;

Expand All @@ -84,7 +86,7 @@
* each time the vector is accessed.
* Source code generated using FreeMarker template ${.template_name}
*/
public class DenseUnionVector extends AbstractContainerVector implements FieldVector {
public class DenseUnionVector extends AbstractContainerVector implements FieldVector, ValueIterableVector<Object> {
int valueCount;

NonNullableStructVector internalStruct;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* BigIntVector implements a fixed width vector (8 bytes) of integer values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class BigIntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class BigIntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Long> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* BitVector implements a fixed width (1 bit) vector of boolean values which could be null. Each
* value in the vector corresponds to a single bit in the underlying data stream backing the vector.
*/
public final class BitVector extends BaseFixedWidthVector {
public final class BitVector extends BaseFixedWidthVector implements ValueIterableVector<Boolean> {

private static final int HASH_CODE_FOR_ZERO = 17;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* DateDayVector implements a fixed width (4 bytes) vector of date values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class DateDayVector extends BaseFixedWidthVector {
public final class DateDayVector extends BaseFixedWidthVector implements ValueIterableVector<Integer> {

public static final byte TYPE_WIDTH = 4;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* DateMilliVector implements a fixed width vector (8 bytes) of date values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class DateMilliVector extends BaseFixedWidthVector {
public final class DateMilliVector extends BaseFixedWidthVector implements ValueIterableVector<LocalDateTime> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* null. A validity buffer (bit vector) is maintained to track which elements in the vector are
* null.
*/
public final class Decimal256Vector extends BaseFixedWidthVector {
public final class Decimal256Vector extends BaseFixedWidthVector implements ValueIterableVector<BigDecimal> {
public static final int MAX_PRECISION = 76;
public static final byte TYPE_WIDTH = 32;
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
* DecimalVector implements a fixed width vector (16 bytes) of decimal values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class DecimalVector extends BaseFixedWidthVector {
public final class DecimalVector extends BaseFixedWidthVector implements ValueIterableVector<BigDecimal> {
public static final int MAX_PRECISION = 38;
public static final byte TYPE_WIDTH = 16;
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* duration values which could be null. A validity buffer (bit vector) is maintained to track which
* elements in the vector are null.
*/
public final class DurationVector extends BaseFixedWidthVector {
public final class DurationVector extends BaseFixedWidthVector implements ValueIterableVector<Duration> {
public static final byte TYPE_WIDTH = 8;

private final TimeUnit unit;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* FixedSizeBinaryVector implements a fixed width vector of binary values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public class FixedSizeBinaryVector extends BaseFixedWidthVector {
public class FixedSizeBinaryVector extends BaseFixedWidthVector implements ValueIterableVector<byte[]> {
private final int byteWidth;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
* Float2Vector implements a fixed width (2 bytes) vector of short values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector {
public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector,
ValueIterableVector<Short> {
public static final byte TYPE_WIDTH = 2;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
* Float4Vector implements a fixed width vector (4 bytes) of float values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class Float4Vector extends BaseFixedWidthVector implements FloatingPointVector {
public final class Float4Vector extends BaseFixedWidthVector implements FloatingPointVector,
ValueIterableVector<Float> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
* Float8Vector implements a fixed width vector (8 bytes) of double values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class Float8Vector extends BaseFixedWidthVector implements FloatingPointVector {
public final class Float8Vector extends BaseFixedWidthVector implements FloatingPointVector,
ValueIterableVector<Double> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* IntVector implements a fixed width (4 bytes) vector of integer values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class IntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class IntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Integer> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* values which could be null. A validity buffer (bit vector) is maintained to track which elements
* in the vector are null.
*/
public final class IntervalDayVector extends BaseFixedWidthVector {
public final class IntervalDayVector extends BaseFixedWidthVector implements ValueIterableVector<Duration> {
public static final byte TYPE_WIDTH = 8;
private static final byte MILLISECOND_OFFSET = 4;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
* <p>Month, day and nanoseconds are independent from one another and there is no specific limits
* imposed on their values.
*/
public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector {
public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector
implements ValueIterableVector<PeriodDuration> {
public static final byte TYPE_WIDTH = 16;
private static final byte DAY_OFFSET = 4;
private static final byte NANOSECOND_OFFSET = 8;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* values which could be null. A validity buffer (bit vector) is maintained to track which elements
* in the vector are null.
*/
public final class IntervalYearVector extends BaseFixedWidthVector {
public final class IntervalYearVector extends BaseFixedWidthVector implements ValueIterableVector<Period> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
* NULL. A validity buffer (bit vector) is maintained to track which elements in the vector are
* null. The size of the underlying buffer can be over 2GB.
*/
public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector {
public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector implements ValueIterableVector<byte[]> {

/**
* Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for the data in vector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
*
* <p>The offset width of this vector is 8, so the underlying buffer can be larger than 2GB.
*/
public final class LargeVarCharVector extends BaseLargeVariableWidthVector {
public final class LargeVarCharVector extends BaseLargeVariableWidthVector implements ValueIterableVector<Text> {

/**
* Instantiate a LargeVarCharVector. This doesn't allocate any memory for the data in vector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import org.apache.arrow.vector.util.TransferPair;

/** A null type vector. */
public class NullVector implements FieldVector {
public class NullVector implements FieldVector, ValueIterableVector<Object> {

private int valueCount;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* SmallIntVector implements a fixed width (2 bytes) vector of short values which could be null. A
* validity buffer (bit vector) is maintained to track which elements in the vector are null.
*/
public final class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector {
public final class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector, ValueIterableVector<Short> {
public static final byte TYPE_WIDTH = 2;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* which could be null. A validity buffer (bit vector) is maintained to track which elements in the
* vector are null.
*/
public final class TimeMicroVector extends BaseFixedWidthVector {
public final class TimeMicroVector extends BaseFixedWidthVector implements ValueIterableVector<Long> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
* which could be null. A validity buffer (bit vector) is maintained to track which elements in the
* vector are null.
*/
public final class TimeMilliVector extends BaseFixedWidthVector {
public final class TimeMilliVector extends BaseFixedWidthVector implements ValueIterableVector<LocalDateTime> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* which could be null. A validity buffer (bit vector) is maintained to track which elements in the
* vector are null.
*/
public final class TimeNanoVector extends BaseFixedWidthVector {
public final class TimeNanoVector extends BaseFixedWidthVector implements ValueIterableVector<Long> {
public static final byte TYPE_WIDTH = 8;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
* could be null. A validity buffer (bit vector) is maintained to track which elements in the vector
* are null.
*/
public final class TimeSecVector extends BaseFixedWidthVector {
public final class TimeSecVector extends BaseFixedWidthVector implements ValueIterableVector<Integer> {
public static final byte TYPE_WIDTH = 4;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* resolution) values which could be null. A validity buffer (bit vector) is maintained to track
* which elements in the vector are null.
*/
public final class TimeStampMicroTZVector extends TimeStampVector {
public final class TimeStampMicroTZVector extends TimeStampVector implements ValueIterableVector<Long> {
private final String timeZone;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* resolution) values which could be null. A validity buffer (bit vector) is maintained to track
* which elements in the vector are null.
*/
public final class TimeStampMicroVector extends TimeStampVector {
public final class TimeStampMicroVector extends TimeStampVector implements ValueIterableVector<LocalDateTime> {

/**
* Instantiate a TimeStampMicroVector. This doesn't allocate any memory for the data in vector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* resolution) values which could be null. A validity buffer (bit vector) is maintained to track
* which elements in the vector are null.
*/
public final class TimeStampMilliTZVector extends TimeStampVector {
public final class TimeStampMilliTZVector extends TimeStampVector implements ValueIterableVector<Long> {
private final String timeZone;

/**
Expand Down
Loading

0 comments on commit b795f12

Please sign in to comment.