diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index f4522b8f5da..343bb321baa 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1523,7 +1523,6 @@ def _temp_path(): generate_nested_large_offsets_case() .skip_category('Go') - .skip_category('Java') # TODO(ARROW-6111) .skip_category('JS') .skip_category('Rust'), diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 8361e2ac009..4d2a540f572 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -30,6 +30,11 @@ fields: [], complex: true }, + { + name: "LargeList", + fields: [], + complex: true + }, { name: "FixedSizeList", fields: [{name: "listSize", type: int}], diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java index 60a932d69d9..af78022c943 100644 --- a/java/vector/src/main/codegen/templates/ComplexCopier.java +++ b/java/vector/src/main/codegen/templates/ComplexCopier.java @@ -52,6 +52,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { switch (mt) { case LIST: + case LARGELIST: case FIXED_SIZE_LIST: if (reader.isSet()) { writer.startList(); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index ba60e27c5cc..a2664436acc 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -23,13 +23,15 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionListWriter.java" /> +<#list ["List", "LargeList"] as listName> +<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> <#include "/@includes/license.ftl" /> package org.apache.arrow.vector.complex.impl; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; <#include "/@includes/vv_imports.ftl" /> /* @@ -37,24 +39,28 @@ */ @SuppressWarnings("unused") -public class UnionListWriter extends AbstractFieldWriter { +public class Union${listName}Writer extends AbstractFieldWriter { - protected ListVector vector; + protected ${listName}Vector vector; protected PromotableWriter writer; private boolean inStruct = false; private String structName; + <#if listName == "LargeList"> + private static final long OFFSET_WIDTH = 8; + <#else> private static final int OFFSET_WIDTH = 4; + - public UnionListWriter(ListVector vector) { + public Union${listName}Writer(${listName}Vector vector) { this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); } - public UnionListWriter(ListVector vector, NullableStructWriterFactory nullableStructWriterFactory) { + public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) { this.vector = vector; this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory); } - public UnionListWriter(ListVector vector, AbstractFieldWriter parent) { + public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) { this(vector); } @@ -92,6 +98,7 @@ public void close() throws Exception { public void setPosition(int index) { super.setPosition(index); } + <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign uncappedName = name?uncap_first/> @@ -149,6 +156,19 @@ public StructWriter struct(String name) { return structWriter; } + <#if listName == "LargeList"> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getLong(((long) idx() + 1L) * OFFSET_WIDTH))); + } + + @Override + public void endList() { + vector.getOffsetBuffer().setLong(((long) idx() + 1L) * OFFSET_WIDTH, writer.idx()); + setPosition(idx() + 1); + } + <#else> @Override public void startList() { vector.startNewValue(idx()); @@ -160,6 +180,7 @@ public void endList() { vector.getOffsetBuffer().setInt((idx() + 1) * OFFSET_WIDTH, writer.idx()); setPosition(idx() + 1); } + @Override public void start() { @@ -224,3 +245,4 @@ public void write(${name}Holder holder) { } + diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java index b4b4887bc42..d731b02d360 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java @@ -20,6 +20,7 @@ import static io.netty.util.internal.PlatformDependent.getByte; import static io.netty.util.internal.PlatformDependent.getInt; import static io.netty.util.internal.PlatformDependent.getLong; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BoundsChecking; @@ -37,6 +38,20 @@ public class BitVectorHelper { private BitVectorHelper() {} + /** + * Get the index of byte corresponding to bit index in validity buffer. + */ + public static long byteIndex(long absoluteBitIndex) { + return absoluteBitIndex >> 3; + } + + /** + * Get the relative index of bit within the byte in validity buffer. + */ + public static int bitIndex(long absoluteBitIndex) { + return checkedCastToInt(absoluteBitIndex & 7); + } + /** * Get the index of byte corresponding to bit index in validity buffer. */ @@ -57,11 +72,11 @@ public static int bitIndex(int absoluteBitIndex) { * @param validityBuffer validity buffer of the vector * @param index index to be set */ - public static void setBit(ArrowBuf validityBuffer, int index) { + public static void setBit(ArrowBuf validityBuffer, long index) { // it can be observed that some logic is duplicate of the logic in setValidityBit. // this is because JIT cannot always remove the if branch in setValidityBit, // so we give a dedicated implementation for setting bits. - final int byteIndex = byteIndex(index); + final long byteIndex = byteIndex(index); final int bitIndex = bitIndex(index); // the byte is promoted to an int, because according to Java specification, @@ -76,7 +91,7 @@ public static void setBit(ArrowBuf validityBuffer, int index) { /** * Set the bit at provided index to 1. * - * @deprecated Please use {@link BitVectorHelper#setBit(ArrowBuf, int)} instead.. + * @deprecated Please use {@link BitVectorHelper#setBit(ArrowBuf, long)} instead.. */ @Deprecated public static void setValidityBitToOne(ArrowBuf validityBuffer, int index) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index a3999050c32..191597ad877 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -110,6 +110,15 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) return new TypeLayout(vectors); } + @Override + public TypeLayout visit(ArrowType.LargeList type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.largeOffsetBuffer() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(FixedSizeList type) { List vectors = asList( @@ -295,6 +304,12 @@ public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { return 2; } + @Override + public Integer visit(ArrowType.LargeList type) { + // validity buffer + offset buffer + return 2; + } + @Override public Integer visit(FixedSizeList type) { // validity buffer diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 85337f4e091..6805d7caf8e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector.compare; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; + import java.util.List; import java.util.function.BiFunction; @@ -30,6 +32,7 @@ import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -171,6 +174,14 @@ public Boolean visit(FixedSizeListVector left, Range range) { return compareFixedSizeListVectors(range); } + @Override + public Boolean visit(LargeListVector left, Range range) { + if (!validate(left)) { + return false; + } + return compareLargeListVectors(range); + } + @Override public Boolean visit(NonNullableStructVector left, Range range) { if (!validate(left)) { @@ -494,4 +505,46 @@ protected boolean compareFixedSizeListVectors(Range range) { } return true; } + + protected boolean compareLargeListVectors(Range range) { + LargeListVector leftVector = (LargeListVector) left; + LargeListVector rightVector = (LargeListVector) right; + + RangeEqualsVisitor innerVisitor = + createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); + Range innerRange = new Range(); + + for (int i = 0; i < range.getLength(); i++) { + int leftIndex = range.getLeftStart() + i; + int rightIndex = range.getRightStart() + i; + + boolean isNull = leftVector.isNull(leftIndex); + if (isNull != rightVector.isNull(rightIndex)) { + return false; + } + + long offsetWidth = LargeListVector.OFFSET_WIDTH; + + if (!isNull) { + final long startIndexLeft = leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth); + final long endIndexLeft = leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth); + + final long startIndexRight = rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth); + final long endIndexRight = rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth); + + if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) { + return false; + } + + innerRange = innerRange // TODO revisit these casts when long indexing is finished + .setRightStart(checkedCastToInt(startIndexRight)) + .setLeftStart(checkedCastToInt(startIndexLeft)) + .setLength(checkedCastToInt(endIndexLeft - startIndexLeft)); + if (!innerVisitor.rangeEquals(innerRange)) { + return false; + } + } + } + return true; + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java index ef115b7ba9f..95db7924cd1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -93,6 +94,11 @@ public Boolean visit(FixedSizeListVector left, Void value) { return compareField(left.getField(), right.getField()); } + @Override + public Boolean visit(LargeListVector left, Void value) { + return compareField(left.getField(), right.getField()); + } + @Override public Boolean visit(NonNullableStructVector left, Void value) { return compareField(left.getField(), right.getField()); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java index a33c56a5736..14f3434736e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -44,6 +45,8 @@ public interface VectorVisitor { OUT visit(FixedSizeListVector left, IN value); + OUT visit(LargeListVector left, IN value); + OUT visit(NonNullableStructVector left, IN value); OUT visit(UnionVector left, IN value); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java new file mode 100644 index 00000000000..f437f32a733 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -0,0 +1,1004 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static java.util.Collections.singletonList; +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkNotNull; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.DensityAwareVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.UnionLargeListReader; +import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; +import org.apache.arrow.vector.util.TransferPair; + +/** + * A list vector contains lists of a specific type of elements. Its structure contains 3 elements. + *
    + *
  1. A validity buffer.
  2. + *
  3. An offset buffer, that denotes lists boundaries.
  4. + *
  5. A child data vector that contains the elements of lists.
  6. + *
+ * + * This is the LargeList variant of list, it has a 64-bit wide offset + * + *

+ * WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This class + * follows the expected behaviour of a LargeList but doesn't actually support allocating + * a 64-bit vector. It has little use until 64-bit vectors are supported and should be used + * with caution. + * todo review checkedCastToInt usage in this class. + * Once int64 indexed vectors are supported these checks aren't needed. + *

+ */ +public class LargeListVector extends BaseValueVector implements RepeatedValueVector, FieldVector, PromotableVector { + + public static LargeListVector empty(String name, BufferAllocator allocator) { + return new LargeListVector(name, allocator, FieldType.nullable(ArrowType.LargeList.INSTANCE), null); + } + + public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; + public static final String DATA_VECTOR_NAME = "$data$"; + + public static final byte OFFSET_WIDTH = 8; + protected ArrowBuf offsetBuffer; + protected FieldVector vector; + protected final CallBack callBack; + protected int valueCount; + protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + private final String name; + + protected String defaultDataVectorName = DATA_VECTOR_NAME; + protected ArrowBuf validityBuffer; + protected UnionLargeListReader reader; + private final FieldType fieldType; + private int validityAllocationSizeInBytes; + + /** + * The maximum index that is actually set. + */ + private int lastSet; + + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. + */ + public LargeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + super(allocator); + this.name = name; + this.validityBuffer = allocator.getEmpty(); + this.fieldType = checkNotNull(fieldType); + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + this.lastSet = -1; + this.offsetBuffer = allocator.getEmpty(); + this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector; + this.valueCount = 0; + } + + @Override + public void initializeChildrenFromFields(List children) { + if (children.size() != 1) { + throw new IllegalArgumentException("Lists have only one child. Found: " + children); + } + Field field = children.get(0); + AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); + if (!addOrGetVector.isCreated()) { + throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector()); + } + + addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + } + + @Override + public void setInitialCapacity(int numRecords) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + offsetAllocationSizeInBytes = (long) (numRecords + 1) * OFFSET_WIDTH; + if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } + } + + /** + * Specialized version of setInitialCapacity() for ListVector. This is + * used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. This is + * very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. In + * such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListVector. Density is the average size of + * list per position in the List vector. For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1 and + * remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + if ((numRecords * density) >= Integer.MAX_VALUE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + + offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH; + + int innerValueCapacity = Math.max((int) (numRecords * density), 1); + + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } + } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final long startOffset = offsetBuffer.getLong(0L); + final long endOffset = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH); + final double totalListSize = endOffset - startOffset; + return totalListSize / valueCount; + } + + @Override + public List getChildrenFromFields() { + return singletonList(getDataVector()); + } + + /** + * Load the buffers of this vector with provided source buffers. + * The caller manages the source buffers and populates them before invoking + * this method. + * @param fieldNode the fieldNode indicating the value count + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 2) { + throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size()); + } + + ArrowBuf bitBuffer = ownBuffers.get(0); + ArrowBuf offBuffer = ownBuffers.get(1); + + validityBuffer.getReferenceManager().release(); + validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); + offsetBuffer.getReferenceManager().release(); + offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); + + validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); + offsetAllocationSizeInBytes = offsetBuffer.capacity(); + + lastSet = fieldNode.getLength() - 1; + valueCount = fieldNode.getLength(); + } + + /** + * Get the buffers belonging to this vector. + * @return the inner buffers. + */ + @Override + public List getFieldBuffers() { + List result = new ArrayList<>(2); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(offsetBuffer); + + return result; + } + + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + offsetBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); + } + } + + @Override + @Deprecated + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + /** + * Same as {@link #allocateNewSafe()}. + */ + @Override + public void allocateNew() throws OutOfMemoryException { + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } + } + + /** + * Allocate memory for the vector. We internally use a default value count + * of 4096 to allocate memory for at least these many elements in the + * vector. + * + * @return false if memory allocation fails, true otherwise. + */ + public boolean allocateNewSafe() { + boolean success = false; + try { + /* we are doing a new allocation -- release the current buffers */ + clear(); + /* allocate validity buffer */ + allocateValidityBuffer(validityAllocationSizeInBytes); + /* allocate offset and data buffer */ + boolean dataAlloc = false; + try { + allocateOffsetBuffer(offsetAllocationSizeInBytes); + dataAlloc = vector.allocateNewSafe(); + } catch (Exception e) { + e.printStackTrace(); + clear(); + return false; + } finally { + if (!dataAlloc) { + clear(); + } + } + success = dataAlloc; + } finally { + if (!success) { + clear(); + return false; + } + } + return true; + } + + private void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + validityAllocationSizeInBytes = curSize; + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + protected void allocateOffsetBuffer(final long size) { + offsetBuffer = allocator.buffer(size); + offsetBuffer.readerIndex(0); + offsetAllocationSizeInBytes = size; + offsetBuffer.setZero(0, offsetBuffer.capacity()); + } + + /** + * Resize the vector to increase the capacity. The internal behavior is to + * double the current value capacity. + */ + @Override + public void reAlloc() { + /* reallocate the validity buffer */ + reallocValidityBuffer(); + /* reallocate the offset and data */ + reallocOffsetBuffer(); + vector.reAlloc(); + } + + private void reallocValidityAndOffsetBuffers() { + reallocOffsetBuffer(); + reallocValidityBuffer(); + } + + protected void reallocOffsetBuffer() { + final long currentBufferCapacity = offsetBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (offsetAllocationSizeInBytes > 0) { + newAllocationSize = offsetAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + offsetBuffer.getReferenceManager().release(1); + offsetBuffer = newBuf; + offsetAllocationSizeInBytes = newAllocationSize; + } + + private void reallocValidityBuffer() { + final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (validityAllocationSizeInBytes > 0) { + newAllocationSize = validityAllocationSizeInBytes; + } else { + newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2; + } + } + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize); + newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + validityBuffer.getReferenceManager().release(1); + validityBuffer = newBuf; + validityAllocationSizeInBytes = (int) newAllocationSize; + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that + * it handles the case when the capacity of the vector needs to be expanded + * before copy. + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from); + } + + /** + * Copy a cell value from a particular index in source vector to a particular + * position in this vector. + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFrom(int inIndex, int outIndex, ValueVector from) { + Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); + FieldReader in = from.getReader(); + in.setPosition(inIndex); + UnionLargeListWriter out = getWriter(); + out.setPosition(outIndex); + ComplexCopier.copy(in, out); + } + + @Override + public UInt4Vector getOffsetVector() { + throw new UnsupportedOperationException("There is no inner offset vector"); + } + + /** + * Get the inner data vector for this list vector. + * @return data vector + */ + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return new TransferImpl(ref, allocator, callBack); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + return new TransferImpl((LargeListVector) target); + } + + @Override + public long getValidityBufferAddress() { + return (validityBuffer.memoryAddress()); + } + + @Override + public long getDataBufferAddress() { + throw new UnsupportedOperationException(); + } + + @Override + public long getOffsetBufferAddress() { + return (offsetBuffer.memoryAddress()); + } + + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + @Override + public ArrowBuf getDataBuffer() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowBuf getOffsetBuffer() { + return offsetBuffer; + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + if (isSet(index) == 0) { + return ArrowBufPointer.NULL_HASH_CODE; + } + int hash = 0; + final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); + final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); + for (long i = start; i < end; i++) { + hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(checkedCastToInt(i), hasher)); + } + return hash; + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + return visitor.visit(this, value); + } + + public UnionLargeListWriter getWriter() { + return new UnionLargeListWriter(this); + } + + protected void replaceDataVector(FieldVector v) { + vector.clear(); + vector = v; + } + + @Override + public UnionVector promoteToUnion() { + UnionVector vector = new UnionVector("$data$", allocator, callBack); + replaceDataVector(vector); + invalidateReader(); + if (callBack != null) { + callBack.doWork(); + } + return vector; + } + + private class TransferImpl implements TransferPair { + + LargeListVector to; + TransferPair dataTransferPair; + + public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { + this(new LargeListVector(name, allocator, fieldType, callBack)); + } + + public TransferImpl(LargeListVector to) { + this.to = to; + to.addOrGetVector(vector.getField().getFieldType()); + if (to.getDataVector() instanceof ZeroVector) { + to.addOrGetVector(vector.getField().getFieldType()); + } + dataTransferPair = getDataVector().makeTransferPair(to.getDataVector()); + } + + /** + * Transfer this vector'data to another vector. The memory associated + * with this vector is transferred to the allocator of target vector + * for accounting and management purposes. + */ + @Override + public void transfer() { + to.clear(); + dataTransferPair.transfer(); + to.validityBuffer = transferBuffer(validityBuffer, to.allocator); + to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator); + to.lastSet = lastSet; + if (valueCount > 0) { + to.setValueCount(valueCount); + } + clear(); + } + + /** + * Slice this vector at desired index and length and transfer the + * corresponding data to the target vector. + * @param startIndex start position of the split in source vector. + * @param length length of the split. + */ + @Override + public void splitAndTransfer(int startIndex, int length) { + Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, + "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); + final long startPoint = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH); + final long sliceLength = offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH) - startPoint; + to.clear(); + to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH); + /* splitAndTransfer offset buffer */ + for (int i = 0; i < length + 1; i++) { + final long relativeOffset = offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - startPoint; + to.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeOffset); + } + /* splitAndTransfer validity buffer */ + splitAndTransferValidityBuffer(startIndex, length, to); + /* splitAndTransfer data buffer */ + dataTransferPair.splitAndTransfer(checkedCastToInt(startPoint), checkedCastToInt(sliceLength)); + to.lastSet = length - 1; + to.setValueCount(length); + } + + /* + * transfer the validity. + */ + private void splitAndTransferValidityBuffer(int startIndex, int length, LargeListVector target) { + int firstByteSource = BitVectorHelper.byteIndex(startIndex); + int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + int byteSizeTarget = getValidityBufferSizeFromCount(length); + int offset = startIndex % 8; + + if (length > 0) { + if (offset == 0) { + // slice + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer.getReferenceManager().retain(1); + } else { + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, + firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + } + } + + @Override + public ValueVector getTo() { + return to; + } + + @Override + public void copyValueSafe(int from, int to) { + this.to.copyFrom(from, to, LargeListVector.this); + } + } + + @Override + public UnionLargeListReader getReader() { + if (reader == null) { + reader = new UnionLargeListReader(this); + } + return reader; + } + + /** + * Initialize the data vector (and execute callback) if it hasn't already been done, + * returns the data vector. + */ + public AddOrGetResult addOrGetVector(FieldType fieldType) { + boolean created = false; + if (vector instanceof NullVector) { + vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack); + // returned vector must have the same field + created = true; + if (callBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { + callBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + + invalidateReader(); + return new AddOrGetResult<>((T) vector, created); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH; + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + return offsetBufferSize + validityBufferSize + vector.getBufferSize(); + } + + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + long innerVectorValueCount = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH); + + return ((valueCount + 1) * OFFSET_WIDTH) + + vector.getBufferSizeFor(checkedCastToInt(innerVectorValueCount)) + + validityBufferSize; + } + + @Override + public Field getField() { + return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField())); + } + + @Override + public MinorType getMinorType() { + return MinorType.LARGELIST; + } + + @Override + public String getName() { + return name; + } + + @Override + public void clear() { + offsetBuffer = releaseBuffer(offsetBuffer); + vector.clear(); + valueCount = 0; + super.clear(); + validityBuffer = releaseBuffer(validityBuffer); + lastSet = -1; + } + + @Override + public void reset() { + offsetBuffer.setZero(0, offsetBuffer.capacity()); + vector.reset(); + valueCount = 0; + validityBuffer.setZero(0, validityBuffer.capacity()); + lastSet = -1; + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer so it only should be used for in-context + * access. Also note that this buffer changes regularly thus + * external classes shouldn't hold a reference to it (unless they change it). + * + * @param clear Whether to clear vector before returning; the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + setReaderAndWriterIndex(); + final ArrowBuf[] buffers; + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + List list = new ArrayList<>(); + list.add(offsetBuffer); + list.add(validityBuffer); + list.addAll(Arrays.asList(vector.getBuffers(false))); + buffers = list.toArray(new ArrowBuf[list.size()]); + } + if (clear) { + for (ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; + } + + protected void invalidateReader() { + reader = null; + } + + /** + * Get the element in the list vector at a particular index. + * @param index position of the element + * @return Object at given position + */ + @Override + public Object getObject(int index) { + if (isSet(index) == 0) { + return null; + } + final List vals = new JsonStringArrayList<>(); + final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); + final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); + final ValueVector vv = getDataVector(); + for (long i = start; i < end; i++) { + vals.add(vv.getObject(checkedCastToInt(i))); + } + + return vals; + } + + /** + * Check if element at given index is null. + * + * @param index position of element + * @return true if element at given index is null, false otherwise + */ + @Override + public boolean isNull(int index) { + return (isSet(index) == 0); + } + + /** + * Check if element at given index is empty list. + * @param index position of element + * @return true if element at given index is empty list or NULL, false otherwise + */ + public boolean isEmpty(int index) { + if (isNull(index)) { + return true; + } else { + final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH); + final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); + return start == end; + } + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ + @Override + public int getNullCount() { + return BitVectorHelper.getNullCount(validityBuffer, valueCount); + } + + /** + * Get the current value capacity for the vector. + * @return number of elements that vector can hold. + */ + @Override + public int getValueCapacity() { + return getValidityAndOffsetValueCapacity(); + } + + protected int getOffsetBufferValueCapacity() { + return checkedCastToInt(offsetBuffer.capacity() / OFFSET_WIDTH); + } + + private int getValidityAndOffsetValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0); + return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + + /** + * Sets the list at index to be not-null. Reallocates validity buffer if index + * is larger than current capacity. + */ + public void setNotNull(int index) { + while (index >= getValidityAndOffsetValueCapacity()) { + reallocValidityAndOffsetBuffers(); + } + BitVectorHelper.setBit(validityBuffer, index); + lastSet = index; + } + + /** + * Start a new value in the list vector. + * + * @param index index of the value to start + */ + public long startNewValue(long index) { + while (index >= getValidityAndOffsetValueCapacity()) { + reallocValidityAndOffsetBuffers(); + } + for (int i = lastSet + 1; i <= index; i++) { + final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH); + offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset); + } + BitVectorHelper.setBit(validityBuffer, index); + lastSet = checkedCastToInt(index); + return offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH); + } + + /** + * End the current value. + * + * @param index index of the value to end + * @param size number of elements in the list that was written + */ + public void endValue(int index, long size) { + final long currentOffset = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); + offsetBuffer.setLong(((long) index + 1L) * OFFSET_WIDTH, currentOffset + size); + } + + /** + * Sets the value count for the vector. + * + *

+ * Important note: The underlying vector does not support 64-bit + * allocations yet. This may throw if attempting to hold larger + * than what a 32-bit vector can store. + *

+ * + * @param valueCount value count + */ + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + if (valueCount > 0) { + while (valueCount > getValidityAndOffsetValueCapacity()) { + /* check if validity and offset buffers need to be re-allocated */ + reallocValidityAndOffsetBuffers(); + } + for (int i = lastSet + 1; i < valueCount; i++) { + /* fill the holes with offsets */ + final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH); + offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset); + } + } + /* valueCount for the data vector is the current end offset */ + final long childValueCount = (valueCount == 0) ? 0 : + offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH); + /* set the value count of data vector and this will take care of + * checking whether data buffer needs to be reallocated. + * TODO: revisit when 64-bit vectors are supported + */ + Preconditions.checkArgument(childValueCount <= Integer.MAX_VALUE || childValueCount >= Integer.MIN_VALUE, + "LargeListVector doesn't yet support 64-bit allocations: %s", childValueCount); + vector.setValueCount((int) childValueCount); + } + + public void setLastSet(int value) { + lastSet = value; + } + + public int getLastSet() { + return lastSet; + } + + public long getElementStartIndex(int index) { + return offsetBuffer.getLong((long) index * OFFSET_WIDTH); + } + + public long getElementEndIndex(int index) { + return offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index d6cc5e3ccd9..6f40836e06b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -25,6 +25,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -49,6 +50,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractStructVector parentContainer; private final ListVector listVector; private final FixedSizeListVector fixedListVector; + private final LargeListVector largeListVector; private final NullableStructWriterFactory nullableStructWriterFactory; private int position; private static final int MAX_DECIMAL_PRECISION = 38; @@ -87,6 +89,7 @@ public PromotableWriter( this.parentContainer = parentContainer; this.listVector = null; this.fixedListVector = null; + this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } @@ -111,6 +114,16 @@ public PromotableWriter(ValueVector v, FixedSizeListVector fixedListVector) { this(v, fixedListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); } + /** + * Constructs a new instance. + * + * @param v The vector to initialize the writer with. + * @param largeListVector The vector that serves as a parent of v. + */ + public PromotableWriter(ValueVector v, LargeListVector largeListVector) { + this(v, largeListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); + } + /** * Constructs a new instance. * @@ -125,6 +138,7 @@ public PromotableWriter( this.listVector = listVector; this.parentContainer = null; this.fixedListVector = null; + this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } @@ -143,6 +157,26 @@ public PromotableWriter( this.fixedListVector = fixedListVector; this.parentContainer = null; this.listVector = null; + this.largeListVector = null; + this.nullableStructWriterFactory = nullableStructWriterFactory; + init(v); + } + + /** + * Constructs a new instance. + * + * @param v The vector to initialize the writer with. + * @param largeListVector The vector that serves as a parent of v. + * @param nullableStructWriterFactory The factory to create the delegate writer. + */ + public PromotableWriter( + ValueVector v, + LargeListVector largeListVector, + NullableStructWriterFactory nullableStructWriterFactory) { + this.largeListVector = largeListVector; + this.fixedListVector = null; + this.parentContainer = null; + this.listVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } @@ -223,8 +257,14 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { arrowType = type.getType(); } FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null); - ValueVector v = listVector != null ? listVector.addOrGetVector(fieldType).getVector() : - fixedListVector.addOrGetVector(fieldType).getVector(); + ValueVector v; + if (listVector != null) { + v = listVector.addOrGetVector(fieldType).getVector(); + } else if (fixedListVector != null) { + v = fixedListVector.addOrGetVector(fieldType).getVector(); + } else { + v = largeListVector.addOrGetVector(fieldType).getVector(); + } v.allocateNew(); setWriter(v); writer.setPosition(position); @@ -260,6 +300,8 @@ private FieldWriter promoteToUnion() { unionVector = listVector.promoteToUnion(); } else if (fixedListVector != null) { unionVector = fixedListVector.promoteToUnion(); + } else if (largeListVector != null) { + unionVector = largeListVector.promoteToUnion(); } unionVector.addVector((FieldVector) tp.getTo()); writer = new UnionWriter(unionVector, nullableStructWriterFactory); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java new file mode 100644 index 00000000000..faf088b5598 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex.impl; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; + +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.holders.UnionHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; + +/** + * {@link FieldReader} for list of union types. + */ +public class UnionLargeListReader extends AbstractFieldReader { + + private LargeListVector vector; + private ValueVector data; + private long index; + private static final long OFFSET_WIDTH = 8L; + + public UnionLargeListReader(LargeListVector vector) { + this.vector = vector; + this.data = vector.getDataVector(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + @Override + public boolean isSet() { + return !vector.isNull(idx()); + } + + private long currentOffset; + private long maxOffset; + + @Override + public void setPosition(int index) { + super.setPosition(index); + currentOffset = vector.getOffsetBuffer().getLong((long) index * OFFSET_WIDTH) - 1; + maxOffset = vector.getOffsetBuffer().getLong(((long) index + 1L) * OFFSET_WIDTH); + } + + @Override + public FieldReader reader() { + return data.getReader(); + } + + @Override + public Object readObject() { + return vector.getObject(idx()); + } + + @Override + public MinorType getMinorType() { + return MinorType.LARGELIST; + } + + @Override + public void read(int index, UnionHolder holder) { + setPosition(index); + for (int i = -1; i < index; i++) { + next(); + } + holder.reader = data.getReader(); + holder.isSet = data.getReader().isSet() ? 1 : 0; + } + + @Override + public int size() { + int size = checkedCastToInt(maxOffset - currentOffset - 1); //todo revisit when int64 vectors are done + return size < 0 ? 0 : size; + } + + @Override + public boolean next() { + if (currentOffset + 1 < maxOffset) { + data.getReader().setPosition(checkedCastToInt(++currentOffset)); // todo revisit when int64 vectors are done + return true; + } else { + return false; + } + } + + public void copyAsValue(UnionLargeListWriter writer) { + ComplexCopier.copy(this, (FieldWriter) writer); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index 18d8bbe73ac..54f47007fec 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -562,7 +562,9 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType if (bufferType.equals(VALIDITY)) { reader = helper.BIT; } else if (bufferType.equals(OFFSET)) { - if (type == Types.MinorType.LARGEVARCHAR || type == Types.MinorType.LARGEVARBINARY) { + if (type == Types.MinorType.LARGELIST || + type == Types.MinorType.LARGEVARCHAR || + type == Types.MinorType.LARGEVARBINARY) { reader = helper.INT8; } else { reader = helper.INT4; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 40d20842d5d..0ec427e5a03 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -63,6 +63,7 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; @@ -101,6 +102,7 @@ import org.apache.arrow.vector.complex.impl.UInt2WriterImpl; import org.apache.arrow.vector.complex.impl.UInt4WriterImpl; import org.apache.arrow.vector.complex.impl.UInt8WriterImpl; +import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.impl.UnionWriter; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; @@ -610,6 +612,17 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + LARGELIST(ArrowType.LargeList.INSTANCE) { + @Override + public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new LargeListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionLargeListWriter((LargeListVector) vector); + } + }, FIXED_SIZE_LIST(null) { @Override public FieldVector getNewVector( @@ -817,6 +830,11 @@ public MinorType visit(Map type) { return MinorType.MAP; } + @Override + public MinorType visit(ArrowType.LargeList type) { + return MinorType.LARGELIST; + } + @Override public MinorType visit(Int type) { switch (type.getBitWidth()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index e0fd13ea55b..4993a73c285 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector.util; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; + import java.util.HashSet; import org.apache.arrow.util.Preconditions; @@ -30,6 +32,7 @@ import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -141,14 +144,14 @@ public ValueVector visit(BaseLargeVariableWidthVector left, Void value) { @Override public ValueVector visit(ListVector deltaVector, Void value) { Preconditions.checkArgument(typeVisitor.equals(deltaVector), - "The targetVector to append must have the same type as the targetVector being appended"); + "The targetVector to append must have the same type as the targetVector being appended"); int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); int targetListSize = targetVector.getOffsetBuffer().getInt( - (long) targetVector.getValueCount() * ListVector.OFFSET_WIDTH); + (long) targetVector.getValueCount() * ListVector.OFFSET_WIDTH); int deltaListSize = deltaVector.getOffsetBuffer().getInt( - (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); + (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); ListVector targetListVector = (ListVector) targetVector; @@ -161,6 +164,58 @@ public ValueVector visit(ListVector deltaVector, Void value) { targetVector.reAlloc(); } + // append validity buffer + BitVectorHelper.concatBits( + targetVector.getValidityBuffer(), targetVector.getValueCount(), + deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); + + // append offset buffer + PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH, + targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) * + ListVector.OFFSET_WIDTH, + (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); + + // increase each offset from the second buffer + for (int i = 0; i < deltaVector.getValueCount(); i++) { + int oldOffset = targetVector.getOffsetBuffer().getInt( + (long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH); + targetVector.getOffsetBuffer().setInt((long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH, + oldOffset + targetListSize); + } + targetListVector.setLastSet(newValueCount - 1); + + // append underlying vectors + VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector()); + deltaVector.getDataVector().accept(innerAppender, null); + + targetVector.setValueCount(newValueCount); + return targetVector; + } + + @Override + public ValueVector visit(LargeListVector deltaVector, Void value) { + Preconditions.checkArgument(typeVisitor.equals(deltaVector), + "The targetVector to append must have the same type as the targetVector being appended"); + + int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount(); + + long targetListSize = targetVector.getOffsetBuffer().getLong( + (long) targetVector.getValueCount() * LargeListVector.OFFSET_WIDTH); + long deltaListSize = deltaVector.getOffsetBuffer().getLong( + (long) deltaVector.getValueCount() * LargeListVector.OFFSET_WIDTH); + + ListVector targetListVector = (ListVector) targetVector; + + // make sure the underlying vector has value count set + // todo recheck these casts when int64 vectors are supported + targetListVector.getDataVector().setValueCount(checkedCastToInt(targetListSize)); + deltaVector.getDataVector().setValueCount(checkedCastToInt(deltaListSize)); + + // make sure there is enough capacity + while (targetVector.getValueCapacity() < newValueCount) { + targetVector.reAlloc(); + } + // append validity buffer BitVectorHelper.concatBits( targetVector.getValidityBuffer(), targetVector.getValueCount(), @@ -169,15 +224,15 @@ public ValueVector visit(ListVector deltaVector, Void value) { // append offset buffer PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH, targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) * - ListVector.OFFSET_WIDTH, + LargeListVector.OFFSET_WIDTH, (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH); // increase each offset from the second buffer for (int i = 0; i < deltaVector.getValueCount(); i++) { - int oldOffset = targetVector.getOffsetBuffer().getInt( - (long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH); - targetVector.getOffsetBuffer().setInt((long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH, - oldOffset + targetListSize); + long oldOffset = targetVector.getOffsetBuffer().getLong( + (long) (targetVector.getValueCount() + 1 + i) * LargeListVector.OFFSET_WIDTH); + targetVector.getOffsetBuffer().setLong((long) (targetVector.getValueCount() + 1 + i) * + LargeListVector.OFFSET_WIDTH, oldOffset + targetListSize); } targetListVector.setLastSet(newValueCount - 1); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java index 756995ad76f..3e44c262d75 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -135,6 +136,45 @@ public Void visit(ListVector vector, Void value) { return dataVector.accept(this, null); } + @Override + public Void visit(LargeListVector vector, Void value) { + + FieldVector dataVector = vector.getDataVector(); + + if (vector.getValueCount() > 0) { + + ArrowBuf offsetBuf = vector.getOffsetBuffer(); + long minBufferSize = (vector.getValueCount() + 1) * LargeListVector.OFFSET_WIDTH; + + if (offsetBuf.capacity() < minBufferSize) { + throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" + + " and valueCount %s : expected at least %s byte(s), got %s", + vector.getField().getType().toString(), + vector.getValueCount(), minBufferSize, offsetBuf.capacity())); + } + + long firstOffset = vector.getOffsetBuffer().getLong(0); + long lastOffset = vector.getOffsetBuffer().getLong(vector.getValueCount() * LargeListVector.OFFSET_WIDTH); + + if (firstOffset < 0 || lastOffset < 0) { + throw new IllegalArgumentException("Negative offsets in list vector"); + } + + long dataExtent = lastOffset - firstOffset; + + if (dataExtent > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) { + throw new IllegalArgumentException("valueBuffer is null or capacity is 0"); + } + + if (dataExtent > dataVector.getValueCount()) { + throw new IllegalArgumentException(String.format("Length spanned by list offsets (%s) larger than" + + " data vector valueCount (length %s)", dataExtent, dataVector.getValueCount())); + } + } + + return dataVector.accept(this, null); + } + @Override public Void visit(FixedSizeListVector vector, Void value) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java new file mode 100644 index 00000000000..9a731fe099d --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -0,0 +1,982 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestLargeListVector { + + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @After + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testCopyFrom() throws Exception { + try (LargeListVector inVector = LargeListVector.empty("input", allocator); + LargeListVector outVector = LargeListVector.empty("output", allocator)) { + UnionLargeListWriter writer = inVector.getWriter(); + writer.allocate(); + + // populate input vector with the following records + // [1, 2, 3] + // null + // [] + writer.setPosition(0); // optional + writer.startList(); + writer.bigInt().writeBigInt(1); + writer.bigInt().writeBigInt(2); + writer.bigInt().writeBigInt(3); + writer.endList(); + + writer.setPosition(2); + writer.startList(); + writer.endList(); + + writer.setValueCount(3); + + // copy values from input to output + outVector.allocateNew(); + for (int i = 0; i < 3; i++) { + outVector.copyFrom(i, i, inVector); + } + outVector.setValueCount(3); + + // assert the output vector is correct + FieldReader reader = outVector.getReader(); + Assert.assertTrue("shouldn't be null", reader.isSet()); + reader.setPosition(1); + Assert.assertFalse("should be null", reader.isSet()); + reader.setPosition(2); + Assert.assertTrue("shouldn't be null", reader.isSet()); + + + /* index 0 */ + Object result = outVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(new Long(1), (Long) resultSet.get(0)); + assertEquals(new Long(2), (Long) resultSet.get(1)); + assertEquals(new Long(3), (Long) resultSet.get(2)); + + /* index 1 */ + result = outVector.getObject(1); + assertNull(result); + + /* index 2 */ + result = outVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(0, resultSet.size()); + + /* 3+0+0/3 */ + assertEquals(1.0D, inVector.getDensity(), 0); + } + } + + @Test + public void testSetLastSetUsage() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("input", allocator)) { + + /* Explicitly add the dataVector */ + MinorType type = MinorType.BIGINT; + listVector.addOrGetVector(FieldType.nullable(type.getType())); + + /* allocate memory */ + listVector.allocateNew(); + + /* get inner buffers; validityBuffer and offsetBuffer */ + + ArrowBuf validityBuffer = listVector.getValidityBuffer(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* get the underlying data vector -- BigIntVector */ + BigIntVector dataVector = (BigIntVector) listVector.getDataVector(); + + /* check current lastSet */ + assertEquals(-1L, listVector.getLastSet()); + + int index = 0; + int offset = 0; + + /* write [10, 11, 12] to the list vector at index 0 */ + BitVectorHelper.setBit(validityBuffer, index); + dataVector.setSafe(0, 1, 10); + dataVector.setSafe(1, 1, 11); + dataVector.setSafe(2, 1, 12); + offsetBuffer.setInt((index + 1) * LargeListVector.OFFSET_WIDTH, 3); + + index += 1; + + /* write [13, 14] to the list vector at index 1 */ + BitVectorHelper.setBit(validityBuffer, index); + dataVector.setSafe(3, 1, 13); + dataVector.setSafe(4, 1, 14); + offsetBuffer.setInt((index + 1) * LargeListVector.OFFSET_WIDTH, 5); + + index += 1; + + /* write [15, 16, 17] to the list vector at index 2 */ + BitVectorHelper.setBit(validityBuffer, index); + dataVector.setSafe(5, 1, 15); + dataVector.setSafe(6, 1, 16); + dataVector.setSafe(7, 1, 17); + offsetBuffer.setInt((index + 1) * LargeListVector.OFFSET_WIDTH, 8); + + /* check current lastSet */ + assertEquals(-1L, listVector.getLastSet()); + + /* set lastset and arbitrary valuecount for list vector. + * + * NOTE: if we don't execute setLastSet() before setLastValueCount(), then + * the latter will corrupt the offsetBuffer and thus the accessor will not + * retrieve the correct values from underlying dataBuffer. Run the test + * by commenting out next line and we should see failures from 5th assert + * onwards. This is why doing setLastSet() is important before setValueCount() + * once the vector has been loaded. + * + * Another important thing to remember is the value of lastSet itself. + * Even though the listVector has elements till index 2 only, the lastSet should + * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3. + * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value + * after execution of setValueCount(). + * + * correct state of the listVector + * bitvector {1, 1, 1, 0, 0.... } + * offsetvector {0, 3, 5, 8, 8, 8.....} + * datavector { [10, 11, 12], + * [13, 14], + * [15, 16, 17] + * } + * + * if we don't do setLastSet() before setValueCount --> incorrect state + * bitvector {1, 1, 1, 0, 0.... } + * offsetvector {0, 0, 0, 0, 0, 0.....} + * datavector { [10, 11, 12], + * [13, 14], + * [15, 16, 17] + * } + * + * if we do setLastSet(2) before setValueCount --> incorrect state + * bitvector {1, 1, 1, 0, 0.... } + * offsetvector {0, 3, 5, 5, 5, 5.....} + * datavector { [10, 11, 12], + * [13, 14], + * [15, 16, 17] + * } + */ + listVector.setLastSet(2); + listVector.setValueCount(10); + + /* (3+2+3)/10 */ + assertEquals(0.8D, listVector.getDensity(), 0); + + index = 0; + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(0), Integer.toString(offset)); + + Object actual = dataVector.getObject(offset); + assertEquals(new Long(10), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(11), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(12), (Long) actual); + + index++; + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(3), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(13), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(14), (Long) actual); + + index++; + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(5), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(15), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(16), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(17), (Long) actual); + + index++; + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(8), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertNull(actual); + } + } + + @Test + public void testSplitAndTransfer() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { + + /* Explicitly add the dataVector */ + MinorType type = MinorType.BIGINT; + listVector.addOrGetVector(FieldType.nullable(type.getType())); + + UnionLargeListWriter listWriter = listVector.getWriter(); + + /* allocate memory */ + listWriter.allocate(); + + /* populate data */ + listWriter.setPosition(0); + listWriter.startList(); + listWriter.bigInt().writeBigInt(10); + listWriter.bigInt().writeBigInt(11); + listWriter.bigInt().writeBigInt(12); + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + listWriter.bigInt().writeBigInt(13); + listWriter.bigInt().writeBigInt(14); + listWriter.endList(); + + listWriter.setPosition(2); + listWriter.startList(); + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(16); + listWriter.bigInt().writeBigInt(17); + listWriter.bigInt().writeBigInt(18); + listWriter.endList(); + + listWriter.setPosition(3); + listWriter.startList(); + listWriter.bigInt().writeBigInt(19); + listWriter.endList(); + + listWriter.setPosition(4); + listWriter.startList(); + listWriter.bigInt().writeBigInt(20); + listWriter.bigInt().writeBigInt(21); + listWriter.bigInt().writeBigInt(22); + listWriter.bigInt().writeBigInt(23); + listWriter.endList(); + + listVector.setValueCount(5); + + assertEquals(4, listVector.getLastSet()); + + /* get offset buffer */ + final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* get dataVector */ + BigIntVector dataVector = (BigIntVector) listVector.getDataVector(); + + /* check the vector output */ + + int index = 0; + int offset = 0; + Object actual = null; + + /* index 0 */ + assertFalse(listVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(0), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(10), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(11), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(12), (Long) actual); + + /* index 1 */ + index++; + assertFalse(listVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(3), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(13), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(14), (Long) actual); + + /* index 2 */ + index++; + assertFalse(listVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(5), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(15), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(16), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(17), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(18), (Long) actual); + + /* index 3 */ + index++; + assertFalse(listVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(9), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(19), (Long) actual); + + /* index 4 */ + index++; + assertFalse(listVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(10), Integer.toString(offset)); + + actual = dataVector.getObject(offset); + assertEquals(new Long(20), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(21), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(22), (Long) actual); + offset++; + actual = dataVector.getObject(offset); + assertEquals(new Long(23), (Long) actual); + + /* index 5 */ + index++; + assertTrue(listVector.isNull(index)); + offset = offsetBuffer.getInt(index * LargeListVector.OFFSET_WIDTH); + assertEquals(Integer.toString(14), Integer.toString(offset)); + + /* do split and transfer */ + try (LargeListVector toVector = LargeListVector.empty("toVector", allocator)) { + + TransferPair transferPair = listVector.makeTransferPair(toVector); + + int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}}; + + for (final int[] transferLength : transferLengths) { + int start = transferLength[0]; + int splitLength = transferLength[1]; + + int dataLength1 = 0; + int dataLength2 = 0; + + int offset1 = 0; + int offset2 = 0; + + transferPair.splitAndTransfer(start, splitLength); + + /* get offsetBuffer of toVector */ + final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer(); + + /* get dataVector of toVector */ + BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector(); + + for (int i = 0; i < splitLength; i++) { + dataLength1 = offsetBuffer.getInt((start + i + 1) * LargeListVector.OFFSET_WIDTH) - + offsetBuffer.getInt((start + i) * LargeListVector.OFFSET_WIDTH); + dataLength2 = toOffsetBuffer.getInt((i + 1) * LargeListVector.OFFSET_WIDTH) - + toOffsetBuffer.getInt(i * LargeListVector.OFFSET_WIDTH); + + assertEquals("Different data lengths at index: " + i + " and start: " + start, + dataLength1, dataLength2); + + offset1 = offsetBuffer.getInt((start + i) * LargeListVector.OFFSET_WIDTH); + offset2 = toOffsetBuffer.getInt(i * LargeListVector.OFFSET_WIDTH); + + for (int j = 0; j < dataLength1; j++) { + assertEquals("Different data at indexes: " + offset1 + " and " + offset2, + dataVector.getObject(offset1), dataVector1.getObject(offset2)); + + offset1++; + offset2++; + } + } + } + } + } + } + + @Test + public void testNestedLargeListVector() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { + + UnionLargeListWriter listWriter = listVector.getWriter(); + + /* allocate memory */ + listWriter.allocate(); + + /* the dataVector that backs a listVector will also be a + * listVector for this test. + */ + + /* write one or more inner lists at index 0 */ + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(50); + listWriter.list().bigInt().writeBigInt(100); + listWriter.list().bigInt().writeBigInt(200); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(75); + listWriter.list().bigInt().writeBigInt(125); + listWriter.list().bigInt().writeBigInt(150); + listWriter.list().bigInt().writeBigInt(175); + listWriter.list().endList(); + + listWriter.endList(); + + /* write one or more inner lists at index 1 */ + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(10); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(15); + listWriter.list().bigInt().writeBigInt(20); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(25); + listWriter.list().bigInt().writeBigInt(30); + listWriter.list().bigInt().writeBigInt(35); + listWriter.list().endList(); + + listWriter.endList(); + + assertEquals(1, listVector.getLastSet()); + + listVector.setValueCount(2); + + assertEquals(2, listVector.getValueCount()); + + /* get listVector value at index 0 -- the value itself is a listvector */ + Object result = listVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(50), list.get(0)); + assertEquals(new Long(100), list.get(1)); + assertEquals(new Long(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(new Long(75), list.get(0)); + assertEquals(new Long(125), list.get(1)); + assertEquals(new Long(150), list.get(2)); + assertEquals(new Long(175), list.get(3)); + + /* get listVector value at index 1 -- the value itself is a listvector */ + result = listVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of third inner list */ + + list = resultSet.get(0); + assertEquals(new Long(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(new Long(15), list.get(0)); + assertEquals(new Long(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(new Long(25), list.get(0)); + assertEquals(new Long(30), list.get(1)); + assertEquals(new Long(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listVector.isNull(0)); + assertFalse(listVector.isNull(1)); + + /* check underlying offsets */ + final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* listVector has 2 lists at index 0 and 3 lists at index 1 */ + assertEquals(0, offsetBuffer.getInt(0 * LargeListVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(1 * LargeListVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(2 * LargeListVector.OFFSET_WIDTH)); + } + } + + @Test + public void testNestedLargeListVector1() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LIST; + MinorType scalarType = MinorType.BIGINT; + + listVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList1 = (ListVector) listVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList2 = (ListVector) innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList3 = (ListVector) innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList4 = (ListVector) innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList5 = (ListVector) innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListVector innerList6 = (ListVector) innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listVector.setInitialCapacity(128); + } + } + + @Test + public void testNestedLargeListVector2() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { + listVector.setInitialCapacity(1); + UnionLargeListWriter listWriter = listVector.getWriter(); + /* allocate memory */ + listWriter.allocate(); + + /* write one or more inner lists at index 0 */ + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(50); + listWriter.list().bigInt().writeBigInt(100); + listWriter.list().bigInt().writeBigInt(200); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(75); + listWriter.list().bigInt().writeBigInt(125); + listWriter.list().endList(); + + listWriter.endList(); + + /* write one or more inner lists at index 1 */ + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(15); + listWriter.list().bigInt().writeBigInt(20); + listWriter.list().endList(); + + listWriter.list().startList(); + listWriter.list().bigInt().writeBigInt(25); + listWriter.list().bigInt().writeBigInt(30); + listWriter.list().bigInt().writeBigInt(35); + listWriter.list().endList(); + + listWriter.endList(); + + assertEquals(1, listVector.getLastSet()); + + listVector.setValueCount(2); + + assertEquals(2, listVector.getValueCount()); + + /* get listVector value at index 0 -- the value itself is a listvector */ + Object result = listVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(50), list.get(0)); + assertEquals(new Long(100), list.get(1)); + assertEquals(new Long(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(new Long(75), list.get(0)); + assertEquals(new Long(125), list.get(1)); + + /* get listVector value at index 1 -- the value itself is a listvector */ + result = listVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(2, resultSet.get(0).size()); /* size of first inner list */ + assertEquals(3, resultSet.get(1).size()); /* size of second inner list */ + + list = resultSet.get(0); + assertEquals(new Long(15), list.get(0)); + assertEquals(new Long(20), list.get(1)); + + list = resultSet.get(1); + assertEquals(new Long(25), list.get(0)); + assertEquals(new Long(30), list.get(1)); + assertEquals(new Long(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listVector.isNull(0)); + assertFalse(listVector.isNull(1)); + + /* check underlying offsets */ + final ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + /* listVector has 2 lists at index 0 and 3 lists at index 1 */ + assertEquals(0, offsetBuffer.getInt(0 * LargeListVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(1 * LargeListVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(2 * LargeListVector.OFFSET_WIDTH)); + } + } + + @Test + public void testGetBufferAddress() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("vector", allocator)) { + + UnionLargeListWriter listWriter = listVector.getWriter(); + boolean error = false; + + listWriter.allocate(); + + listWriter.setPosition(0); + listWriter.startList(); + listWriter.bigInt().writeBigInt(50); + listWriter.bigInt().writeBigInt(100); + listWriter.bigInt().writeBigInt(200); + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + listWriter.bigInt().writeBigInt(250); + listWriter.bigInt().writeBigInt(300); + listWriter.endList(); + + listVector.setValueCount(2); + + /* check listVector contents */ + Object result = listVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(new Long(50), resultSet.get(0)); + assertEquals(new Long(100), resultSet.get(1)); + assertEquals(new Long(200), resultSet.get(2)); + + result = listVector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(new Long(250), resultSet.get(0)); + assertEquals(new Long(300), resultSet.get(1)); + + List buffers = listVector.getFieldBuffers(); + + long bitAddress = listVector.getValidityBufferAddress(); + long offsetAddress = listVector.getOffsetBufferAddress(); + + try { + long dataAddress = listVector.getDataBufferAddress(); + } catch (UnsupportedOperationException ue) { + error = true; + } finally { + assertTrue(error); + } + + assertEquals(2, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listVector.getDensity(), 0); + } + } + + @Test + public void testConsistentChildName() throws Exception { + try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) { + String emptyListStr = listVector.getField().toString(); + assertTrue(emptyListStr.contains(LargeListVector.DATA_VECTOR_NAME)); + + listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + String emptyVectorStr = listVector.getField().toString(); + assertTrue(emptyVectorStr.contains(LargeListVector.DATA_VECTOR_NAME)); + } + } + + @Test + public void testSetInitialCapacity() { + try (final LargeListVector vector = LargeListVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + /** + * use the default multiplier of 5, + * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity. + */ + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5); + + /* use density as 4 */ + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); + + /** + * inner value capacity we pass to data vector is 512 * 0.1 => 51 + * For an int vector this is 204 bytes of memory for data buffer + * and 7 bytes for validity buffer. + * and with power of 2 allocation, we allocate 256 bytes and 8 bytes + * for the data buffer and validity buffer of the inner vector. Thus + * value capacity of inner vector is 64 + */ + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 51); + + /** + * inner value capacity we pass to data vector is 512 * 0.01 => 5 + * For an int vector this is 20 bytes of memory for data buffer + * and 1 byte for validity buffer. + * and with power of 2 allocation, we allocate 32 bytes and 1 bytes + * for the data buffer and validity buffer of the inner vector. Thus + * value capacity of inner vector is 8 + */ + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 5); + + /** + * inner value capacity we pass to data vector is 5 * 0.1 => 0 + * which is then rounded off to 1. So we pass value count as 1 + * to the inner int vector. + * the offset buffer of the list vector is allocated for 6 values + * which is 24 bytes and then rounded off to 32 bytes (8 values) + * the validity buffer of the list vector is allocated for 5 + * values which is 1 byte. This is why value capacity of the list + * vector is 7 as we take the min of validity buffer value capacity + * and offset buffer value capacity. + */ + vector.setInitialCapacity(5, 0.1); + vector.allocateNew(); + assertEquals(7, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 1); + } + } + + @Test + public void testClearAndReuse() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + BigIntVector bigIntVector = + (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); + vector.setInitialCapacity(10); + vector.allocateNew(); + + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(new Long(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(new Long(8), resultSet.get(0)); + + // Clear and release the buffers to trigger a realloc when adding next value + vector.clear(); + + // The list vector should reuse a buffer when reallocating the offset buffer + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + result = vector.getObject(0); + resultSet = (ArrayList) result; + assertEquals(new Long(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(new Long(8), resultSet.get(0)); + } + } + + @Test + public void testWriterGetField() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + + UnionLargeListWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Int(32, true)), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.LargeList.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + } + } + + @Test + public void testClose() throws Exception { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + + UnionLargeListWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + assertTrue(vector.getBufferSize() > 0); + assertTrue(vector.getDataVector().getBufferSize() > 0); + + writer.close(); + assertEquals(0, vector.getBufferSize()); + assertEquals(0, vector.getDataVector().getBufferSize()); + } + } + + @Test + public void testGetBufferSizeFor() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + + UnionLargeListWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writeIntValues(writer, new int[] {1, 2}); + writeIntValues(writer, new int[] {3, 4}); + writeIntValues(writer, new int[] {5, 6}); + writeIntValues(writer, new int[] {7, 8, 9, 10}); + writeIntValues(writer, new int[] {11, 12, 13, 14}); + writer.setValueCount(5); + + IntVector dataVector = (IntVector) vector.getDataVector(); + int[] indices = new int[] {0, 2, 4, 6, 10, 14}; + + for (int valueCount = 1; valueCount <= 5; valueCount++) { + int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); + int offsetBufferSize = (valueCount + 1) * LargeListVector.OFFSET_WIDTH; + + int expectedSize = validityBufferSize + offsetBufferSize + dataVector.getBufferSizeFor(indices[valueCount]); + assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); + } + } + } + + @Test + public void testIsEmpty() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + UnionLargeListWriter writer = vector.getWriter(); + writer.allocate(); + + // set values [1,2], null, [], [5,6] + writeIntValues(writer, new int[] {1, 2}); + writer.setPosition(2); + writeIntValues(writer, new int[] {}); + writeIntValues(writer, new int[] {5, 6}); + writer.setValueCount(4); + + assertFalse(vector.isEmpty(0)); + assertTrue(vector.isNull(1)); + assertTrue(vector.isEmpty(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isEmpty(2)); + assertFalse(vector.isEmpty(3)); + } + } + + private void writeIntValues(UnionLargeListWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } +}