Skip to content

Commit

Permalink
apacheGH-30866: [Java] fix SplitAndTransfer throws for (0,0) if vecto…
Browse files Browse the repository at this point in the history
…r empty (apache#41066)

This is addresses https://issues.apache.org/jira/browse/ARROW-15382 and is reopening of apache#12250 (which I asked to be reopened).

I tried to address all the comments from the previous discussion, added some more tests and fixed an issue in the old commit.
* GitHub Issue: apache#30866

Authored-by: Finn Völkel <finn.volkel@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
FiV0 authored and lriggs committed Sep 6, 2024
1 parent 9aa5992 commit b7db21e
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -729,16 +729,14 @@ public void transferTo(BaseLargeVariableWidthVector target) {
*/
public void splitAndTransferTo(int startIndex, int length,
BaseLargeVariableWidthVector target) {
Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount,
"Invalid startIndex: %s", startIndex);
Preconditions.checkArgument(startIndex + length <= valueCount,
"Invalid length: %s", length);
Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
"Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
compareTypes(target, "splitAndTransferTo");
target.clear();
splitAndTransferValidityBuffer(startIndex, length, target);
splitAndTransferOffsetBuffer(startIndex, length, target);
target.setLastSet(length - 1);
if (length > 0) {
splitAndTransferValidityBuffer(startIndex, length, target);
splitAndTransferOffsetBuffer(startIndex, length, target);
target.setLastSet(length - 1);
target.setValueCount(length);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -774,10 +774,10 @@ public void splitAndTransferTo(int startIndex, int length,
"Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
compareTypes(target, "splitAndTransferTo");
target.clear();
splitAndTransferValidityBuffer(startIndex, length, target);
splitAndTransferOffsetBuffer(startIndex, length, target);
target.setLastSet(length - 1);
if (length > 0) {
splitAndTransferValidityBuffer(startIndex, length, target);
splitAndTransferOffsetBuffer(startIndex, length, target);
target.setLastSet(length - 1);
target.setValueCount(length);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -527,21 +527,23 @@ public void transfer() {
public void splitAndTransfer(int startIndex, int length) {
Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
"Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.clear();
to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
if (length > 0) {
final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
/* splitAndTransfer offset buffer */
for (int i = 0; i < length + 1; i++) {
final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
}
/* splitAndTransfer validity buffer */
splitAndTransferValidityBuffer(startIndex, length, to);
/* splitAndTransfer data buffer */
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
to.lastSet = length - 1;
to.setValueCount(length);
}
/* splitAndTransfer validity buffer */
splitAndTransferValidityBuffer(startIndex, length, to);
/* splitAndTransfer data buffer */
dataTransferPair.splitAndTransfer(startPoint, sliceLength);
to.lastSet = length - 1;
to.setValueCount(length);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public void testInvalidStartIndex() {
IllegalArgumentException.class,
() -> tp.splitAndTransfer(valueCount, 10));

assertEquals("Invalid startIndex: 500", e.getMessage());
assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage());
}
}

Expand All @@ -184,7 +184,7 @@ public void testInvalidLength() {
IllegalArgumentException.class,
() -> tp.splitAndTransfer(0, valueCount * 2));

assertEquals("Invalid length: 1000", e.getMessage());
assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
Expand All @@ -48,7 +49,7 @@ public class TestSplitAndTransfer {
public void init() {
allocator = new RootAllocator(Long.MAX_VALUE);
}

@After
public void terminate() throws Exception {
allocator.close();
Expand All @@ -64,21 +65,130 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S
}
vector.setValueCount(valueCount);
}


private void populateIntVector(final IntVector vector, int valueCount) {
for (int i = 0; i < valueCount; i++) {
vector.set(i, i);
}
vector.setValueCount(valueCount);
}

private void populateDenseUnionVector(final DenseUnionVector vector, int valueCount) {
VarCharVector varCharVector = vector.addOrGet("varchar", FieldType.nullable(new ArrowType.Utf8()),
VarCharVector.class);
BigIntVector intVector = vector.addOrGet("int",
FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);

for (int i = 0; i < valueCount; i++) {
vector.setTypeId(i, (byte) (i % 2));
if (i % 2 == 0) {
final String s = String.format("%010d", i);
varCharVector.setSafe(i / 2, s.getBytes(StandardCharsets.UTF_8));
} else {
intVector.setSafe(i / 2, i);
}
}
vector.setValueCount(valueCount);
}

@Test
public void testWithEmptyVector() {
// MapVector use TransferImpl from ListVector
ListVector listVector = ListVector.empty("", allocator);
TransferPair transferPair = listVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// BaseFixedWidthVector
IntVector intVector = new IntVector("", allocator);
transferPair = intVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// BaseVariableWidthVector
VarCharVector varCharVector = new VarCharVector("", allocator);
transferPair = varCharVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// BaseLargeVariableWidthVector
LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator);
transferPair = largeVarCharVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// StructVector
StructVector structVector = StructVector.empty("", allocator);
transferPair = structVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// FixedSizeListVector
FixedSizeListVector fixedSizeListVector = FixedSizeListVector.empty("", 0, allocator);
transferPair = fixedSizeListVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// FixedSizeBinaryVector
FixedSizeBinaryVector fixedSizeBinaryVector = new FixedSizeBinaryVector("", allocator, 4);
transferPair = fixedSizeBinaryVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// UnionVector
UnionVector unionVector = UnionVector.empty("", allocator);
transferPair = unionVector.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());
// DenseUnionVector
DenseUnionVector duv = DenseUnionVector.empty("", allocator);
transferPair = duv.getTransferPair(allocator);
transferPair.splitAndTransfer(0, 0);
assertEquals(0, transferPair.getTo().getValueCount());

// non empty from vector

// BaseFixedWidthVector
IntVector fromIntVector = new IntVector("", allocator);
fromIntVector.allocateNew(100);
populateIntVector(fromIntVector, 100);
transferPair = fromIntVector.getTransferPair(allocator);
IntVector toIntVector = (IntVector) transferPair.getTo();
transferPair.splitAndTransfer(0, 0);
assertEquals(0, toIntVector.getValueCount());

transferPair.splitAndTransfer(50, 0);
assertEquals(0, toIntVector.getValueCount());

transferPair.splitAndTransfer(100, 0);
assertEquals(0, toIntVector.getValueCount());
fromIntVector.clear();
toIntVector.clear();

// DenseUnionVector
DenseUnionVector fromDuv = DenseUnionVector.empty("", allocator);
populateDenseUnionVector(fromDuv, 100);
transferPair = fromDuv.getTransferPair(allocator);
DenseUnionVector toDUV = (DenseUnionVector) transferPair.getTo();
transferPair.splitAndTransfer(0, 0);
assertEquals(0, toDUV.getValueCount());

transferPair.splitAndTransfer(50, 0);
assertEquals(0, toDUV.getValueCount());

transferPair.splitAndTransfer(100, 0);
assertEquals(0, toDUV.getValueCount());
fromDuv.clear();
toDUV.clear();
}

@Test /* VarCharVector */
public void test() throws Exception {
try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
varCharVector.allocateNew(10000, 1000);

final int valueCount = 500;
final String[] compareArray = new String[valueCount];

populateVarcharVector(varCharVector, valueCount, compareArray);

final TransferPair tp = varCharVector.getTransferPair(allocator);
final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};

for (final int[] startLength : startLengths) {
final int start = startLength[0];
final int length = startLength[1];
Expand Down Expand Up @@ -428,5 +538,4 @@ public void testMapVectorZeroStartIndexAndLength() {
newMapVector.clear();
}
}

}

0 comments on commit b7db21e

Please sign in to comment.