Skip to content

Commit

Permalink
GH-25659: [Java] Add DefaultVectorComparators for Large types (#37887)
Browse files Browse the repository at this point in the history
### Rationale for this change
Support additional vector types in DefaultVectorComparators to make arrow-algorithm easier to use.

### What changes are included in this PR?
Add DefaultVectorComparators for large vector types (LargeVarCharVector and LargeVarBinaryVector).

### Are these changes tested?
Yes.

### Are there any user-facing changes?
No.
* Closes: #25659

Authored-by: James Duong <duong.james@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
jduo authored Sep 26, 2023
1 parent 517d849 commit e038498
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DateDayVector;
Expand All @@ -50,6 +49,7 @@
import org.apache.arrow.vector.UInt4Vector;
import org.apache.arrow.vector.UInt8Vector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VariableWidthVector;
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;

/**
Expand Down Expand Up @@ -112,7 +112,7 @@ public static <T extends ValueVector> VectorValueComparator<T> createDefaultComp
} else if (vector instanceof TimeStampVector) {
return (VectorValueComparator<T>) new TimeStampComparator();
}
} else if (vector instanceof BaseVariableWidthVector) {
} else if (vector instanceof VariableWidthVector) {
return (VectorValueComparator<T>) new VariableWidthComparator();
} else if (vector instanceof BaseRepeatedValueVector) {
VectorValueComparator<?> innerComparator =
Expand Down Expand Up @@ -675,14 +675,14 @@ public VectorValueComparator<TimeStampVector> createNew() {
}

/**
* Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
* Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}.
* The comparison is in lexicographic order, with null comes first.
*/
public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {
public static class VariableWidthComparator extends VectorValueComparator<VariableWidthVector> {

private ArrowBufPointer reusablePointer1 = new ArrowBufPointer();
private final ArrowBufPointer reusablePointer1 = new ArrowBufPointer();

private ArrowBufPointer reusablePointer2 = new ArrowBufPointer();
private final ArrowBufPointer reusablePointer2 = new ArrowBufPointer();

@Override
public int compare(int index1, int index2) {
Expand All @@ -699,7 +699,7 @@ public int compareNotNull(int index1, int index2) {
}

@Override
public VectorValueComparator<BaseVariableWidthVector> createNew() {
public VectorValueComparator<VariableWidthVector> createNew() {
return new VariableWidthComparator();
}
}
Expand Down Expand Up @@ -743,7 +743,7 @@ public int compareNotNull(int index1, int index2) {
@Override
public VectorValueComparator<BaseRepeatedValueVector> createNew() {
VectorValueComparator<T> newInnerComparator = innerComparator.createNew();
return new RepeatedValueComparator(newInnerComparator);
return new RepeatedValueComparator<>(newInnerComparator);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.IntervalDayVector;
import org.apache.arrow.vector.LargeVarBinaryVector;
import org.apache.arrow.vector.LargeVarCharVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TimeMicroVector;
import org.apache.arrow.vector.TimeMilliVector;
Expand All @@ -47,6 +49,9 @@
import org.apache.arrow.vector.UInt2Vector;
import org.apache.arrow.vector.UInt4Vector;
import org.apache.arrow.vector.UInt8Vector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.TimeUnit;
Expand Down Expand Up @@ -911,4 +916,25 @@ public void testCheckNullsOnCompareIsTrueWithEmptyVectors() {
assertTrue(comparator.checkNullsOnCompare());
}
}

@Test
public void testVariableWidthDefaultComparators() {
try (VarCharVector vec = new VarCharVector("test", allocator)) {
verifyVariableWidthComparatorReturned(vec);
}
try (VarBinaryVector vec = new VarBinaryVector("test", allocator)) {
verifyVariableWidthComparatorReturned(vec);
}
try (LargeVarCharVector vec = new LargeVarCharVector("test", allocator)) {
verifyVariableWidthComparatorReturned(vec);
}
try (LargeVarBinaryVector vec = new LargeVarBinaryVector("test", allocator)) {
verifyVariableWidthComparatorReturned(vec);
}
}

private static <V extends ValueVector> void verifyVariableWidthComparatorReturned(V vec) {
VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vec);
assertEquals(DefaultVectorComparators.VariableWidthComparator.class, comparator.getClass());
}
}

0 comments on commit e038498

Please sign in to comment.