Skip to content

Commit

Permalink
[Refactor] CollectionUtils.sortAndDedup to use java lists
Browse files Browse the repository at this point in the history
HPPC ObjectArrayLists provide no benefit in CollectionUtils.sortAndDedup and
adds an unnecessary library dependency. This commit removes that dependency on
hppc by switching the sortAndDeup method to use java.util.Lists.
BinaryFieldMapper logic is updated to use the generic java list.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
  • Loading branch information
nknize committed Mar 30, 2023
1 parent 37b37c5 commit 3aa3164
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,10 @@

package org.opensearch.common.util;

import com.carrotsearch.hppc.ObjectArrayList;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefArray;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.IntroSorter;
import org.opensearch.common.Strings;
import org.opensearch.common.collect.Iterators;

Expand All @@ -50,7 +48,9 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -95,59 +95,28 @@ public static <T> List<T> rotate(final List<T> list, int distance) {
return new RotatedList<>(list, d);
}

public static void sortAndDedup(final ObjectArrayList<byte[]> array) {
int len = array.size();
if (len > 1) {
sort(array);
int uniqueCount = 1;
for (int i = 1; i < len; ++i) {
if (!Arrays.equals(array.get(i), array.get(i - 1))) {
array.set(uniqueCount++, array.get(i));
}
}
array.elementsCount = uniqueCount;
/**
* in place de-duplicates items in a list
*/
public static <T> void sortAndDedup(final List<T> array, Comparator<T> comparator) {
// base case: one item
if (array.size() <= 1) {
return;
}
}

public static void sort(final ObjectArrayList<byte[]> array) {
new IntroSorter() {

byte[] pivot;

@Override
protected void swap(int i, int j) {
final byte[] tmp = array.get(i);
array.set(i, array.get(j));
array.set(j, tmp);
}

@Override
protected int compare(int i, int j) {
return compare(array.get(i), array.get(j));
}

@Override
protected void setPivot(int i) {
pivot = array.get(i);
}

@Override
protected int comparePivot(int j) {
return compare(pivot, array.get(j));
array.sort(comparator);
ListIterator<T> deduped = array.listIterator();
T cmp = deduped.next(); // return the first item and advance
Iterator<T> oldArray = array.iterator();
oldArray.next(); // advance to the old to the second item (advanced to third below)

do {
T old = oldArray.next(); // get the next item and advance iter
if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) {
deduped.set(old);
}

private int compare(byte[] left, byte[] right) {
for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) {
int a = left[i] & 0xFF;
int b = right[j] & 0xFF;
if (a != b) {
return a - b;
}
}
return left.length - right.length;
}

}.sort(0, array.size());
} while (oldArray.hasNext());
// in place update
array.subList(deduped.nextIndex(), array.size()).clear();
}

public static int[] toArray(Collection<Integer> ints) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

package org.opensearch.index.mapper;

import com.carrotsearch.hppc.ObjectArrayList;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
Expand All @@ -52,6 +51,7 @@

import java.io.IOException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
Expand Down Expand Up @@ -240,30 +240,28 @@ protected String contentType() {
*/
public static class CustomBinaryDocValuesField extends CustomDocValuesField {

private final ObjectArrayList<byte[]> bytesList;

private int totalSize = 0;
private final ArrayList<byte[]> bytesList;

public CustomBinaryDocValuesField(String name, byte[] bytes) {
super(name);
bytesList = new ObjectArrayList<>();
bytesList = new ArrayList<>();
add(bytes);
}

public void add(byte[] bytes) {
bytesList.add(bytes);
totalSize += bytes.length;
}

@Override
public BytesRef binaryValue() {
try {
CollectionUtils.sortAndDedup(bytesList);
int size = bytesList.size();
BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5);
out.writeVInt(size); // write total number of values
for (int i = 0; i < size; i++) {
final byte[] value = bytesList.get(i);
// sort and dedup in place
CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned);
int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum);
int length = bytesList.size();
BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5);
out.writeVInt(length); // write total number of values
for (byte[] value : bytesList) {
int valueLength = value.length;
out.writeVInt(valueLength);
out.writeBytes(value, 0, valueLength);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
Expand Down Expand Up @@ -85,6 +87,32 @@ public void testRotate() {
}
}

private <T> void assertDeduped(List<T> array, Comparator<T> cmp, int expectedLength) {
// test the dedup w/ ArrayLists and LinkedLists
List<List<T>> types = List.of(new ArrayList<T>(array), new LinkedList<>(array));
for (List<T> clone : types) {
// dedup the list
CollectionUtils.sortAndDedup(clone, cmp);
// verify unique elements
for (int i = 0; i < clone.size() - 1; ++i) {
assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0);
}
assertEquals(expectedLength, clone.size());
}
}

public void testSortAndDedup() {
// test no elements in a string array
assertDeduped(List.<String>of(), Comparator.naturalOrder(), 0);
// test no elements in an integer array
assertDeduped(List.<Integer>of(), Comparator.naturalOrder(), 0);
// test unsorted array
assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5);
// test sorted array
assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5);
// test sorted
}

public void testSortAndDedupByteRefArray() {
SortedSet<BytesRef> set = new TreeSet<>();
final int numValues = scaledRandomIntBetween(0, 10000);
Expand Down

0 comments on commit 3aa3164

Please sign in to comment.