Skip to content

Commit

Permalink
[Refactor] Remove CollectionUtils.sortAndDedup and use java TreeSet i…
Browse files Browse the repository at this point in the history
…nstead of hppc ObjectArrayList (opensearch-project#6884)

* [Refactor] CollectionUtils.sortAndDedup to use java lists

HPPC ObjectArrayLists provide no benefit in CollectionUtils.sortAndDedup and
adds an unnecessary library dependency. This commit removes that dependency on
hppc by switching the sortAndDeup method to use java.util.Lists.
BinaryFieldMapper logic is updated to use the generic java list.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>

* close BytesStreamOutput in CustomBinaryDocValuesField.bytes

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>

---------

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
Signed-off-by: Valentin Mitrofanov <mitrofmep@gmail.com>
  • Loading branch information
nknize authored and mitrofmep committed Apr 5, 2023
1 parent b993a6a commit 237ff98
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,10 @@

package org.opensearch.common.util;

import com.carrotsearch.hppc.ObjectArrayList;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefArray;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.IntroSorter;
import org.opensearch.common.Strings;
import org.opensearch.common.collect.Iterators;

Expand All @@ -50,7 +48,9 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -95,59 +95,28 @@ public static <T> List<T> rotate(final List<T> list, int distance) {
return new RotatedList<>(list, d);
}

public static void sortAndDedup(final ObjectArrayList<byte[]> array) {
int len = array.size();
if (len > 1) {
sort(array);
int uniqueCount = 1;
for (int i = 1; i < len; ++i) {
if (!Arrays.equals(array.get(i), array.get(i - 1))) {
array.set(uniqueCount++, array.get(i));
}
}
array.elementsCount = uniqueCount;
/**
* in place de-duplicates items in a list
*/
public static <T> void sortAndDedup(final List<T> array, Comparator<T> comparator) {
// base case: one item
if (array.size() <= 1) {
return;
}
}

public static void sort(final ObjectArrayList<byte[]> array) {
new IntroSorter() {

byte[] pivot;

@Override
protected void swap(int i, int j) {
final byte[] tmp = array.get(i);
array.set(i, array.get(j));
array.set(j, tmp);
}

@Override
protected int compare(int i, int j) {
return compare(array.get(i), array.get(j));
}

@Override
protected void setPivot(int i) {
pivot = array.get(i);
}

@Override
protected int comparePivot(int j) {
return compare(pivot, array.get(j));
array.sort(comparator);
ListIterator<T> deduped = array.listIterator();
T cmp = deduped.next(); // return the first item and advance
Iterator<T> oldArray = array.iterator();
oldArray.next(); // advance to the old to the second item (advanced to third below)

do {
T old = oldArray.next(); // get the next item and advance iter
if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) {
deduped.set(old);
}

private int compare(byte[] left, byte[] right) {
for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) {
int a = left[i] & 0xFF;
int b = right[j] & 0xFF;
if (a != b) {
return a - b;
}
}
return left.length - right.length;
}

}.sort(0, array.size());
} while (oldArray.hasNext());
// in place update
array.subList(deduped.nextIndex(), array.size()).clear();
}

public static int[] toArray(Collection<Integer> ints) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@

package org.opensearch.index.mapper;

import com.carrotsearch.hppc.ObjectArrayList;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
Expand All @@ -52,6 +51,7 @@

import java.io.IOException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
Expand Down Expand Up @@ -240,35 +240,34 @@ protected String contentType() {
*/
public static class CustomBinaryDocValuesField extends CustomDocValuesField {

private final ObjectArrayList<byte[]> bytesList;

private int totalSize = 0;
private final ArrayList<byte[]> bytesList;

public CustomBinaryDocValuesField(String name, byte[] bytes) {
super(name);
bytesList = new ObjectArrayList<>();
bytesList = new ArrayList<>();
add(bytes);
}

public void add(byte[] bytes) {
bytesList.add(bytes);
totalSize += bytes.length;
}

@Override
public BytesRef binaryValue() {
try {
CollectionUtils.sortAndDedup(bytesList);
int size = bytesList.size();
BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5);
out.writeVInt(size); // write total number of values
for (int i = 0; i < size; i++) {
final byte[] value = bytesList.get(i);
int valueLength = value.length;
out.writeVInt(valueLength);
out.writeBytes(value, 0, valueLength);
// sort and dedup in place
CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned);
int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum);
int length = bytesList.size();
try (BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5)) {
out.writeVInt(length); // write total number of values
for (byte[] value : bytesList) {
int valueLength = value.length;
out.writeVInt(valueLength);
out.writeBytes(value, 0, valueLength);
}
return out.bytes().toBytesRef();
}
return out.bytes().toBytesRef();
} catch (IOException e) {
throw new OpenSearchException("Failed to get binary value", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
Expand Down Expand Up @@ -85,6 +87,32 @@ public void testRotate() {
}
}

private <T> void assertDeduped(List<T> array, Comparator<T> cmp, int expectedLength) {
// test the dedup w/ ArrayLists and LinkedLists
List<List<T>> types = List.of(new ArrayList<T>(array), new LinkedList<>(array));
for (List<T> clone : types) {
// dedup the list
CollectionUtils.sortAndDedup(clone, cmp);
// verify unique elements
for (int i = 0; i < clone.size() - 1; ++i) {
assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0);
}
assertEquals(expectedLength, clone.size());
}
}

public void testSortAndDedup() {
// test no elements in a string array
assertDeduped(List.<String>of(), Comparator.naturalOrder(), 0);
// test no elements in an integer array
assertDeduped(List.<Integer>of(), Comparator.naturalOrder(), 0);
// test unsorted array
assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5);
// test sorted array
assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5);
// test sorted
}

public void testSortAndDedupByteRefArray() {
SortedSet<BytesRef> set = new TreeSet<>();
final int numValues = scaledRandomIntBetween(0, 10000);
Expand Down

0 comments on commit 237ff98

Please sign in to comment.