From d8daa424e551bcd483f27165bd62e72e3f313705 Mon Sep 17 00:00:00 2001 From: Nicholas Walter Knize Date: Wed, 29 Mar 2023 17:02:24 -0500 Subject: [PATCH] [Refactor] CollectionUtils.sortAndDedup to use java lists HPPC ObjectArrayLists provide no benefit in CollectionUtils.sortAndDedup and adds an unnecessary library dependency. This commit removes that dependency on hppc by switching the sortAndDeup method to use java.util.Lists. BinaryFieldMapper logic is updated to use the generic java list. Signed-off-by: Nicholas Walter Knize --- .../common/util/CollectionUtils.java | 75 ++++++------------- .../index/mapper/BinaryFieldMapper.java | 22 +++--- .../common/util/CollectionUtilsTests.java | 28 +++++++ 3 files changed, 60 insertions(+), 65 deletions(-) diff --git a/server/src/main/java/org/opensearch/common/util/CollectionUtils.java b/server/src/main/java/org/opensearch/common/util/CollectionUtils.java index d8a86f4878f58..6452d7061fdfa 100644 --- a/server/src/main/java/org/opensearch/common/util/CollectionUtils.java +++ b/server/src/main/java/org/opensearch/common/util/CollectionUtils.java @@ -32,12 +32,10 @@ package org.opensearch.common.util; -import com.carrotsearch.hppc.ObjectArrayList; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefArray; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.IntroSorter; import org.opensearch.common.Strings; import org.opensearch.common.collect.Iterators; @@ -50,7 +48,9 @@ import java.util.Comparator; import java.util.HashMap; import java.util.IdentityHashMap; +import java.util.Iterator; import java.util.List; +import java.util.ListIterator; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -95,59 +95,28 @@ public static List rotate(final List list, int distance) { return new RotatedList<>(list, d); } - public static void sortAndDedup(final ObjectArrayList array) { - int len = array.size(); - if (len > 1) { - sort(array); - int uniqueCount = 1; - for (int i = 1; i < len; ++i) { - if (!Arrays.equals(array.get(i), array.get(i - 1))) { - array.set(uniqueCount++, array.get(i)); - } - } - array.elementsCount = uniqueCount; + /** + * in place de-duplicates items in a list + */ + public static void sortAndDedup(final List array, Comparator comparator) { + // base case: one item + if (array.size() <= 1) { + return; } - } - - public static void sort(final ObjectArrayList array) { - new IntroSorter() { - - byte[] pivot; - - @Override - protected void swap(int i, int j) { - final byte[] tmp = array.get(i); - array.set(i, array.get(j)); - array.set(j, tmp); - } - - @Override - protected int compare(int i, int j) { - return compare(array.get(i), array.get(j)); - } - - @Override - protected void setPivot(int i) { - pivot = array.get(i); - } - - @Override - protected int comparePivot(int j) { - return compare(pivot, array.get(j)); + array.sort(comparator); + ListIterator deduped = array.listIterator(); + T cmp = deduped.next(); // return the first item and advance + Iterator oldArray = array.iterator(); + oldArray.next(); // advance to the old to the second item (advanced to third below) + + do { + T old = oldArray.next(); // get the next item and advance iter + if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) { + deduped.set(old); } - - private int compare(byte[] left, byte[] right) { - for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) { - int a = left[i] & 0xFF; - int b = right[j] & 0xFF; - if (a != b) { - return a - b; - } - } - return left.length - right.length; - } - - }.sort(0, array.size()); + } while (oldArray.hasNext()); + // in place update + array.subList(deduped.nextIndex(), array.size()).clear(); } public static int[] toArray(Collection ints) { diff --git a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java index 6965f3bf664ed..9c60641e61258 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java @@ -32,7 +32,6 @@ package org.opensearch.index.mapper; -import com.carrotsearch.hppc.ObjectArrayList; import org.apache.lucene.document.StoredField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -52,6 +51,7 @@ import java.io.IOException; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Collections; @@ -240,30 +240,28 @@ protected String contentType() { */ public static class CustomBinaryDocValuesField extends CustomDocValuesField { - private final ObjectArrayList bytesList; - - private int totalSize = 0; + private final ArrayList bytesList; public CustomBinaryDocValuesField(String name, byte[] bytes) { super(name); - bytesList = new ObjectArrayList<>(); + bytesList = new ArrayList<>(); add(bytes); } public void add(byte[] bytes) { bytesList.add(bytes); - totalSize += bytes.length; } @Override public BytesRef binaryValue() { try { - CollectionUtils.sortAndDedup(bytesList); - int size = bytesList.size(); - BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5); - out.writeVInt(size); // write total number of values - for (int i = 0; i < size; i++) { - final byte[] value = bytesList.get(i); + // sort and dedup in place + CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned); + int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum); + int length = bytesList.size(); + BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5); + out.writeVInt(length); // write total number of values + for (byte[] value : bytesList) { int valueLength = value.length; out.writeVInt(valueLength); out.writeBytes(value, 0, valueLength); diff --git a/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java b/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java index 40b2706d314ce..c237bdeb5c5cf 100644 --- a/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java +++ b/server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java @@ -41,9 +41,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.SortedSet; @@ -85,6 +87,32 @@ public void testRotate() { } } + private void assertDeduped(List array, Comparator cmp, int expectedLength) { + // test the dedup w/ ArrayLists and LinkedLists + List> types = List.of(new ArrayList(array), new LinkedList<>(array)); + for (List clone : types) { + // dedup the list + CollectionUtils.sortAndDedup(clone, cmp); + // verify unique elements + for (int i = 0; i < clone.size() - 1; ++i) { + assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0); + } + assertEquals(expectedLength, clone.size()); + } + } + + public void testSortAndDedup() { + // test no elements in a string array + assertDeduped(List.of(), Comparator.naturalOrder(), 0); + // test no elements in an integer array + assertDeduped(List.of(), Comparator.naturalOrder(), 0); + // test unsorted array + assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5); + // test sorted array + assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5); + // test sorted + } + public void testSortAndDedupByteRefArray() { SortedSet set = new TreeSet<>(); final int numValues = scaledRandomIntBetween(0, 10000);