Skip to content

Commit

Permalink
replace sort and dedup with TreeSet
Browse files Browse the repository at this point in the history
Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
  • Loading branch information
nknize committed Mar 30, 2023
1 parent d8daa42 commit 33943f5
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -95,30 +93,6 @@ public static <T> List<T> rotate(final List<T> list, int distance) {
return new RotatedList<>(list, d);
}

/**
* in place de-duplicates items in a list
*/
public static <T> void sortAndDedup(final List<T> array, Comparator<T> comparator) {
// base case: one item
if (array.size() <= 1) {
return;
}
array.sort(comparator);
ListIterator<T> deduped = array.listIterator();
T cmp = deduped.next(); // return the first item and advance
Iterator<T> oldArray = array.iterator();
oldArray.next(); // advance to the old to the second item (advanced to third below)

do {
T old = oldArray.next(); // get the next item and advance iter
if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) {
deduped.set(old);
}
} while (oldArray.hasNext());
// in place update
array.subList(deduped.nextIndex(), array.size()).clear();
}

public static int[] toArray(Collection<Integer> ints) {
Objects.requireNonNull(ints);
return ints.stream().mapToInt(s -> s).toArray();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
import org.opensearch.common.bytes.BytesArray;
import org.opensearch.common.bytes.BytesReference;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.common.util.CollectionUtils;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.BytesBinaryIndexFieldData;
Expand All @@ -55,8 +54,10 @@
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import java.util.function.Supplier;

/**
Expand Down Expand Up @@ -255,13 +256,13 @@ public void add(byte[] bytes) {
@Override
public BytesRef binaryValue() {
try {
// sort and dedup in place
CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned);
int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum);
int length = bytesList.size();
TreeSet<byte[]> bytesTree = new TreeSet((Comparator<byte[]>)((byte[] a, byte[] b) -> Arrays.compareUnsigned(a, b)));
bytesTree.addAll(bytesList);
int size = bytesTree.stream().mapToInt(b -> b.length).sum();
int length = bytesTree.size();
BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5);
out.writeVInt(length); // write total number of values
for (byte[] value : bytesList) {
for (byte[] value : bytesTree) {
int valueLength = value.length;
out.writeVInt(valueLength);
out.writeBytes(value, 0, valueLength);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
Expand Down Expand Up @@ -87,32 +85,6 @@ public void testRotate() {
}
}

private <T> void assertDeduped(List<T> array, Comparator<T> cmp, int expectedLength) {
// test the dedup w/ ArrayLists and LinkedLists
List<List<T>> types = List.of(new ArrayList<T>(array), new LinkedList<>(array));
for (List<T> clone : types) {
// dedup the list
CollectionUtils.sortAndDedup(clone, cmp);
// verify unique elements
for (int i = 0; i < clone.size() - 1; ++i) {
assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0);
}
assertEquals(expectedLength, clone.size());
}
}

public void testSortAndDedup() {
// test no elements in a string array
assertDeduped(List.<String>of(), Comparator.naturalOrder(), 0);
// test no elements in an integer array
assertDeduped(List.<Integer>of(), Comparator.naturalOrder(), 0);
// test unsorted array
assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5);
// test sorted array
assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5);
// test sorted
}

public void testSortAndDedupByteRefArray() {
SortedSet<BytesRef> set = new TreeSet<>();
final int numValues = scaledRandomIntBetween(0, 10000);
Expand Down

0 comments on commit 33943f5

Please sign in to comment.