From c71654444159f643632fbd57588cc00c01b8bb7f Mon Sep 17 00:00:00 2001
From: Ketan Verma <9292653+ketanv3@users.noreply.github.com>
Date: Mon, 3 Jul 2023 06:09:18 -0400
Subject: [PATCH] Self-organizing hash table to improve the performance of
 bucket aggregations (#7652) (#8337)

* Add self-organizing hash table to improve the performance of bucket aggregations

* Updated approach: PSL, fingerprint and recency information are embedded in the hash table itself

* Updated tests and added microbenchmarks

* Renamed FastLongHash to ReorganizingLongHash and updated the default initial capacity

---------

Signed-off-by: Ketan Verma <ketan9495@gmail.com>
---
 CHANGELOG.md                                  |   1 +
 .../common/util/LongHashBenchmark.java        | 425 ++++++++++++++++++
 .../common/util/ReorganizingLongHash.java     | 307 +++++++++++++
 .../bucket/terms/LongKeyedBucketOrds.java     |   6 +-
 .../util/ReorganizingLongHashTests.java       | 146 ++++++
 5 files changed, 882 insertions(+), 3 deletions(-)
 create mode 100644 benchmarks/src/main/java/org/opensearch/common/util/LongHashBenchmark.java
 create mode 100644 server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
 create mode 100644 server/src/test/java/org/opensearch/common/util/ReorganizingLongHashTests.java

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f82fed6737ec8..e94f482bc5651 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -74,6 +74,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - [Refactor] Metadata members from ImmutableOpenMap to j.u.Map ([#7165](https://github.com/opensearch-project/OpenSearch/pull/7165))
 - [Refactor] more ImmutableOpenMap to jdk Map in cluster package ([#7301](https://github.com/opensearch-project/OpenSearch/pull/7301))
 - [Refactor] ImmutableOpenMap to j.u.Map in IndexMetadata ([#7306](https://github.com/opensearch-project/OpenSearch/pull/7306))
+- Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652))
 - Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))
 - [Refactor] remaining ImmutableOpenMap usage to j.u.Map and remove class ([#7309](https://github.com/opensearch-project/OpenSearch/pull/7309))
 
diff --git a/benchmarks/src/main/java/org/opensearch/common/util/LongHashBenchmark.java b/benchmarks/src/main/java/org/opensearch/common/util/LongHashBenchmark.java
new file mode 100644
index 0000000000000..fa75dd2c91f5a
--- /dev/null
+++ b/benchmarks/src/main/java/org/opensearch/common/util/LongHashBenchmark.java
@@ -0,0 +1,425 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.opensearch.common.lease.Releasable;
+
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Supplier;
+
+@Fork(value = 3)
+@Warmup(iterations = 1, time = 4)
+@Measurement(iterations = 3, time = 2)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public class LongHashBenchmark {
+
+    @Benchmark
+    public void add(Blackhole bh, HashTableOptions tableOpts, WorkloadOptions workloadOpts) {
+        try (HashTable table = tableOpts.get(); WorkloadIterator iter = workloadOpts.iter()) {
+            while (iter.hasNext()) {
+                bh.consume(table.add(iter.next()));
+            }
+        }
+    }
+
+    /**
+     * Creates a hash table with varying parameters.
+     */
+    @State(Scope.Benchmark)
+    public static class HashTableOptions {
+
+        @Param({ "LongHash", "ReorganizingLongHash" })
+        public String type;
+
+        @Param({ "1" })
+        public long initialCapacity;
+
+        @Param({ "0.6" })
+        public float loadFactor;
+
+        private Supplier<HashTable> supplier;
+
+        @Setup
+        public void setup() {
+            switch (type) {
+                case "LongHash":
+                    supplier = this::newLongHash;
+                    break;
+                case "ReorganizingLongHash":
+                    supplier = this::newReorganizingLongHash;
+                    break;
+                default:
+                    throw new IllegalArgumentException("invalid hash table type: " + type);
+            }
+        }
+
+        public HashTable get() {
+            return supplier.get();
+        }
+
+        private HashTable newLongHash() {
+            return new HashTable() {
+                private final LongHash table = new LongHash(initialCapacity, loadFactor, BigArrays.NON_RECYCLING_INSTANCE);
+
+                @Override
+                public long add(long key) {
+                    return table.add(key);
+                }
+
+                @Override
+                public void close() {
+                    table.close();
+                }
+            };
+        }
+
+        private HashTable newReorganizingLongHash() {
+            return new HashTable() {
+                private final ReorganizingLongHash table = new ReorganizingLongHash(
+                    initialCapacity,
+                    loadFactor,
+                    BigArrays.NON_RECYCLING_INSTANCE
+                );
+
+                @Override
+                public long add(long key) {
+                    return table.add(key);
+                }
+
+                @Override
+                public void close() {
+                    table.close();
+                }
+            };
+        }
+    }
+
+    /**
+     * Creates a workload with varying parameters.
+     */
+    @State(Scope.Benchmark)
+    public static class WorkloadOptions {
+        public static final int NUM_HITS = 20_000_000;
+
+        /**
+         * Repeat the experiment with growing number of keys.
+         * These values are generated with an exponential growth pattern such that:
+         * value = ceil(previous_value * random_float_between(1.0, 1.14))
+         */
+        @Param({
+            "1",
+            "2",
+            "3",
+            "4",
+            "5",
+            "6",
+            "7",
+            "8",
+            "9",
+            "10",
+            "11",
+            "13",
+            "15",
+            "17",
+            "18",
+            "19",
+            "20",
+            "21",
+            "23",
+            "26",
+            "27",
+            "30",
+            "32",
+            "35",
+            "41",
+            "45",
+            "50",
+            "53",
+            "54",
+            "55",
+            "57",
+            "63",
+            "64",
+            "69",
+            "74",
+            "80",
+            "84",
+            "91",
+            "98",
+            "101",
+            "111",
+            "114",
+            "124",
+            "128",
+            "139",
+            "148",
+            "161",
+            "162",
+            "176",
+            "190",
+            "204",
+            "216",
+            "240",
+            "257",
+            "269",
+            "291",
+            "302",
+            "308",
+            "327",
+            "341",
+            "374",
+            "402",
+            "412",
+            "438",
+            "443",
+            "488",
+            "505",
+            "558",
+            "612",
+            "621",
+            "623",
+            "627",
+            "642",
+            "717",
+            "765",
+            "787",
+            "817",
+            "915",
+            "962",
+            "1011",
+            "1083",
+            "1163",
+            "1237",
+            "1301",
+            "1424",
+            "1541",
+            "1716",
+            "1805",
+            "1817",
+            "1934",
+            "2024",
+            "2238",
+            "2281",
+            "2319",
+            "2527",
+            "2583",
+            "2639",
+            "2662",
+            "2692",
+            "2991",
+            "3201",
+            "3215",
+            "3517",
+            "3681",
+            "3710",
+            "4038",
+            "4060",
+            "4199",
+            "4509",
+            "4855",
+            "5204",
+            "5624",
+            "6217",
+            "6891",
+            "7569",
+            "8169",
+            "8929",
+            "9153",
+            "10005",
+            "10624",
+            "10931",
+            "12070",
+            "12370",
+            "13694",
+            "14227",
+            "15925",
+            "17295",
+            "17376",
+            "18522",
+            "19200",
+            "20108",
+            "21496",
+            "23427",
+            "24224",
+            "26759",
+            "29199",
+            "29897",
+            "32353",
+            "33104",
+            "36523",
+            "38480",
+            "38958",
+            "40020",
+            "44745",
+            "45396",
+            "47916",
+            "49745",
+            "49968",
+            "52231",
+            "53606" })
+        public int size;
+
+        @Param({ "correlated", "uncorrelated", "distinct" })
+        public String dataset;
+
+        private WorkloadIterator iterator;
+
+        @Setup
+        public void setup() {
+            switch (dataset) {
+                case "correlated":
+                    iterator = newCorrelatedWorkload();
+                    break;
+                case "uncorrelated":
+                    iterator = newUncorrelatedWorkload();
+                    break;
+                case "distinct":
+                    iterator = newDistinctWorkload();
+                    break;
+                default:
+                    throw new IllegalArgumentException("invalid dataset: " + dataset);
+            }
+        }
+
+        public WorkloadIterator iter() {
+            return iterator;
+        }
+
+        /**
+         * Simulates monotonically increasing timestamp data with multiple hits mapping to the same key.
+         */
+        private WorkloadIterator newCorrelatedWorkload() {
+            assert NUM_HITS >= size : "ensure hits >= size so that each key is used at least once";
+
+            final long[] data = new long[size];
+            for (int i = 0; i < data.length; i++) {
+                data[i] = 1420070400000L + 3600000L * i;
+            }
+
+            return new WorkloadIterator() {
+                private int count = 0;
+                private int index = 0;
+                private int remaining = NUM_HITS / data.length;
+
+                @Override
+                public boolean hasNext() {
+                    return count < NUM_HITS;
+                }
+
+                @Override
+                public long next() {
+                    if (--remaining <= 0) {
+                        index = (index + 1) % data.length;
+                        remaining = NUM_HITS / data.length;
+                    }
+                    count++;
+                    return data[index];
+                }
+
+                @Override
+                public void reset() {
+                    count = 0;
+                    index = 0;
+                    remaining = NUM_HITS / data.length;
+                }
+            };
+        }
+
+        /**
+         * Simulates uncorrelated data (such as travel distance / fare amount).
+         */
+        private WorkloadIterator newUncorrelatedWorkload() {
+            assert NUM_HITS >= size : "ensure hits >= size so that each key is used at least once";
+
+            final Random random = new Random(0); // fixed seed for reproducible results
+            final long[] data = new long[size];
+            for (int i = 0; i < data.length; i++) {
+                data[i] = Double.doubleToLongBits(20.0 + 80 * random.nextDouble());
+            }
+
+            return new WorkloadIterator() {
+                private int count = 0;
+                private int index = 0;
+
+                @Override
+                public boolean hasNext() {
+                    return count < NUM_HITS;
+                }
+
+                @Override
+                public long next() {
+                    count++;
+                    index = (index + 1) % data.length;
+                    return data[index];
+                }
+
+                @Override
+                public void reset() {
+                    count = 0;
+                    index = 0;
+                }
+            };
+        }
+
+        /**
+         * Simulates workload with high cardinality, i.e., each hit mapping to a different key.
+         */
+        private WorkloadIterator newDistinctWorkload() {
+            return new WorkloadIterator() {
+                private int count = 0;
+
+                @Override
+                public boolean hasNext() {
+                    return count < size;
+                }
+
+                @Override
+                public long next() {
+                    return count++;
+                }
+
+                @Override
+                public void reset() {
+                    count = 0;
+                }
+            };
+        }
+    }
+
+    private interface HashTable extends Releasable {
+        long add(long key);
+    }
+
+    private interface WorkloadIterator extends Releasable {
+        boolean hasNext();
+
+        long next();
+
+        void reset();
+
+        @Override
+        default void close() {
+            reset();
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java b/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
new file mode 100644
index 0000000000000..5789b47423c1d
--- /dev/null
+++ b/server/src/main/java/org/opensearch/common/util/ReorganizingLongHash.java
@@ -0,0 +1,307 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import org.apache.lucene.util.hppc.BitMixer;
+import org.opensearch.common.lease.Releasable;
+
+/**
+ * Specialized hash table implementation that maps a (primitive) long to long.
+ *
+ * <p>
+ * It organizes itself by moving keys around dynamically in order to reduce the
+ * longest probe sequence length (PSL), which makes lookups faster as keys are likely to
+ * be found in the same CPU cache line. It also optimizes lookups for recently added keys,
+ * making it useful for aggregations where keys are correlated across consecutive hits.
+ *
+ * <p>
+ * This class is not thread-safe.
+ *
+ * @opensearch.internal
+ */
+public class ReorganizingLongHash implements Releasable {
+    private static final long MAX_CAPACITY = 1L << 32;
+    private static final long DEFAULT_INITIAL_CAPACITY = 32;
+    private static final float DEFAULT_LOAD_FACTOR = 0.6f;
+
+    /**
+     * Maximum load factor after which the capacity is doubled.
+     */
+    private final float loadFactor;
+
+    /**
+     * Utility class to allocate recyclable arrays.
+     */
+    private final BigArrays bigArrays;
+
+    /**
+     * Current capacity of the hash table. This must be a power of two so that the hash table slot
+     * can be identified quickly using bitmasks, thus avoiding expensive modulo or integer division.
+     */
+    private long capacity;
+
+    /**
+     * Bitmask to identify the hash table slot from a key's hash.
+     */
+    private long mask;
+
+    /**
+     * Size threshold after which the hash table needs to be doubled in capacity.
+     */
+    private long grow;
+
+    /**
+     * Current size of the hash table.
+     */
+    private long size;
+
+    /**
+     * Underlying array to store the hash table values.
+     *
+     * <p>
+     * Each hash table value (64-bit) uses the following byte packing strategy:
+     * <pre>
+     * |=========|===============|================|================================|
+     * | Discard | PSL           | Fingerprint    | Ordinal                        |
+     * |    -    |---------------|----------------|--------------------------------|
+     * | 1 bit   | 15 bits       | 16 bits        | 32 bits                        |
+     * |=========|===============|================|================================|
+     * </pre>
+     *
+     * <p>
+     * This allows us to encode and manipulate additional information in the hash table
+     * itself without having to look elsewhere in the memory, which is much slower.
+     *
+     * <p>
+     * Terminology: <code>table[index] = value = (discard | psl | fingerprint | ordinal)</code>
+     */
+    private LongArray table;
+
+    /**
+     * Underlying array to store the keys.
+     *
+     * <p>
+     * Terminology: <code>keys[ordinal] = key</code>
+     */
+    private LongArray keys;
+
+    /**
+     * Bitmasks to manipulate the hash table values.
+     */
+    private static final long MASK_ORDINAL = 0x00000000FFFFFFFFL;  // extract ordinal
+    private static final long MASK_FINGERPRINT = 0x0000FFFF00000000L;  // extract fingerprint
+    private static final long MASK_PSL = 0x7FFF000000000000L;  // extract PSL
+    private static final long INCR_PSL = 0x0001000000000000L;  // increment PSL by one
+
+    public ReorganizingLongHash(final BigArrays bigArrays) {
+        this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, bigArrays);
+    }
+
+    public ReorganizingLongHash(final long initialCapacity, final float loadFactor, final BigArrays bigArrays) {
+        assert initialCapacity > 0 : "initial capacity must be greater than 0";
+        assert loadFactor > 0 && loadFactor < 1 : "load factor must be between 0 and 1";
+
+        this.bigArrays = bigArrays;
+        this.loadFactor = loadFactor;
+
+        capacity = nextPowerOfTwo((long) (initialCapacity / loadFactor));
+        mask = capacity - 1;
+        grow = (long) (capacity * loadFactor);
+        size = 0;
+
+        table = bigArrays.newLongArray(capacity, false);
+        table.fill(0, capacity, -1);  // -1 represents an empty slot
+        keys = bigArrays.newLongArray(initialCapacity, false);
+    }
+
+    /**
+     * Adds the given key to the hash table and returns its ordinal.
+     * If the key exists already, it returns (-1 - ordinal).
+     */
+    public long add(final long key) {
+        final long ordinal = find(key);
+        if (ordinal != -1) {
+            return -1 - ordinal;
+        }
+
+        if (size >= grow) {
+            grow();
+        }
+
+        return insert(key);
+    }
+
+    /**
+     * Returns the key associated with the given ordinal.
+     * The result is undefined for an unused ordinal.
+     */
+    public long get(final long ordinal) {
+        return keys.get(ordinal);
+    }
+
+    /**
+     * Returns the ordinal associated with the given key, or -1 if the key doesn't exist.
+     *
+     * <p>
+     * Using the 64-bit hash value, up to 32 least significant bits (LSB) are used to identify the
+     * home slot in the hash table, and an additional 16 bits are used to identify the fingerprint.
+     * The fingerprint further increases the entropy and reduces the number of false lookups in the
+     * keys' table during equality checks, which is expensive due to an uncorrelated memory lookup.
+     *
+     * <p>
+     * Total entropy bits = 16 + log2(capacity)
+     *
+     * <p>
+     * Linear probing starts from the home slot, until a match or an empty slot is found.
+     * Values are first checked using their fingerprint (to reduce false positives), then verified
+     * in the keys' table using an equality check.
+     */
+    public long find(final long key) {
+        final long hash = hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        for (long idx = hash & mask, value, ordinal;; idx = (idx + 1) & mask) {
+            if ((value = table.get(idx)) == -1) {
+                return -1;
+            } else if (((value & MASK_FINGERPRINT) == fingerprint) && (keys.get((ordinal = (value & MASK_ORDINAL))) == key)) {
+                return ordinal;
+            }
+        }
+    }
+
+    /**
+     * Returns the number of mappings in this hash table.
+     */
+    public long size() {
+        return size;
+    }
+
+    /**
+     * Inserts the given key in the hash table and returns its ordinal.
+     *
+     * <p>
+     * Inspired by Robin Hood Hashing (RHH): if the PSL for the existing value is less than the PSL
+     * for the value being inserted, swap the two values and keep going. Values that were inserted
+     * early and thus "lucked out" on their PSLs will gradually be moved away from their preferred
+     * slot as new values come in that could make better use of that place in the table. It evens out
+     * the PSLs across the board and reduces the longest PSL dramatically.
+     *
+     * <p>
+     * A lower variance is better because, with modern CPU architectures, a PSL of 1 isn't much
+     * faster than a PSL of 3, because the main cost is fetching the cache line. The ideal hash
+     * table layout is the one where all values have equal PSLs, and that PSL fits within one cache line.
+     *
+     * <p>
+     * The expected longest PSL for a full table: <code>log(N)</code>
+     *
+     * <p>
+     * Our implementation has a slight variation on top of it: by loosening the guarantees provided
+     * by RHH, we can improve the performance on correlated lookups (such as aggregating on repeated
+     * timestamps) by moving the "recent" keys closer to their home slot, and eventually converging
+     * to the ideal hash table layout defined by RHH.
+     */
+    private long insert(final long key) {
+        final long hash = hash(key);
+        final long fingerprint = hash & MASK_FINGERPRINT;
+
+        // The ideal home slot for the given key.
+        long idx = hash & mask;
+
+        // The value yet to find an empty slot (candidate).
+        long value = fingerprint | size;
+
+        // The existing value at idx.
+        long existingValue;
+
+        // Always set the newly inserted key at its ideal home slot, even if it doesn't conform
+        // to the RHH scheme (yet). This will ensure subsequent correlated lookups are fast due
+        // to no additional probing. When another insertion causes this value to be displaced, it
+        // will eventually be placed at an appropriate location defined by the RHH scheme.
+        if ((value = table.set(idx, value)) == -1) {
+            // The ideal home slot was already empty; append the key and return early.
+            return append(key);
+        }
+
+        // Find an alternative slot for the displaced value such that the longest PSL is minimized.
+        do {
+            idx = (idx + 1) & mask;
+            value += INCR_PSL;
+
+            if ((existingValue = table.get(idx)) == -1) {
+                // Empty slot; insert the candidate value here.
+                table.set(idx, value);
+                return append(key);
+            } else if ((existingValue & MASK_PSL) <= (value & MASK_PSL)) {
+                // Existing value is "richer" than the candidate value at this index;
+                // swap and find an alternative slot for the displaced value.
+                // In the case of a tie, the candidate value (i.e. the recent value) is chosen as
+                // the winner and kept closer to its ideal home slot in order to speed up
+                // correlated lookups.
+                value = table.set(idx, value);
+            }
+        } while (true);
+    }
+
+    /**
+     * Appends the key in the keys' table.
+     */
+    private long append(final long key) {
+        keys = bigArrays.grow(keys, size + 1);
+        keys.set(size, key);
+        return size++;
+    }
+
+    /**
+     * Returns the hash for the given key.
+     * Visible for unit-tests.
+     */
+    long hash(final long key) {
+        return BitMixer.mix64(key);
+    }
+
+    /**
+     * Returns the underlying hash table.
+     * Visible for unit-tests.
+     */
+    LongArray getTable() {
+        return table;
+    }
+
+    /**
+     * Grows the hash table by doubling its capacity and reinserting the keys.
+     */
+    private void grow() {
+        // Ensure that the hash table doesn't grow too large.
+        // This implicitly also ensures that the ordinals are no larger than 2^32, thus,
+        // preventing them from polluting other bits (PSL/fingerprint) in the hash table values.
+        assert capacity < MAX_CAPACITY : "hash table already at the max capacity";
+
+        final long oldSize = size;
+        capacity <<= 1;
+        mask = capacity - 1;
+        size = 0;
+        grow = (long) (capacity * loadFactor);
+        table = bigArrays.resize(table, capacity);
+        table.fill(0, capacity, -1);
+
+        for (long ordinal = 0; ordinal < oldSize; ordinal++) {
+            insert(keys.get(ordinal));
+        }
+    }
+
+    @Override
+    public void close() {
+        table.close();
+        keys.close();
+    }
+
+    private static long nextPowerOfTwo(final long value) {
+        return Math.max(1, Long.highestOneBit(value - 1) << 1);
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java
index bcf77ee194ea4..1e2bacc258fe3 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/LongKeyedBucketOrds.java
@@ -34,7 +34,7 @@
 
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.util.BigArrays;
-import org.opensearch.common.util.LongHash;
+import org.opensearch.common.util.ReorganizingLongHash;
 import org.opensearch.common.util.LongLongHash;
 import org.opensearch.search.aggregations.CardinalityUpperBound;
 
@@ -148,10 +148,10 @@ public long value() {
      * @opensearch.internal
      */
     public static class FromSingle extends LongKeyedBucketOrds {
-        private final LongHash ords;
+        private final ReorganizingLongHash ords;
 
         public FromSingle(BigArrays bigArrays) {
-            ords = new LongHash(1, bigArrays);
+            ords = new ReorganizingLongHash(bigArrays);
         }
 
         @Override
diff --git a/server/src/test/java/org/opensearch/common/util/ReorganizingLongHashTests.java b/server/src/test/java/org/opensearch/common/util/ReorganizingLongHashTests.java
new file mode 100644
index 0000000000000..259eab6c624bd
--- /dev/null
+++ b/server/src/test/java/org/opensearch/common/util/ReorganizingLongHashTests.java
@@ -0,0 +1,146 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.common.util;
+
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ReorganizingLongHashTests extends OpenSearchTestCase {
+
+    public void testFuzzy() {
+        Map<Long, Long> reference = new HashMap<>();
+
+        try (
+            ReorganizingLongHash h = new ReorganizingLongHash(
+                randomIntBetween(1, 100),      // random capacity
+                0.6f + randomFloat() * 0.39f,  // random load factor to verify collision resolution
+                BigArrays.NON_RECYCLING_INSTANCE
+            )
+        ) {
+            // Verify the behaviour of "add" and "find".
+            for (int i = 0; i < (1 << 20); i++) {
+                long key = randomLong() % (1 << 12);  // roughly ~4% unique keys
+                if (reference.containsKey(key)) {
+                    long expectedOrdinal = reference.get(key);
+                    assertEquals(-1 - expectedOrdinal, h.add(key));
+                    assertEquals(expectedOrdinal, h.find(key));
+                } else {
+                    assertEquals(-1, h.find(key));
+                    reference.put(key, (long) reference.size());
+                    assertEquals((long) reference.get(key), h.add(key));
+                }
+            }
+
+            // Verify the behaviour of "get".
+            for (Map.Entry<Long, Long> entry : reference.entrySet()) {
+                assertEquals((long) entry.getKey(), h.get(entry.getValue()));
+            }
+
+            // Verify the behaviour of "size".
+            assertEquals(reference.size(), h.size());
+
+            // Verify the calculation of PSLs.
+            final long capacity = h.getTable().size();
+            final long mask = capacity - 1;
+            for (long idx = 0; idx < h.getTable().size(); idx++) {
+                final long value = h.getTable().get(idx);
+                if (value != -1) {
+                    final long homeIdx = h.hash(h.get((int) value)) & mask;
+                    assertEquals((capacity + idx - homeIdx) & mask, value >>> 48);
+                }
+            }
+        }
+    }
+
+    public void testRearrangement() {
+        try (ReorganizingLongHash h = new ReorganizingLongHash(4, 0.6f, BigArrays.NON_RECYCLING_INSTANCE) {
+            /**
+             * Overriding with an "identity" hash function to make it easier to reason about the placement
+             * of values in the hash table. The backing array of the hash table will have a size (8),
+             * i.e. nextPowerOfTwo(initialCapacity/loadFactor), so the bitmask will be (7).
+             * The ideal home slot of a key can then be defined as: (hash(key) & mask) = (key & 7).
+             */
+            @Override
+            long hash(long key) {
+                return key;
+            }
+        }) {
+            /*
+             * Add key=0, hash=0, home_slot=0
+             *
+             * Before: empty slot.
+             *   ▼
+             * [ _ _ _ _ _ _ _ _ ]
+             *
+             * After: inserted [ordinal=0, psl=0] at the empty slot.
+             * [ 0 _ _ _ _ _ _ _ ]
+             */
+            h.add(0);
+            assertEquals(encodeValue(0, 0, 0), h.getTable().get(0));
+
+            /*
+             * Add key=8, hash=8, home_slot=0
+             *
+             * Before: occupied slot.
+             *   ▼
+             * [ 0 _ _ _ _ _ _ _ ]
+             *
+             * After: inserted [ordinal=1, psl=0] at the existing slot, displaced [ordinal=0, psl=0],
+             *        and re-inserted it at the next empty slot as [ordinal=0, psl=1].
+             * [ 1 0 _ _ _ _ _ _ ]
+             */
+            h.add(8);
+            assertEquals(encodeValue(0, 0, 1), h.getTable().get(0));
+            assertEquals(encodeValue(1, 0, 0), h.getTable().get(1));
+
+            /*
+             * Add key=1, hash=1, home_slot=1
+             *
+             * Before: occupied slot.
+             *     ▼
+             * [ 1 0 _ _ _ _ _ _ ]
+             *
+             * After: inserted [ordinal=2, psl=0] at the existing slot, displaced [ordinal=0, psl=1],
+             *        and re-inserted it at the next empty slot as [ordinal=0, psl=2].
+             * [ 1 2 0 _ _ _ _ _ ]
+             */
+            h.add(1);
+            assertEquals(encodeValue(0, 0, 1), h.getTable().get(0));
+            assertEquals(encodeValue(0, 0, 2), h.getTable().get(1));
+            assertEquals(encodeValue(2, 0, 0), h.getTable().get(2));
+
+            /*
+             * Add key=16, hash=16, home_slot=0
+             *
+             * Before: occupied slot.
+             *   ▼
+             * [ 1 2 0 _ _ _ _ _ ]
+             *
+             * After: inserted [ordinal=3, psl=0] at the existing slot, displaced [ordinal=1, psl=0]
+             *        and re-inserted it at the next best slot. Repeated this for other displaced values
+             *        until everything found an empty slot.
+             * [ 3 1 0 2 _ _ _ _ ]
+             */
+            h.add(16);
+            assertEquals(encodeValue(0, 0, 3), h.getTable().get(0));
+            assertEquals(encodeValue(1, 0, 1), h.getTable().get(1));
+            assertEquals(encodeValue(2, 0, 0), h.getTable().get(2));
+            assertEquals(encodeValue(2, 0, 2), h.getTable().get(3));
+        }
+    }
+
+    private static long encodeValue(long psl, long fingerprint, long ordinal) {
+        assert psl < (1L << 15);
+        assert fingerprint < (1L << 16);
+        assert ordinal < (1L << 32);
+        return (psl << 48) | (fingerprint << 32) | ordinal;
+    }
+}