diff --git a/dora/core/client/fs/src/test/java/alluxio/client/file/dora/ConsistentHashProviderTest.java b/dora/core/client/fs/src/test/java/alluxio/client/file/dora/ConsistentHashProviderTest.java index 6d88ca239a8f..b7be396d49a5 100644 --- a/dora/core/client/fs/src/test/java/alluxio/client/file/dora/ConsistentHashProviderTest.java +++ b/dora/core/client/fs/src/test/java/alluxio/client/file/dora/ConsistentHashProviderTest.java @@ -25,7 +25,10 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.Test; +import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.NavigableMap; import java.util.Set; import java.util.concurrent.CountDownLatch; @@ -48,6 +51,45 @@ public void uninitializedThrowsException() { assertThrows(IllegalStateException.class, () -> provider.get(OBJECT_KEY, 0)); } + @Test + /** + * This test calculates the standard deviation over mean on the collection of + * virtual nodes assigned to physical nodes. It arbitrarily bounds it at 0.25, + * but ideally this number should get smaller over time as we improve hashing algorithm + * and use better ways to assign virtual nodes to physical nodes. + * + * This uses 2000 virtual nodes and 50 physical nodes, if these parameters change, + * the bound is likely going to change. + */ + public void virtualNodeDistribution() { + ConsistentHashProvider provider = new ConsistentHashProvider(1, WORKER_LIST_TTL_MS); + List workerList = generateRandomWorkerList(50); + // set initial state + provider.refresh(workerList, 2000); + NavigableMap map = provider.getActiveNodesMap(); + Map count = new HashMap<>(); + long last = Integer.MIN_VALUE; + for (Map.Entry entry: map.entrySet()) { + count.put(entry.getValue(), count.getOrDefault(entry.getValue(), 0L) + + (entry.getKey() - last)); + last = entry.getKey().intValue(); + } + assertTrue(calcSDoverMean(count.values()) < 0.25); + } + + private double calcSDoverMean(Collection list) { + long sum = 0L; + double var = 0; + for (long num : list) { + sum += num; + } + double avg = sum * 1.0 / list.size(); + for (long num : list) { + var = var + (num - avg) * (num - avg); + } + return Math.sqrt(var / list.size()) / avg; + } + @Test public void concurrentInitialization() { ConsistentHashProvider provider = new ConsistentHashProvider(1, WORKER_LIST_TTL_MS);