Re-structure document ID generation favoring _id inverted index com…

…pression (elastic#104683) This implementation restructures auto-generated document IDs to maximize compression within Lucene's terms dictionary. The key insight is placing stable or slowly-changing components at the start of the ID - the most significant bytes of the timestamp change very gradually (the first byte shifts only every 35 years, the second every 50 days). This careful ordering means that large sequences of IDs generated close in time will share common prefixes, allowing Lucene's Finite State Transducer (FST) to store terms more compactly. To maintain uniqueness while preserving these compression benefits, the ID combines three elements: a timestamp that ensures time-based ordering, the coordinator's MAC address for cluster-wide uniqueness, and a sequence number for handling high-throughput scenarios. The timestamp handling is particularly robust, using atomic operations to prevent backwards movement even if the system clock shifts. For high-volume indices generating millions of documents, this optimization can lead to substantial storage savings while maintaining strict guarantees about ID uniqueness and ordering. (cherry picked from commit 778ab8f) # Conflicts: # server/src/main/java/org/elasticsearch/index/IndexVersions.java
salvatore-campagna · Nov 14, 2024 · 553b567 · 553b567
1 parent 161b7ef
commit 553b567
Show file tree

Hide file tree

Showing 10 changed files with 225 additions and 55 deletions.
diff --git a/docs/changelog/104683.yaml b/docs/changelog/104683.yaml
@@ -0,0 +1,5 @@
+pr: 104683
+summary: "Feature: re-structure document ID generation favoring _id inverted index compression"
+area: Logs
+type: enhancement
+issues: []
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkIntegrationIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkIntegrationIT.java
@@ -104,7 +104,11 @@ public void testBulkWithWriteIndexAndRouting() {
     // allowing the auto-generated timestamp to externally be set would allow making the index inconsistent with duplicate docs
     public void testExternallySetAutoGeneratedTimestamp() {
         IndexRequest indexRequest = new IndexRequest("index1").source(Collections.singletonMap("foo", "baz"));
-        indexRequest.autoGenerateId();
+        if (randomBoolean()) {
+            indexRequest.autoGenerateId();
+        } else {
+            indexRequest.autoGenerateTimeBasedId();
+        }
         if (randomBoolean()) {
             indexRequest.id("test");
         }

diff --git a/server/src/main/java/org/elasticsearch/action/index/IndexRequest.java b/server/src/main/java/org/elasticsearch/action/index/IndexRequest.java
@@ -51,6 +51,7 @@
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
+import java.util.function.Supplier;
 
 import static org.elasticsearch.action.ValidateActions.addValidationError;
 import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_PRIMARY_TERM;
@@ -76,6 +77,9 @@ public class IndexRequest extends ReplicatedWriteRequest<IndexRequest> implement
     private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(IndexRequest.class);
     private static final TransportVersion PIPELINES_HAVE_RUN_FIELD_ADDED = TransportVersions.V_8_10_X;
 
+    private static final Supplier<String> ID_GENERATOR = UUIDs::base64UUID;
+    private static final Supplier<String> K_SORTED_TIME_BASED_ID_GENERATOR = UUIDs::base64TimeBasedKOrderedUUID;
+
     /**
      * Max length of the source document to include into string()
      *
@@ -692,10 +696,18 @@ public void process(IndexRouting indexRouting) {
      * request compatible with the append-only optimization.
      */
     public void autoGenerateId() {
-        assert id == null;
-        assert autoGeneratedTimestamp == UNSET_AUTO_GENERATED_TIMESTAMP : "timestamp has already been generated!";
-        assert ifSeqNo == UNASSIGNED_SEQ_NO;
-        assert ifPrimaryTerm == UNASSIGNED_PRIMARY_TERM;
+        assertBeforeGeneratingId();
+        autoGenerateTimestamp();
+        id(ID_GENERATOR.get());
+    }
+
+    public void autoGenerateTimeBasedId() {
+        assertBeforeGeneratingId();
+        autoGenerateTimestamp();
+        id(K_SORTED_TIME_BASED_ID_GENERATOR.get());
+    }
+
+    private void autoGenerateTimestamp() {
         /*
          * Set the auto generated timestamp so the append only optimization
          * can quickly test if this request *must* be unique without reaching
@@ -704,8 +716,13 @@ public void autoGenerateId() {
          * never work before 1970, but that's ok. It's after 1970.
          */
         autoGeneratedTimestamp = Math.max(0, System.currentTimeMillis());
-        String uid = UUIDs.base64UUID();
-        id(uid);
+    }
+
+    private void assertBeforeGeneratingId() {
+        assert id == null;
+        assert autoGeneratedTimestamp == UNSET_AUTO_GENERATED_TIMESTAMP : "timestamp has already been generated!";
+        assert ifSeqNo == UNASSIGNED_SEQ_NO;
+        assert ifPrimaryTerm == UNASSIGNED_PRIMARY_TERM;
     }
 
     /**

diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
@@ -23,6 +23,8 @@
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.features.NodeFeature;
+import org.elasticsearch.index.IndexMode;
+import org.elasticsearch.index.IndexVersion;
 import org.elasticsearch.index.IndexVersions;
 import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper;
 import org.elasticsearch.transport.Transports;
@@ -146,11 +148,15 @@ public void checkIndexSplitAllowed() {}
 
     private abstract static class IdAndRoutingOnly extends IndexRouting {
         private final boolean routingRequired;
+        private final IndexVersion creationVersion;
+        private final IndexMode indexMode;
 
         IdAndRoutingOnly(IndexMetadata metadata) {
             super(metadata);
+            this.creationVersion = metadata.getCreationVersion();
             MappingMetadata mapping = metadata.mapping();
             this.routingRequired = mapping == null ? false : mapping.routingRequired();
+            this.indexMode = metadata.getIndexMode();
         }
 
         protected abstract int shardId(String id, @Nullable String routing);
@@ -160,7 +166,11 @@ public void process(IndexRequest indexRequest) {
             // generate id if not already provided
             final String id = indexRequest.id();
             if (id == null) {
-                indexRequest.autoGenerateId();
+                if (creationVersion.onOrAfter(IndexVersions.TIME_BASED_K_ORDERED_DOC_ID_BACKPORT) && indexMode == IndexMode.LOGSDB) {
+                    indexRequest.autoGenerateTimeBasedId();
+                } else {
+                    indexRequest.autoGenerateId();
+                }
             } else if (id.isEmpty()) {
                 throw new IllegalArgumentException("if _id is specified it must not be empty");
             }

diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedKOrderedUUIDGenerator.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.common;
+
+import java.nio.ByteBuffer;
+import java.util.Base64;
+
+/**
+ * Generates a base64-encoded, k-ordered UUID string optimized for compression and efficient indexing.
+ * <p>
+ * This method produces a time-based UUID where slowly changing components like the timestamp appear first,
+ * improving prefix-sharing and compression during indexing. It ensures uniqueness across nodes by incorporating
+ * a timestamp, a MAC address, and a sequence ID.
+ * <p>
+ * <b>Timestamp:</b> Represents the current time in milliseconds, ensuring ordering and uniqueness.
+ * <br>
+ * <b>MAC Address:</b> Ensures uniqueness across different coordinators.
+ * <br>
+ * <b>Sequence ID:</b> Differentiates UUIDs generated within the same millisecond, ensuring uniqueness even at high throughput.
+ * <p>
+ * The result is a compact base64-encoded string, optimized for efficient compression of the _id field in an inverted index.
+ */
+public class TimeBasedKOrderedUUIDGenerator extends TimeBasedUUIDGenerator {
+    private static final Base64.Encoder BASE_64_NO_PADDING = Base64.getEncoder().withoutPadding();
+
+    @Override
+    public String getBase64UUID() {
+        final int sequenceId = this.sequenceNumber.incrementAndGet() & 0x00FF_FFFF;
+
+        // Calculate timestamp to ensure ordering and avoid backward movement in case of time shifts.
+        // Uses AtomicLong to guarantee that timestamp increases even if the system clock moves backward.
+        // If the sequenceId overflows (reaches 0 within the same millisecond), the timestamp is incremented
+        // to ensure strict ordering.
+        long timestamp = this.lastTimestamp.accumulateAndGet(
+            currentTimeMillis(),
+            sequenceId == 0 ? (lastTimestamp, currentTimeMillis) -> Math.max(lastTimestamp, currentTimeMillis) + 1 : Math::max
+        );
+
+        final byte[] uuidBytes = new byte[15];
+        final ByteBuffer buffer = ByteBuffer.wrap(uuidBytes);
+
+        buffer.put((byte) (timestamp >>> 40)); // changes every 35 years
+        buffer.put((byte) (timestamp >>> 32)); // changes every ~50 days
+        buffer.put((byte) (timestamp >>> 24)); // changes every ~4.5h
+        buffer.put((byte) (timestamp >>> 16)); // changes every ~65 secs
+
+        // MAC address of the coordinator might change if there are many coordinators in the cluster
+        // and the indexing api does not necessarily target the same coordinator.
+        byte[] macAddress = macAddress();
+        assert macAddress.length == 6;
+        buffer.put(macAddress, 0, macAddress.length);
+
+        buffer.put((byte) (sequenceId >>> 16));
+
+        // From hereinafter everything is almost like random and does not compress well
+        // due to unlikely prefix-sharing
+        buffer.put((byte) (timestamp >>> 8));
+        buffer.put((byte) (sequenceId >>> 8));
+        buffer.put((byte) timestamp);
+        buffer.put((byte) sequenceId);
+
+        assert buffer.position() == uuidBytes.length;
+
+        return BASE_64_NO_PADDING.encodeToString(uuidBytes);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java b/server/src/main/java/org/elasticsearch/common/TimeBasedUUIDGenerator.java
@@ -25,10 +25,10 @@ class TimeBasedUUIDGenerator implements UUIDGenerator {
 
     // We only use bottom 3 bytes for the sequence number. Paranoia: init with random int so that if JVM/OS/machine goes down, clock slips
     // backwards, and JVM comes back up, we are less likely to be on the same sequenceNumber at the same time:
-    private final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
+    protected final AtomicInteger sequenceNumber = new AtomicInteger(SecureRandomHolder.INSTANCE.nextInt());
 
     // Used to ensure clock moves forward:
-    private final AtomicLong lastTimestamp = new AtomicLong(0);
+    protected final AtomicLong lastTimestamp = new AtomicLong(0);
 
     private static final byte[] SECURE_MUNGED_ADDRESS = MacAddressProvider.getSecureMungedAddress();
 

diff --git a/server/src/main/java/org/elasticsearch/common/UUIDs.java b/server/src/main/java/org/elasticsearch/common/UUIDs.java
@@ -16,6 +16,8 @@
 public class UUIDs {
 
     private static final RandomBasedUUIDGenerator RANDOM_UUID_GENERATOR = new RandomBasedUUIDGenerator();
+
+    private static final UUIDGenerator TIME_BASED_K_ORDERED_GENERATOR = new TimeBasedKOrderedUUIDGenerator();
     private static final UUIDGenerator TIME_UUID_GENERATOR = new TimeBasedUUIDGenerator();
 
     /**
@@ -33,6 +35,14 @@ public static String base64UUID() {
         return TIME_UUID_GENERATOR.getBase64UUID();
     }
 
+    public static String base64TimeBasedKOrderedUUID() {
+        return TIME_BASED_K_ORDERED_GENERATOR.getBase64UUID();
+    }
+
+    public static String base64TimeBasedUUID() {
+        return TIME_UUID_GENERATOR.getBase64UUID();
+    }
+
     /**
      * The length of a UUID string generated by {@link #randomBase64UUID} and {@link #randomBase64UUIDSecureString}.
      */

diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java
@@ -120,6 +120,7 @@ private static IndexVersion def(int id, Version luceneVersion) {
     public static final IndexVersion ENABLE_IGNORE_ABOVE_LOGSDB = def(8_517_00_0, Version.LUCENE_9_12_0);
     public static final IndexVersion ADD_ROLE_MAPPING_CLEANUP_MIGRATION = def(8_518_00_0, Version.LUCENE_9_12_0);
     public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0);
+    public static final IndexVersion TIME_BASED_K_ORDERED_DOC_ID_BACKPORT = def(8_520_00_0, Version.LUCENE_9_12_0);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

diff --git a/server/src/test/java/org/elasticsearch/action/index/IndexRequestTests.java b/server/src/test/java/org/elasticsearch/action/index/IndexRequestTests.java
@@ -128,6 +128,12 @@ public void testAutoGenerateId() {
         assertTrue("expected > 0 but got: " + request.getAutoGeneratedTimestamp(), request.getAutoGeneratedTimestamp() > 0);
     }
 
+    public void testAutoGenerateTimeBasedId() {
+        IndexRequest request = new IndexRequest("index");
+        request.autoGenerateTimeBasedId();
+        assertTrue("expected > 0 but got: " + request.getAutoGeneratedTimestamp(), request.getAutoGeneratedTimestamp() > 0);
+    }
+
     public void testIndexResponse() {
         ShardId shardId = new ShardId(randomAlphaOfLengthBetween(3, 10), randomAlphaOfLengthBetween(3, 10), randomIntBetween(0, 1000));
         String id = randomAlphaOfLengthBetween(3, 10);