apache · mdmarshmallow · Oct 23, 2024 · Oct 30, 2024 · Oct 31, 2024 · Nov 8, 2024
diff --git a/...ecs/src/test/org/apache/lucene/backward_index/TestIndexUpgradeBackwardsCompatibility.java b/...ecs/src/test/org/apache/lucene/backward_index/TestIndexUpgradeBackwardsCompatibility.java
@@ -26,6 +26,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexUpgrader;
@@ -131,7 +132,7 @@ public void testUpgradeOldSingleSegmentIndexWithAdditions() throws Exception {
     // add dummy segments (which are all in current
     // version) to single segment index
     MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
-    IndexWriterConfig iwc = new IndexWriterConfig(null).setMergePolicy(mp);
+    IndexWriterConfig iwc = new IndexWriterConfig((Analyzer) null).setMergePolicy(mp);
     IndexWriter w = new IndexWriter(directory, iwc);
     w.addIndexes(ramDir);
     try (w) {

diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/TestAddIndexesTask.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/TestAddIndexesTask.java
@@ -18,6 +18,7 @@
 
 import java.nio.file.Path;
 import java.util.Properties;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.utils.Config;
@@ -45,7 +46,7 @@ public static void beforeClassAddIndexesTaskTest() throws Exception {
     inputDir = testDir.resolve("input");
     Directory tmpDir = newFSDirectory(inputDir);
     try {
-      IndexWriter writer = new IndexWriter(tmpDir, new IndexWriterConfig(null));
+      IndexWriter writer = new IndexWriter(tmpDir, new IndexWriterConfig((Analyzer) null));
       for (int i = 0; i < 10; i++) {
         writer.addDocument(new Document());
       }

diff --git a/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java b/lucene/core/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
@@ -16,6 +16,8 @@
  */
 package org.apache.lucene.index;
 
+import java.io.IOException;
+
 /**
  * Default {@link FlushPolicy} implementation that flushes new segments based on RAM used and
  * document count depending on the IndexWriter's {@link IndexWriterConfig}. It also applies pending
@@ -52,6 +54,18 @@ && flushOnDocCount()
     }
   }
 
+  @Override
+  public void flushRamManager(IndexWriter writer) throws IOException {
+    IndexWriterRAMManager ramManager = writer.getConfig().indexWriterRAMManager;
+    if (ramManager.getRamBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH
+        && ramManager.getWriterCount() > 1) {
+      long totalBytes = ramManager.updateAndGetCurrentBytesUsed(writer.ramManagerId);
+      if (totalBytes > ramManager.getRamBufferSizeMB() * 1024 * 1024) {
+        ramManager.flushRoundRobin();
+      }
+    }
+  }
+
   private void flushDeletes(DocumentsWriterFlushControl control) {
     control.setApplyAllDeletes();
     if (infoStream.isEnabled("FP")) {

diff --git a/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java b/lucene/core/src/java/org/apache/lucene/index/FlushPolicy.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.index;
 
+import java.io.IOException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.InfoStream;
 
@@ -57,6 +58,13 @@ abstract class FlushPolicy {
   public abstract void onChange(
       DocumentsWriterFlushControl control, DocumentsWriterPerThread perThread);
 
+  /**
+   * Flushed a writer according to the FlushPolicy. NOTE: this doesn't necessarily mean the passed
+   * in writer will be flushed, and in most cases, this will actually be the case as the default
+   * policy is a round-robin policy
+   */
+  public abstract void flushRamManager(IndexWriter writer) throws IOException;
+
   /** Called by DocumentsWriter to initialize the FlushPolicy */
   protected synchronized void init(LiveIndexWriterConfig indexWriterConfig) {
     this.indexWriterConfig = indexWriterConfig;

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java b/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java
@@ -20,6 +20,7 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Collection;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.CommandLineUtil;
@@ -135,7 +136,7 @@ static IndexUpgrader parseArgs(String[] args) throws IOException {
    * {@code matchVersion}. The tool refuses to upgrade indexes with multiple commit points.
    */
   public IndexUpgrader(Directory dir) {
-    this(dir, new IndexWriterConfig(null), false);
+    this(dir, new IndexWriterConfig((Analyzer) null), false);
   }
 
   /**
@@ -145,7 +146,7 @@ public IndexUpgrader(Directory dir) {
    * be sent to this stream.
    */
   public IndexUpgrader(Directory dir, InfoStream infoStream, boolean deletePriorCommits) {
-    this(dir, new IndexWriterConfig(null), deletePriorCommits);
+    this(dir, new IndexWriterConfig((Analyzer) null), deletePriorCommits);
     if (null != infoStream) {
       this.iwc.setInfoStream(infoStream);
     }

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -462,6 +462,9 @@ public void onTicketBacklog() {
         }
       };
 
+  /** The id that is associated with this writer for {@link IndexWriterRAMManager} */
+  public final int ramManagerId;
+
   /**
    * Expert: returns a readonly reader, covering all committed as well as un-committed changes to
    * the index. This provides "near real-time" searching, in that changes made during an IndexWriter
@@ -1211,6 +1214,7 @@ public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException {
         writeLock = null;
       }
     }
+    this.ramManagerId = config.indexWriterRAMManager.registerWriter(this);
   }
 
   /** Confirms that the incoming index sort (if any) matches the existing index sort (if any). */
@@ -1365,6 +1369,7 @@ private void shutdown() throws IOException {
    */
   @Override
   public void close() throws IOException {
+    config.indexWriterRAMManager.removeWriter(ramManagerId);
     if (config.getCommitOnClose()) {
       shutdown();
     } else {
@@ -2445,6 +2450,7 @@ public void rollback() throws IOException {
     // Ensure that only one thread actually gets to do the
     // closing, and make sure no commit is also in progress:
     if (shouldClose(true)) {
+      config.indexWriterRAMManager.removeWriter(ramManagerId);
       rollbackInternal();
     }
   }
@@ -6012,6 +6018,7 @@ private long maybeProcessEvents(long seqNo) throws IOException {
       seqNo = -seqNo;
       processEvents(true);
     }
+    config.flushPolicy.flushRamManager(this);
     return seqNo;
   }
 

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -142,7 +142,21 @@ public IndexWriterConfig() {
    * problem you should switch to {@link LogByteSizeMergePolicy} or {@link LogDocMergePolicy}.
    */
   public IndexWriterConfig(Analyzer analyzer) {
-    super(analyzer);
+    this(analyzer, new IndexWriterRAMManager(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB));
+  }
+
+  /**
+   * Creates a new config with the provided {@link IndexWriterRAMManager}. If you want to share a
+   * buffer between multiple {@link IndexWriter}, you will need to use this constructor as {@link
+   * IndexWriterConfig} maintains a 1:1 relationship with {@link IndexWriter}
+   */
+  public IndexWriterConfig(IndexWriterRAMManager indexWriterRAMManager) {
+    this(new StandardAnalyzer(), indexWriterRAMManager);
+  }
+
+  /** Creates a new config with the provided {@link Analyzer} and {@link IndexWriterRAMManager} */
+  public IndexWriterConfig(Analyzer analyzer, IndexWriterRAMManager indexWriterRAMManager) {
+    super(analyzer, indexWriterRAMManager);
   }
 
   /**
@@ -393,6 +407,11 @@ public double getRAMBufferSizeMB() {
     return super.getRAMBufferSizeMB();
   }
 
+  @Override
+  public IndexWriterRAMManager getIndexWriterRAMManager() {
+    return super.getIndexWriterRAMManager();
+  }
+
   /**
    * Information about merges, deletes and a message when maxFieldLength is reached will be printed
    * to this. Must not be null, but {@link InfoStream#NO_OUTPUT} may be used to suppress output.

diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterRAMManager.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterRAMManager.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * For managing multiple instances of {@link IndexWriter} sharing the same buffer (configured by
+ * {@link IndexWriterConfig#setRAMBufferSizeMB})
+ */
+public class IndexWriterRAMManager {
+  private final LinkedIdToWriter idToWriter = new LinkedIdToWriter();
+  private final AtomicInteger idGenerator = new AtomicInteger();
+  private double ramBufferSizeMB;
+
+  /**
+   * Default constructor
+   *
+   * @param ramBufferSizeMB the RAM buffer size to use between all registered {@link IndexWriter}
+   *     instances
+   */
+  public IndexWriterRAMManager(double ramBufferSizeMB) {
+    if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) {
+      throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled");
+    }
+    this.ramBufferSizeMB = ramBufferSizeMB;
+  }
+
+  /** Set the buffer size for this manager */
+  public void setRamBufferSizeMB(double ramBufferSizeMB) {
+    this.ramBufferSizeMB = ramBufferSizeMB;
+  }
+
+  /** Get the buffer size assigned to this manager */
+  public double getRamBufferSizeMB() {
+    return ramBufferSizeMB;
+  }
+
+  /**
+   * Calls {@link IndexWriter#flushNextBuffer()} in a round-robin fashion starting from the first
+   * writer added that has not been removed yet. Subsequent calls will flush the next writer in line
+   * and eventually loop back to the beginning. Returns the flushed writer id for testing
+   */
+  public int flushRoundRobin() throws IOException {
+    return idToWriter.flushRoundRobin();
+  }
+
+  /** Gets the number of writers registered with this ram manager */
+  public int getWriterCount() {
+    return idToWriter.size();
+  }
+
+  /** Registers a writer can returns the associated ID */
+  protected int registerWriter(IndexWriter writer) {
+    int id = idGenerator.incrementAndGet();
+    idToWriter.addWriter(writer, id);
+    return id;
+  }
+
+  /** Removes a writer given the writer's ide, protected for testing */
+  protected void removeWriter(int id) {
+    idToWriter.removeWriter(id);
+  }
+
+  /**
+   * Will call {@link IndexWriter#ramBytesUsed()} for the writer id passed in, and then updates the
+   * total ram using that value and returns it
+   */
+  public long updateAndGetCurrentBytesUsed(int id) {
+    return idToWriter.getTotalRamTracker(id);
+  }
+
+  private static class LinkedIdToWriter {
+    private final Map<Integer, IndexWriterNode> idToWriterNode = new HashMap<>();
+    private IndexWriterNode first;
+    private IndexWriterNode last;
+    private long totalRamTracker;
+
+    private final ReentrantLock lock = new ReentrantLock();
+
+    // for round-robin flushing
+    private int lastIdFlushed = -1;
+
+    void addWriter(IndexWriter writer, int id) {
+      synchronized (lock) {
+        IndexWriterNode node = new IndexWriterNode(writer, id);
+        if (idToWriterNode.isEmpty()) {
+          first = node;
+          last = node;
+        }
+        node.next = first;
+        last.next = node;
+        node.prev = last;
+        last = node;
+        first.prev = node;
+        idToWriterNode.put(id, node);
+      }
+    }
+
+    void removeWriter(int id) {
+      synchronized (lock) {
+        if (idToWriterNode.containsKey(id)) {
+          IndexWriterNode nodeToRemove = idToWriterNode.remove(id);
+          totalRamTracker -= nodeToRemove.ram;
+          if (idToWriterNode.isEmpty()) {
+            first = null;
+            last = null;
+            lastIdFlushed = -1;
+            return;
+          }
+          if (id == lastIdFlushed) {
+            lastIdFlushed = nodeToRemove.prev.id;
+          }
+          nodeToRemove.prev.next = nodeToRemove.next;
+          nodeToRemove.next.prev = nodeToRemove.prev;
+          if (nodeToRemove == first) {
+            first = nodeToRemove.next;
+          }
+          if (nodeToRemove == last) {
+            last = nodeToRemove.prev;
+          }
+        }
+      }
+    }
+
+    // Returns the writer id that we attempted to flush (for testing purposes)
+    int flushRoundRobin() throws IOException {
+      synchronized (lock) {
+        if (idToWriterNode.isEmpty()) {
+          return -1;
+        }
+        int idToFlush;
+        if (lastIdFlushed == -1) {
+          idToFlush = first.id;
+        } else {
+          idToFlush = idToWriterNode.get(lastIdFlushed).next.id;
+        }
+        idToWriterNode.get(idToFlush).writer.flushNextBuffer();
+        lastIdFlushed = idToFlush;
+        return idToFlush;
+      }
+    }
+
+    long getTotalRamTracker(int id) {
+      synchronized (lock) {
+        if (idToWriterNode.isEmpty()) {
+          return 0;
+        }
+        if (idToWriterNode.containsKey(id) == false) {
+          return totalRamTracker;
+        }
+        long oldRAMBytesUsed = idToWriterNode.get(id).ram;
+        long newRAMBytesUsed = idToWriterNode.get(id).writer.ramBytesUsed();
+        idToWriterNode.get(id).ram = newRAMBytesUsed;
+        totalRamTracker += newRAMBytesUsed - oldRAMBytesUsed;
+        return totalRamTracker;
+      }
+    }
+
+    int size() {
+      synchronized (lock) {
+        return idToWriterNode.size();
+      }
+    }
+
+    private static class IndexWriterNode {
+      IndexWriter writer;
+      int id;
+      long ram;
+      IndexWriterNode next;
+      IndexWriterNode prev;
+
+      IndexWriterNode(IndexWriter writer, int id) {
+        this.writer = writer;
+        this.id = id;
+        this.ram = writer.ramBytesUsed();
+      }
+    }
+  }
+}