apache · romseygeek · Nov 19, 2025 · Nov 19, 2025 · Nov 21, 2025 · martijnvg
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -222,7 +222,9 @@ Optimizations
 
 * GITHUB#15397: NumericComparator: immediately check whether a segment is competitive with the recorded bottom (Martijn van Groningen)
 
-# GITHUB#15303: Speed up NumericUtils#{add,subtract} by operating on integers instead of bytes. (Kaival Parikh)
+* GITHUB#15303: Speed up NumericUtils#{add,subtract} by operating on integers instead of bytes. (Kaival Parikh)
+
+* GITHUB#15436: Allow Comparators to re-order segments at query time to help result pruning. (Alan Woodward)
 
 Bug Fixes
 ---------------------

diff --git a/lucene/core/src/java/org/apache/lucene/search/Collector.java b/lucene/core/src/java/org/apache/lucene/search/Collector.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.search;
 
 import java.io.IOException;
+import java.util.Comparator;
 import org.apache.lucene.index.LeafReaderContext;
 
 /**
@@ -61,4 +62,12 @@ public interface Collector {
    * Collector#getLeafCollector}.
    */
   default void setWeight(Weight weight) {}
+
+  /**
+   * Returns a Comparator that can be used to order LeafReaderContexts such that segments that are
+   * more likely to be skipped in non-exhaustive ScoreModes are searched later.
+   */
+  default Comparator<LeafReaderContext> getLeafReaderComparator() {
+    return null;
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -799,6 +799,14 @@ protected void search(LeafReaderContextPartition[] partitions, Weight weight, Co
 
     collector.setWeight(weight);
 
+    Comparator<LeafReaderContext> leafComparator = collector.getLeafReaderComparator();
+    if (leafComparator != null) {
+      // copy the partitions list so that the original doesn't get mutated by sorting
+      LeafReaderContextPartition[] sortedPartitions = partitions.clone();
+      Arrays.sort(sortedPartitions, (o1, o2) -> leafComparator.compare(o1.ctx, o2.ctx));
+      partitions = sortedPartitions;
+    }
+
     for (LeafReaderContextPartition partition : partitions) { // search each subreader partition
       searchLeaf(partition.ctx, partition.minDocId, partition.maxDocId, weight, collector);
     }

diff --git a/lucene/core/src/java/org/apache/lucene/search/NumericFieldReaderContextComparator.java b/lucene/core/src/java/org/apache/lucene/search/NumericFieldReaderContextComparator.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.ToLongFunction;
+import org.apache.lucene.index.DocValuesSkipper;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
+
+/**
+ * Given a sort field, compares segments by the range of values in that field such that documents in
+ * better comparing segments are more likely to appear higher in search results.
+ */
+class NumericFieldReaderContextComparator implements Comparator<LeafReaderContext> {
+
+  private final Map<Integer, Long> cachedSortValues = new HashMap<>();
+  private final String field;
+  private final boolean reverse;
+  private final Long missingValue;
+  private final ToLongFunction<byte[]> pointDecoder;
+
+  NumericFieldReaderContextComparator(
+      String field, Long missingValue, boolean reverse, ToLongFunction<byte[]> pointDecoder) {
+    this.field = field;
+    this.missingValue = missingValue;
+    this.reverse = reverse;
+    this.pointDecoder = pointDecoder;
+  }
+
+  @Override
+  public int compare(LeafReaderContext o1, LeafReaderContext o2) {
+    return reverse
+        ? Long.compare(getSortValue(o2), getSortValue(o1))
+        : Long.compare(getSortValue(o1), getSortValue(o2));
+  }
+
+  private long getSortValue(LeafReaderContext ctx) {
+    if (cachedSortValues.containsKey(ctx.ord) == false) {
+      cachedSortValues.put(ctx.ord, loadSortValue(ctx));
+    }
+    return cachedSortValues.get(ctx.ord);
+  }
+
+  private long loadSortValue(LeafReaderContext ctx) {
+    LeafReader reader = ctx.reader();
+    try {
+      DocValuesSkipper skipper = reader.getDocValuesSkipper(field);
+      if (skipper != null) {
+        if (skipper.docCount() == reader.maxDoc() || missingValue == null) {
+          return reverse ? skipper.maxValue() : skipper.minValue();
+        }
+        if (reverse) {
+          return Math.max(skipper.maxValue(), missingValue);
+        } else {
+          return Math.min(skipper.minValue(), missingValue);
+        }
+      }
+      PointValues pointValues = reader.getPointValues(field);
+      if (pointValues != null) {
+        if (pointValues.getDocCount() == reader.maxDoc() || missingValue == null) {
+          if (reverse) {
+            return pointDecoder.applyAsLong(pointValues.getMaxPackedValue());
+          } else {
+            return pointDecoder.applyAsLong(pointValues.getMinPackedValue());
+          }
+        }
+        if (reverse) {
+          return Math.max(pointDecoder.applyAsLong(pointValues.getMaxPackedValue()), missingValue);
+        } else {
+          return Math.min(pointDecoder.applyAsLong(pointValues.getMinPackedValue()), missingValue);
+        }
+      }
+    } catch (IOException _) {
+      // We can't rethrow exceptions from inside a Comparator, so we instead
+      // return as if there are no index structures to read values from.
+      return reverse ? Long.MAX_VALUE : Long.MIN_VALUE;
+    }
+    return reverse ? Long.MAX_VALUE : Long.MIN_VALUE;
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/search/Sort.java b/lucene/core/src/java/org/apache/lucene/search/Sort.java
@@ -18,6 +18,8 @@
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Comparator;
+import org.apache.lucene.index.LeafReaderContext;
 
 /**
  * Encapsulates sort criteria for returned hits.
@@ -131,4 +133,11 @@ public boolean needsScores() {
     }
     return false;
   }
+
+  /**
+   * @return the leaf reader comparator of the first SortField of this Sort
+   */
+  public Comparator<LeafReaderContext> getLeafReaderComparator() {
+    return fields[0].getLeafReaderComparator();
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/SortField.java b/lucene/core/src/java/org/apache/lucene/search/SortField.java
@@ -19,12 +19,14 @@
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.Objects;
+import java.util.function.ToIntFunction;
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexSorter;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortFieldProvider;
 import org.apache.lucene.search.comparators.DocComparator;
 import org.apache.lucene.search.comparators.DoubleComparator;
@@ -562,6 +564,52 @@ public FieldComparator<?> getComparator(final int numHits, Pruning pruning) {
     return fieldComparator;
   }
 
+  /**
+   * Returns a Comparator that will order segments based on whether they are likely to return
+   * documents that sort higher in a result set, allowing uncompetitive segments to be skipped
+   * entirely if the top-k queue is already populated by earlier competitive segments. If the
+   * SortType does not support ordering in this way, returns {@code null}
+   */
+  public Comparator<LeafReaderContext> getLeafReaderComparator() {
+    return switch (type) {
+      case DOC ->
+          reverse
+              ? Comparator.comparingInt((ToIntFunction<LeafReaderContext>) value -> value.docBase)
+                  .reversed()
+              : Comparator.comparingInt((ToIntFunction<LeafReaderContext>) value -> value.docBase);
+      case INT ->
+          new NumericFieldReaderContextComparator(
+              field,
+              missingValue == null ? null : ((Integer) missingValue).longValue(),
+              reverse,
+              b -> NumericUtils.sortableBytesToInt(b, 0));
+      case LONG ->
+          new NumericFieldReaderContextComparator(
+              field,
+              missingValue == null ? null : (Long) missingValue,
+              reverse,
+              b -> NumericUtils.sortableBytesToLong(b, 0));
+      case FLOAT ->
+          new NumericFieldReaderContextComparator(
+              field,
+              missingValue == null
+                  ? null
+                  : (long) NumericUtils.floatToSortableInt((float) missingValue),
+              reverse,
+              b -> NumericUtils.sortableBytesToInt(b, 0));
+      case DOUBLE ->
+          new NumericFieldReaderContextComparator(
+              field,
+              missingValue == null
+                  ? null
+                  : NumericUtils.doubleToSortableLong((double) missingValue),
+              reverse,
+              b -> NumericUtils.sortableBytesToLong(b, 0));
+      // $CASES-OMITTED$
+      default -> null;
+    };
+  }
+
   /**
    * Rewrites this SortField, returning a new SortField if a change is made. Subclasses should
    * override this define their rewriting behavior when this SortField is of type {@link

diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -217,6 +217,14 @@ public void collect(int doc) throws IOException {
 
       return collector;
     }
+
+    @Override
+    public Comparator<LeafReaderContext> getLeafReaderComparator() {
+      if (scoreMode.isExhaustive()) {
+        return null;
+      }
+      return this.sort.getLeafReaderComparator();
+    }
   }
 
   /*