Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ Optimizations

* GITHUB#15397: NumericComparator: immediately check whether a segment is competitive with the recorded bottom (Martijn van Groningen)

# GITHUB#15303: Speed up NumericUtils#{add,subtract} by operating on integers instead of bytes. (Kaival Parikh)
* GITHUB#15303: Speed up NumericUtils#{add,subtract} by operating on integers instead of bytes. (Kaival Parikh)

* GITHUB#15436: Allow Comparators to re-order segments at query time to help result pruning. (Alan Woodward)

Bug Fixes
---------------------
Expand Down
9 changes: 9 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Collector.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.LeafReaderContext;

/**
Expand Down Expand Up @@ -61,4 +62,12 @@ public interface Collector {
* Collector#getLeafCollector}.
*/
default void setWeight(Weight weight) {}

/**
* Returns a Comparator that can be used to order LeafReaderContexts such that segments that are
* more likely to be skipped in non-exhaustive ScoreModes are searched later.
*/
default Comparator<LeafReaderContext> getLeafReaderComparator() {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,14 @@ protected void search(LeafReaderContextPartition[] partitions, Weight weight, Co

collector.setWeight(weight);

Comparator<LeafReaderContext> leafComparator = collector.getLeafReaderComparator();
if (leafComparator != null) {
// copy the partitions list so that the original doesn't get mutated by sorting
LeafReaderContextPartition[] sortedPartitions = partitions.clone();
Arrays.sort(sortedPartitions, (o1, o2) -> leafComparator.compare(o1.ctx, o2.ctx));
partitions = sortedPartitions;
}

for (LeafReaderContextPartition partition : partitions) { // search each subreader partition
searchLeaf(partition.ctx, partition.minDocId, partition.maxDocId, weight, collector);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.search;

import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.function.ToLongFunction;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;

/**
* Given a sort field, compares segments by the range of values in that field such that documents in
* better comparing segments are more likely to appear higher in search results.
*/
class NumericFieldReaderContextComparator implements Comparator<LeafReaderContext> {

private final Map<Integer, Long> cachedSortValues = new HashMap<>();
private final String field;
private final boolean reverse;
private final Long missingValue;
private final ToLongFunction<byte[]> pointDecoder;

NumericFieldReaderContextComparator(
String field, Long missingValue, boolean reverse, ToLongFunction<byte[]> pointDecoder) {
this.field = field;
this.missingValue = missingValue;
this.reverse = reverse;
this.pointDecoder = pointDecoder;
}

@Override
public int compare(LeafReaderContext o1, LeafReaderContext o2) {
return reverse
? Long.compare(getSortValue(o2), getSortValue(o1))
: Long.compare(getSortValue(o1), getSortValue(o2));
}

private long getSortValue(LeafReaderContext ctx) {
if (cachedSortValues.containsKey(ctx.ord) == false) {
cachedSortValues.put(ctx.ord, loadSortValue(ctx));
}
return cachedSortValues.get(ctx.ord);
}

private long loadSortValue(LeafReaderContext ctx) {
LeafReader reader = ctx.reader();
try {
DocValuesSkipper skipper = reader.getDocValuesSkipper(field);
if (skipper != null) {
if (skipper.docCount() == reader.maxDoc() || missingValue == null) {
return reverse ? skipper.maxValue() : skipper.minValue();
}
if (reverse) {
return Math.max(skipper.maxValue(), missingValue);
} else {
return Math.min(skipper.minValue(), missingValue);
}
}
PointValues pointValues = reader.getPointValues(field);
if (pointValues != null) {
if (pointValues.getDocCount() == reader.maxDoc() || missingValue == null) {
if (reverse) {
return pointDecoder.applyAsLong(pointValues.getMaxPackedValue());
} else {
return pointDecoder.applyAsLong(pointValues.getMinPackedValue());
}
}
if (reverse) {
return Math.max(pointDecoder.applyAsLong(pointValues.getMaxPackedValue()), missingValue);
} else {
return Math.min(pointDecoder.applyAsLong(pointValues.getMinPackedValue()), missingValue);
}
}
} catch (IOException _) {
// We can't rethrow exceptions from inside a Comparator, so we instead
// return as if there are no index structures to read values from.
return reverse ? Long.MAX_VALUE : Long.MIN_VALUE;
}
return reverse ? Long.MAX_VALUE : Long.MIN_VALUE;
}
}
9 changes: 9 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Sort.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.LeafReaderContext;

/**
* Encapsulates sort criteria for returned hits.
Expand Down Expand Up @@ -131,4 +133,11 @@ public boolean needsScores() {
}
return false;
}

/**
* @return the leaf reader comparator of the first SortField of this Sort
*/
public Comparator<LeafReaderContext> getLeafReaderComparator() {
return fields[0].getLeafReaderComparator();
}
}
48 changes: 48 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/SortField.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
import java.io.IOException;
import java.util.Comparator;
import java.util.Objects;
import java.util.function.ToIntFunction;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexSorter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortFieldProvider;
import org.apache.lucene.search.comparators.DocComparator;
import org.apache.lucene.search.comparators.DoubleComparator;
Expand Down Expand Up @@ -562,6 +564,52 @@ public FieldComparator<?> getComparator(final int numHits, Pruning pruning) {
return fieldComparator;
}

/**
* Returns a Comparator that will order segments based on whether they are likely to return
* documents that sort higher in a result set, allowing uncompetitive segments to be skipped
* entirely if the top-k queue is already populated by earlier competitive segments. If the
* SortType does not support ordering in this way, returns {@code null}
*/
public Comparator<LeafReaderContext> getLeafReaderComparator() {
return switch (type) {
case DOC ->
reverse
? Comparator.comparingInt((ToIntFunction<LeafReaderContext>) value -> value.docBase)
.reversed()
: Comparator.comparingInt((ToIntFunction<LeafReaderContext>) value -> value.docBase);
case INT ->
new NumericFieldReaderContextComparator(
field,
missingValue == null ? null : ((Integer) missingValue).longValue(),
reverse,
b -> NumericUtils.sortableBytesToInt(b, 0));
case LONG ->
new NumericFieldReaderContextComparator(
field,
missingValue == null ? null : (Long) missingValue,
reverse,
b -> NumericUtils.sortableBytesToLong(b, 0));
case FLOAT ->
new NumericFieldReaderContextComparator(
field,
missingValue == null
? null
: (long) NumericUtils.floatToSortableInt((float) missingValue),
reverse,
b -> NumericUtils.sortableBytesToInt(b, 0));
case DOUBLE ->
new NumericFieldReaderContextComparator(
field,
missingValue == null
? null
: NumericUtils.doubleToSortableLong((double) missingValue),
reverse,
b -> NumericUtils.sortableBytesToLong(b, 0));
// $CASES-OMITTED$
default -> null;
};
}

/**
* Rewrites this SortField, returning a new SortField if a change is made. Subclasses should
* override this define their rewriting behavior when this SortField is of type {@link
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,14 @@ public void collect(int doc) throws IOException {

return collector;
}

@Override
public Comparator<LeafReaderContext> getLeafReaderComparator() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case that the original ordering of leaves is already optimal (because leaf sorter has been configured on IndexWriterConfig), would this be the place where subclasses overwrite and return null?

In this case there shouldn't be a need to do the re-ordering of segments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The case where there's a configured leaf sorter is tricky, as it might be optimal or it might be entirely adverse depending on whether the query sort is reversed or not. You could override here and turn off query-time segment sorting if you wanted. But maybe we need a better escape hatch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But maybe we need a better escape hatch?

I think so. What do you think would be a good escape hatch here?

if (scoreMode.isExhaustive()) {
return null;
}
return this.sort.getLeafReaderComparator();
}
}

/*
Expand Down
Loading