Skip to content

Commit

Permalink
Skip docs with Docvalues in NumericLeafComparator (#12405)
Browse files Browse the repository at this point in the history
* Skip document by docValues

*When the queue is full with only one Comparator, we could better tune the maxValueAsBytes/minValueAsBytes. For instance, if the sort is ascending and bottom value is 5, we will use a range on [MIN_VALUE, 4].
---------

Co-authored-by: Adrien Grand <jpountz@gmail.com>
  • Loading branch information
LuXugang and jpountz authored Nov 9, 2023
1 parent 8665014 commit a71d64a
Show file tree
Hide file tree
Showing 35 changed files with 421 additions and 106 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ Optimizations

* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)

* GITHUB#12381: Skip docs with DocValues in NumericLeafComparator. (Lu Xugang, Adrien Grand)

Changes in runtime behavior
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SimpleFieldComparator;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
Expand All @@ -44,7 +45,7 @@ public FeatureSortField(String field, String featureName) {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new FeatureComparator(numHits, getField(), featureName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField;

/** Sorts by distance from an origin location. */
Expand All @@ -38,7 +39,7 @@ final class LatLonPointSortField extends SortField {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField;

/** Sorts by distance from an origin location. */
Expand All @@ -35,7 +36,7 @@ final class XYPointSortField extends SortField {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new XYPointDistanceComparator(getField(), x, y, numHits);
}

Expand Down
3 changes: 2 additions & 1 deletion lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
Expand Down Expand Up @@ -1119,7 +1120,7 @@ public static Status.IndexSortStatus testSort(

for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, false).getLeafComparator(readerContext);
comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext);
}

int maxDoc = reader.maxDoc();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,8 @@ void setMissingValue(double missingValue) {

@Override
public FieldComparator<Double> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) {
return new DoubleComparator(numHits, fieldname, missingValue, reversed, false) {
String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new DoubleComparator(numHits, fieldname, missingValue, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
DoubleValuesHolder holder = new DoubleValuesHolder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ public abstract class FieldComparatorSource {
* @return FieldComparator.
*/
public abstract FieldComparator<?> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed);
String fieldname, int numHits, Pruning pruning, boolean reversed);
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ private FieldValueHitQueue(SortField[] fields, int size) {
// need to check it again.

// All these are required by this class's API - need to return arrays.
// Therefore even in the case of a single comparator, create an array
// Therefore, even in the case of a single comparator, create an array
// anyway.
this.fields = fields;
int numComparators = fields.length;
Expand All @@ -134,7 +134,12 @@ private FieldValueHitQueue(SortField[] fields, int size) {
for (int i = 0; i < numComparators; ++i) {
SortField field = fields[i];
reverseMul[i] = field.reverse ? -1 : 1;
comparators[i] = field.getComparator(size, i == 0);
comparators[i] =
field.getComparator(
size,
i == 0
? (numComparators > 1 ? Pruning.GREATER_THAN : Pruning.GREATER_THAN_OR_EQUAL_TO)
: Pruning.NONE);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ private static ValueComparator loadComparator(
throws IOException {
@SuppressWarnings("unchecked")
FieldComparator<Number> fieldComparator =
(FieldComparator<Number>) sortField.getComparator(1, false);
(FieldComparator<Number>) sortField.getComparator(1, Pruning.NONE);
if (type == Type.INT) {
fieldComparator.setTopValue((int) topValue);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ void setMissingValue(long missingValue) {

@Override
public FieldComparator<Long> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) {
return new LongComparator(numHits, fieldname, missingValue, reversed, false) {
String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new LongComparator(numHits, fieldname, missingValue, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
LongValuesHolder holder = new LongValuesHolder();
Expand Down
33 changes: 33 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Pruning.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

/** Controls {@link LeafFieldComparator} how to skip documents */
public enum Pruning {
/** Not allowed to skip documents. */
NONE,
/**
* Allowed to skip documents that compare strictly better than the top value, or strictly worse
* than the bottom value.
*/
GREATER_THAN,
/**
* Allowed to skip documents that compare better than the top value, or worse than or equal to the
* bottom value.
*/
GREATER_THAN_OR_EQUAL_TO
}
20 changes: 9 additions & 11 deletions lucene/core/src/java/org/apache/lucene/search/SortField.java
Original file line number Diff line number Diff line change
Expand Up @@ -500,50 +500,48 @@ public Comparator<BytesRef> getBytesComparator() {
*
* @lucene.experimental
* @param numHits number of top hits the queue will store
* @param enableSkipping true if the comparator can skip documents via {@link
* @param pruning controls how can the comparator to skip documents via {@link
* LeafFieldComparator#competitiveIterator()}
* @return {@link FieldComparator} to use when sorting
*/
public FieldComparator<?> getComparator(final int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(final int numHits, Pruning pruning) {
final FieldComparator<?> fieldComparator;
switch (type) {
case SCORE:
fieldComparator = new FieldComparator.RelevanceComparator(numHits);
break;

case DOC:
fieldComparator = new DocComparator(numHits, reverse, enableSkipping);
fieldComparator = new DocComparator(numHits, reverse, pruning);
break;

case INT:
fieldComparator =
new IntComparator(numHits, field, (Integer) missingValue, reverse, enableSkipping);
new IntComparator(numHits, field, (Integer) missingValue, reverse, pruning);
break;

case FLOAT:
fieldComparator =
new FloatComparator(numHits, field, (Float) missingValue, reverse, enableSkipping);
new FloatComparator(numHits, field, (Float) missingValue, reverse, pruning);
break;

case LONG:
fieldComparator =
new LongComparator(numHits, field, (Long) missingValue, reverse, enableSkipping);
fieldComparator = new LongComparator(numHits, field, (Long) missingValue, reverse, pruning);
break;

case DOUBLE:
fieldComparator =
new DoubleComparator(numHits, field, (Double) missingValue, reverse, enableSkipping);
new DoubleComparator(numHits, field, (Double) missingValue, reverse, pruning);
break;

case CUSTOM:
assert comparatorSource != null;
fieldComparator = comparatorSource.newComparator(field, numHits, enableSkipping, reverse);
fieldComparator = comparatorSource.newComparator(field, numHits, pruning, reverse);
break;

case STRING:
fieldComparator =
new TermOrdValComparator(
numHits, field, missingValue == STRING_LAST, reverse, enableSkipping);
new TermOrdValComparator(numHits, field, missingValue == STRING_LAST, reverse, pruning);
break;

case STRING_VAL:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ public void setMissingValue(Object missingValue) {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
final FieldComparator<?> fieldComparator;
// we can use sort optimization with points if selector is MIN or MAX,
// because we can still build successful iterator over points in this case.
Expand All @@ -255,7 +255,7 @@ public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
getField(),
(Integer) missingValue,
reverse,
enableSkipping && isMinOrMax) {
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand All @@ -273,7 +273,11 @@ protected NumericDocValues getNumericDocValues(
case FLOAT:
fieldComparator =
new FloatComparator(
numHits, getField(), (Float) missingValue, reverse, enableSkipping && isMinOrMax) {
numHits,
getField(),
(Float) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand All @@ -291,7 +295,11 @@ protected NumericDocValues getNumericDocValues(
case LONG:
fieldComparator =
new LongComparator(
numHits, getField(), (Long) missingValue, reverse, enableSkipping && isMinOrMax) {
numHits,
getField(),
(Long) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand All @@ -309,7 +317,11 @@ protected NumericDocValues getNumericDocValues(
case DOUBLE:
fieldComparator =
new DoubleComparator(
numHits, getField(), (Double) missingValue, reverse, enableSkipping && isMinOrMax) {
numHits,
getField(),
(Double) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ public void setMissingValue(Object missingValue) {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
boolean finalEnableSkipping = enableSkipping && getOptimizeSortWithIndexedData();
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
Pruning finalPruning = getOptimizeSortWithIndexedData() ? pruning : Pruning.NONE;
return new TermOrdValComparator(
numHits, getField(), missingValue == STRING_LAST, reverse, finalEnableSkipping) {
numHits, getField(), missingValue == STRING_LAST, reverse, finalPruning) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
throws IOException {
Expand Down
2 changes: 1 addition & 1 deletion lucene/core/src/java/org/apache/lucene/search/TopDocs.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public MergeSortQueue(Sort sort, TopDocs[] shardHits, Comparator<ScoreDoc> tieBr
reverseMul = new int[sortFields.length];
for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
final SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, compIDX == 0);
comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable;

/** Comparator that sorts by asc _doc */
Expand All @@ -35,10 +36,10 @@ public class DocComparator extends FieldComparator<Integer> {
private boolean hitsThresholdReached;

/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(int numHits, boolean reverse, boolean enableSkipping) {
public DocComparator(int numHits, boolean reverse, Pruning pruning) {
this.docIDs = new int[numHits];
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (reverse == false && enableSkipping);
this.enableSkipping = (reverse == false && pruning != Pruning.NONE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;

/**
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
Expand All @@ -32,8 +33,8 @@ public class DoubleComparator extends NumericComparator<Double> {
protected double bottom;

public DoubleComparator(
int numHits, String field, Double missingValue, boolean reverse, boolean enableSkipping) {
super(field, missingValue != null ? missingValue : 0.0, reverse, enableSkipping, Double.BYTES);
int numHits, String field, Double missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0.0, reverse, pruning, Double.BYTES);
values = new double[numHits];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;

/**
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
Expand All @@ -32,8 +33,8 @@ public class FloatComparator extends NumericComparator<Float> {
protected float bottom;

public FloatComparator(
int numHits, String field, Float missingValue, boolean reverse, boolean enableSkipping) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, enableSkipping, Float.BYTES);
int numHits, String field, Float missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, pruning, Float.BYTES);
values = new float[numHits];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;

/**
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
Expand All @@ -32,8 +33,8 @@ public class IntComparator extends NumericComparator<Integer> {
protected int bottom;

public IntComparator(
int numHits, String field, Integer missingValue, boolean reverse, boolean enableSkipping) {
super(field, missingValue != null ? missingValue : 0, reverse, enableSkipping, Integer.BYTES);
int numHits, String field, Integer missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0, reverse, pruning, Integer.BYTES);
values = new int[numHits];
}

Expand Down
Loading

0 comments on commit a71d64a

Please sign in to comment.