-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize sort on numeric long and date fields #39770
Changes from all commits
f55f987
980fe7e
0f0e8a2
50a2427
da1aa9e
34a9c40
c1c6171
64353b8
105bc74
075a7bf
d370c3e
05ca0de
bb32154
6c1e506
63cf7d5
02d2584
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,26 +21,36 @@ | |
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.document.LongPoint; | ||
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.index.IndexReader; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.PointValues; | ||
import org.apache.lucene.queries.MinDocQuery; | ||
import org.apache.lucene.queries.SearchAfterSortedDocQuery; | ||
import org.apache.lucene.search.BooleanClause; | ||
import org.apache.lucene.search.BooleanQuery; | ||
import org.apache.lucene.search.Collector; | ||
import org.apache.lucene.search.ConstantScoreQuery; | ||
import org.apache.lucene.search.DocValuesFieldExistsQuery; | ||
import org.apache.lucene.search.FieldDoc; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.MatchAllDocsQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.ScoreDoc; | ||
import org.apache.lucene.search.Sort; | ||
import org.apache.lucene.search.SortField; | ||
import org.apache.lucene.search.TopDocs; | ||
import org.apache.lucene.search.TopFieldDocs; | ||
import org.apache.lucene.search.TotalHits; | ||
import org.elasticsearch.action.search.SearchTask; | ||
import org.elasticsearch.common.Booleans; | ||
import org.elasticsearch.common.lucene.Lucene; | ||
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore; | ||
import org.elasticsearch.common.util.concurrent.QueueResizingEsThreadPoolExecutor; | ||
import org.elasticsearch.index.IndexSortConfig; | ||
import org.elasticsearch.index.mapper.MappedFieldType; | ||
import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; | ||
import org.elasticsearch.search.DocValueFormat; | ||
import org.elasticsearch.search.SearchPhase; | ||
import org.elasticsearch.search.SearchService; | ||
|
@@ -57,6 +67,8 @@ | |
import org.elasticsearch.tasks.TaskCancelledException; | ||
import org.elasticsearch.threadpool.ThreadPool; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.LinkedList; | ||
import java.util.concurrent.ExecutorService; | ||
import java.util.function.Consumer; | ||
|
@@ -67,6 +79,7 @@ | |
import static org.elasticsearch.search.query.QueryCollectorContext.createMinScoreCollectorContext; | ||
import static org.elasticsearch.search.query.QueryCollectorContext.createMultiCollectorContext; | ||
import static org.elasticsearch.search.query.TopDocsCollectorContext.createTopDocsCollectorContext; | ||
import static org.elasticsearch.search.query.TopDocsCollectorContext.shortcutTotalHitCount; | ||
|
||
|
||
/** | ||
|
@@ -75,6 +88,8 @@ | |
*/ | ||
public class QueryPhase implements SearchPhase { | ||
private static final Logger LOGGER = LogManager.getLogger(QueryPhase.class); | ||
public static final boolean SYS_PROP_LONG_SORT_OPTIMIZED = | ||
Booleans.parseBoolean(System.getProperty("es.search.long_sort_optimized", "false")); | ||
|
||
private final AggregationPhase aggregationPhase; | ||
private final SuggestPhase suggestPhase; | ||
|
@@ -133,6 +148,7 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep | |
static boolean execute(SearchContext searchContext, | ||
final IndexSearcher searcher, | ||
Consumer<Runnable> checkCancellationSetter) throws QueryPhaseExecutionException { | ||
SortAndFormats sortAndFormatsForRewrittenNumericSort = null; | ||
final IndexReader reader = searcher.getIndexReader(); | ||
QuerySearchResult queryResult = searchContext.queryResult(); | ||
queryResult.searchTimedOut(false); | ||
|
@@ -204,6 +220,25 @@ static boolean execute(SearchContext searchContext, | |
hasFilterCollector = true; | ||
} | ||
|
||
// try to rewrite numeric or date sort to the optimized distanceFeatureQuery | ||
if ((searchContext.sort() != null) && SYS_PROP_LONG_SORT_OPTIMIZED) { | ||
Query rewrittenQuery = tryRewriteLongSort(searchContext, searcher.getIndexReader(), query, hasFilterCollector); | ||
if (rewrittenQuery != null) { | ||
query = rewrittenQuery; | ||
// modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort | ||
SortField[] oldSortFields = searchContext.sort().sort.getSort(); | ||
DocValueFormat[] oldFormats = searchContext.sort().formats; | ||
SortField[] newSortFields = new SortField[oldSortFields.length + 1]; | ||
DocValueFormat[] newFormats = new DocValueFormat[oldSortFields.length + 1]; | ||
newSortFields[0] = SortField.FIELD_SCORE; | ||
newFormats[0] = DocValueFormat.RAW; | ||
System.arraycopy(oldSortFields, 0, newSortFields, 1, oldSortFields.length); | ||
System.arraycopy(oldFormats, 0, newFormats, 1, oldFormats.length); | ||
sortAndFormatsForRewrittenNumericSort = searchContext.sort(); // stash SortAndFormats to restore it later | ||
searchContext.sort(new SortAndFormats(new Sort(newSortFields), newFormats)); | ||
} | ||
} | ||
|
||
boolean timeoutSet = scrollContext == null && searchContext.timeout() != null && | ||
searchContext.timeout().equals(SearchService.NO_TIMEOUT) == false; | ||
|
||
|
@@ -290,6 +325,13 @@ static boolean execute(SearchContext searchContext, | |
for (QueryCollectorContext ctx : collectors) { | ||
ctx.postProcess(result); | ||
} | ||
|
||
// if we rewrote numeric long or date sort, restore fieldDocs based on the original sort | ||
if (sortAndFormatsForRewrittenNumericSort != null) { | ||
searchContext.sort(sortAndFormatsForRewrittenNumericSort); // restore SortAndFormats | ||
restoreTopFieldDocs(result, sortAndFormatsForRewrittenNumericSort); | ||
} | ||
|
||
ExecutorService executor = searchContext.indexShard().getThreadPool().executor(ThreadPool.Names.SEARCH); | ||
if (executor instanceof QueueResizingEsThreadPoolExecutor) { | ||
QueueResizingEsThreadPoolExecutor rExecutor = (QueueResizingEsThreadPoolExecutor) executor; | ||
|
@@ -306,6 +348,92 @@ static boolean execute(SearchContext searchContext, | |
} | ||
} | ||
|
||
private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader reader, | ||
Query query, boolean hasFilterCollector) throws IOException { | ||
if (searchContext.searchAfter() != null) return null; | ||
if (searchContext.scrollContext() != null) return null; | ||
if (searchContext.collapse() != null) return null; | ||
if (searchContext.trackScores()) return null; | ||
if (searchContext.aggregations() != null) return null; | ||
Sort sort = searchContext.sort().sort; | ||
SortField sortField = sort.getSort()[0]; | ||
if (SortField.Type.LONG.equals(IndexSortConfig.getSortFieldType(sortField)) == false) return null; | ||
jimczi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// check if this is a field of type Long or Date, that is indexed and has doc values | ||
String fieldName = sortField.getField(); | ||
if (fieldName == null) return null; // happens when _score or _doc is the 1st sort field | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should probably also return null when sortField.getType is not long. Not doing it would not cause bugs to my knowledge, but Lucene supports providing custom comparators, while we are assuming the natural number ordering here. |
||
if (searchContext.mapperService() == null) return null; // mapperService can be null in tests | ||
final MappedFieldType fieldType = searchContext.mapperService().fullName(fieldName); | ||
if (fieldType == null) return null; // for unmapped fields, default behaviour depending on "unmapped_type" flag | ||
if ((fieldType.typeName().equals("long") == false) && (fieldType instanceof DateFieldType == false)) return null; | ||
if (fieldType.indexOptions() == IndexOptions.NONE) return null; //TODO: change to pointDataDimensionCount() when implemented | ||
if (fieldType.hasDocValues() == false) return null; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to check the missing value as well to make sure this optimization is applicable. |
||
// check that all sorts are actual document fields or _doc | ||
for (int i = 1; i < sort.getSort().length; i++) { | ||
SortField sField = sort.getSort()[i]; | ||
String sFieldName = sField.getField(); | ||
if (sFieldName == null) { | ||
if (SortField.FIELD_DOC.equals(sField) == false) return null; | ||
} else { | ||
if (searchContext.mapperService().fullName(sFieldName) == null) return null; // could be _script field that uses _score | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we need to disable the optimization when sorting by a script as well, since scripts may use the score. Or turn it into a whitelist rather than a blacklist and only enable this optimization when all sort fields are either actual fields or |
||
|
||
// check that setting of missing values allows optimization | ||
if (sortField.getMissingValue() == null) return null; | ||
Long missingValue = (Long) sortField.getMissingValue(); | ||
boolean missingValuesAccordingToSort = (sortField.getReverse() && (missingValue == Long.MIN_VALUE)) || | ||
((sortField.getReverse() == false) && (missingValue == Long.MAX_VALUE)); | ||
if (missingValuesAccordingToSort == false) return null; | ||
|
||
// check for multiple values | ||
if (PointValues.size(reader, fieldName) != PointValues.getDocCount(reader, fieldName)) return null; //TODO: handle multiple values | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should work if the multi-valued sort mode is set to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is already large, maybe we should keep making this optimization work for multi-valued fields as a follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure |
||
|
||
// check if the optimization makes sense with the track_total_hits setting | ||
if (searchContext.trackTotalHitsUpTo() == Integer.MAX_VALUE) { | ||
// with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense | ||
if (hasFilterCollector) return null; | ||
// if we can't pre-calculate hitsCount based on the query type, optimization does't make sense | ||
if (shortcutTotalHitCount(reader, query) == -1) return null; | ||
} | ||
|
||
byte[] minValueBytes = PointValues.getMinPackedValue(reader, fieldName); | ||
byte[] maxValueBytes = PointValues.getMaxPackedValue(reader, fieldName); | ||
if ((maxValueBytes == null) || (minValueBytes == null)) return null; | ||
long minValue = LongPoint.decodeDimension(minValueBytes, 0); | ||
long maxValue = LongPoint.decodeDimension(maxValueBytes, 0); | ||
|
||
Query rewrittenQuery; | ||
if (minValue == maxValue) { | ||
rewrittenQuery = new DocValuesFieldExistsQuery(fieldName); | ||
} else { | ||
long origin = (sortField.getReverse()) ? maxValue : minValue; | ||
long pivotDistance = (maxValue - minValue) >>> 1; // division by 2 on the unsigned representation to avoid overflow | ||
if (pivotDistance == 0) { // 0 if maxValue = (minValue + 1) | ||
pivotDistance = 1; | ||
} | ||
rewrittenQuery = LongPoint.newDistanceFeatureQuery(sortField.getField(), 1, origin, pivotDistance); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's compute |
||
rewrittenQuery = new BooleanQuery.Builder() | ||
.add(query, BooleanClause.Occur.FILTER) // filter for original query | ||
.add(rewrittenQuery, BooleanClause.Occur.SHOULD) //should for rewrittenQuery | ||
.build(); | ||
return rewrittenQuery; | ||
} | ||
|
||
// Restore fieldsDocs to remove the first _score sort | ||
// updating in place without creating new FieldDoc objects | ||
static void restoreTopFieldDocs(QuerySearchResult result, SortAndFormats originalSortAndFormats) { | ||
TopDocs topDocs = result.topDocs().topDocs; | ||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) { | ||
FieldDoc fieldDoc = (FieldDoc) scoreDoc; | ||
fieldDoc.fields = Arrays.copyOfRange(fieldDoc.fields, 1, fieldDoc.fields.length); | ||
} | ||
TopFieldDocs newTopDocs = new TopFieldDocs(topDocs.totalHits, topDocs.scoreDocs, originalSortAndFormats.sort.getSort()); | ||
result.topDocs(new TopDocsAndMaxScore(newTopDocs, Float.NaN), originalSortAndFormats.formats); | ||
} | ||
|
||
/** | ||
* Returns true if the provided <code>query</code> returns docs in index order (internal doc ids). | ||
* @param query The query to execute | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder whether we need to check that
track_scores
is set tofalse
too. Maybe we don't, but in this case too I think it'd be worth to leave a comment about it.