elastic · mayya-sharipova · Jun 10, 2019 · Mar 6, 2019 · Apr 3, 2019 · Apr 8, 2019
diff --git a/buildSrc/src/main/groovy/org/elasticsearch/gradle/BuildPlugin.groovy b/buildSrc/src/main/groovy/org/elasticsearch/gradle/BuildPlugin.groovy
@@ -872,6 +872,9 @@ class BuildPlugin implements Plugin<Project> {
                 // TODO: remove this once ctx isn't added to update script params in 7.0
                 test.systemProperty 'es.scripting.update.ctx_in_params', 'false'
 
+                // TODO: remove when sort optimization is merged
+                test.systemProperty 'es.search.long_sort_optimized', 'true'
+
                 test.testLogging { TestLoggingContainer logging ->
                     logging.showExceptions = true
                     logging.showCauses = true

diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java
@@ -21,26 +21,36 @@
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.queries.MinDocQuery;
 import org.apache.lucene.queries.SearchAfterSortedDocQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TotalHits;
 import org.elasticsearch.action.search.SearchTask;
+import org.elasticsearch.common.Booleans;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
 import org.elasticsearch.common.util.concurrent.QueueResizingEsThreadPoolExecutor;
+import org.elasticsearch.index.IndexSortConfig;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.SearchPhase;
 import org.elasticsearch.search.SearchService;
@@ -57,6 +67,8 @@
 import org.elasticsearch.tasks.TaskCancelledException;
 import org.elasticsearch.threadpool.ThreadPool;
 
+import java.io.IOException;
+import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.concurrent.ExecutorService;
 import java.util.function.Consumer;
@@ -67,6 +79,7 @@
 import static org.elasticsearch.search.query.QueryCollectorContext.createMinScoreCollectorContext;
 import static org.elasticsearch.search.query.QueryCollectorContext.createMultiCollectorContext;
 import static org.elasticsearch.search.query.TopDocsCollectorContext.createTopDocsCollectorContext;
+import static org.elasticsearch.search.query.TopDocsCollectorContext.shortcutTotalHitCount;
 
 
 /**
@@ -75,6 +88,8 @@
  */
 public class QueryPhase implements SearchPhase {
     private static final Logger LOGGER = LogManager.getLogger(QueryPhase.class);
+    public static final boolean SYS_PROP_LONG_SORT_OPTIMIZED =
+        Booleans.parseBoolean(System.getProperty("es.search.long_sort_optimized", "false"));
 
     private final AggregationPhase aggregationPhase;
     private final SuggestPhase suggestPhase;
@@ -133,6 +148,7 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
     static boolean execute(SearchContext searchContext,
                            final IndexSearcher searcher,
                            Consumer<Runnable> checkCancellationSetter) throws QueryPhaseExecutionException {
+        SortAndFormats sortAndFormatsForRewrittenNumericSort = null;
         final IndexReader reader = searcher.getIndexReader();
         QuerySearchResult queryResult = searchContext.queryResult();
         queryResult.searchTimedOut(false);
@@ -204,6 +220,25 @@ static boolean execute(SearchContext searchContext,
                 hasFilterCollector = true;
             }
 
+            // try to rewrite numeric or date sort to the optimized distanceFeatureQuery
+            if ((searchContext.sort() != null) && SYS_PROP_LONG_SORT_OPTIMIZED) {
+                Query rewrittenQuery = tryRewriteLongSort(searchContext, searcher.getIndexReader(), query, hasFilterCollector);
+                if (rewrittenQuery != null) {
+                    query = rewrittenQuery;
+                    // modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort
+                    SortField[] oldSortFields = searchContext.sort().sort.getSort();
+                    DocValueFormat[] oldFormats = searchContext.sort().formats;
+                    SortField[] newSortFields = new SortField[oldSortFields.length + 1];
+                    DocValueFormat[] newFormats = new DocValueFormat[oldSortFields.length + 1];
+                    newSortFields[0] = SortField.FIELD_SCORE;
+                    newFormats[0] = DocValueFormat.RAW;
+                    System.arraycopy(oldSortFields, 0, newSortFields, 1, oldSortFields.length);
+                    System.arraycopy(oldFormats, 0, newFormats, 1, oldFormats.length);
+                    sortAndFormatsForRewrittenNumericSort = searchContext.sort(); // stash SortAndFormats to restore it later
+                    searchContext.sort(new SortAndFormats(new Sort(newSortFields), newFormats));
+                }
+            }
+
             boolean timeoutSet = scrollContext == null && searchContext.timeout() != null &&
                 searchContext.timeout().equals(SearchService.NO_TIMEOUT) == false;
 
@@ -290,6 +325,13 @@ static boolean execute(SearchContext searchContext,
             for (QueryCollectorContext ctx : collectors) {
                 ctx.postProcess(result);
             }
+
+            // if we rewrote numeric long or date sort, restore fieldDocs based on the original sort
+            if (sortAndFormatsForRewrittenNumericSort != null) {
+                searchContext.sort(sortAndFormatsForRewrittenNumericSort); // restore SortAndFormats
+                restoreTopFieldDocs(result, sortAndFormatsForRewrittenNumericSort);
+            }
+
             ExecutorService executor = searchContext.indexShard().getThreadPool().executor(ThreadPool.Names.SEARCH);
             if (executor instanceof QueueResizingEsThreadPoolExecutor) {
                 QueueResizingEsThreadPoolExecutor rExecutor = (QueueResizingEsThreadPoolExecutor) executor;
@@ -306,6 +348,92 @@ static boolean execute(SearchContext searchContext,
         }
     }
 
+     private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader reader,
+            Query query, boolean hasFilterCollector) throws IOException {
+        if (searchContext.searchAfter() != null) return null;
+        if (searchContext.scrollContext() != null) return null;
+        if (searchContext.collapse() != null) return null;
+        if (searchContext.trackScores()) return null;
+        if (searchContext.aggregations() != null) return null;
+        Sort sort = searchContext.sort().sort;
+        SortField sortField = sort.getSort()[0];
+        if (SortField.Type.LONG.equals(IndexSortConfig.getSortFieldType(sortField)) == false) return null;
+
+        // check if this is a field of type Long or Date, that is indexed and has doc values
+        String fieldName = sortField.getField();
+        if (fieldName == null) return null; // happens when _score or _doc is the 1st sort field
+        if (searchContext.mapperService() == null) return null; // mapperService can be null in tests
+        final MappedFieldType fieldType = searchContext.mapperService().fullName(fieldName);
+        if (fieldType == null) return null; // for unmapped fields, default behaviour depending on "unmapped_type" flag
+        if ((fieldType.typeName().equals("long") == false) && (fieldType instanceof DateFieldType == false)) return null;
+        if (fieldType.indexOptions() == IndexOptions.NONE) return null; //TODO: change to pointDataDimensionCount() when implemented
+        if (fieldType.hasDocValues() == false) return null;
+
+        // check that all sorts are actual document fields or _doc
+        for (int i = 1; i < sort.getSort().length; i++) {
+            SortField sField = sort.getSort()[i];
+            String sFieldName = sField.getField();
+            if (sFieldName == null) {
+                if (SortField.FIELD_DOC.equals(sField) == false) return null;
+            } else {
+                if (searchContext.mapperService().fullName(sFieldName) == null) return null; // could be _script field that uses _score
+            }
+        }
+
+        // check that setting of missing values allows optimization
+        if (sortField.getMissingValue() == null) return null;
+        Long missingValue = (Long) sortField.getMissingValue();
+        boolean missingValuesAccordingToSort = (sortField.getReverse() && (missingValue == Long.MIN_VALUE)) ||
+        ((sortField.getReverse() == false) && (missingValue == Long.MAX_VALUE));
+        if (missingValuesAccordingToSort == false) return null;
+
+        // check for multiple values
+        if (PointValues.size(reader, fieldName) != PointValues.getDocCount(reader, fieldName)) return null; //TODO: handle multiple values
+
+        // check if the optimization makes sense with the track_total_hits setting
+        if (searchContext.trackTotalHitsUpTo() == Integer.MAX_VALUE) {
+            // with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense
+            if (hasFilterCollector) return null;
+            // if we can't pre-calculate hitsCount based on the query type, optimization does't make sense
+            if (shortcutTotalHitCount(reader, query) == -1) return null;
+        }
+
+        byte[] minValueBytes = PointValues.getMinPackedValue(reader, fieldName);
+        byte[] maxValueBytes = PointValues.getMaxPackedValue(reader, fieldName);
+        if ((maxValueBytes == null) || (minValueBytes == null)) return null;
+        long minValue = LongPoint.decodeDimension(minValueBytes, 0);
+        long maxValue = LongPoint.decodeDimension(maxValueBytes, 0);
+
+        Query rewrittenQuery;
+        if (minValue == maxValue) {
+            rewrittenQuery = new DocValuesFieldExistsQuery(fieldName);
+        } else {
+            long origin = (sortField.getReverse()) ? maxValue : minValue;
+            long pivotDistance = (maxValue - minValue) >>> 1; // division by 2 on the unsigned representation to avoid overflow
+            if (pivotDistance == 0) { // 0 if maxValue = (minValue + 1)
+                pivotDistance = 1;
+            }
+            rewrittenQuery = LongPoint.newDistanceFeatureQuery(sortField.getField(), 1, origin, pivotDistance);
+        }
+        rewrittenQuery = new BooleanQuery.Builder()
+            .add(query, BooleanClause.Occur.FILTER) // filter for original query
+            .add(rewrittenQuery, BooleanClause.Occur.SHOULD) //should for rewrittenQuery
+            .build();
+        return rewrittenQuery;
+    }
+
+    // Restore fieldsDocs to remove the first _score sort
+    // updating in place without creating new FieldDoc objects
+    static void restoreTopFieldDocs(QuerySearchResult result, SortAndFormats originalSortAndFormats) {
+        TopDocs topDocs = result.topDocs().topDocs;
+        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+            FieldDoc fieldDoc = (FieldDoc) scoreDoc;
+            fieldDoc.fields = Arrays.copyOfRange(fieldDoc.fields, 1, fieldDoc.fields.length);
+        }
+        TopFieldDocs newTopDocs = new TopFieldDocs(topDocs.totalHits, topDocs.scoreDocs, originalSortAndFormats.sort.getSort());
+        result.topDocs(new TopDocsAndMaxScore(newTopDocs, Float.NaN), originalSortAndFormats.formats);
+    }
+
     /**
      * Returns true if the provided <code>query</code> returns docs in index order (internal doc ids).
      * @param query The query to execute