From cc58c5194129e213877f11e002f7670d4f4bdf63 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 9 Sep 2021 10:22:42 -0400 Subject: [PATCH] LUCENE-10089 Disable numeric sort optim when needed (#286) Add a method to SortField that allows to enable/ disable numeric sort optimization with points, which is enabled by default from 9.0. --- lucene/CHANGES.txt | 4 +++ .../lucene/search/FieldValueHitQueue.java | 1 + .../org/apache/lucene/search/SortField.java | 32 ++++++++++++++++++ .../lucene/search/TestSortOptimization.java | 33 ++++++++++--------- 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 949d2b7f5ca7..e7f5c56bba3a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -141,6 +141,10 @@ API Changes The Weight#count API represents a cleaner way for Query classes to optimize their counting method. (Gautam Worah, Adrien Grand) +* LUCENE-10089: Add a method to SortField that allows to enable or disable numeric sort + optimization to use the points index to skip over non-competitive documents, + which is enabled by default from 9.0 (Mayya Sharipova, Adrien Grand) + Improvements * LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss) diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java index b4454034df59..7fa825df0dcb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java @@ -135,6 +135,7 @@ private FieldValueHitQueue(SortField[] fields, int size) { SortField field = fields[i]; reverseMul[i] = field.reverse ? -1 : 1; comparators[i] = field.getComparator(size, i); + if (field.getOptimizeSortWithPoints() == false) comparators[i].disableSkipping(); } if (numComparators == 1) { // inform a comparator that sort is based on this single field diff --git a/lucene/core/src/java/org/apache/lucene/search/SortField.java b/lucene/core/src/java/org/apache/lucene/search/SortField.java index cdb7f8793fec..adfd5ef8da89 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/core/src/java/org/apache/lucene/search/SortField.java @@ -130,6 +130,9 @@ public enum Type { // Used for 'sortMissingFirst/Last' protected Object missingValue = null; + // Indicates if numeric sort should be optimized with Points index. Set to true by default. + @Deprecated private boolean optimizeSortWithPoints = true; + /** * Creates a sort by terms in the given field with the type of term values explicitly given. * @@ -606,4 +609,33 @@ public IndexSorter getIndexSorter() { return null; } } + + /** + * Enables/disables numeric sort optimization to use the Points index. + * + *

Enabled by default. By default, sorting on a numeric field activates point sort optimization + * that can efficiently skip over non-competitive hits. Sort optimization has a number of + * requirements, one of which is that SortField.Type matches the Point type with which the field + * was indexed (e.g. sort on IntPoint field should use SortField.Type.INT). Another requirement is + * that the same data is indexed with points and doc values for the field. + * + * @param optimizeSortWithPoints providing {@code false} disables the optimization, in cases where + * these requirements can't be met. + * @deprecated should only be used for compatibility with 8.x indices that got created with + * inconsistent data across fields, or the wrong sort configuration in the index sort + */ + @Deprecated // Remove in Lucene 10 + public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) { + this.optimizeSortWithPoints = optimizeSortWithPoints; + } + + /** + * Returns whether sort optimization should be optimized with points index + * + * @return whether sort optimization should be optimized with points index + */ + @Deprecated // Remove in Lucene 10 + public boolean getOptimizeSortWithPoints() { + return optimizeSortWithPoints; + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index d40740f4522b..ec6ec66d322c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -605,27 +605,30 @@ public void testPointValidation() throws IOException { writer.close(); IndexSearcher searcher = newSearcher(reader); + + SortField longSortOnIntField = new SortField("intField", SortField.Type.LONG); assertThrows( IllegalArgumentException.class, - () -> - searcher.search( - new MatchAllDocsQuery(), - 1, - new Sort(new SortField("intField", SortField.Type.LONG)))); + () -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField))); + // assert that when sort optimization is disabled we can use LONG sort on int field + longSortOnIntField.setOptimizeSortWithPoints(false); + searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField)); + + SortField intSortOnLongField = new SortField("longField", SortField.Type.INT); assertThrows( IllegalArgumentException.class, - () -> - searcher.search( - new MatchAllDocsQuery(), - 1, - new Sort(new SortField("longField", SortField.Type.INT)))); + () -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField))); + // assert that when sort optimization is disabled we can use INT sort on long field + intSortOnLongField.setOptimizeSortWithPoints(false); + searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField)); + + SortField intSortOnIntRangeField = new SortField("intRange", SortField.Type.INT); assertThrows( IllegalArgumentException.class, - () -> - searcher.search( - new MatchAllDocsQuery(), - 1, - new Sort(new SortField("intRange", SortField.Type.INT)))); + () -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField))); + // assert that when sort optimization is disabled we can use INT sort on intRange field + intSortOnIntRangeField.setOptimizeSortWithPoints(false); + searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField)); reader.close(); dir.close();