Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable numeric sort optimization support for all numeric types #6424

Merged
merged 17 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Segment Replication] Apply backpressure when replicas fall behind ([#6563](https://github.com/opensearch-project/OpenSearch/pull/6563))
- [Remote Store] Integrate remote segment store in peer recovery flow ([#6664](https://github.com/opensearch-project/OpenSearch/pull/6664))
- [Segment Replication] Add new cluster setting to set replication strategy by default for all indices in cluster. ([#6791](https://github.com/opensearch-project/OpenSearch/pull/6791))
- Enable sort optimization for all NumericTypes ([#6464](https://github.com/opensearch-project/OpenSearch/pull/6464)

### Dependencies
- Bump `org.apache.logging.log4j:log4j-core` from 2.18.0 to 2.20.0 ([#6490](https://github.com/opensearch-project/OpenSearch/pull/6490))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ private static XContentBuilder createTestMapping() {
public void testIndexSort() {
SortField dateSort = new SortedNumericSortField("date", SortField.Type.LONG, false);
dateSort.setMissingValue(Long.MAX_VALUE);
SortField numericSort = new SortedNumericSortField("numeric_dv", SortField.Type.LONG, false);
numericSort.setMissingValue(Long.MAX_VALUE);
SortField numericSort = new SortedNumericSortField("numeric_dv", SortField.Type.INT, false);
numericSort.setMissingValue(Integer.MAX_VALUE);
SortField keywordSort = new SortedSetSortField("keyword_dv", false);
keywordSort.setMissingValue(SortField.STRING_LAST);
Sort indexSort = new Sort(dateSort, numericSort, keywordSort);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,24 +382,22 @@ private void createIndexMappingsFromObjectType(String indexName, List<Object> ty
ensureGreen();
}

// Convert Integer, Short, Byte and Boolean to Long in order to match the conversion done
// Convert Integer, Short, Byte and Boolean to Int in order to match the conversion done
// by the internal hits when populating the sort values.
private List<Object> convertSortValues(List<Object> sortValues) {
List<Object> converted = new ArrayList<>();
for (int i = 0; i < sortValues.size(); i++) {
Object from = sortValues.get(i);
if (from instanceof Integer) {
converted.add(((Integer) from).longValue());
} else if (from instanceof Short) {
converted.add(((Short) from).longValue());
if (from instanceof Short) {
converted.add(((Short) from).intValue());
} else if (from instanceof Byte) {
converted.add(((Byte) from).longValue());
converted.add(((Byte) from).intValue());
} else if (from instanceof Boolean) {
boolean b = (boolean) from;
if (b) {
converted.add(1L);
converted.add(1);
} else {
converted.add(0L);
converted.add(0);
}
} else {
converted.add(from);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
Expand Down Expand Up @@ -68,6 +69,7 @@
import org.opensearch.search.profile.ProfileShardResult;
import org.opensearch.search.profile.SearchProfileShardResults;
import org.opensearch.search.query.QuerySearchResult;
import org.opensearch.search.sort.SortedWiderNumericSortField;
import org.opensearch.search.suggest.Suggest;
import org.opensearch.search.suggest.Suggest.Suggestion;
import org.opensearch.search.suggest.completion.CompletionSuggestion;
Expand Down Expand Up @@ -235,14 +237,12 @@ static TopDocs mergeTopDocs(Collection<TopDocs> results, int topN, int from) {
if (numShards == 1 && from == 0) { // only one shard and no pagination we can just return the topDocs as we got them.
return topDocs;
} else if (topDocs instanceof CollapseTopFieldDocs) {
CollapseTopFieldDocs firstTopDocs = (CollapseTopFieldDocs) topDocs;
final Sort sort = new Sort(firstTopDocs.fields);
final CollapseTopFieldDocs[] shardTopDocs = results.toArray(new CollapseTopFieldDocs[numShards]);
final Sort sort = createSort(shardTopDocs);
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, false);
} else if (topDocs instanceof TopFieldDocs) {
TopFieldDocs firstTopDocs = (TopFieldDocs) topDocs;
final Sort sort = new Sort(firstTopDocs.fields);
final TopFieldDocs[] shardTopDocs = results.toArray(new TopFieldDocs[numShards]);
final Sort sort = createSort(shardTopDocs);
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
} else {
final TopDocs[] shardTopDocs = results.toArray(new TopDocs[numShards]);
Expand Down Expand Up @@ -600,6 +600,49 @@ private static void validateMergeSortValueFormats(Collection<? extends SearchPha
}
}

/**
* Creates Sort object from topFieldsDocs fields.
* It is necessary to widen the SortField.Type to maximum byte size for merging sorted docs.
* Different indices might have different types. This will avoid user to do re-index of data
* in case of mapping field change for newly indexed data.
* This will support Int to Long and Float to Double.
* Earlier widening of type was taken care in IndexNumericFieldData, but since we now want to
* support sort optimization, we removed type widening there and taking care here during merging.
* More details here https://github.com/opensearch-project/OpenSearch/issues/6326
*/
private static Sort createSort(TopFieldDocs[] topFieldDocs) {
final SortField[] firstTopDocFields = topFieldDocs[0].fields;
final SortField[] newFields = new SortField[firstTopDocFields.length];

for (int i = 0; i < firstTopDocFields.length; i++) {
if (firstTopDocFields[i] instanceof SortedNumericSortField && isSortWideningRequired(topFieldDocs, i)) {
final SortedNumericSortField delegate = (SortedNumericSortField) firstTopDocFields[i];
newFields[i] = new SortedWiderNumericSortField(
delegate.getField(),
delegate.getNumericType(),
delegate.getReverse(),
delegate.getSelector()
);
} else {
newFields[i] = firstTopDocFields[i];
}
}
return new Sort(newFields);
}

/**
* It will compare respective SortField between shards to see if any shard results have different
* field mapping type, accordingly it will decide to widen the sort fields.
*/
private static boolean isSortWideningRequired(TopFieldDocs[] topFieldDocs, int sortFieldindex) {
for (int i = 0; i < topFieldDocs.length - 1; i++) {
if (topFieldDocs[i].fields[sortFieldindex] != topFieldDocs[i + 1].fields[sortFieldindex]) {
return true;
}
}
return false;
}

/*
* Returns the size of the requested top documents (from + size)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
import org.opensearch.index.analysis.AnalyzerScope;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.search.sort.SortedWiderNumericSortField;

import java.io.IOException;
import java.math.BigInteger;
Expand Down Expand Up @@ -599,8 +600,8 @@ public static void writeSortField(StreamOutput out, SortField sortField) throws
SortField newSortField = new SortField(sortField.getField(), SortField.Type.STRING, sortField.getReverse());
newSortField.setMissingValue(sortField.getMissingValue());
sortField = newSortField;
} else if (sortField.getClass() == SortedNumericSortField.class) {
// for multi-valued sort field, we replace the SortedSetSortField with a simple SortField.
} else if (sortField.getClass() == SortedNumericSortField.class || sortField.getClass() == SortedWiderNumericSortField.class) {
// for multi-valued sort field, we replace the SortedNumericSortField/SortedWiderNumericSortField with a simple SortField.
// It works because the sort field is only used to merge results from different shards.
SortField newSortField = new SortField(
sortField.getField(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.opensearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource;
import org.opensearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource;
import org.opensearch.index.fielddata.fieldcomparator.IntValuesComparatorSource;
import org.opensearch.index.fielddata.fieldcomparator.LongValuesComparatorSource;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.MultiValueMode;
Expand All @@ -65,76 +66,16 @@ public abstract class IndexNumericFieldData implements IndexFieldData<LeafNumeri
* @opensearch.internal
*/
public enum NumericType {
BOOLEAN(false, SortField.Type.LONG, CoreValuesSourceType.BOOLEAN) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
BYTE(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
SHORT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
INT(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE) {
@Deprecated
@Override
public PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
},
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.DISABLED;
}
},
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC) {
@Deprecated
@Override
protected PointSortOptimization applyPointSortOptimization() {
return PointSortOptimization.ENABLED;
}
};
BOOLEAN(false, SortField.Type.INT, CoreValuesSourceType.BOOLEAN),
BYTE(false, SortField.Type.INT, CoreValuesSourceType.NUMERIC),
SHORT(false, SortField.Type.INT, CoreValuesSourceType.NUMERIC),
INT(false, SortField.Type.INT, CoreValuesSourceType.NUMERIC),
LONG(false, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
DATE(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
DATE_NANOSECONDS(false, SortField.Type.LONG, CoreValuesSourceType.DATE),
HALF_FLOAT(true, SortField.Type.LONG, CoreValuesSourceType.NUMERIC),
FLOAT(true, SortField.Type.FLOAT, CoreValuesSourceType.NUMERIC),
DOUBLE(true, SortField.Type.DOUBLE, CoreValuesSourceType.NUMERIC);

private final boolean floatingPoint;
private final ValuesSourceType valuesSourceType;
Expand All @@ -153,24 +94,6 @@ public final boolean isFloatingPoint() {
public final ValuesSourceType getValuesSourceType() {
return valuesSourceType;
}

@Deprecated
protected abstract PointSortOptimization applyPointSortOptimization();
}

/**
* Controls whether to apply sort optimization to skip non-competitive docs
* based on the BKD index.
*
* @deprecated this control will be removed in a future version of OpenSearch
*
* @opensearch.internal
* @opensearch.experimental
*/
@Deprecated
private enum PointSortOptimization {
ENABLED,
DISABLED
}

/**
Expand Down Expand Up @@ -211,21 +134,6 @@ public final SortField sortField(
: SortedNumericSelector.Type.MIN;
SortField sortField = new SortedNumericSortField(getFieldName(), getNumericType().sortFieldType, reverse, selectorType);
sortField.setMissingValue(source.missingObject(missingValue, reverse));

// LUCENE-9280 added the ability for collectors to skip non-competitive
// documents when top docs are sorted by other fields different from the _score.
// However, from Lucene 9 onwards, numeric sort optimisation requires the byte size
// for points (BKD index) and doc values (columnar) and SortField.Type to be matched.
// NumericType violates this requirement
// (see: https://github.com/opensearch-project/OpenSearch/issues/2063#issuecomment-1069358826 test failure)
// because it uses the largest byte size (LONG) for the SortField of most types. The section below disables
// the BKD based sort optimization for numeric types whose encoded BYTE size does not match the comparator (LONG)/
// So as of now, we can only enable for DATE, DATE_NANOSECONDS, LONG, DOUBLE.
// BOOLEAN, BYTE, SHORT, INT, HALF_FLOAT, FLOAT (use long for doc values, but fewer for BKD Points)
// todo : Enable other SortField.Type as well, that will require wider change
if (getNumericType().applyPointSortOptimization() == PointSortOptimization.DISABLED) {
sortField.setOptimizeSortWithPoints(false);
}
return sortField;
}

Expand Down Expand Up @@ -298,9 +206,11 @@ private XFieldComparatorSource comparatorSource(
return dateComparatorSource(missingValue, sortMode, nested);
case DATE_NANOSECONDS:
return dateNanosComparatorSource(missingValue, sortMode, nested);
case LONG:
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
default:
assert !targetNumericType.isFloatingPoint();
return new LongValuesComparatorSource(this, missingValue, sortMode, nested);
return new IntValuesComparatorSource(this, missingValue, sortMode, nested);
}
}

Expand Down
Loading