From dfde04a808454a567e51f0f259b52ea0e686da9c Mon Sep 17 00:00:00 2001 From: gashutos Date: Sun, 19 Feb 2023 15:38:25 +0530 Subject: [PATCH] Adding numeric optimization support for all numeric types Signed-off-by: gashutos --- CHANGELOG.md | 1 + .../action/search/SearchPhaseController.java | 96 ++++++++ .../fielddata/IndexNumericFieldData.java | 119 ++------- .../IntValuesComparatorSource.java | 60 +++++ .../search/SearchPhaseControllerTests.java | 226 ++++++++++++++++-- 5 files changed, 380 insertions(+), 122 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/fielddata/fieldcomparator/IntValuesComparatorSource.java diff --git a/CHANGELOG.md b/CHANGELOG.md index e5c60bdc99db9..eb2b97af048d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -101,6 +101,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix timeout error when adding a document to an index with extension running ([#6275](https://github.com/opensearch-project/OpenSearch/pull/6275)) - Handle translog upload during primary relocation for remote-backed indexes ([#5804](https://github.com/opensearch-project/OpenSearch/pull/5804)) - Batch translog sync/upload per x ms for remote-backed indexes ([#5854](https://github.com/opensearch-project/OpenSearch/pull/5854)) +- Enable sort optimization for all NumericTypes ([#6321](https://github.com/opensearch-project/OpenSearch/pull/6321) ### Dependencies - Update nebula-publishing-plugin to 19.2.0 ([#5704](https://github.com/opensearch-project/OpenSearch/pull/5704)) diff --git a/server/src/main/java/org/opensearch/action/search/SearchPhaseController.java b/server/src/main/java/org/opensearch/action/search/SearchPhaseController.java index d32e7753cd153..2ca9d2d0699d7 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchPhaseController.java +++ b/server/src/main/java/org/opensearch/action/search/SearchPhaseController.java @@ -35,17 +35,22 @@ import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.ObjectObjectHashMap; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.LeafFieldComparator; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TotalHits; import org.apache.lucene.search.TotalHits.Relation; +import org.apache.lucene.search.comparators.NumericComparator; import org.apache.lucene.search.grouping.CollapseTopFieldDocs; import org.opensearch.common.breaker.CircuitBreaker; import org.opensearch.common.collect.HppcMaps; @@ -72,6 +77,7 @@ import org.opensearch.search.suggest.Suggest.Suggestion; import org.opensearch.search.suggest.completion.CompletionSuggestion; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -237,11 +243,13 @@ static TopDocs mergeTopDocs(Collection results, int topN, int from) { } else if (topDocs instanceof CollapseTopFieldDocs) { CollapseTopFieldDocs firstTopDocs = (CollapseTopFieldDocs) topDocs; final Sort sort = new Sort(firstTopDocs.fields); + applySortFieldWidening(sort); final CollapseTopFieldDocs[] shardTopDocs = results.toArray(new CollapseTopFieldDocs[numShards]); mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, false); } else if (topDocs instanceof TopFieldDocs) { TopFieldDocs firstTopDocs = (TopFieldDocs) topDocs; final Sort sort = new Sort(firstTopDocs.fields); + applySortFieldWidening(sort); final TopFieldDocs[] shardTopDocs = results.toArray(new TopFieldDocs[numShards]); mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs); } else { @@ -600,6 +608,94 @@ private static void validateMergeSortValueFormats(Collection getComparator(int numHits, boolean enableSkipping) { + return new NumericComparator( + delegate.getField(), + (Number) delegate.getMissingValue(), + delegate.getReverse(), + enableSkipping, + bytes + ) { + @Override + public int compare(int slot1, int slot2) { + throw new UnsupportedOperationException(); + } + + @Override + public Number value(int slot) { + throw new UnsupportedOperationException(); + } + + @Override + public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int compareValues(Number first, Number second) { + if (first == null) { + if (second == null) { + return 0; + } else { + return -1; + } + } else if (second == null) { + return 1; + } else { + if (type == Type.LONG) { + return Long.compare(first.longValue(), second.longValue()); + } else { + return Double.compare(first.doubleValue(), second.doubleValue()); + } + } + } + }; + } + }; + } + /* * Returns the size of the requested top documents (from + size) */ diff --git a/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java b/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java index 6fe79565a51e1..89d64341595b4 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java +++ b/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java @@ -42,6 +42,7 @@ import org.opensearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.opensearch.index.fielddata.fieldcomparator.DoubleValuesComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.FloatValuesComparatorSource; +import org.opensearch.index.fielddata.fieldcomparator.IntValuesComparatorSource; import org.opensearch.index.fielddata.fieldcomparator.LongValuesComparatorSource; import org.opensearch.search.DocValueFormat; import org.opensearch.search.MultiValueMode; @@ -65,76 +66,16 @@ public abstract class IndexNumericFieldData implements IndexFieldData results = generateQueryResults(nShards, suggestions, queryResultSize, false); - Optional first = results.asList().stream().findFirst(); - int from = 0, size = 0; - if (first.isPresent()) { - from = first.get().queryResult().from(); - size = first.get().queryResult().size(); + performSortDocs(results, queryResultSize); + } + + /** + * Test to verify merge shard results with SortField.Type.Int, document type Integer + */ + public void testSortIntFieldDocsMerge() { + List suggestions = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 5); i++) { + suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false)); } - int accumulatedLength = Math.min(queryResultSize, getTotalQueryHits(results)); - List reducedCompletionSuggestions = reducedSuggest(results); - for (Suggest.Suggestion suggestion : reducedCompletionSuggestions) { - int suggestionSize = suggestion.getEntries().get(0).getOptions().size(); - accumulatedLength += suggestionSize; + int nShards = randomIntBetween(1, 20); + int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); + AtomicArray results = generateQueryResultsWithIntSortedField(nShards, suggestions, queryResultSize, false); + performSortDocs(results, queryResultSize); + } + + /** + * Test to verify merge shard results with different SortField.Type. + * Few shards with Int and few shards with Long + */ + public void testSortIntLongFieldDocsMerge() { + List suggestions = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 5); i++) { + suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false)); } - List topDocsList = new ArrayList<>(); - for (SearchPhaseResult result : results.asList()) { - QuerySearchResult queryResult = result.queryResult(); - TopDocs topDocs = queryResult.consumeTopDocs().topDocs; - SearchPhaseController.setShardIndex(topDocs, result.getShardIndex()); - topDocsList.add(topDocs); + int nShards = randomIntBetween(1, 20); + int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); + AtomicArray results = generateQueryResultsWithIntLongSortedField(nShards, suggestions, queryResultSize, false); + performSortDocs(results, queryResultSize); + } + + /** + * Test to verify merge shard results with SortField.Type.Float, document type Float + */ + public void testSortFloatFieldDocsMerge() { + List suggestions = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 5); i++) { + suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false)); } - ScoreDoc[] sortedDocs = SearchPhaseController.sortDocs(true, topDocsList, from, size, reducedCompletionSuggestions).scoreDocs; - assertThat(sortedDocs.length, equalTo(accumulatedLength)); + int nShards = randomIntBetween(1, 20); + int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); + AtomicArray results = generateQueryResultsWithFloatSortedField(nShards, suggestions, queryResultSize, false); + performSortDocs(results, queryResultSize); + } + + /** + * Test to verify merge shard results with different SortField.Type. + * Few shards with Float and few shards with Double + */ + public void testSortIntFloatDoubleFieldDocsMerge() { + List suggestions = new ArrayList<>(); + for (int i = 0; i < randomIntBetween(1, 5); i++) { + suggestions.add(new CompletionSuggestion(randomAlphaOfLength(randomIntBetween(1, 5)), randomIntBetween(1, 20), false)); + } + int nShards = randomIntBetween(1, 20); + int queryResultSize = randomBoolean() ? 0 : randomIntBetween(1, nShards * 2); + AtomicArray results = generateQueryResultsWithFloatDoubleSortedField( + nShards, + suggestions, + queryResultSize, + false + ); + performSortDocs(results, queryResultSize); } public void testSortDocsIsIdempotent() throws Exception { @@ -241,6 +285,30 @@ public void testSortDocsIsIdempotent() throws Exception { } } + private static void performSortDocs(AtomicArray results, int queryResultSize) { + Optional first = results.asList().stream().findFirst(); + int from = 0, size = 0; + if (first.isPresent()) { + from = first.get().queryResult().from(); + size = first.get().queryResult().size(); + } + int accumulatedLength = Math.min(queryResultSize, getTotalQueryHits(results)); + List reducedCompletionSuggestions = reducedSuggest(results); + for (Suggest.Suggestion suggestion : reducedCompletionSuggestions) { + int suggestionSize = suggestion.getEntries().get(0).getOptions().size(); + accumulatedLength += suggestionSize; + } + List topDocsList = new ArrayList<>(); + for (SearchPhaseResult result : results.asList()) { + QuerySearchResult queryResult = result.queryResult(); + TopDocs topDocs = queryResult.consumeTopDocs().topDocs; + SearchPhaseController.setShardIndex(topDocs, result.getShardIndex()); + topDocsList.add(topDocs); + } + ScoreDoc[] sortedDocs = SearchPhaseController.sortDocs(true, topDocsList, from, size, reducedCompletionSuggestions).scoreDocs; + assertThat(sortedDocs.length, equalTo(accumulatedLength)); + } + private AtomicArray generateSeededQueryResults( long seed, int nShards, @@ -389,6 +457,128 @@ private static AtomicArray generateQueryResults( return queryResults; } + private static AtomicArray generateQueryResultsWithIntSortedField( + int nShards, + List suggestions, + int searchHitsSize, + boolean useConstantScore + ) { + AtomicArray results = generateQueryResults(nShards, suggestions, searchHitsSize, false); + for (int i = 0; i < results.length(); i++) { + int nDocs = randomIntBetween(0, searchHitsSize); + float maxScore = 0; + final TopDocs topDocs = getIntTopFieldDocs(nDocs, useConstantScore); + results.get(i).queryResult().topDocs(new TopDocsAndMaxScore(topDocs, maxScore), new DocValueFormat[1]); + } + return results; + } + + private static AtomicArray generateQueryResultsWithFloatSortedField( + int nShards, + List suggestions, + int searchHitsSize, + boolean useConstantScore + ) { + AtomicArray results = generateQueryResults(nShards, suggestions, searchHitsSize, false); + for (int i = 0; i < results.length(); i++) { + int nDocs = randomIntBetween(0, searchHitsSize); + float maxScore = 0; + final TopDocs topDocs = getFloatTopFieldDocs(nDocs, useConstantScore); + results.get(i).queryResult().topDocs(new TopDocsAndMaxScore(topDocs, maxScore), new DocValueFormat[1]); + } + return results; + } + + private static AtomicArray generateQueryResultsWithIntLongSortedField( + int nShards, + List suggestions, + int searchHitsSize, + boolean useConstantScore + ) { + AtomicArray results = generateQueryResults(nShards, suggestions, searchHitsSize, false); + for (int i = 0; i < results.length(); i++) { + int nDocs = randomIntBetween(0, searchHitsSize); + float maxScore = 0; + final TopDocs topDocs; + if (i % 2 == 0) { + topDocs = getLongTopFieldDocs(nDocs, useConstantScore); + } else { + topDocs = getIntTopFieldDocs(nDocs, useConstantScore); + } + results.get(i).queryResult().topDocs(new TopDocsAndMaxScore(topDocs, maxScore), new DocValueFormat[1]); + } + return results; + } + + private static AtomicArray generateQueryResultsWithFloatDoubleSortedField( + int nShards, + List suggestions, + int searchHitsSize, + boolean useConstantScore + ) { + AtomicArray results = generateQueryResults(nShards, suggestions, searchHitsSize, false); + for (int i = 0; i < results.length(); i++) { + int nDocs = randomIntBetween(0, searchHitsSize); + float maxScore = 0; + final TopDocs topDocs; + if (i % 2 == 0) { + topDocs = getFloatTopFieldDocs(nDocs, useConstantScore); + } else { + topDocs = getDoubleTopFieldDocs(nDocs, useConstantScore); + } + results.get(i).queryResult().topDocs(new TopDocsAndMaxScore(topDocs, maxScore), new DocValueFormat[1]); + } + return results; + } + + private static TopFieldDocs getLongTopFieldDocs(int nDocs, boolean useConstantScore) { + FieldDoc[] fieldDocs = new FieldDoc[nDocs]; + SortField[] sortFields = { new SortedNumericSortField("field", SortField.Type.LONG, true) }; + float maxScore = 0; + for (int i = 0; i < nDocs; i++) { + float score = useConstantScore ? 1.0F : Math.abs(randomFloat()); + fieldDocs[i] = new FieldDoc(i, score, new Long[] { randomLong() }); + maxScore = Math.max(score, maxScore); + } + return new TopFieldDocs(new TotalHits(fieldDocs.length, TotalHits.Relation.EQUAL_TO), fieldDocs, sortFields); + } + + private static TopFieldDocs getFloatTopFieldDocs(int nDocs, boolean useConstantScore) { + FieldDoc[] fieldDocs = new FieldDoc[nDocs]; + SortField[] sortFields = { new SortedNumericSortField("field", SortField.Type.FLOAT, true) }; + float maxScore = 0; + for (int i = 0; i < nDocs; i++) { + float score = useConstantScore ? 1.0F : Math.abs(randomFloat()); + fieldDocs[i] = new FieldDoc(i, score, new Float[] { randomFloat() }); + maxScore = Math.max(score, maxScore); + } + return new TopFieldDocs(new TotalHits(fieldDocs.length, TotalHits.Relation.EQUAL_TO), fieldDocs, sortFields); + } + + private static TopFieldDocs getDoubleTopFieldDocs(int nDocs, boolean useConstantScore) { + FieldDoc[] fieldDocs = new FieldDoc[nDocs]; + SortField[] sortFields = { new SortedNumericSortField("field", SortField.Type.DOUBLE, true) }; + float maxScore = 0; + for (int i = 0; i < nDocs; i++) { + float score = useConstantScore ? 1.0F : Math.abs(randomFloat()); + fieldDocs[i] = new FieldDoc(i, score, new Double[] { randomDouble() }); + maxScore = Math.max(score, maxScore); + } + return new TopFieldDocs(new TotalHits(fieldDocs.length, TotalHits.Relation.EQUAL_TO), fieldDocs, sortFields); + } + + private static TopFieldDocs getIntTopFieldDocs(int nDocs, boolean useConstantScore) { + FieldDoc[] fieldDocs = new FieldDoc[nDocs]; + SortField[] sortFields = { new SortedNumericSortField("field", SortField.Type.INT, true) }; + float maxScore = 0; + for (int i = 0; i < nDocs; i++) { + float score = useConstantScore ? 1.0F : Math.abs(randomFloat()); + fieldDocs[i] = new FieldDoc(i, score, new Integer[] { randomInt() }); + maxScore = Math.max(score, maxScore); + } + return new TopFieldDocs(new TotalHits(fieldDocs.length, TotalHits.Relation.EQUAL_TO), fieldDocs, sortFields); + } + private static int getTotalQueryHits(AtomicArray results) { int resultCount = 0; for (SearchPhaseResult shardResult : results.asList()) {