From 3a6bdc75efb3031bcf6d4f998444464eb78b5189 Mon Sep 17 00:00:00 2001 From: Martin Gaievski Date: Wed, 4 Sep 2024 12:26:36 -0700 Subject: [PATCH] Fixed merge logic for multiple shards case (#877) * Fixed merge logic for multiple shards case Signed-off-by: Martin Gaievski --- CHANGELOG.md | 1 + .../search/query/TopDocsMerger.java | 22 ++++++- .../search/query/TopDocsMergerTests.java | 59 +++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4a4fa9f8..6a3643abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Features ### Enhancements ### Bug Fixes +- Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877)) ### Infrastructure - Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852)) ### Documentation diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java b/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java index a77ff458e..4a1955740 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java @@ -55,9 +55,15 @@ class TopDocsMerger { * @return merged TopDocsAndMaxScore object */ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) { - if (Objects.isNull(newTopDocs) || Objects.isNull(newTopDocs.topDocs) || newTopDocs.topDocs.totalHits.value == 0) { + // we need to check if any of source and destination top docs are empty. This is needed for case when concurrent segment search + // is enabled. In such case search is done by multiple workers, and results are saved in multiple doc collectors. Any on those + // results can be empty, in such case we can skip actual merge logic and just return result object. + if (isEmpty(newTopDocs)) { return source; } + if (isEmpty(source)) { + return newTopDocs; + } TotalHits mergedTotalHits = getMergedTotalHits(source, newTopDocs); TopDocsAndMaxScore result = new TopDocsAndMaxScore( getTopDocs(getMergedScoreDocs(source.topDocs.scoreDocs, newTopDocs.topDocs.scoreDocs), mergedTotalHits), @@ -66,6 +72,20 @@ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAn return result; } + /** + * Checks if TopDocsAndMaxScore is null, has no top docs or zero total hits + * @param topDocsAndMaxScore + * @return + */ + private static boolean isEmpty(final TopDocsAndMaxScore topDocsAndMaxScore) { + if (Objects.isNull(topDocsAndMaxScore) + || Objects.isNull(topDocsAndMaxScore.topDocs) + || topDocsAndMaxScore.topDocs.totalHits.value == 0) { + return true; + } + return false; + } + private TotalHits getMergedTotalHits(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) { // merged value is a lower bound - if both are equal_to than merged will also be equal_to, // otherwise assign greater_than_or_equal diff --git a/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java b/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java index d10ca0668..9c2718687 100644 --- a/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java +++ b/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java @@ -176,6 +176,65 @@ public void testMergeScoreDocs_whenBothTopDocsHasNoHits_thenSuccessful() { assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[3].score, 0); } + @SneakyThrows + public void testMergeScoreDocs_whenSomeSegmentsHasNoHits_thenSuccessful() { + // Given + TopDocsMerger topDocsMerger = new TopDocsMerger(null); + + // When + // first segment has no results, and we merge with non-empty segment + TopDocs topDocsOriginal = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {}); + TopDocsAndMaxScore topDocsAndMaxScoreOriginal = new TopDocsAndMaxScore(topDocsOriginal, 0); + TopDocs topDocsNew = new TopDocs( + new TotalHits(2, TotalHits.Relation.EQUAL_TO), + new ScoreDoc[] { + createStartStopElementForHybridSearchResults(0), + createDelimiterElementForHybridSearchResults(0), + new ScoreDoc(0, 0.5f), + new ScoreDoc(2, 0.3f), + createStartStopElementForHybridSearchResults(0) } + ); + TopDocsAndMaxScore topDocsAndMaxScoreNew = new TopDocsAndMaxScore(topDocsNew, 0.5f); + TopDocsAndMaxScore mergedTopDocsAndMaxScore = topDocsMerger.merge(topDocsAndMaxScoreOriginal, topDocsAndMaxScoreNew); + + // Then + assertNotNull(mergedTopDocsAndMaxScore); + + assertEquals(0.5f, mergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION); + assertEquals(2, mergedTopDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, mergedTopDocsAndMaxScore.topDocs.totalHits.relation); + assertEquals(5, mergedTopDocsAndMaxScore.topDocs.scoreDocs.length); + // check format, all elements one by one + ScoreDoc[] scoreDocs = mergedTopDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, 0); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, 0); + assertScoreDoc(scoreDocs[2], 0, 0.5f); + assertScoreDoc(scoreDocs[3], 2, 0.3f); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[4].score, 0); + + // When + // source object has results, and we merge with empty segment + TopDocs topDocsNewEmpty = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {}); + TopDocsAndMaxScore topDocsAndMaxScoreNewEmpty = new TopDocsAndMaxScore(topDocsNewEmpty, 0); + TopDocsAndMaxScore finalMergedTopDocsAndMaxScore = topDocsMerger.merge(mergedTopDocsAndMaxScore, topDocsAndMaxScoreNewEmpty); + + // Then + // merged object remains unchanged + assertNotNull(finalMergedTopDocsAndMaxScore); + + assertEquals(0.5f, finalMergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION); + assertEquals(2, finalMergedTopDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, finalMergedTopDocsAndMaxScore.topDocs.totalHits.relation); + assertEquals(5, finalMergedTopDocsAndMaxScore.topDocs.scoreDocs.length); + // check format, all elements one by one + ScoreDoc[] finalScoreDocs = finalMergedTopDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[0].score, 0); + assertEquals(MAGIC_NUMBER_DELIMITER, finalScoreDocs[1].score, 0); + assertScoreDoc(finalScoreDocs[2], 0, 0.5f); + assertScoreDoc(finalScoreDocs[3], 2, 0.3f); + assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[4].score, 0); + } + @SneakyThrows public void testThreeSequentialMerges_whenAllTopDocsHasHits_thenSuccessful() { TopDocsMerger topDocsMerger = new TopDocsMerger(null);