Skip to content

Commit

Permalink
Fixed merge logic for multiple shards case (#877)
Browse files Browse the repository at this point in the history
* Fixed merge logic for multiple shards case

Signed-off-by: Martin Gaievski <gaievski@amazon.com>
  • Loading branch information
martin-gaievski authored Sep 4, 2024
1 parent e1c3878 commit 3a6bdc7
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Features
### Enhancements
### Bug Fixes
- Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877))
### Infrastructure
- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))
### Documentation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,15 @@ class TopDocsMerger {
* @return merged TopDocsAndMaxScore object
*/
public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) {
if (Objects.isNull(newTopDocs) || Objects.isNull(newTopDocs.topDocs) || newTopDocs.topDocs.totalHits.value == 0) {
// we need to check if any of source and destination top docs are empty. This is needed for case when concurrent segment search
// is enabled. In such case search is done by multiple workers, and results are saved in multiple doc collectors. Any on those
// results can be empty, in such case we can skip actual merge logic and just return result object.
if (isEmpty(newTopDocs)) {
return source;
}
if (isEmpty(source)) {
return newTopDocs;
}
TotalHits mergedTotalHits = getMergedTotalHits(source, newTopDocs);
TopDocsAndMaxScore result = new TopDocsAndMaxScore(
getTopDocs(getMergedScoreDocs(source.topDocs.scoreDocs, newTopDocs.topDocs.scoreDocs), mergedTotalHits),
Expand All @@ -66,6 +72,20 @@ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAn
return result;
}

/**
* Checks if TopDocsAndMaxScore is null, has no top docs or zero total hits
* @param topDocsAndMaxScore
* @return
*/
private static boolean isEmpty(final TopDocsAndMaxScore topDocsAndMaxScore) {
if (Objects.isNull(topDocsAndMaxScore)
|| Objects.isNull(topDocsAndMaxScore.topDocs)
|| topDocsAndMaxScore.topDocs.totalHits.value == 0) {
return true;
}
return false;
}

private TotalHits getMergedTotalHits(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) {
// merged value is a lower bound - if both are equal_to than merged will also be equal_to,
// otherwise assign greater_than_or_equal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,65 @@ public void testMergeScoreDocs_whenBothTopDocsHasNoHits_thenSuccessful() {
assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[3].score, 0);
}

@SneakyThrows
public void testMergeScoreDocs_whenSomeSegmentsHasNoHits_thenSuccessful() {
// Given
TopDocsMerger topDocsMerger = new TopDocsMerger(null);

// When
// first segment has no results, and we merge with non-empty segment
TopDocs topDocsOriginal = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
TopDocsAndMaxScore topDocsAndMaxScoreOriginal = new TopDocsAndMaxScore(topDocsOriginal, 0);
TopDocs topDocsNew = new TopDocs(
new TotalHits(2, TotalHits.Relation.EQUAL_TO),
new ScoreDoc[] {
createStartStopElementForHybridSearchResults(0),
createDelimiterElementForHybridSearchResults(0),
new ScoreDoc(0, 0.5f),
new ScoreDoc(2, 0.3f),
createStartStopElementForHybridSearchResults(0) }
);
TopDocsAndMaxScore topDocsAndMaxScoreNew = new TopDocsAndMaxScore(topDocsNew, 0.5f);
TopDocsAndMaxScore mergedTopDocsAndMaxScore = topDocsMerger.merge(topDocsAndMaxScoreOriginal, topDocsAndMaxScoreNew);

// Then
assertNotNull(mergedTopDocsAndMaxScore);

assertEquals(0.5f, mergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION);
assertEquals(2, mergedTopDocsAndMaxScore.topDocs.totalHits.value);
assertEquals(TotalHits.Relation.EQUAL_TO, mergedTopDocsAndMaxScore.topDocs.totalHits.relation);
assertEquals(5, mergedTopDocsAndMaxScore.topDocs.scoreDocs.length);
// check format, all elements one by one
ScoreDoc[] scoreDocs = mergedTopDocsAndMaxScore.topDocs.scoreDocs;
assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, 0);
assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, 0);
assertScoreDoc(scoreDocs[2], 0, 0.5f);
assertScoreDoc(scoreDocs[3], 2, 0.3f);
assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[4].score, 0);

// When
// source object has results, and we merge with empty segment
TopDocs topDocsNewEmpty = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {});
TopDocsAndMaxScore topDocsAndMaxScoreNewEmpty = new TopDocsAndMaxScore(topDocsNewEmpty, 0);
TopDocsAndMaxScore finalMergedTopDocsAndMaxScore = topDocsMerger.merge(mergedTopDocsAndMaxScore, topDocsAndMaxScoreNewEmpty);

// Then
// merged object remains unchanged
assertNotNull(finalMergedTopDocsAndMaxScore);

assertEquals(0.5f, finalMergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION);
assertEquals(2, finalMergedTopDocsAndMaxScore.topDocs.totalHits.value);
assertEquals(TotalHits.Relation.EQUAL_TO, finalMergedTopDocsAndMaxScore.topDocs.totalHits.relation);
assertEquals(5, finalMergedTopDocsAndMaxScore.topDocs.scoreDocs.length);
// check format, all elements one by one
ScoreDoc[] finalScoreDocs = finalMergedTopDocsAndMaxScore.topDocs.scoreDocs;
assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[0].score, 0);
assertEquals(MAGIC_NUMBER_DELIMITER, finalScoreDocs[1].score, 0);
assertScoreDoc(finalScoreDocs[2], 0, 0.5f);
assertScoreDoc(finalScoreDocs[3], 2, 0.3f);
assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[4].score, 0);
}

@SneakyThrows
public void testThreeSequentialMerges_whenAllTopDocsHasHits_thenSuccessful() {
TopDocsMerger topDocsMerger = new TopDocsMerger(null);
Expand Down

0 comments on commit 3a6bdc7

Please sign in to comment.