diff --git a/.github/workflows/backwards_compatibility_tests_workflow.yml b/.github/workflows/backwards_compatibility_tests_workflow.yml index 4d552a892..893202d29 100644 --- a/.github/workflows/backwards_compatibility_tests_workflow.yml +++ b/.github/workflows/backwards_compatibility_tests_workflow.yml @@ -15,8 +15,8 @@ jobs: matrix: java: [ 11, 17, 21 ] os: [ubuntu-latest,windows-latest] - bwc_version : ["2.9.0","2.10.0","2.11.0","2.12.0","2.13.0","2.14.0","2.15.0","2.16.0"] - opensearch_version : [ "2.17.0-SNAPSHOT" ] + bwc_version : ["2.9.0","2.10.0","2.11.0","2.12.0","2.13.0","2.14.0","2.15.0","2.16.0","2.17.0"] + opensearch_version : [ "2.18.0-SNAPSHOT" ] name: NeuralSearch Restart-Upgrade BWC Tests runs-on: ${{ matrix.os }} @@ -42,8 +42,8 @@ jobs: matrix: java: [ 11, 17, 21 ] os: [ubuntu-latest,windows-latest] - bwc_version: [ "2.11.0","2.12.0","2.13.0","2.14.0","2.15.0", "2.16.0" ] - opensearch_version: [ "2.17.0-SNAPSHOT" ] + bwc_version: [ "2.11.0","2.12.0","2.13.0","2.14.0","2.15.0", "2.16.0", "2.17.0" ] + opensearch_version: [ "2.18.0-SNAPSHOT" ] name: NeuralSearch Rolling-Upgrade BWC Tests runs-on: ${{ matrix.os }} diff --git a/.github/workflows/changelog_verifier.yml b/.github/workflows/changelog_verifier.yml index 992a38b62..9c9ae9cbd 100644 --- a/.github/workflows/changelog_verifier.yml +++ b/.github/workflows/changelog_verifier.yml @@ -1,7 +1,7 @@ name: "Changelog Verifier" on: pull_request: - types: [opened, edited, review_requested, synchronize, reopened, ready_for_review, labeled, unlabeled] + types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] jobs: # Enforces the update of a changelog file on every pull request diff --git a/.gitignore b/.gitignore index bf7d6f02a..b154373c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,27 @@ -# Ignore Gradle project-specific cache directory -.gradle +# intellij files +.idea/ +*.iml +*.ipr +*.iws +*.log +build-idea/ +out/ -# Ignore Gradle build output directory -build -.idea -.DS_Store -.gitattributes +# eclipse files +.classpath +.project +.settings + +# gradle stuff +.gradle/ +build/ +bin/ +# vscode stuff +.vscode/ +# osx stuff +.DS_Store + +# git stuff +.gitattributes diff --git a/CHANGELOG.md b/CHANGELOG.md index 62e925bb0..fc234e379 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,14 +7,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Features ### Enhancements ### Bug Fixes +- Fix for nested field missing sub embedding field in text embedding processor ([#913](https://github.com/opensearch-project/neural-search/pull/913)) ### Infrastructure ### Documentation ### Maintenance ### Refactoring -## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.16...2.x) +## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.17...2.x) ### Features ### Enhancements +- Implement `ignore_missing` field in text chunking processors ([#907](https://github.com/opensearch-project/neural-search/pull/907)) +- Added rescorer in hybrid query ([#917](https://github.com/opensearch-project/neural-search/pull/917)) ### Bug Fixes ### Infrastructure - Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852)) diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index 47ae31be6..dccc260df 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -11,10 +11,12 @@ - [Run Single-node Cluster Locally](#run-single-node-cluster-locally) - [Run Multi-node Cluster Locally](#run-multi-node-cluster-locally) - [Debugging](#debugging) + - [Major Dependencies](#major-dependencies) - [Backwards Compatibility Testing](#backwards-compatibility-testing) - [Adding new tests](#adding-new-tests) - [Supported configurations](#supported-configurations) - [Submitting Changes](#submitting-changes) + - [Building On Lucene Version Updates](#building-on-lucene-version-updates) # Developer Guide @@ -88,12 +90,22 @@ Please follow these formatting guidelines: OpenSearch neural-search uses a [Gradle](https://docs.gradle.org/6.6.1/userguide/userguide.html) wrapper for its build. Run `gradlew` on Unix systems. -Build OpenSearch neural-search using `gradlew build` +Build OpenSearch neural-search using `gradlew build`. This command will +also run Integration Tests and Unit Tests. ``` ./gradlew build ``` +## Run Unit Tests +If you want to strictly test that your unit tests are passing +you can run the following. + +``` +./gradlew test +``` + + ## Run OpenSearch neural-search ### Run Single-node Cluster Locally @@ -227,6 +239,12 @@ Additionally, it is possible to attach one debugger to the cluster JVM and anoth ./gradlew :integTest -Dtest.debug=1 -Dcluster.debug=1 ``` +#### Major Dependencies +Currently, the major dependencies that Neural Search depends on are [ML-Commons](https://github.com/opensearch-project/ml-commons) and [K-NN](https://github.com/opensearch-project/k-NN). +Make sure to check on them when you observe a failure that affects Neural Search. +See [Building on Lucene Version updates](#building-on-lucene-version-updates) as an example where K-NN caused a build failure. +Also, please note that it may take time for developers to create a fix for your current dependency issue. + ## Backwards Compatibility Testing The purpose of Backwards Compatibility Testing and different types of BWC tests are explained [here](https://github.com/opensearch-project/opensearch-plugins/blob/main/TESTING.md#backwards-compatibility-testing). The BWC tests (i.e. Restart-Upgrade, Mixed-Cluster and Rolling-Upgrade scenarios) should be added with any new feature being added to Neural Search. @@ -292,3 +310,31 @@ original PR with an appropriate label `backport ` is merge run successfully on the PR. For example, if a PR on main needs to be backported to `2.x` branch, add a label `backport 2.x` to the PR and make sure the backport workflow runs on the PR along with other checks. Once this PR is merged to main, the workflow will create a backport PR to the `2.x` branch. + +## Building On Lucene Version Updates +There may be a Lucene version update that can affect your workflow causing errors like +`java.lang.NoClassDefFoundError: org/apache/lucene/codecs/lucene99/Lucene99Codec` or +`Provider org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec could not be instantiated`. In this case +we can observe there may be an issue with a dependency with [K-NN](https://github.com/opensearch-project/k-NN). +This results in having issues with not being able to do `./gradlew run` or `./gradlew build`. + +You can check this [K-NN PR](https://github.com/opensearch-project/k-NN/pull/2195) as an example of this event happening or this [Neural Search PR](https://github.com/opensearch-project/neural-search/pull/913#issuecomment-2400189329) that shows a developer going +through the same build issue. + +**Follow the steps to remedy the gradle run issue.** +1. From your cloned neural search repo root directory `rm -rf build .gradle` +2. Clear the following directories from your gradle folder located in your root directory + 1. `cd ~/.gradle` + 2. `rm -rf caches workers wrapper daemon` + 3. `cd -` switch back the previous directory (i.e. the neural search repo root directory) +3. Finally run `./gradlew run` + +**Follow the steps to remedy the gradle build issue** + +**PREREQ:** Make sure you have OpenSearch repo cloned locally + +1. From your cloned neural search repo root directory `rm -rf build .gradle` +2. Delete the .gradle folder and .m2 folder. `rm -rf ~/.gradle ~/.m2` +3. Head over to your OpenSearch cloned repo root directory + 1. `./gradlew publisToMavenLocal` +4. Finally run `./gradlew build` from the neural search repo diff --git a/build.gradle b/build.gradle index 626862372..468ad0dfb 100644 --- a/build.gradle +++ b/build.gradle @@ -14,7 +14,7 @@ import java.util.concurrent.Callable buildscript { ext { - opensearch_version = System.getProperty("opensearch.version", "2.17.0-SNAPSHOT") + opensearch_version = System.getProperty("opensearch.version", "2.18.0-SNAPSHOT") buildVersionQualifier = System.getProperty("build.version_qualifier", "") isSnapshot = "true" == System.getProperty("build.snapshot", "true") version_tokens = opensearch_version.tokenize('-') diff --git a/gradle.properties b/gradle.properties index d275429d7..7817ccbfb 100644 --- a/gradle.properties +++ b/gradle.properties @@ -7,8 +7,8 @@ # https://github.com/opensearch-project/OpenSearch/blob/main/libs/core/src/main/java/org/opensearch/Version.java . # Wired compatibility of OpenSearch works like 3.x version is compatible with 2.(latest-major) version. # Therefore, to run rolling-upgrade BWC Test on local machine the BWC version here should be set 2.(latest-major). -systemProp.bwc.version=2.17.0-SNAPSHOT -systemProp.bwc.bundle.version=2.16.0 +systemProp.bwc.version=2.18.0-SNAPSHOT +systemProp.bwc.bundle.version=2.17.0 # For fixing Spotless check with Java 17 org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java index 845396dd0..fe69c577e 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java @@ -18,6 +18,8 @@ import static org.opensearch.neuralsearch.util.TestUtils.TEXT_EMBEDDING_PROCESSOR; import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_NORMALIZATION_METHOD; import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_COMBINATION_METHOD; + +import org.opensearch.knn.index.query.rescore.RescoreContext; import org.opensearch.neuralsearch.query.HybridQueryBuilder; import org.opensearch.neuralsearch.query.NeuralQueryBuilder; @@ -69,8 +71,10 @@ private void validateNormalizationProcessor(final String fileName, final String modelId = getModelId(getIngestionPipeline(pipelineName), TEXT_EMBEDDING_PROCESSOR); loadModel(modelId); addDocuments(getIndexNameForTest(), false); - validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName); - validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName, Map.of("ef_search", 100)); + HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null); + validateTestIndex(getIndexNameForTest(), searchPipelineName, hybridQueryBuilder); + hybridQueryBuilder = getQueryBuilder(modelId, Map.of("ef_search", 100), RescoreContext.getDefault()); + validateTestIndex(getIndexNameForTest(), searchPipelineName, hybridQueryBuilder); } finally { wipeOfTestResources(getIndexNameForTest(), pipelineName, modelId, searchPipelineName); } @@ -98,15 +102,10 @@ private void createSearchPipeline(final String pipelineName) { ); } - private void validateTestIndex(final String modelId, final String index, final String searchPipeline) { - validateTestIndex(modelId, index, searchPipeline, null); - } - - private void validateTestIndex(final String modelId, final String index, final String searchPipeline, Map methodParameters) { + private void validateTestIndex(final String index, final String searchPipeline, HybridQueryBuilder queryBuilder) { int docCount = getDocCount(index); assertEquals(6, docCount); - HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, methodParameters); - Map searchResponseAsMap = search(index, hybridQueryBuilder, null, 1, Map.of("search_pipeline", searchPipeline)); + Map searchResponseAsMap = search(index, queryBuilder, null, 1, Map.of("search_pipeline", searchPipeline)); assertNotNull(searchResponseAsMap); int hits = getHitCount(searchResponseAsMap); assertEquals(1, hits); @@ -116,7 +115,7 @@ private void validateTestIndex(final String modelId, final String index, final S } } - private HybridQueryBuilder getQueryBuilder(final String modelId, Map methodParameters) { + private HybridQueryBuilder getQueryBuilder(final String modelId, Map methodParameters, RescoreContext rescoreContext) { NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); neuralQueryBuilder.fieldName("passage_embedding"); neuralQueryBuilder.modelId(modelId); @@ -125,6 +124,9 @@ private HybridQueryBuilder getQueryBuilder(final String modelId, Map if (methodParameters != null) { neuralQueryBuilder.methodParameters(methodParameters); } + if (rescoreContext != null) { + neuralQueryBuilder.rescoreContext(rescoreContext); + } MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY); diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java index ece2bbb9e..d0994e711 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java @@ -61,6 +61,7 @@ private void validateIndexQuery(final String modelId) { 0.01f, null, null, + null, null ); Map responseWithMinScoreQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1); @@ -76,6 +77,7 @@ private void validateIndexQuery(final String modelId) { null, null, null, + null, null ); Map responseWithMaxDistanceQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1); diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java index 54d993b35..f35227041 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java @@ -63,6 +63,7 @@ private void validateTestIndex(final String modelId) throws Exception { null, null, null, + null, null ); Map response = search(getIndexNameForTest(), neuralQueryBuilder, 1); diff --git a/qa/rolling-upgrade/build.gradle b/qa/rolling-upgrade/build.gradle index 68e2c5566..8dfc7d30c 100644 --- a/qa/rolling-upgrade/build.gradle +++ b/qa/rolling-upgrade/build.gradle @@ -84,12 +84,19 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { // Excluding the tests because we introduce these features in 2.13 if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ - filter { + filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } + // Excluding the test because hybrid query with rescore is not compatible with 2.14 and lower + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" + } + } + // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { @@ -154,6 +161,13 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } + + // Excluding the test because hybrid query with rescore is not compatible with 2.14 and lower + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" + } + } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ @@ -219,6 +233,13 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } + + // Excluding the test because hybrid query with rescore is not compatible with 2.14 and lower + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" + } + } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ @@ -284,6 +305,13 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } + + // Excluding the test because hybrid query with rescore is not compatible with 2.14 and lower + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" + } + } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java index ba2ff7979..eeae7f7dd 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java @@ -16,6 +16,8 @@ import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_NORMALIZATION_METHOD; import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_COMBINATION_METHOD; import static org.opensearch.neuralsearch.util.TestUtils.getModelId; + +import org.opensearch.knn.index.query.rescore.RescoreContext; import org.opensearch.neuralsearch.query.HybridQueryBuilder; import org.opensearch.neuralsearch.query.NeuralQueryBuilder; @@ -59,11 +61,13 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr int totalDocsCountMixed; if (isFirstMixedRound()) { totalDocsCountMixed = NUM_DOCS_PER_ROUND; - validateTestIndexOnUpgrade(totalDocsCountMixed, modelId); + HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null); + validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, hybridQueryBuilder); addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_MIXED, null, null); } else { totalDocsCountMixed = 2 * NUM_DOCS_PER_ROUND; - validateTestIndexOnUpgrade(totalDocsCountMixed, modelId); + HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null); + validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, hybridQueryBuilder); } break; case UPGRADED: @@ -72,8 +76,10 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr int totalDocsCountUpgraded = 3 * NUM_DOCS_PER_ROUND; loadModel(modelId); addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT_UPGRADED, null, null); - validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId); - validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, Map.of("ef_search", 100)); + HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null); + validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder); + hybridQueryBuilder = getQueryBuilder(modelId, Map.of("ef_search", 100), RescoreContext.getDefault()); + validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder); } finally { wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, SEARCH_PIPELINE_NAME); } @@ -83,16 +89,11 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr } } - private void validateTestIndexOnUpgrade(final int numberOfDocs, final String modelId) throws Exception { - validateTestIndexOnUpgrade(numberOfDocs, modelId, null); - } - - private void validateTestIndexOnUpgrade(final int numberOfDocs, final String modelId, Map methodParameters) + private void validateTestIndexOnUpgrade(final int numberOfDocs, final String modelId, HybridQueryBuilder hybridQueryBuilder) throws Exception { int docCount = getDocCount(getIndexNameForTest()); assertEquals(numberOfDocs, docCount); loadModel(modelId); - HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, methodParameters); Map searchResponseAsMap = search( getIndexNameForTest(), hybridQueryBuilder, @@ -109,7 +110,11 @@ private void validateTestIndexOnUpgrade(final int numberOfDocs, final String mod } } - private HybridQueryBuilder getQueryBuilder(final String modelId, final Map methodParameters) { + private HybridQueryBuilder getQueryBuilder( + final String modelId, + final Map methodParameters, + final RescoreContext rescoreContext + ) { NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); neuralQueryBuilder.fieldName("passage_embedding"); neuralQueryBuilder.modelId(modelId); @@ -118,6 +123,9 @@ private HybridQueryBuilder getQueryBuilder(final String modelId, final Map searchResponseAsMap = search( + getIndexNameForTest(), + hybridQueryBuilder, + rescorer, + 1, + Map.of("search_pipeline", SEARCH_PIPELINE_NAME) + ); + assertNotNull(searchResponseAsMap); + int hits = getHitCount(searchResponseAsMap); + assertEquals(1, hits); + List scoresList = getNormalizationScoreList(searchResponseAsMap); + for (Double score : scoresList) { + assertTrue(0 <= score && score <= 2); + } + } + + private HybridQueryBuilder getQueryBuilder( + final String modelId, + final Map methodParameters, + final RescoreContext rescoreContextForNeuralQuery + ) { + NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); + neuralQueryBuilder.fieldName(VECTOR_EMBEDDING_FIELD); + neuralQueryBuilder.modelId(modelId); + neuralQueryBuilder.queryText(QUERY); + neuralQueryBuilder.k(5); + if (methodParameters != null) { + neuralQueryBuilder.methodParameters(methodParameters); + } + if (Objects.nonNull(rescoreContextForNeuralQuery)) { + neuralQueryBuilder.rescoreContext(rescoreContextForNeuralQuery); + } + + MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY); + + HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder(); + hybridQueryBuilder.add(matchQueryBuilder); + hybridQueryBuilder.add(neuralQueryBuilder); + + return hybridQueryBuilder; + } +} diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java index 17d15898b..391e2135d 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/KnnRadialSearchIT.java @@ -87,6 +87,7 @@ private void validateIndexQueryOnUpgrade(final int numberOfDocs, final String mo 0.01f, null, null, + null, null ); Map responseWithMinScore = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1); @@ -102,6 +103,7 @@ private void validateIndexQueryOnUpgrade(final int numberOfDocs, final String mo null, null, null, + null, null ); Map responseWithMaxScore = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1); diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java index 8e0ff7568..72976770d 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java @@ -86,6 +86,7 @@ private void validateTestIndexOnUpgrade(final int numberOfDocs, final String mod null, null, null, + null, null ); Map responseWithKQuery = search(getIndexNameForTest(), neuralQueryBuilderWithKQuery, 1); diff --git a/release-notes/opensearch-neural-search.release-notes-2.17.0.0.md b/release-notes/opensearch-neural-search.release-notes-2.17.0.0.md new file mode 100644 index 000000000..6ee7c653a --- /dev/null +++ b/release-notes/opensearch-neural-search.release-notes-2.17.0.0.md @@ -0,0 +1,11 @@ +## Version 2.17.0.0 Release Notes + +Compatible with OpenSearch 2.17.0 + +### Enhancements +- Adds rescore parameter support ([#885](https://github.com/opensearch-project/neural-search/pull/885)) +### Bug Fixes +- Removing code to cut search results of hybrid search in the priority queue ([#867](https://github.com/opensearch-project/neural-search/pull/867)) +- Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877)) +### Infrastructure +- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852)) \ No newline at end of file diff --git a/src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java index 30780a3f5..ae996251d 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java @@ -10,6 +10,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -285,7 +286,7 @@ private void createInferenceListForMapTypeInput(Object sourceValue, List if (sourceValue instanceof Map) { ((Map) sourceValue).forEach((k, v) -> createInferenceListForMapTypeInput(v, texts)); } else if (sourceValue instanceof List) { - texts.addAll(((List) sourceValue)); + ((List) sourceValue).stream().filter(Objects::nonNull).forEach(texts::add); } else { if (sourceValue == null) return; texts.add(sourceValue.toString()); @@ -419,8 +420,12 @@ private void putNLPResultToSourceMapForMapType( for (Map.Entry inputNestedMapEntry : ((Map) sourceValue).entrySet()) { if (sourceAndMetadataMap.get(processorKey) instanceof List) { // build nlp output for list of nested objects + Iterator inputNestedMapValueIt = ((List) inputNestedMapEntry.getValue()).iterator(); for (Map nestedElement : (List>) sourceAndMetadataMap.get(processorKey)) { - nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++)); + // Only fill in when value is not null + if (inputNestedMapValueIt.hasNext() && inputNestedMapValueIt.next() != null) { + nestedElement.put(inputNestedMapEntry.getKey(), results.get(indexWrapper.index++)); + } } } else { Pair processedNestedKey = processNestedKey(inputNestedMapEntry); diff --git a/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java index 49435746c..bc1945bee 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/TextChunkingProcessor.java @@ -46,10 +46,13 @@ public final class TextChunkingProcessor extends AbstractProcessor { public static final String FIELD_MAP_FIELD = "field_map"; public static final String ALGORITHM_FIELD = "algorithm"; private static final String DEFAULT_ALGORITHM = FixedTokenLengthChunker.ALGORITHM_NAME; + public static final String IGNORE_MISSING = "ignore_missing"; + public static final boolean DEFAULT_IGNORE_MISSING = false; private int maxChunkLimit; private Chunker chunker; private final Map fieldMap; + private final boolean ignoreMissing; private final ClusterService clusterService; private final AnalysisRegistry analysisRegistry; private final Environment environment; @@ -59,12 +62,14 @@ public TextChunkingProcessor( final String description, final Map fieldMap, final Map algorithmMap, + final boolean ignoreMissing, final Environment environment, final ClusterService clusterService, final AnalysisRegistry analysisRegistry ) { super(tag, description); this.fieldMap = fieldMap; + this.ignoreMissing = ignoreMissing; this.environment = environment; this.clusterService = clusterService; this.analysisRegistry = analysisRegistry; @@ -75,6 +80,11 @@ public String getType() { return TYPE; } + // if ignore missing is true null fields return null. If ignore missing is false null fields return an empty list + private boolean shouldProcessChunk(Object chunkObject) { + return !ignoreMissing || Objects.nonNull(chunkObject); + } + @SuppressWarnings("unchecked") private void parseAlgorithmMap(final Map algorithmMap) { if (algorithmMap.size() > 1) { @@ -250,8 +260,11 @@ private void chunkMapType( } else { // chunk the object when target key is of leaf type (null, string and list of string) Object chunkObject = sourceAndMetadataMap.get(originalKey); - List chunkedResult = chunkLeafType(chunkObject, runtimeParameters); - sourceAndMetadataMap.put(String.valueOf(targetKey), chunkedResult); + + if (shouldProcessChunk(chunkObject)) { + List chunkedResult = chunkLeafType(chunkObject, runtimeParameters); + sourceAndMetadataMap.put(String.valueOf(targetKey), chunkedResult); + } } } } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactory.java index 91b9ac5c1..b9904f05d 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactory.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/factory/TextChunkingProcessorFactory.java @@ -14,7 +14,10 @@ import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.TYPE; import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.FIELD_MAP_FIELD; import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.ALGORITHM_FIELD; +import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.IGNORE_MISSING; +import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.DEFAULT_IGNORE_MISSING; import static org.opensearch.ingest.ConfigurationUtils.readMap; +import static org.opensearch.ingest.ConfigurationUtils.readBooleanProperty; /** * Factory for chunking ingest processor for ingestion pipeline. @@ -45,6 +48,16 @@ public TextChunkingProcessor create( ) throws Exception { Map fieldMap = readMap(TYPE, processorTag, config, FIELD_MAP_FIELD); Map algorithmMap = readMap(TYPE, processorTag, config, ALGORITHM_FIELD); - return new TextChunkingProcessor(processorTag, description, fieldMap, algorithmMap, environment, clusterService, analysisRegistry); + boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, IGNORE_MISSING, DEFAULT_IGNORE_MISSING); + return new TextChunkingProcessor( + processorTag, + description, + fieldMap, + algorithmMap, + ignoreMissing, + environment, + clusterService, + analysisRegistry + ); } } diff --git a/src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java index 8e1b6b36b..915a79117 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java +++ b/src/main/java/org/opensearch/neuralsearch/query/NeuralQueryBuilder.java @@ -8,6 +8,7 @@ import static org.opensearch.knn.index.query.KNNQueryBuilder.MAX_DISTANCE_FIELD; import static org.opensearch.knn.index.query.KNNQueryBuilder.METHOD_PARAMS_FIELD; import static org.opensearch.knn.index.query.KNNQueryBuilder.MIN_SCORE_FIELD; +import static org.opensearch.knn.index.query.KNNQueryBuilder.RESCORE_FIELD; import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersion; import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersionForDefaultModelIdSupport; import static org.opensearch.neuralsearch.common.MinClusterVersionUtil.isClusterOnOrAfterMinReqVersionForRadialSearch; @@ -40,6 +41,8 @@ import org.opensearch.index.query.QueryShardContext; import org.opensearch.knn.index.query.KNNQueryBuilder; import org.opensearch.knn.index.query.parser.MethodParametersParser; +import org.opensearch.knn.index.query.parser.RescoreParser; +import org.opensearch.knn.index.query.rescore.RescoreContext; import org.opensearch.neuralsearch.common.MinClusterVersionUtil; import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor; @@ -101,6 +104,7 @@ public static void initialize(MLCommonsClientAccessor mlClient) { private Supplier vectorSupplier; private QueryBuilder filter; private Map methodParameters; + private RescoreContext rescoreContext; /** * Constructor from stream input @@ -131,6 +135,7 @@ public NeuralQueryBuilder(StreamInput in) throws IOException { if (isClusterOnOrAfterMinReqVersion(METHOD_PARAMS_FIELD.getPreferredName())) { this.methodParameters = MethodParametersParser.streamInput(in, MinClusterVersionUtil::isClusterOnOrAfterMinReqVersion); } + this.rescoreContext = RescoreParser.streamInput(in); } @Override @@ -156,6 +161,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { if (isClusterOnOrAfterMinReqVersion(METHOD_PARAMS_FIELD.getPreferredName())) { MethodParametersParser.streamOutput(out, methodParameters, MinClusterVersionUtil::isClusterOnOrAfterMinReqVersion); } + RescoreParser.streamOutput(out, rescoreContext); } @Override @@ -181,6 +187,9 @@ protected void doXContent(XContentBuilder xContentBuilder, Params params) throws if (Objects.nonNull(methodParameters)) { MethodParametersParser.doXContent(xContentBuilder, methodParameters); } + if (Objects.nonNull(rescoreContext)) { + RescoreParser.doXContent(xContentBuilder, rescoreContext); + } printBoostAndQueryName(xContentBuilder); xContentBuilder.endObject(); xContentBuilder.endObject(); @@ -276,6 +285,8 @@ private static void parseQueryParams(XContentParser parser, NeuralQueryBuilder n neuralQueryBuilder.filter(parseInnerQueryBuilder(parser)); } else if (METHOD_PARAMS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { neuralQueryBuilder.methodParameters(MethodParametersParser.fromXContent(parser)); + } else if (RESCORE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + neuralQueryBuilder.rescoreContext(RescoreParser.fromXContent(parser)); } } else { throw new ParsingException( @@ -308,6 +319,8 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) { .maxDistance(maxDistance) .minScore(minScore) .k(k) + .methodParameters(methodParameters) + .rescoreContext(rescoreContext) .build(); } @@ -335,7 +348,8 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) { minScore(), vectorSetOnce::get, filter(), - methodParameters() + methodParameters(), + rescoreContext() ); } diff --git a/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java b/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java index 01a4cdfff..4e72b55bf 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java +++ b/src/main/java/org/opensearch/neuralsearch/search/collector/HybridTopScoreDocCollector.java @@ -172,11 +172,6 @@ private TopDocs topDocsPerQuery(int start, int howMany, PriorityQueue int size = howMany - start; ScoreDoc[] results = new ScoreDoc[size]; - // pq's pop() returns the 'least' element in the queue, therefore need - // to discard the first ones, until we reach the requested range. - for (int i = pq.size() - start - size; i > 0; i--) { - pq.pop(); - } // Get the requested results from pq. populateResults(results, size, pq); diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java b/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java index 4eb49e845..f9457f6ca 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/HybridCollectorManager.java @@ -6,6 +6,7 @@ import java.util.Locale; import lombok.RequiredArgsConstructor; +import lombok.extern.log4j.Log4j2; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; @@ -33,7 +34,9 @@ import org.opensearch.search.query.MultiCollectorWrapper; import org.opensearch.search.query.QuerySearchResult; import org.opensearch.search.query.ReduceableSearchResult; +import org.opensearch.search.rescore.RescoreContext; import org.opensearch.search.sort.SortAndFormats; +import org.opensearch.neuralsearch.search.query.exception.HybridSearchRescoreQueryException; import java.io.IOException; import java.util.ArrayList; @@ -55,6 +58,7 @@ * In most cases it will be wrapped in MultiCollectorManager. */ @RequiredArgsConstructor +@Log4j2 public abstract class HybridCollectorManager implements CollectorManager { private final int numHits; @@ -67,6 +71,7 @@ public abstract class HybridCollectorManager implements CollectorManager getSearchResults(final List results = new ArrayList<>(); DocValueFormat[] docValueFormats = getSortValueFormats(sortAndFormats); for (HybridSearchCollector collector : hybridSearchCollectors) { - TopDocsAndMaxScore topDocsAndMaxScore = getTopDocsAndAndMaxScore(collector, docValueFormats); + boolean isSortEnabled = docValueFormats != null; + TopDocsAndMaxScore topDocsAndMaxScore = getTopDocsAndAndMaxScore(collector, isSortEnabled); results.add((QuerySearchResult result) -> reduceCollectorResults(result, topDocsAndMaxScore, docValueFormats)); } return results; } - private TopDocsAndMaxScore getTopDocsAndAndMaxScore( - final HybridSearchCollector hybridSearchCollector, - final DocValueFormat[] docValueFormats - ) { - TopDocs newTopDocs; + private TopDocsAndMaxScore getTopDocsAndAndMaxScore(final HybridSearchCollector hybridSearchCollector, final boolean isSortEnabled) { List topDocs = hybridSearchCollector.topDocs(); - if (docValueFormats != null) { - newTopDocs = getNewTopFieldDocs( - getTotalHits(this.trackTotalHitsUpTo, topDocs, hybridSearchCollector.getTotalHits()), - topDocs, - sortAndFormats.sort.getSort() - ); - } else { - newTopDocs = getNewTopDocs(getTotalHits(this.trackTotalHitsUpTo, topDocs, hybridSearchCollector.getTotalHits()), topDocs); + if (isSortEnabled) { + return getSortedTopDocsAndMaxScore(topDocs, hybridSearchCollector); + } + return getTopDocsAndMaxScore(topDocs, hybridSearchCollector); + } + + private TopDocsAndMaxScore getSortedTopDocsAndMaxScore(List topDocs, HybridSearchCollector hybridSearchCollector) { + TopDocs sortedTopDocs = getNewTopFieldDocs( + getTotalHits(this.trackTotalHitsUpTo, topDocs, hybridSearchCollector.getTotalHits()), + topDocs, + sortAndFormats.sort.getSort() + ); + return new TopDocsAndMaxScore(sortedTopDocs, hybridSearchCollector.getMaxScore()); + } + + private TopDocsAndMaxScore getTopDocsAndMaxScore(List topDocs, HybridSearchCollector hybridSearchCollector) { + if (shouldRescore()) { + topDocs = rescore(topDocs); + } + float maxScore = calculateMaxScore(topDocs, hybridSearchCollector.getMaxScore()); + TopDocs finalTopDocs = getNewTopDocs(getTotalHits(this.trackTotalHitsUpTo, topDocs, hybridSearchCollector.getTotalHits()), topDocs); + return new TopDocsAndMaxScore(finalTopDocs, maxScore); + } + + private boolean shouldRescore() { + List rescoreContexts = searchContext.rescore(); + return Objects.nonNull(rescoreContexts) && !rescoreContexts.isEmpty(); + } + + private List rescore(List topDocs) { + List rescoredTopDocs = topDocs; + for (RescoreContext ctx : searchContext.rescore()) { + rescoredTopDocs = rescoredTopDocs(ctx, rescoredTopDocs); + } + return rescoredTopDocs; + } + + /** + * Rescores the top documents using the provided context. The input topDocs may be modified during this process. + */ + private List rescoredTopDocs(final RescoreContext ctx, final List topDocs) { + List result = new ArrayList<>(topDocs.size()); + for (TopDocs topDoc : topDocs) { + try { + result.add(ctx.rescorer().rescore(topDoc, searchContext.searcher(), ctx)); + } catch (IOException exception) { + log.error("rescore failed for hybrid query in collector_manager.reduce call", exception); + throw new HybridSearchRescoreQueryException(exception); + } } - return new TopDocsAndMaxScore(newTopDocs, hybridSearchCollector.getMaxScore()); + return result; + } + + /** + * Calculates the maximum score from the provided TopDocs, considering rescoring. + */ + private float calculateMaxScore(List topDocsList, float initialMaxScore) { + List rescoreContexts = searchContext.rescore(); + if (Objects.nonNull(rescoreContexts) && !rescoreContexts.isEmpty()) { + for (TopDocs topDocs : topDocsList) { + if (Objects.nonNull(topDocs.scoreDocs) && topDocs.scoreDocs.length > 0) { + // first top doc for each sub-query has the max score because top docs are sorted by score desc + initialMaxScore = Math.max(initialMaxScore, topDocs.scoreDocs[0].score); + } + } + } + return initialMaxScore; } private List getHybridSearchCollectors(final Collection collectors) { @@ -415,18 +472,18 @@ public HybridCollectorNonConcurrentManager( int numHits, HitsThresholdChecker hitsThresholdChecker, int trackTotalHitsUpTo, - SortAndFormats sortAndFormats, Weight filteringWeight, - ScoreDoc searchAfter + SearchContext searchContext ) { super( numHits, hitsThresholdChecker, trackTotalHitsUpTo, - sortAndFormats, + searchContext.sort(), filteringWeight, - new TopDocsMerger(sortAndFormats), - (FieldDoc) searchAfter + new TopDocsMerger(searchContext.sort()), + searchContext.searchAfter(), + searchContext ); scoreCollector = Objects.requireNonNull(super.newCollector(), "collector for hybrid query cannot be null"); } @@ -453,18 +510,18 @@ public HybridCollectorConcurrentSearchManager( int numHits, HitsThresholdChecker hitsThresholdChecker, int trackTotalHitsUpTo, - SortAndFormats sortAndFormats, Weight filteringWeight, - ScoreDoc searchAfter + SearchContext searchContext ) { super( numHits, hitsThresholdChecker, trackTotalHitsUpTo, - sortAndFormats, + searchContext.sort(), filteringWeight, - new TopDocsMerger(sortAndFormats), - (FieldDoc) searchAfter + new TopDocsMerger(searchContext.sort()), + searchContext.searchAfter(), + searchContext ); } } diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java b/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java index 53248f88c..411127507 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryPhaseSearcher.java @@ -60,9 +60,15 @@ public boolean searchWith( validateQuery(searchContext, query); return super.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); } else { + // TODO remove this check after following issue https://github.com/opensearch-project/neural-search/issues/280 gets resolved. + if (searchContext.from() != 0) { + throw new IllegalArgumentException("In the current OpenSearch version pagination is not supported with hybrid query"); + } Query hybridQuery = extractHybridQuery(searchContext, query); QueryPhaseSearcher queryPhaseSearcher = getQueryPhaseSearcher(searchContext); - return queryPhaseSearcher.searchWith(searchContext, searcher, hybridQuery, collectors, hasFilterCollector, hasTimeout); + queryPhaseSearcher.searchWith(searchContext, searcher, hybridQuery, collectors, hasFilterCollector, hasTimeout); + // we decide on rescore later in collector manager + return false; } } diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java b/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java index a77ff458e..4a1955740 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java +++ b/src/main/java/org/opensearch/neuralsearch/search/query/TopDocsMerger.java @@ -55,9 +55,15 @@ class TopDocsMerger { * @return merged TopDocsAndMaxScore object */ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) { - if (Objects.isNull(newTopDocs) || Objects.isNull(newTopDocs.topDocs) || newTopDocs.topDocs.totalHits.value == 0) { + // we need to check if any of source and destination top docs are empty. This is needed for case when concurrent segment search + // is enabled. In such case search is done by multiple workers, and results are saved in multiple doc collectors. Any on those + // results can be empty, in such case we can skip actual merge logic and just return result object. + if (isEmpty(newTopDocs)) { return source; } + if (isEmpty(source)) { + return newTopDocs; + } TotalHits mergedTotalHits = getMergedTotalHits(source, newTopDocs); TopDocsAndMaxScore result = new TopDocsAndMaxScore( getTopDocs(getMergedScoreDocs(source.topDocs.scoreDocs, newTopDocs.topDocs.scoreDocs), mergedTotalHits), @@ -66,6 +72,20 @@ public TopDocsAndMaxScore merge(final TopDocsAndMaxScore source, final TopDocsAn return result; } + /** + * Checks if TopDocsAndMaxScore is null, has no top docs or zero total hits + * @param topDocsAndMaxScore + * @return + */ + private static boolean isEmpty(final TopDocsAndMaxScore topDocsAndMaxScore) { + if (Objects.isNull(topDocsAndMaxScore) + || Objects.isNull(topDocsAndMaxScore.topDocs) + || topDocsAndMaxScore.topDocs.totalHits.value == 0) { + return true; + } + return false; + } + private TotalHits getMergedTotalHits(final TopDocsAndMaxScore source, final TopDocsAndMaxScore newTopDocs) { // merged value is a lower bound - if both are equal_to than merged will also be equal_to, // otherwise assign greater_than_or_equal diff --git a/src/main/java/org/opensearch/neuralsearch/search/query/exception/HybridSearchRescoreQueryException.java b/src/main/java/org/opensearch/neuralsearch/search/query/exception/HybridSearchRescoreQueryException.java new file mode 100644 index 000000000..34933a8e9 --- /dev/null +++ b/src/main/java/org/opensearch/neuralsearch/search/query/exception/HybridSearchRescoreQueryException.java @@ -0,0 +1,17 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.search.query.exception; + +import org.opensearch.OpenSearchException; + +/** + * Exception thrown when there is an issue with the hybrid search rescore query. + */ +public class HybridSearchRescoreQueryException extends OpenSearchException { + + public HybridSearchRescoreQueryException(Throwable cause) { + super("rescore failed for hybrid query", cause); + } +} diff --git a/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java b/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java index 58a42c439..9a969e71b 100644 --- a/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java +++ b/src/test/java/org/opensearch/neuralsearch/plugin/NeuralSearchTests.java @@ -4,14 +4,22 @@ */ package org.opensearch.neuralsearch.plugin; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Optional; +import org.junit.Before; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.env.Environment; import org.opensearch.indices.IndicesService; import org.opensearch.ingest.IngestService; @@ -21,22 +29,72 @@ import org.opensearch.neuralsearch.processor.NormalizationProcessor; import org.opensearch.neuralsearch.processor.TextEmbeddingProcessor; import org.opensearch.neuralsearch.processor.factory.NormalizationProcessorFactory; +import org.opensearch.neuralsearch.processor.rerank.RerankProcessor; import org.opensearch.neuralsearch.query.HybridQueryBuilder; import org.opensearch.neuralsearch.query.NeuralQueryBuilder; import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase; import org.opensearch.neuralsearch.search.query.HybridQueryPhaseSearcher; import org.opensearch.plugins.SearchPipelinePlugin; import org.opensearch.plugins.SearchPlugin; +import org.opensearch.plugins.SearchPlugin.SearchExtSpec; +import org.opensearch.search.pipeline.Processor.Factory; import org.opensearch.search.pipeline.SearchPhaseResultsProcessor; +import org.opensearch.search.pipeline.SearchPipelineService; import org.opensearch.search.pipeline.SearchRequestProcessor; +import org.opensearch.search.pipeline.SearchResponseProcessor; import org.opensearch.search.query.QueryPhaseSearcher; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; +import org.opensearch.threadpool.ThreadPool; public class NeuralSearchTests extends OpenSearchQueryTestCase { + private NeuralSearch plugin; + + @Mock + private SearchPipelineService searchPipelineService; + private SearchPipelinePlugin.Parameters searchParameters; + @Mock + private IngestService ingestService; + private Processor.Parameters ingestParameters; + @Mock + private ClusterService clusterService; + @Mock + private ThreadPool threadPool; + + @Before + public void setup() { + MockitoAnnotations.openMocks(this); + + plugin = new NeuralSearch(); + + when(searchPipelineService.getClusterService()).thenReturn(clusterService); + searchParameters = new SearchPipelinePlugin.Parameters(null, null, null, null, null, null, searchPipelineService, null, null, null); + ingestParameters = new Processor.Parameters(null, null, null, null, null, null, ingestService, null, null, null); + when(threadPool.executor(anyString())).thenReturn(OpenSearchExecutors.newDirectExecutorService()); + } + + public void testCreateComponents() { + // clientAccessor can not be null, and this is the only way to access it from this test + plugin.getProcessors(ingestParameters); + Collection components = plugin.createComponents( + null, + clusterService, + threadPool, + null, + null, + null, + null, + null, + null, + null, + null + ); + + assertEquals(1, components.size()); + } + public void testQuerySpecs() { - NeuralSearch plugin = new NeuralSearch(); List> querySpecs = plugin.getQueries(); assertNotNull(querySpecs); @@ -46,7 +104,6 @@ public void testQuerySpecs() { } public void testQueryPhaseSearcher() { - NeuralSearch plugin = new NeuralSearch(); Optional queryPhaseSearcherWithFeatureFlagDisabled = plugin.getQueryPhaseSearcher(); assertNotNull(queryPhaseSearcherWithFeatureFlagDisabled); @@ -62,7 +119,6 @@ public void testQueryPhaseSearcher() { } public void testProcessors() { - NeuralSearch plugin = new NeuralSearch(); Settings settings = Settings.builder().build(); Environment environment = mock(Environment.class); when(environment.settings()).thenReturn(settings); @@ -84,10 +140,8 @@ public void testProcessors() { } public void testSearchPhaseResultsProcessors() { - NeuralSearch plugin = new NeuralSearch(); - SearchPipelinePlugin.Parameters parameters = mock(SearchPipelinePlugin.Parameters.class); Map> searchPhaseResultsProcessors = plugin - .getSearchPhaseResultsProcessors(parameters); + .getSearchPhaseResultsProcessors(searchParameters); assertNotNull(searchPhaseResultsProcessors); assertEquals(1, searchPhaseResultsProcessors.size()); assertTrue(searchPhaseResultsProcessors.containsKey("normalization-processor")); @@ -97,19 +151,34 @@ public void testSearchPhaseResultsProcessors() { assertTrue(scoringProcessor instanceof NormalizationProcessorFactory); } + public void testGetSettings() { + List> settings = plugin.getSettings(); + + assertEquals(2, settings.size()); + } + public void testRequestProcessors() { - NeuralSearch plugin = new NeuralSearch(); - SearchPipelinePlugin.Parameters parameters = mock(SearchPipelinePlugin.Parameters.class); Map> processors = plugin.getRequestProcessors( - parameters + searchParameters ); assertNotNull(processors); assertNotNull(processors.get(NeuralQueryEnricherProcessor.TYPE)); assertNotNull(processors.get(NeuralSparseTwoPhaseProcessor.TYPE)); } + public void testResponseProcessors() { + Map> processors = plugin.getResponseProcessors(searchParameters); + assertNotNull(processors); + assertNotNull(processors.get(RerankProcessor.TYPE)); + } + + public void testSearchExts() { + List> searchExts = plugin.getSearchExts(); + + assertEquals(1, searchExts.size()); + } + public void testExecutionBuilders() { - NeuralSearch plugin = new NeuralSearch(); Settings settings = Settings.builder().build(); Environment environment = mock(Environment.class); when(environment.settings()).thenReturn(settings); @@ -120,5 +189,4 @@ public void testExecutionBuilders() { assertEquals("Unexpected number of executor builders are registered", 1, executorBuilders.size()); assertTrue(executorBuilders.get(0) instanceof FixedExecutorBuilder); } - } diff --git a/src/test/java/org/opensearch/neuralsearch/processor/InferenceProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/InferenceProcessorTests.java index cd2d0816a..dc86975bd 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/InferenceProcessorTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/InferenceProcessorTests.java @@ -66,7 +66,7 @@ public void test_batchExecute_emptyInput() { verify(clientAccessor, never()).inferenceSentences(anyString(), anyList(), any()); } - public void test_batchExecute_allFailedValidation() { + public void test_batchExecuteWithEmpty_allFailedValidation() { final int docCount = 2; TestInferenceProcessor processor = new TestInferenceProcessor(createMockVectorResult(), BATCH_SIZE, null); List wrapperList = createIngestDocumentWrappers(docCount); @@ -79,6 +79,29 @@ public void test_batchExecute_allFailedValidation() { assertEquals(docCount, captor.getValue().size()); for (int i = 0; i < docCount; ++i) { assertNotNull(captor.getValue().get(i).getException()); + assertEquals( + "list type field [key1] has empty string, cannot process it", + captor.getValue().get(i).getException().getMessage() + ); + assertEquals(wrapperList.get(i).getIngestDocument(), captor.getValue().get(i).getIngestDocument()); + } + verify(clientAccessor, never()).inferenceSentences(anyString(), anyList(), any()); + } + + public void test_batchExecuteWithNull_allFailedValidation() { + final int docCount = 2; + TestInferenceProcessor processor = new TestInferenceProcessor(createMockVectorResult(), BATCH_SIZE, null); + List wrapperList = createIngestDocumentWrappers(docCount); + wrapperList.get(0).getIngestDocument().setFieldValue("key1", Arrays.asList(null, "value1")); + wrapperList.get(1).getIngestDocument().setFieldValue("key1", Arrays.asList(null, "value1")); + Consumer resultHandler = mock(Consumer.class); + processor.batchExecute(wrapperList, resultHandler); + ArgumentCaptor> captor = ArgumentCaptor.forClass(List.class); + verify(resultHandler).accept(captor.capture()); + assertEquals(docCount, captor.getValue().size()); + for (int i = 0; i < docCount; ++i) { + assertNotNull(captor.getValue().get(i).getException()); + assertEquals("list type field [key1] has null, cannot process it", captor.getValue().get(i).getException().getMessage()); assertEquals(wrapperList.get(i).getIngestDocument(), captor.getValue().get(i).getIngestDocument()); } verify(clientAccessor, never()).inferenceSentences(anyString(), anyList(), any()); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java index 7477fe63b..0fe9a77d6 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/NormalizationProcessorIT.java @@ -97,6 +97,7 @@ public void testResultProcessor_whenOneShardAndQueryMatches_thenSuccessful() { null, null, null, + null, null ); TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3); @@ -148,6 +149,7 @@ public void testResultProcessor_whenDefaultProcessorConfigAndQueryMatches_thenSu null, null, null, + null, null ); TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3); @@ -188,6 +190,7 @@ public void testQueryMatches_whenMultipleShards_thenSuccessful() { null, null, null, + null, null ); TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/ScoreCombinationIT.java b/src/test/java/org/opensearch/neuralsearch/processor/ScoreCombinationIT.java index ad2460103..4ddf8d2c3 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/ScoreCombinationIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/ScoreCombinationIT.java @@ -224,7 +224,7 @@ public void testHarmonicMeanCombination_whenOneShardAndQueryMatches_thenSuccessf HybridQueryBuilder hybridQueryBuilderDefaultNorm = new HybridQueryBuilder(); hybridQueryBuilderDefaultNorm.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderDefaultNorm.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -249,7 +249,7 @@ public void testHarmonicMeanCombination_whenOneShardAndQueryMatches_thenSuccessf HybridQueryBuilder hybridQueryBuilderL2Norm = new HybridQueryBuilder(); hybridQueryBuilderL2Norm.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderL2Norm.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -299,7 +299,7 @@ public void testGeometricMeanCombination_whenOneShardAndQueryMatches_thenSuccess HybridQueryBuilder hybridQueryBuilderDefaultNorm = new HybridQueryBuilder(); hybridQueryBuilderDefaultNorm.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderDefaultNorm.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -324,7 +324,7 @@ public void testGeometricMeanCombination_whenOneShardAndQueryMatches_thenSuccess HybridQueryBuilder hybridQueryBuilderL2Norm = new HybridQueryBuilder(); hybridQueryBuilderL2Norm.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderL2Norm.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/ScoreNormalizationIT.java b/src/test/java/org/opensearch/neuralsearch/processor/ScoreNormalizationIT.java index 7700c9f6a..d851131f1 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/ScoreNormalizationIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/ScoreNormalizationIT.java @@ -85,7 +85,7 @@ public void testL2Norm_whenOneShardAndQueryMatches_thenSuccessful() { HybridQueryBuilder hybridQueryBuilderArithmeticMean = new HybridQueryBuilder(); hybridQueryBuilderArithmeticMean.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderArithmeticMean.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -110,7 +110,7 @@ public void testL2Norm_whenOneShardAndQueryMatches_thenSuccessful() { HybridQueryBuilder hybridQueryBuilderHarmonicMean = new HybridQueryBuilder(); hybridQueryBuilderHarmonicMean.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderHarmonicMean.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -135,7 +135,7 @@ public void testL2Norm_whenOneShardAndQueryMatches_thenSuccessful() { HybridQueryBuilder hybridQueryBuilderGeometricMean = new HybridQueryBuilder(); hybridQueryBuilderGeometricMean.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderGeometricMean.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -185,7 +185,7 @@ public void testMinMaxNorm_whenOneShardAndQueryMatches_thenSuccessful() { HybridQueryBuilder hybridQueryBuilderArithmeticMean = new HybridQueryBuilder(); hybridQueryBuilderArithmeticMean.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderArithmeticMean.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -210,7 +210,7 @@ public void testMinMaxNorm_whenOneShardAndQueryMatches_thenSuccessful() { HybridQueryBuilder hybridQueryBuilderHarmonicMean = new HybridQueryBuilder(); hybridQueryBuilderHarmonicMean.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderHarmonicMean.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); @@ -235,7 +235,7 @@ public void testMinMaxNorm_whenOneShardAndQueryMatches_thenSuccessful() { HybridQueryBuilder hybridQueryBuilderGeometricMean = new HybridQueryBuilder(); hybridQueryBuilderGeometricMean.add( - new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null) + new NeuralQueryBuilder(TEST_KNN_VECTOR_FIELD_NAME_1, TEST_DOC_TEXT1, "", modelId, 5, null, null, null, null, null, null) ); hybridQueryBuilderGeometricMean.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3)); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorTests.java index 433e51ef5..9cdb9aad0 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorTests.java @@ -42,6 +42,7 @@ import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.TYPE; import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.FIELD_MAP_FIELD; import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.ALGORITHM_FIELD; +import static org.opensearch.neuralsearch.processor.TextChunkingProcessor.IGNORE_MISSING; import static org.opensearch.neuralsearch.processor.chunker.Chunker.MAX_CHUNK_LIMIT_FIELD; public class TextChunkingProcessorTests extends OpenSearchTestCase { @@ -181,6 +182,20 @@ private TextChunkingProcessor createDelimiterInstance() { return textChunkingProcessorFactory.create(registry, PROCESSOR_TAG, DESCRIPTION, config); } + @SneakyThrows + private TextChunkingProcessor createIgnoreMissingInstance() { + Map config = new HashMap<>(); + Map fieldMap = new HashMap<>(); + Map algorithmMap = new HashMap<>(); + algorithmMap.put(DelimiterChunker.ALGORITHM_NAME, createDelimiterParameters()); + fieldMap.put(INPUT_FIELD, OUTPUT_FIELD); + config.put(FIELD_MAP_FIELD, fieldMap); + config.put(ALGORITHM_FIELD, algorithmMap); + config.put(IGNORE_MISSING, true); + Map registry = new HashMap<>(); + return textChunkingProcessorFactory.create(registry, PROCESSOR_TAG, DESCRIPTION, config); + } + public void testCreate_whenAlgorithmFieldMissing_thenFail() { Map config = new HashMap<>(); Map fieldMap = new HashMap<>(); @@ -945,4 +960,16 @@ public void testExecute_withDelimiter_andSourceDataString_thenSucceed() { expectedPassages.add(" The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."); assertEquals(expectedPassages, passages); } + + @SneakyThrows + public void testExecute_withIgnoreMissing_thenSucceed() { + Map sourceAndMetadata = new HashMap<>(); + sourceAndMetadata.put("text_field", ""); + sourceAndMetadata.put(IndexFieldMapper.NAME, INDEX_NAME); + IngestDocument ingestDocument = new IngestDocument(sourceAndMetadata, new HashMap<>()); + + TextChunkingProcessor processor = createIgnoreMissingInstance(); + IngestDocument document = processor.execute(ingestDocument); + assertFalse(document.getSourceAndMetadata().containsKey(OUTPUT_FIELD)); + } } diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java index d921ef8b8..c00423b1f 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java @@ -124,6 +124,7 @@ public void testNestedFieldMapping_whenDocumentsIngested_thenSuccessful() throws null, null, null, + null, null ); QueryBuilder queryNestedLowerLevel = QueryBuilders.nestedQuery( @@ -309,5 +310,14 @@ private void ingestBatchDocumentWithBulk(String idPrefix, int docCount, Set> itemMap = (Map>) item; + if (itemMap.get("index").get("error") != null) { + failedDocCount++; + } + } + assertEquals(failedIds.size(), failedDocCount); } } diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorTests.java index 82b24324c..97e85e46e 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorTests.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorTests.java @@ -730,7 +730,7 @@ public void testBuildVectorOutput_withNestedList_successful() { IngestDocument ingestDocument = createNestedListIngestDocument(); TextEmbeddingProcessor textEmbeddingProcessor = createInstanceWithNestedMapConfiguration(config); Map knnMap = textEmbeddingProcessor.buildMapWithTargetKeys(ingestDocument); - List> modelTensorList = createMockVectorResult(); + List> modelTensorList = createRandomOneDimensionalMockVector(2, 2, 0.0f, 1.0f); textEmbeddingProcessor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata()); List> nestedObj = (List>) ingestDocument.getSourceAndMetadata().get("nestedField"); assertTrue(nestedObj.get(0).containsKey("vectorField")); @@ -739,12 +739,27 @@ public void testBuildVectorOutput_withNestedList_successful() { assertNotNull(nestedObj.get(1).get("vectorField")); } + @SuppressWarnings("unchecked") + public void testBuildVectorOutput_withNestedListHasNotForEmbeddingField_successful() { + Map config = createNestedListConfiguration(); + IngestDocument ingestDocument = createNestedListWithNotEmbeddingFieldIngestDocument(); + TextEmbeddingProcessor textEmbeddingProcessor = createInstanceWithNestedMapConfiguration(config); + Map knnMap = textEmbeddingProcessor.buildMapWithTargetKeys(ingestDocument); + List> modelTensorList = createRandomOneDimensionalMockVector(1, 2, 0.0f, 1.0f); + textEmbeddingProcessor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata()); + List> nestedObj = (List>) ingestDocument.getSourceAndMetadata().get("nestedField"); + assertFalse(nestedObj.get(0).containsKey("vectorField")); + assertTrue(nestedObj.get(0).containsKey("textFieldNotForEmbedding")); + assertTrue(nestedObj.get(1).containsKey("vectorField")); + assertNotNull(nestedObj.get(1).get("vectorField")); + } + public void testBuildVectorOutput_withNestedList_Level2_successful() { Map config = createNestedList2LevelConfiguration(); IngestDocument ingestDocument = create2LevelNestedListIngestDocument(); TextEmbeddingProcessor textEmbeddingProcessor = createInstanceWithNestedMapConfiguration(config); Map knnMap = textEmbeddingProcessor.buildMapWithTargetKeys(ingestDocument); - List> modelTensorList = createMockVectorResult(); + List> modelTensorList = createRandomOneDimensionalMockVector(2, 2, 0.0f, 1.0f); textEmbeddingProcessor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata()); Map nestedLevel1 = (Map) ingestDocument.getSourceAndMetadata().get("nestedField"); List> nestedObj = (List>) nestedLevel1.get("nestedField"); @@ -754,6 +769,22 @@ public void testBuildVectorOutput_withNestedList_Level2_successful() { assertNotNull(nestedObj.get(1).get("vectorField")); } + @SuppressWarnings("unchecked") + public void testBuildVectorOutput_withNestedListHasNotForEmbeddingField_Level2_successful() { + Map config = createNestedList2LevelConfiguration(); + IngestDocument ingestDocument = create2LevelNestedListWithNotEmbeddingFieldIngestDocument(); + TextEmbeddingProcessor textEmbeddingProcessor = createInstanceWithNestedMapConfiguration(config); + Map knnMap = textEmbeddingProcessor.buildMapWithTargetKeys(ingestDocument); + List> modelTensorList = createRandomOneDimensionalMockVector(1, 2, 0.0f, 1.0f); + textEmbeddingProcessor.buildNLPResult(knnMap, modelTensorList, ingestDocument.getSourceAndMetadata()); + Map nestedLevel1 = (Map) ingestDocument.getSourceAndMetadata().get("nestedField"); + List> nestedObj = (List>) nestedLevel1.get("nestedField"); + assertFalse(nestedObj.get(0).containsKey("vectorField")); + assertTrue(nestedObj.get(0).containsKey("textFieldNotForEmbedding")); + assertTrue(nestedObj.get(1).containsKey("vectorField")); + assertNotNull(nestedObj.get(1).get("vectorField")); + } + public void test_updateDocument_appendVectorFieldsToDocument_successful() { Map config = createPlainStringConfiguration(); IngestDocument ingestDocument = createPlainIngestDocument(); @@ -1039,6 +1070,16 @@ private IngestDocument createNestedListIngestDocument() { return new IngestDocument(nestedList, new HashMap<>()); } + private IngestDocument createNestedListWithNotEmbeddingFieldIngestDocument() { + HashMap nestedObj1 = new HashMap<>(); + nestedObj1.put("textFieldNotForEmbedding", "This is a text field"); + HashMap nestedObj2 = new HashMap<>(); + nestedObj2.put("textField", "This is another text field"); + HashMap nestedList = new HashMap<>(); + nestedList.put("nestedField", Arrays.asList(nestedObj1, nestedObj2)); + return new IngestDocument(nestedList, new HashMap<>()); + } + private IngestDocument create2LevelNestedListIngestDocument() { HashMap nestedObj1 = new HashMap<>(); nestedObj1.put("textField", "This is a text field"); @@ -1050,4 +1091,16 @@ private IngestDocument create2LevelNestedListIngestDocument() { nestedList1.put("nestedField", nestedList); return new IngestDocument(nestedList1, new HashMap<>()); } + + private IngestDocument create2LevelNestedListWithNotEmbeddingFieldIngestDocument() { + HashMap nestedObj1 = new HashMap<>(); + nestedObj1.put("textFieldNotForEmbedding", "This is a text field"); + HashMap nestedObj2 = new HashMap<>(); + nestedObj2.put("textField", "This is another text field"); + HashMap nestedList = new HashMap<>(); + nestedList.put("nestedField", Arrays.asList(nestedObj1, nestedObj2)); + HashMap nestedList1 = new HashMap<>(); + nestedList1.put("nestedField", nestedList); + return new IngestDocument(nestedList1, new HashMap<>()); + } } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java index 9df106156..3bb566aef 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryAggregationsIT.java @@ -215,7 +215,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(1, searchResponseAsMap); @@ -230,7 +231,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(2, searchResponseAsMap); } else if (!isSingleShard && hasPostFilterQuery) { @@ -244,7 +246,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(2, searchResponseAsMap); } else { @@ -258,7 +261,8 @@ private void testPostFilterWithSimpleHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(3, searchResponseAsMap); } @@ -319,7 +323,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(1, searchResponseAsMap); @@ -334,7 +339,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(2, searchResponseAsMap); } else if (!isSingleShard && hasPostFilterQuery) { @@ -348,7 +354,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean rangeFilterQuery, null, false, - null + null, + 0 ); assertHitResultsFromQuery(4, searchResponseAsMap); } else { @@ -362,7 +369,8 @@ private void testPostFilterWithComplexHybridQuery(boolean isSingleShard, boolean null, null, false, - null + null, + 0 ); assertHitResultsFromQuery(3, searchResponseAsMap); } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java index a650087b4..610e08dd0 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java @@ -793,6 +793,46 @@ public void testConcurrentSearchWithMultipleSlices_whenMultipleShardsIndex_thenS } } + // TODO remove this test after following issue https://github.com/opensearch-project/neural-search/issues/280 gets resolved. + @SneakyThrows + public void testHybridQuery_whenFromIsSetInSearchRequest_thenFail() { + try { + initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD); + createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE); + MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3); + HybridQueryBuilder hybridQueryBuilderOnlyTerm = new HybridQueryBuilder(); + hybridQueryBuilderOnlyTerm.add(matchQueryBuilder); + + ResponseException exceptionNoNestedTypes = expectThrows( + ResponseException.class, + () -> search( + TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD, + hybridQueryBuilderOnlyTerm, + null, + 10, + Map.of("search_pipeline", SEARCH_PIPELINE), + null, + null, + null, + false, + null, + 10 + ) + + ); + + org.hamcrest.MatcherAssert.assertThat( + exceptionNoNestedTypes.getMessage(), + allOf( + containsString("In the current OpenSearch version pagination is not supported with hybrid query"), + containsString("illegal_argument_exception") + ) + ); + } finally { + wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME_ONE_SHARD, null, null, SEARCH_PIPELINE); + } + } + @SneakyThrows private void initializeIndexIfNotExist(String indexName) throws IOException { if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(TEST_BASIC_INDEX_NAME)) { diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java index ea951b65f..82b8ef6ec 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryPostFilterIT.java @@ -177,7 +177,8 @@ private void testPostFilterRangeQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 1, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); } @@ -262,7 +263,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 2, 1, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); // Case 2 A Query with a combination of hybrid query (Match Query, Term Query, Range Query), aggregation (Average stock price @@ -278,7 +280,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 2, 1, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); Map aggregations = getAggregations(searchResponseAsMap); @@ -303,7 +306,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 0, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); // Case 4 A Query with a combination of hybrid query (Match Query, Range Query) and a post filter query (Bool Query with a should @@ -324,7 +328,8 @@ private void testPostFilterBoolQuery(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 0, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); } @@ -382,7 +387,8 @@ private void testPostFilterMatchAllAndMatchNoneQueries(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 4, 3, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); @@ -399,7 +405,8 @@ private void testPostFilterMatchAllAndMatchNoneQueries(String indexName) { postFilterQuery, null, false, - null + null, + 0 ); assertHybridQueryResults(searchResponseAsMap, 0, 0, GTE_OF_RANGE_IN_POST_FILTER_QUERY, LTE_OF_RANGE_IN_POST_FILTER_QUERY); } diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java index f3615b991..b5e812780 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQuerySortIT.java @@ -13,6 +13,7 @@ import lombok.SneakyThrows; import org.junit.BeforeClass; import org.opensearch.client.ResponseException; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.TermQueryBuilder; @@ -139,7 +140,8 @@ private void testSingleFieldSort_whenMultipleSubQueries_thenSuccessful(String in null, createSortBuilders(fieldSortOrderMap, false), false, - null + null, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 6, 6); assertStockValueWithSortOrderInHybridQueryResults(nestedHits, SortOrder.DESC, LARGEST_STOCK_VALUE_IN_QUERY_RESULT, true, true); @@ -168,7 +170,8 @@ private void testMultipleFieldSort_whenMultipleSubQueries_thenSuccessful(String null, createSortBuilders(fieldSortOrderMap, false), false, - null + null, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 6, 6); assertStockValueWithSortOrderInHybridQueryResults(nestedHits, SortOrder.DESC, LARGEST_STOCK_VALUE_IN_QUERY_RESULT, true, false); @@ -200,7 +203,8 @@ public void testSingleFieldSort_whenTrackScoresIsEnabled_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - null + null, + 0 ) ); } finally { @@ -234,7 +238,8 @@ public void testSingleFieldSort_whenSortCriteriaIsByScoreAndField_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - null + null, + 0 ) ); } finally { @@ -312,7 +317,8 @@ private void testSearchAfter_whenSingleFieldSort_thenSuccessful(String indexName null, createSortBuilders(fieldSortOrderMap, false), false, - searchAfter + searchAfter, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 3, 6); assertStockValueWithSortOrderInHybridQueryResults( @@ -348,7 +354,8 @@ private void testSearchAfter_whenMultipleFieldSort_thenSuccessful(String indexNa null, createSortBuilders(fieldSortOrderMap, false), false, - searchAfter + searchAfter, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 5, 6); assertStockValueWithSortOrderInHybridQueryResults( @@ -381,7 +388,8 @@ private void testScoreSort_whenSingleFieldSort_thenSuccessful(String indexName) null, createSortBuilders(fieldSortOrderMap, false), false, - null + null, + 0 ); List> nestedHits = validateHitsCountAndFetchNestedHits(searchResponseAsMap, 6, 6); assertScoreWithSortOrderInHybridQueryResults(nestedHits, SortOrder.DESC, 1.0); @@ -415,7 +423,8 @@ public void testSort_whenSortFieldsSizeNotEqualToSearchAfterSize_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - searchAfter + searchAfter, + 0 ) ); } finally { @@ -450,11 +459,75 @@ public void testSearchAfter_whenAfterFieldIsNotPassed_thenFail() { null, createSortBuilders(fieldSortOrderMap, false), true, - searchAfter + searchAfter, + 0 + ) + ); + } finally { + wipeOfTestResources(TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS, null, null, SEARCH_PIPELINE); + } + } + + @SneakyThrows + public void testSortingWithRescoreWhenConcurrentSegmentSearchEnabledAndDisabled_whenBothSortAndRescorePresent_thenFail() { + try { + prepareResourcesBeforeTestExecution(SHARDS_COUNT_IN_MULTI_NODE_CLUSTER); + updateClusterSettings(CONCURRENT_SEGMENT_SEARCH_ENABLED, false); + HybridQueryBuilder hybridQueryBuilder = createHybridQueryBuilderWithMatchTermAndRangeQuery( + "mission", + "part", + LTE_OF_RANGE_IN_HYBRID_QUERY, + GTE_OF_RANGE_IN_HYBRID_QUERY + ); + + Map fieldSortOrderMap = new HashMap<>(); + fieldSortOrderMap.put("stock", SortOrder.DESC); + + List searchAfter = new ArrayList<>(); + searchAfter.add(25); + + QueryBuilder rescoreQuery = QueryBuilders.matchQuery(TEXT_FIELD_1_NAME, TEXT_FIELD_VALUE_1_DUNES); + + assertThrows( + "Cannot use [sort] option in conjunction with [rescore].", + ResponseException.class, + () -> search( + TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS, + hybridQueryBuilder, + rescoreQuery, + 10, + Map.of("search_pipeline", SEARCH_PIPELINE), + null, + null, + createSortBuilders(fieldSortOrderMap, false), + false, + searchAfter, + 0 + ) + ); + + updateClusterSettings(CONCURRENT_SEGMENT_SEARCH_ENABLED, true); + + assertThrows( + "Cannot use [sort] option in conjunction with [rescore].", + ResponseException.class, + () -> search( + TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS, + hybridQueryBuilder, + rescoreQuery, + 10, + Map.of("search_pipeline", SEARCH_PIPELINE), + null, + null, + createSortBuilders(fieldSortOrderMap, false), + false, + searchAfter, + 0 ) ); } finally { wipeOfTestResources(TEST_MULTI_DOC_INDEX_WITH_TEXT_AND_INT_MULTIPLE_SHARDS, null, null, SEARCH_PIPELINE); + updateClusterSettings(CONCURRENT_SEGMENT_SEARCH_ENABLED, false); } } diff --git a/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java index 9ecb93b81..6d8e810f3 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryBuilderTests.java @@ -14,6 +14,9 @@ import static org.opensearch.knn.index.query.KNNQueryBuilder.FILTER_FIELD; import static org.opensearch.knn.index.query.KNNQueryBuilder.MAX_DISTANCE_FIELD; import static org.opensearch.knn.index.query.KNNQueryBuilder.MIN_SCORE_FIELD; +import static org.opensearch.knn.index.query.KNNQueryBuilder.RESCORE_FIELD; +import static org.opensearch.knn.index.query.KNNQueryBuilder.RESCORE_OVERSAMPLE_FIELD; +import static org.opensearch.neuralsearch.util.TestUtils.DELTA_FOR_FLOATS_ASSERTION; import static org.opensearch.neuralsearch.util.TestUtils.xContentBuilderToMap; import static org.opensearch.neuralsearch.query.NeuralQueryBuilder.K_FIELD; import static org.opensearch.neuralsearch.query.NeuralQueryBuilder.MODEL_ID_FIELD; @@ -52,6 +55,7 @@ import org.opensearch.index.query.QueryRewriteContext; import org.opensearch.index.query.QueryShardContext; import org.opensearch.knn.index.query.KNNQueryBuilder; +import org.opensearch.knn.index.query.rescore.RescoreContext; import org.opensearch.neuralsearch.common.VectorUtil; import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor; import org.opensearch.neuralsearch.util.NeuralSearchClusterTestUtils; @@ -136,10 +140,56 @@ public void testFromXContent_withMethodParameters_thenBuildSuccessfully() { contentParser.nextToken(); NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.fromXContent(contentParser); + assertEquals(Map.of("ef_search", 1000), neuralQueryBuilder.methodParameters()); assertEquals(FIELD_NAME, neuralQueryBuilder.fieldName()); assertEquals(QUERY_TEXT, neuralQueryBuilder.queryText()); assertEquals(MODEL_ID, neuralQueryBuilder.modelId()); assertEquals(K, neuralQueryBuilder.k()); + assertNull(neuralQueryBuilder.rescoreContext()); + } + + @SneakyThrows + public void testFromXContent_withRescoreContext_thenBuildSuccessfully() { + /* + { + "VECTOR_FIELD": { + "query_text": "string", + "query_image": "string", + "model_id": "string", + "k": int, + "rescore": { + "oversample_factor" : int + } + } + } + */ + setUpClusterService(Version.V_2_10_0); + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .startObject(FIELD_NAME) + .field(QUERY_TEXT_FIELD.getPreferredName(), QUERY_TEXT) + .field(MODEL_ID_FIELD.getPreferredName(), MODEL_ID) + .field(K_FIELD.getPreferredName(), K) + .startObject(RESCORE_FIELD.getPreferredName()) + .field(RESCORE_OVERSAMPLE_FIELD.getPreferredName(), 1) + .endObject() + .endObject() + .endObject(); + + XContentParser contentParser = createParser(xContentBuilder); + contentParser.nextToken(); + NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.fromXContent(contentParser); + + assertEquals(FIELD_NAME, neuralQueryBuilder.fieldName()); + assertEquals(QUERY_TEXT, neuralQueryBuilder.queryText()); + assertEquals(MODEL_ID, neuralQueryBuilder.modelId()); + assertEquals(K, neuralQueryBuilder.k()); + assertEquals( + RescoreContext.getDefault().getOversampleFactor(), + neuralQueryBuilder.rescoreContext().getOversampleFactor(), + DELTA_FOR_FLOATS_ASSERTION + ); + assertNull(neuralQueryBuilder.methodParameters()); } @SneakyThrows @@ -679,13 +729,20 @@ public void testRewrite_whenVectorSupplierAndVectorSet_thenReturnKNNQueryBuilder .queryImage(IMAGE_TEXT) .modelId(MODEL_ID) .k(K) + .methodParameters(Map.of("ef_search", 100)) + .rescoreContext(RescoreContext.getDefault()) .vectorSupplier(TEST_VECTOR_SUPPLIER); + + KNNQueryBuilder expected = KNNQueryBuilder.builder() + .k(K) + .fieldName(neuralQueryBuilder.fieldName()) + .methodParameters(neuralQueryBuilder.methodParameters()) + .rescoreContext(neuralQueryBuilder.rescoreContext()) + .vector(TEST_VECTOR_SUPPLIER.get()) + .build(); + QueryBuilder queryBuilder = neuralQueryBuilder.doRewrite(null); - assertTrue(queryBuilder instanceof KNNQueryBuilder); - KNNQueryBuilder knnQueryBuilder = (KNNQueryBuilder) queryBuilder; - assertEquals(neuralQueryBuilder.fieldName(), knnQueryBuilder.fieldName()); - assertEquals((int) neuralQueryBuilder.k(), knnQueryBuilder.getK()); - assertArrayEquals(TEST_VECTOR_SUPPLIER.get(), (float[]) knnQueryBuilder.vector(), 0.0f); + assertEquals(expected, queryBuilder); } public void testRewrite_whenFilterSet_thenKNNQueryBuilderFilterSet() { diff --git a/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryIT.java index 0e5d86e72..210abd7ca 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralQueryIT.java @@ -18,6 +18,7 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.knn.index.query.rescore.RescoreContext; import org.opensearch.neuralsearch.BaseNeuralSearchIT; import com.google.common.primitives.Floats; @@ -111,6 +112,7 @@ public void testQueryWithBoostAndImageQueryAndRadialQuery() { null, null, null, + null, null ); @@ -133,7 +135,8 @@ public void testQueryWithBoostAndImageQueryAndRadialQuery() { null, null, null, - Map.of("ef_search", 10) + Map.of("ef_search", 10), + RescoreContext.getDefault() ); Map searchResponseAsMapMultimodalQuery = search(TEST_BASIC_INDEX_NAME, neuralQueryBuilderMultimodalQuery, 1); Map firstInnerHitMultimodalQuery = getFirstInnerHit(searchResponseAsMapMultimodalQuery); @@ -160,6 +163,7 @@ public void testQueryWithBoostAndImageQueryAndRadialQuery() { null, null, null, + null, null ); @@ -189,6 +193,7 @@ public void testQueryWithBoostAndImageQueryAndRadialQuery() { 0.01f, null, null, + null, null ); @@ -244,6 +249,7 @@ public void testRescoreQuery() { null, null, null, + null, null ); @@ -322,6 +328,7 @@ public void testBooleanQuery_withMultipleNeuralQueries() { null, null, null, + null, null ); NeuralQueryBuilder neuralQueryBuilder2 = new NeuralQueryBuilder( @@ -334,6 +341,7 @@ public void testBooleanQuery_withMultipleNeuralQueries() { null, null, null, + null, null ); @@ -362,6 +370,7 @@ public void testBooleanQuery_withMultipleNeuralQueries() { null, null, null, + null, null ); @@ -418,6 +427,7 @@ public void testNestedQuery() { null, null, null, + null, null ); @@ -469,6 +479,7 @@ public void testFilterQuery() { null, null, new MatchQueryBuilder("_id", "3"), + null, null ); Map searchResponseAsMap = search(TEST_MULTI_DOC_INDEX_NAME, neuralQueryBuilder, 3); diff --git a/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java index 08efd0811..c969e6ee8 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/aggregation/MetricAggregationsWithHybridQueryIT.java @@ -424,7 +424,8 @@ private void testSumAggsAndRangePostFilter() throws IOException { rangeFilterQuery, null, false, - null + null, + 0 ); Map aggregations = getAggregations(searchResponseAsMap); diff --git a/src/test/java/org/opensearch/neuralsearch/search/query/HybridCollectorManagerTests.java b/src/test/java/org/opensearch/neuralsearch/search/query/HybridCollectorManagerTests.java index de9c6006b..1d3bc29e9 100644 --- a/src/test/java/org/opensearch/neuralsearch/search/query/HybridCollectorManagerTests.java +++ b/src/test/java/org/opensearch/neuralsearch/search/query/HybridCollectorManagerTests.java @@ -5,6 +5,8 @@ package org.opensearch.neuralsearch.search.query; import com.carrotsearch.randomizedtesting.RandomizedTest; + +import java.io.IOException; import java.util.Arrays; import lombok.SneakyThrows; import org.apache.lucene.document.FieldType; @@ -12,16 +14,19 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TotalHits; @@ -44,6 +49,7 @@ import org.opensearch.neuralsearch.search.collector.HybridTopScoreDocCollector; import org.opensearch.neuralsearch.search.collector.PagingFieldCollector; import org.opensearch.neuralsearch.search.collector.SimpleFieldCollector; +import org.opensearch.neuralsearch.search.query.exception.HybridSearchRescoreQueryException; import org.opensearch.search.DocValueFormat; import org.opensearch.search.internal.ContextIndexSearcher; import org.opensearch.search.internal.SearchContext; @@ -54,11 +60,17 @@ import java.util.List; import java.util.Map; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.opensearch.neuralsearch.search.util.HybridSearchResultFormatUtil.MAGIC_NUMBER_DELIMITER; import static org.opensearch.neuralsearch.search.util.HybridSearchResultFormatUtil.MAGIC_NUMBER_START_STOP; + +import org.opensearch.search.rescore.QueryRescorerBuilder; +import org.opensearch.search.rescore.RescoreContext; +import org.opensearch.search.rescore.Rescorer; +import org.opensearch.search.rescore.RescorerBuilder; import org.opensearch.search.sort.SortAndFormats; public class HybridCollectorManagerTests extends OpenSearchQueryTestCase { @@ -70,6 +82,7 @@ public class HybridCollectorManagerTests extends OpenSearchQueryTestCase { private static final String QUERY1 = "hello"; private static final String QUERY2 = "hi"; private static final float DELTA_FOR_ASSERTION = 0.001f; + protected static final String QUERY3 = "everyone"; @SneakyThrows public void testNewCollector_whenNotConcurrentSearch_thenSuccessful() { @@ -734,4 +747,335 @@ public void testReduceWithConcurrentSegmentSearch_whenMultipleCollectorsMatchedD reader2.close(); directory2.close(); } + + @SneakyThrows + public void testReduceAndRescore_whenMatchedDocsAndRescoreContextPresent_thenSuccessful() { + SearchContext searchContext = mock(SearchContext.class); + QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); + TextFieldMapper.TextFieldType fieldType = (TextFieldMapper.TextFieldType) createMapperService().fieldType(TEXT_FIELD_NAME); + when(mockQueryShardContext.fieldMapper(eq(TEXT_FIELD_NAME))).thenReturn(fieldType); + + HybridQuery hybridQueryWithTerm = new HybridQuery( + List.of( + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY1).toQuery(mockQueryShardContext), + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY2).toQuery(mockQueryShardContext) + ) + ); + when(searchContext.query()).thenReturn(hybridQueryWithTerm); + ContextIndexSearcher indexSearcher = mock(ContextIndexSearcher.class); + IndexReader indexReader = mock(IndexReader.class); + when(indexReader.numDocs()).thenReturn(3); + when(indexSearcher.getIndexReader()).thenReturn(indexReader); + when(searchContext.searcher()).thenReturn(indexSearcher); + when(searchContext.size()).thenReturn(2); + IndexReaderContext indexReaderContext = mock(IndexReaderContext.class); + when(indexReader.getContext()).thenReturn(indexReaderContext); + + Map, CollectorManager> classCollectorManagerMap = new HashMap<>(); + when(searchContext.queryCollectorManagers()).thenReturn(classCollectorManagerMap); + when(searchContext.shouldUseConcurrentSearch()).thenReturn(false); + + Directory directory = newDirectory(); + final IndexWriter w = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS); + ft.setOmitNorms(random().nextBoolean()); + ft.freeze(); + + int docId1 = RandomizedTest.randomInt(); + int docId2 = RandomizedTest.randomInt(); + int docId3 = RandomizedTest.randomInt(); + w.addDocument(getDocument(TEXT_FIELD_NAME, docId1, TEST_DOC_TEXT1, ft)); + w.addDocument(getDocument(TEXT_FIELD_NAME, docId2, TEST_DOC_TEXT2, ft)); + w.addDocument(getDocument(TEXT_FIELD_NAME, docId3, TEST_DOC_TEXT3, ft)); + w.flush(); + w.commit(); + + IndexReader reader = DirectoryReader.open(w); + IndexSearcher searcher = newSearcher(reader); + + RescorerBuilder rescorerBuilder = new QueryRescorerBuilder(QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY2)); + RescoreContext rescoreContext = rescorerBuilder.buildContext(mockQueryShardContext); + List rescoreContexts = List.of(rescoreContext); + when(searchContext.rescore()).thenReturn(rescoreContexts); + when(indexReader.leaves()).thenReturn(reader.leaves()); + Weight rescoreWeight = mock(Weight.class); + Scorer rescoreScorer = mock(Scorer.class); + when(rescoreWeight.scorer(any())).thenReturn(rescoreScorer); + when(rescoreScorer.docID()).thenReturn(1); + DocIdSetIterator iterator = mock(DocIdSetIterator.class); + when(rescoreScorer.iterator()).thenReturn(iterator); + when(rescoreScorer.score()).thenReturn(0.9f); + when(indexSearcher.createWeight(any(), eq(ScoreMode.COMPLETE), eq(1f))).thenReturn(rescoreWeight); + + CollectorManager hybridCollectorManager1 = HybridCollectorManager.createHybridCollectorManager(searchContext); + HybridTopScoreDocCollector collector = (HybridTopScoreDocCollector) hybridCollectorManager1.newCollector(); + + QueryBuilder postFilterQuery = QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY1); + + Query pfQuery = postFilterQuery.toQuery(mockQueryShardContext); + ParsedQuery parsedQuery = new ParsedQuery(pfQuery); + searchContext.parsedQuery(parsedQuery); + when(searchContext.parsedPostFilter()).thenReturn(parsedQuery); + when(indexSearcher.rewrite(pfQuery)).thenReturn(pfQuery); + Weight postFilterWeight = mock(Weight.class); + when(indexSearcher.createWeight(pfQuery, ScoreMode.COMPLETE_NO_SCORES, 1f)).thenReturn(postFilterWeight); + + CollectorManager hybridCollectorManager2 = HybridCollectorManager.createHybridCollectorManager(searchContext); + FilteredCollector filteredCollector = (FilteredCollector) hybridCollectorManager2.newCollector(); + + Weight weight = new HybridQueryWeight(hybridQueryWithTerm, searcher, ScoreMode.TOP_SCORES, BoostingQueryBuilder.DEFAULT_BOOST); + collector.setWeight(weight); + filteredCollector.setWeight(weight); + LeafReaderContext leafReaderContext = searcher.getIndexReader().leaves().get(0); + LeafCollector leafCollector = collector.getLeafCollector(leafReaderContext); + LeafCollector filteredCollectorLeafCollector = filteredCollector.getLeafCollector(leafReaderContext); + BulkScorer scorer = weight.bulkScorer(leafReaderContext); + scorer.score(leafCollector, leafReaderContext.reader().getLiveDocs()); + leafCollector.finish(); + scorer.score(filteredCollectorLeafCollector, leafReaderContext.reader().getLiveDocs()); + filteredCollectorLeafCollector.finish(); + + Object results1 = hybridCollectorManager1.reduce(List.of()); + Object results2 = hybridCollectorManager2.reduce(List.of()); + + assertNotNull(results1); + assertNotNull(results2); + ReduceableSearchResult reduceableSearchResult = ((ReduceableSearchResult) results1); + QuerySearchResult querySearchResult = new QuerySearchResult(); + reduceableSearchResult.reduce(querySearchResult); + TopDocsAndMaxScore topDocsAndMaxScore = querySearchResult.topDocs(); + + assertNotNull(topDocsAndMaxScore); + assertEquals(2, topDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, topDocsAndMaxScore.topDocs.totalHits.relation); + float maxScore = topDocsAndMaxScore.maxScore; + assertTrue(maxScore > 0); + ScoreDoc[] scoreDocs = topDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(6, scoreDocs.length); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, DELTA_FOR_ASSERTION); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, DELTA_FOR_ASSERTION); + assertTrue(maxScore >= scoreDocs[2].score); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[3].score, DELTA_FOR_ASSERTION); + assertEquals(maxScore, scoreDocs[4].score, DELTA_FOR_ASSERTION); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[5].score, DELTA_FOR_ASSERTION); + + w.close(); + reader.close(); + directory.close(); + } + + @SneakyThrows + public void testRescoreWithConcurrentSegmentSearch_whenMatchedDocsAndRescore_thenSuccessful() { + SearchContext searchContext = mock(SearchContext.class); + QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); + TextFieldMapper.TextFieldType fieldType = (TextFieldMapper.TextFieldType) createMapperService().fieldType(TEXT_FIELD_NAME); + when(mockQueryShardContext.fieldMapper(eq(TEXT_FIELD_NAME))).thenReturn(fieldType); + + HybridQuery hybridQueryWithTerm = new HybridQuery( + List.of( + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY1).toQuery(mockQueryShardContext), + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY2).toQuery(mockQueryShardContext), + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY3).toQuery(mockQueryShardContext) + ) + ); + when(searchContext.query()).thenReturn(hybridQueryWithTerm); + ContextIndexSearcher indexSearcher = mock(ContextIndexSearcher.class); + IndexReader indexReader = mock(IndexReader.class); + when(indexReader.numDocs()).thenReturn(2); + when(indexSearcher.getIndexReader()).thenReturn(indexReader); + when(searchContext.searcher()).thenReturn(indexSearcher); + when(searchContext.size()).thenReturn(1); + + Map, CollectorManager> classCollectorManagerMap = new HashMap<>(); + when(searchContext.queryCollectorManagers()).thenReturn(classCollectorManagerMap); + when(searchContext.shouldUseConcurrentSearch()).thenReturn(true); + // index segment 1 + Directory directory = newDirectory(); + final IndexWriter w = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS); + ft.setOmitNorms(random().nextBoolean()); + ft.freeze(); + + int docId1 = RandomizedTest.randomInt(); + int docId2 = RandomizedTest.randomInt(); + int docId3 = RandomizedTest.randomInt(); + + w.addDocument(getDocument(TEXT_FIELD_NAME, docId1, TEST_DOC_TEXT1, ft)); + w.addDocument(getDocument(TEXT_FIELD_NAME, docId2, TEST_DOC_TEXT2, ft)); + w.flush(); + w.commit(); + + // index segment 2 + SearchContext searchContext2 = mock(SearchContext.class); + + ContextIndexSearcher indexSearcher2 = mock(ContextIndexSearcher.class); + IndexReader indexReader2 = mock(IndexReader.class); + when(indexReader2.numDocs()).thenReturn(1); + when(indexSearcher2.getIndexReader()).thenReturn(indexReader); + when(searchContext2.searcher()).thenReturn(indexSearcher2); + when(searchContext2.size()).thenReturn(1); + + when(searchContext2.queryCollectorManagers()).thenReturn(new HashMap<>()); + when(searchContext2.shouldUseConcurrentSearch()).thenReturn(true); + + Directory directory2 = newDirectory(); + final IndexWriter w2 = new IndexWriter(directory2, newIndexWriterConfig(new MockAnalyzer(random()))); + FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); + ft2.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS); + ft2.setOmitNorms(random().nextBoolean()); + ft2.freeze(); + + w2.addDocument(getDocument(TEXT_FIELD_NAME, docId3, TEST_DOC_TEXT3, ft)); + w2.flush(); + w2.commit(); + + IndexReader reader1 = DirectoryReader.open(w); + IndexSearcher searcher1 = newSearcher(reader1); + IndexReader reader2 = DirectoryReader.open(w2); + IndexSearcher searcher2 = newSearcher(reader2); + + List leafReaderContexts = reader1.leaves(); + IndexReaderContext indexReaderContext = mock(IndexReaderContext.class); + when(indexReader.getContext()).thenReturn(indexReaderContext); + when(indexReader.leaves()).thenReturn(leafReaderContexts); + // set up rescorer in a way that it boosts second documents from the first segment + RescorerBuilder rescorerBuilder = new QueryRescorerBuilder(QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY2)); + RescoreContext rescoreContext = rescorerBuilder.buildContext(mockQueryShardContext); + List rescoreContexts = List.of(rescoreContext); + when(searchContext.rescore()).thenReturn(rescoreContexts); + Weight rescoreWeight = mock(Weight.class); + Scorer rescoreScorer = mock(Scorer.class); + when(rescoreWeight.scorer(any())).thenReturn(rescoreScorer); + when(rescoreScorer.docID()).thenReturn(1); + DocIdSetIterator iterator = mock(DocIdSetIterator.class); + when(rescoreScorer.iterator()).thenReturn(iterator); + when(rescoreScorer.score()).thenReturn(0.9f); + when(indexSearcher.createWeight(any(), eq(ScoreMode.COMPLETE), eq(1f))).thenReturn(rescoreWeight); + + CollectorManager hybridCollectorManager = HybridCollectorManager.createHybridCollectorManager(searchContext); + HybridTopScoreDocCollector collector1 = (HybridTopScoreDocCollector) hybridCollectorManager.newCollector(); + HybridTopScoreDocCollector collector2 = (HybridTopScoreDocCollector) hybridCollectorManager.newCollector(); + + Weight weight1 = new HybridQueryWeight(hybridQueryWithTerm, searcher1, ScoreMode.TOP_SCORES, BoostingQueryBuilder.DEFAULT_BOOST); + Weight weight2 = new HybridQueryWeight(hybridQueryWithTerm, searcher2, ScoreMode.TOP_SCORES, BoostingQueryBuilder.DEFAULT_BOOST); + collector1.setWeight(weight1); + collector2.setWeight(weight2); + + LeafReaderContext leafReaderContext = searcher1.getIndexReader().leaves().get(0); + LeafCollector leafCollector1 = collector1.getLeafCollector(leafReaderContext); + BulkScorer scorer = weight1.bulkScorer(leafReaderContext); + scorer.score(leafCollector1, leafReaderContext.reader().getLiveDocs()); + leafCollector1.finish(); + + LeafReaderContext leafReaderContext2 = searcher2.getIndexReader().leaves().get(0); + LeafCollector leafCollector2 = collector2.getLeafCollector(leafReaderContext2); + BulkScorer scorer2 = weight2.bulkScorer(leafReaderContext2); + scorer2.score(leafCollector2, leafReaderContext2.reader().getLiveDocs()); + leafCollector2.finish(); + + Object results = hybridCollectorManager.reduce(List.of(collector1, collector2)); + + // assert that second search hit in result has the max score due to boots from rescorer + assertNotNull(results); + ReduceableSearchResult reduceableSearchResult = ((ReduceableSearchResult) results); + QuerySearchResult querySearchResult = new QuerySearchResult(); + reduceableSearchResult.reduce(querySearchResult); + TopDocsAndMaxScore topDocsAndMaxScore = querySearchResult.topDocs(); + + assertNotNull(topDocsAndMaxScore); + assertEquals(3, topDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, topDocsAndMaxScore.topDocs.totalHits.relation); + float maxScore = topDocsAndMaxScore.maxScore; + assertTrue(maxScore > 0); + ScoreDoc[] scoreDocs = topDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(8, scoreDocs.length); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, DELTA_FOR_ASSERTION); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, DELTA_FOR_ASSERTION); + assertTrue(maxScore > scoreDocs[2].score); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[3].score, DELTA_FOR_ASSERTION); + assertEquals(maxScore, scoreDocs[4].score, DELTA_FOR_ASSERTION); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[5].score, DELTA_FOR_ASSERTION); + assertTrue(maxScore > scoreDocs[6].score); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[7].score, DELTA_FOR_ASSERTION); + + // release resources + w.close(); + reader1.close(); + directory.close(); + w2.close(); + reader2.close(); + directory2.close(); + } + + @SneakyThrows + public void testReduceAndRescore_whenRescorerThrowsException_thenFail() { + SearchContext searchContext = mock(SearchContext.class); + QueryShardContext mockQueryShardContext = mock(QueryShardContext.class); + TextFieldMapper.TextFieldType fieldType = (TextFieldMapper.TextFieldType) createMapperService().fieldType(TEXT_FIELD_NAME); + when(mockQueryShardContext.fieldMapper(eq(TEXT_FIELD_NAME))).thenReturn(fieldType); + + HybridQuery hybridQueryWithTerm = new HybridQuery( + List.of( + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY1).toQuery(mockQueryShardContext), + QueryBuilders.termQuery(TEXT_FIELD_NAME, QUERY2).toQuery(mockQueryShardContext) + ) + ); + when(searchContext.query()).thenReturn(hybridQueryWithTerm); + ContextIndexSearcher indexSearcher = mock(ContextIndexSearcher.class); + IndexReader indexReader = mock(IndexReader.class); + when(indexReader.numDocs()).thenReturn(3); + when(indexSearcher.getIndexReader()).thenReturn(indexReader); + when(searchContext.searcher()).thenReturn(indexSearcher); + when(searchContext.size()).thenReturn(2); + IndexReaderContext indexReaderContext = mock(IndexReaderContext.class); + when(indexReader.getContext()).thenReturn(indexReaderContext); + + Map, CollectorManager> classCollectorManagerMap = new HashMap<>(); + when(searchContext.queryCollectorManagers()).thenReturn(classCollectorManagerMap); + when(searchContext.shouldUseConcurrentSearch()).thenReturn(false); + + Directory directory = newDirectory(); + final IndexWriter w = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS); + ft.setOmitNorms(random().nextBoolean()); + ft.freeze(); + + int docId1 = RandomizedTest.randomInt(); + w.addDocument(getDocument(TEXT_FIELD_NAME, docId1, TEST_DOC_TEXT1, ft)); + w.flush(); + w.commit(); + + IndexReader reader = DirectoryReader.open(w); + IndexSearcher searcher = newSearcher(reader); + + RescoreContext rescoreContext = mock(RescoreContext.class); + Rescorer rescorer = mock(Rescorer.class); + when(rescoreContext.rescorer()).thenReturn(rescorer); + when(rescorer.rescore(any(), any(), any())).thenThrow(new IOException("something happened with rescorer")); + List rescoreContexts = List.of(rescoreContext); + when(searchContext.rescore()).thenReturn(rescoreContexts); + + CollectorManager hybridCollectorManager1 = HybridCollectorManager.createHybridCollectorManager(searchContext); + HybridTopScoreDocCollector collector = (HybridTopScoreDocCollector) hybridCollectorManager1.newCollector(); + + Weight weight = new HybridQueryWeight(hybridQueryWithTerm, searcher, ScoreMode.TOP_SCORES, BoostingQueryBuilder.DEFAULT_BOOST); + collector.setWeight(weight); + + LeafReaderContext leafReaderContext = searcher.getIndexReader().leaves().get(0); + LeafCollector leafCollector = collector.getLeafCollector(leafReaderContext); + + BulkScorer scorer = weight.bulkScorer(leafReaderContext); + scorer.score(leafCollector, leafReaderContext.reader().getLiveDocs()); + leafCollector.finish(); + + expectThrows(HybridSearchRescoreQueryException.class, () -> hybridCollectorManager1.reduce(List.of())); + + // release resources + w.close(); + reader.close(); + directory.close(); + } } diff --git a/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java b/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java index d10ca0668..9c2718687 100644 --- a/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java +++ b/src/test/java/org/opensearch/neuralsearch/search/query/TopDocsMergerTests.java @@ -176,6 +176,65 @@ public void testMergeScoreDocs_whenBothTopDocsHasNoHits_thenSuccessful() { assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[3].score, 0); } + @SneakyThrows + public void testMergeScoreDocs_whenSomeSegmentsHasNoHits_thenSuccessful() { + // Given + TopDocsMerger topDocsMerger = new TopDocsMerger(null); + + // When + // first segment has no results, and we merge with non-empty segment + TopDocs topDocsOriginal = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {}); + TopDocsAndMaxScore topDocsAndMaxScoreOriginal = new TopDocsAndMaxScore(topDocsOriginal, 0); + TopDocs topDocsNew = new TopDocs( + new TotalHits(2, TotalHits.Relation.EQUAL_TO), + new ScoreDoc[] { + createStartStopElementForHybridSearchResults(0), + createDelimiterElementForHybridSearchResults(0), + new ScoreDoc(0, 0.5f), + new ScoreDoc(2, 0.3f), + createStartStopElementForHybridSearchResults(0) } + ); + TopDocsAndMaxScore topDocsAndMaxScoreNew = new TopDocsAndMaxScore(topDocsNew, 0.5f); + TopDocsAndMaxScore mergedTopDocsAndMaxScore = topDocsMerger.merge(topDocsAndMaxScoreOriginal, topDocsAndMaxScoreNew); + + // Then + assertNotNull(mergedTopDocsAndMaxScore); + + assertEquals(0.5f, mergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION); + assertEquals(2, mergedTopDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, mergedTopDocsAndMaxScore.topDocs.totalHits.relation); + assertEquals(5, mergedTopDocsAndMaxScore.topDocs.scoreDocs.length); + // check format, all elements one by one + ScoreDoc[] scoreDocs = mergedTopDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[0].score, 0); + assertEquals(MAGIC_NUMBER_DELIMITER, scoreDocs[1].score, 0); + assertScoreDoc(scoreDocs[2], 0, 0.5f); + assertScoreDoc(scoreDocs[3], 2, 0.3f); + assertEquals(MAGIC_NUMBER_START_STOP, scoreDocs[4].score, 0); + + // When + // source object has results, and we merge with empty segment + TopDocs topDocsNewEmpty = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[] {}); + TopDocsAndMaxScore topDocsAndMaxScoreNewEmpty = new TopDocsAndMaxScore(topDocsNewEmpty, 0); + TopDocsAndMaxScore finalMergedTopDocsAndMaxScore = topDocsMerger.merge(mergedTopDocsAndMaxScore, topDocsAndMaxScoreNewEmpty); + + // Then + // merged object remains unchanged + assertNotNull(finalMergedTopDocsAndMaxScore); + + assertEquals(0.5f, finalMergedTopDocsAndMaxScore.maxScore, DELTA_FOR_ASSERTION); + assertEquals(2, finalMergedTopDocsAndMaxScore.topDocs.totalHits.value); + assertEquals(TotalHits.Relation.EQUAL_TO, finalMergedTopDocsAndMaxScore.topDocs.totalHits.relation); + assertEquals(5, finalMergedTopDocsAndMaxScore.topDocs.scoreDocs.length); + // check format, all elements one by one + ScoreDoc[] finalScoreDocs = finalMergedTopDocsAndMaxScore.topDocs.scoreDocs; + assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[0].score, 0); + assertEquals(MAGIC_NUMBER_DELIMITER, finalScoreDocs[1].score, 0); + assertScoreDoc(finalScoreDocs[2], 0, 0.5f); + assertScoreDoc(finalScoreDocs[3], 2, 0.3f); + assertEquals(MAGIC_NUMBER_START_STOP, finalScoreDocs[4].score, 0); + } + @SneakyThrows public void testThreeSequentialMerges_whenAllTopDocsHasHits_thenSuccessful() { TopDocsMerger topDocsMerger = new TopDocsMerger(null); diff --git a/src/test/resources/processor/IndexMappings.json b/src/test/resources/processor/IndexMappings.json index 7afbaa92e..afe613117 100644 --- a/src/test/resources/processor/IndexMappings.json +++ b/src/test/resources/processor/IndexMappings.json @@ -90,6 +90,9 @@ "text": { "type": "text" }, + "text_not_for_embedding": { + "type": "text" + }, "embedding": { "type": "knn_vector", "dimension": 768, diff --git a/src/test/resources/processor/ingest_doc1.json b/src/test/resources/processor/ingest_doc1.json index b1cc5392b..d952d07d8 100644 --- a/src/test/resources/processor/ingest_doc1.json +++ b/src/test/resources/processor/ingest_doc1.json @@ -12,6 +12,9 @@ "movie": null }, "nested_passages": [ + { + "text_not_for_embedding": "test" + }, { "text": "hello" }, diff --git a/src/test/resources/processor/ingest_doc2.json b/src/test/resources/processor/ingest_doc2.json index cce93d4a1..5ab1f7525 100644 --- a/src/test/resources/processor/ingest_doc2.json +++ b/src/test/resources/processor/ingest_doc2.json @@ -10,6 +10,9 @@ "movie": null }, "nested_passages": [ + { + "text_not_for_embedding": "test" + }, { "text": "apple" }, diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index 4ee9f71f8..1dbe1b832 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -203,6 +203,7 @@ protected void loadModel(final String modelId) throws Exception { isComplete = checkComplete(taskQueryResult); Thread.sleep(DEFAULT_TASK_RESULT_QUERY_INTERVAL_IN_MILLISECOND); } + assertTrue(isComplete); } /** @@ -528,7 +529,7 @@ protected Map search( Map requestParams, List aggs ) { - return search(index, queryBuilder, rescorer, resultSize, requestParams, aggs, null, null, false, null); + return search(index, queryBuilder, rescorer, resultSize, requestParams, aggs, null, null, false, null, 0); } @SneakyThrows @@ -542,10 +543,11 @@ protected Map search( QueryBuilder postFilterBuilder, List> sortBuilders, boolean trackScores, - List searchAfter + List searchAfter, + int from ) { XContentBuilder builder = XContentFactory.jsonBuilder().startObject(); - + builder.field("from", from); if (queryBuilder != null) { builder.field("query"); queryBuilder.toXContent(builder, ToXContent.EMPTY_PARAMS); @@ -1199,6 +1201,8 @@ protected void waitForClusterHealthGreen(final String numOfNodes) throws IOExcep Request waitForGreen = new Request("GET", "/_cluster/health"); waitForGreen.addParameter("wait_for_nodes", numOfNodes); waitForGreen.addParameter("wait_for_status", "green"); + waitForGreen.addParameter("cluster_manager_timeout", "60s"); + waitForGreen.addParameter("timeout", "60s"); client().performRequest(waitForGreen); } diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java b/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java index bc016aae2..ab041c440 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/util/TestUtils.java @@ -39,6 +39,7 @@ public class TestUtils { public static final String RELATION_EQUAL_TO = "eq"; public static final float DELTA_FOR_SCORE_ASSERTION = 0.001f; + public static final float DELTA_FOR_FLOATS_ASSERTION = 0.001f; public static final String RESTART_UPGRADE_OLD_CLUSTER = "tests.is_old_cluster"; public static final String BWC_VERSION = "tests.plugin_bwc_version"; public static final String NEURAL_SEARCH_BWC_PREFIX = "neuralsearch-bwc-";