opensearch-project · zhichao-aws · Jan 9, 2025 · Aug 23, 2024 · Jan 7, 2025 · Jan 7, 2025
@@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
 - Fixed document source and score field mismatch in sorted hybrid queries ([#1043](https://github.com/opensearch-project/neural-search/pull/1043))
 ### Infrastructure
+- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))
 - Fix CI for JDK upgrade towards 21 ([#835](https://github.com/opensearch-project/neural-search/pull/835))
 ### Documentation
 ### Maintenance

@@ -54,6 +54,13 @@ testClusters {
     }
 }
 
+def versionsBelow2_11 = ["2.9", "2.10"]
+def versionsBelow2_12 = versionsBelow2_11 + "2.11"
+def versionsBelow2_13 = versionsBelow2_12 + "2.12"
+def versionsBelow2_14 = versionsBelow2_13 + "2.13"
+def versionsBelow2_15 = versionsBelow2_14 + "2.14"
+def versionsBelow2_16 = versionsBelow2_15 + "2.15"
+
 // Task to run BWC tests against the old cluster
 task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
     if(!ext.bwcBundleTest){
@@ -67,7 +74,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
 
     // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
     // because these features were released in 2.11 version.
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
+    if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
@@ -76,36 +83,36 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
-    // Excluding the test because we introduce this feature in 2.13
-    if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+    // Excluding the these tests because we introduce them in 2.13
+    if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
-        }
-    }
-
-    // Excluding the text chunking processor test because we introduce this feature in 2.13
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
-        filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
         }
     }
 
-    // Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
+    // Excluding the k-NN radial search tests because we introduce this feature in 2.14
+    if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
-            excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
         }
     }
 
     // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
+    if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
         }
     }
 
+    // Excluding the batching processor tests because we introduce this feature in 2.16
+    if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'
@@ -132,7 +139,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
 
     // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
     // because these features were released in 2.11 version.
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
+    if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
@@ -141,36 +148,36 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
         }
     }
 
-    // Excluding the test because we introduce this feature in 2.13
-    if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
+    // Excluding these tests because we introduce them in 2.13
+    if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
-        }
-    }
-
-    // Excluding the text chunking processor test because we introduce this feature in 2.13
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
-        filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
         }
     }
 
-    // Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
+    // Excluding the k-NN radial search tests because we introduce this feature in 2.14
+    if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
-            excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
         }
     }
 
     // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
-    if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
+    if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
         filter {
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
             excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
         }
     }
 
+    // Excluding the batch processor tests because we introduce this feature in 2.16
+    if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
+        filter {
+            excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
+        }
+    }
+
     nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
     nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
     systemProperty 'tests.security.manager', 'false'

@@ -76,7 +76,7 @@ protected String registerModelGroupAndGetModelId(final String requestBody) throw
 
     protected void createPipelineProcessor(final String modelId, final String pipelineName) throws Exception {
         String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI()));
-        createPipelineProcessor(requestBody, pipelineName, modelId);
+        createPipelineProcessor(requestBody, pipelineName, modelId, null);
     }
 
     protected String uploadSparseEncodingModel() throws Exception {
@@ -90,20 +90,25 @@ protected void createPipelineForTextImageProcessor(final String modelId, final S
         String requestBody = Files.readString(
             Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI())
         );
-        createPipelineProcessor(requestBody, pipelineName, modelId);
+        createPipelineProcessor(requestBody, pipelineName, modelId, null);
     }
 
-    protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
+    protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName, final Integer batchSize)
+        throws Exception {
         String requestBody = Files.readString(
             Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI())
         );
-        createPipelineProcessor(requestBody, pipelineName, modelId);
+        createPipelineProcessor(requestBody, pipelineName, modelId, batchSize);
+    }
+
+    protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
+        createPipelineForSparseEncodingProcessor(modelId, pipelineName, null);
     }
 
     protected void createPipelineForTextChunkingProcessor(String pipelineName) throws Exception {
         String requestBody = Files.readString(
             Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI())
         );
-        createPipelineProcessor(requestBody, pipelineName, "");
+        createPipelineProcessor(requestBody, pipelineName, "", null);
     }
 }
@@ -27,22 +27,22 @@ public void testBatchIngestionWithNeuralSparseProcessor_E2EFlow() throws Excepti
         if (isRunningAgainstOldCluster()) {
             String modelId = uploadSparseEncodingModel();
             loadModel(modelId);
-            createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME);
+            createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME, batchSize);
             createIndexWithConfiguration(
                 indexName,
                 Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
                 PIPELINE_NAME
             );
             List<Map<String, String>> docs = prepareDataForBulkIngestion(0, 5);
-            bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
+            bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs);
             validateDocCountAndInfo(indexName, 5, () -> getDocById(indexName, "4"), EMBEDDING_FIELD_NAME, Map.class);
         } else {
             String modelId = null;
             modelId = TestUtils.getModelId(getIngestionPipeline(PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
             loadModel(modelId);
             try {
                 List<Map<String, String>> docs = prepareDataForBulkIngestion(5, 5);
-                bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
+                bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs);
                 validateDocCountAndInfo(indexName, 10, () -> getDocById(indexName, "9"), EMBEDDING_FIELD_NAME, Map.class);
             } finally {
                 wipeOfTestResources(indexName, PIPELINE_NAME, modelId, null);