Skip to content

Commit

Permalink
Remove batch_size of bulk API from tests & refactor BWC version check (
Browse files Browse the repository at this point in the history
…#852)

* Remove batch_size of bulk API from tests & refactor BWC version check

Signed-off-by: Liyun Xiu <xiliyun@amazon.com>

* Update changelog

Signed-off-by: Liyun Xiu <xiliyun@amazon.com>

* Address some comments

Signed-off-by: Liyun Xiu <xiliyun@amazon.com>

* Update Changelog

Signed-off-by: Liyun Xiu <xiliyun@amazon.com>

---------

Signed-off-by: Liyun Xiu <xiliyun@amazon.com>
(cherry picked from commit e1c3878)
Signed-off-by: Martin Gaievski <gaievski@amazon.com>
  • Loading branch information
chishui authored and martin-gaievski committed Nov 27, 2024
1 parent d83dc5b commit 01fdea5
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 122 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Bug Fixes
- Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
### Infrastructure
- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))
### Documentation
### Maintenance
### Refactoring
59 changes: 33 additions & 26 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ testClusters {
}
}

def versionsBelow2_11 = ["2.9", "2.10"]
def versionsBelow2_12 = versionsBelow2_11 + "2.11"
def versionsBelow2_13 = versionsBelow2_12 + "2.12"
def versionsBelow2_14 = versionsBelow2_13 + "2.13"
def versionsBelow2_15 = versionsBelow2_14 + "2.14"
def versionsBelow2_16 = versionsBelow2_15 + "2.15"

// Task to run BWC tests against the old cluster
task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
if(!ext.bwcBundleTest){
Expand All @@ -67,7 +74,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {

// Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
Expand All @@ -76,35 +83,35 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
// Excluding the these tests because we introduce them in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

// Excluding the text chunking processor test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand All @@ -131,7 +138,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {

// Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
Expand All @@ -140,35 +147,35 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
// Excluding these tests because we introduce them in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

// Excluding the text chunking processor test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

// Excluding the batch processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ protected String registerModelGroupAndGetModelId(final String requestBody) throw

protected void createPipelineProcessor(final String modelId, final String pipelineName) throws Exception {
String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI()));
createPipelineProcessor(requestBody, pipelineName, modelId);
createPipelineProcessor(requestBody, pipelineName, modelId, null);
}

protected String uploadSparseEncodingModel() throws Exception {
Expand All @@ -90,20 +90,25 @@ protected void createPipelineForTextImageProcessor(final String modelId, final S
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, modelId);
createPipelineProcessor(requestBody, pipelineName, modelId, null);
}

protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName, final Integer batchSize)
throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, modelId);
createPipelineProcessor(requestBody, pipelineName, modelId, batchSize);
}

protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
createPipelineForSparseEncodingProcessor(modelId, pipelineName, null);
}

protected void createPipelineForTextChunkingProcessor(String pipelineName) throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, "");
createPipelineProcessor(requestBody, pipelineName, "", null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@ public void testBatchIngestionWithNeuralSparseProcessor_E2EFlow() throws Excepti
if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME);
createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME, batchSize);
createIndexWithConfiguration(
indexName,
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
PIPELINE_NAME
);
List<Map<String, String>> docs = prepareDataForBulkIngestion(0, 5);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs);
validateDocCountAndInfo(indexName, 5, () -> getDocById(indexName, "4"), EMBEDDING_FIELD_NAME, Map.class);
} else {
String modelId = null;
modelId = TestUtils.getModelId(getIngestionPipeline(PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
try {
List<Map<String, String>> docs = prepareDataForBulkIngestion(5, 5);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs);
validateDocCountAndInfo(indexName, 10, () -> getDocById(indexName, "9"), EMBEDDING_FIELD_NAME, Map.class);
} finally {
wipeOfTestResources(indexName, PIPELINE_NAME, modelId, null);
Expand Down
Loading

0 comments on commit 01fdea5

Please sign in to comment.