Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Remove batch_size of bulk API from tests & refactor BWC version check #873

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
- Fixed document source and score field mismatch in sorted hybrid queries ([#1043](https://github.com/opensearch-project/neural-search/pull/1043))
### Infrastructure
- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))
- Fix CI for JDK upgrade towards 21 ([#835](https://github.com/opensearch-project/neural-search/pull/835))
### Documentation
### Maintenance
Expand Down
59 changes: 33 additions & 26 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ testClusters {
}
}

def versionsBelow2_11 = ["2.9", "2.10"]
def versionsBelow2_12 = versionsBelow2_11 + "2.11"
def versionsBelow2_13 = versionsBelow2_12 + "2.12"
def versionsBelow2_14 = versionsBelow2_13 + "2.13"
def versionsBelow2_15 = versionsBelow2_14 + "2.14"
def versionsBelow2_16 = versionsBelow2_15 + "2.15"

// Task to run BWC tests against the old cluster
task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
if(!ext.bwcBundleTest){
Expand All @@ -67,7 +74,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {

// Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
Expand All @@ -76,36 +83,36 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
// Excluding the these tests because we introduce them in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

// Excluding the text chunking processor test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand All @@ -132,7 +139,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {

// Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
Expand All @@ -141,36 +148,36 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
// Excluding these tests because we introduce them in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

// Excluding the text chunking processor test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
// Excluding the k-NN radial search tests because we introduce this feature in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
}
}

// Excluding the batch processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ protected String registerModelGroupAndGetModelId(final String requestBody) throw

protected void createPipelineProcessor(final String modelId, final String pipelineName) throws Exception {
String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI()));
createPipelineProcessor(requestBody, pipelineName, modelId);
createPipelineProcessor(requestBody, pipelineName, modelId, null);
}

protected String uploadSparseEncodingModel() throws Exception {
Expand All @@ -90,20 +90,25 @@ protected void createPipelineForTextImageProcessor(final String modelId, final S
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, modelId);
createPipelineProcessor(requestBody, pipelineName, modelId, null);
}

protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName, final Integer batchSize)
throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, modelId);
createPipelineProcessor(requestBody, pipelineName, modelId, batchSize);
}

protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception {
createPipelineForSparseEncodingProcessor(modelId, pipelineName, null);
}

protected void createPipelineForTextChunkingProcessor(String pipelineName) throws Exception {
String requestBody = Files.readString(
Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI())
);
createPipelineProcessor(requestBody, pipelineName, "");
createPipelineProcessor(requestBody, pipelineName, "", null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@ public void testBatchIngestionWithNeuralSparseProcessor_E2EFlow() throws Excepti
if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME);
createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME, batchSize);
createIndexWithConfiguration(
indexName,
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
PIPELINE_NAME
);
List<Map<String, String>> docs = prepareDataForBulkIngestion(0, 5);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs);
validateDocCountAndInfo(indexName, 5, () -> getDocById(indexName, "4"), EMBEDDING_FIELD_NAME, Map.class);
} else {
String modelId = null;
modelId = TestUtils.getModelId(getIngestionPipeline(PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
try {
List<Map<String, String>> docs = prepareDataForBulkIngestion(5, 5);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs);
validateDocCountAndInfo(indexName, 10, () -> getDocById(indexName, "9"), EMBEDDING_FIELD_NAME, Map.class);
} finally {
wipeOfTestResources(indexName, PIPELINE_NAME, modelId, null);
Expand Down
Loading
Loading