From a4c250e54d62408daf00f49683266fe1f4344e31 Mon Sep 17 00:00:00 2001 From: Himshikha Gupta Date: Tue, 3 Sep 2024 22:36:57 +0530 Subject: [PATCH 1/2] changing checksum setting to support modes Signed-off-by: Himshikha Gupta --- .../remote/RemoteRoutingTableServiceIT.java | 5 +- .../remote/RemoteStatePublicationIT.java | 5 +- .../common/settings/ClusterSettings.java | 2 +- .../remote/RemoteClusterStateService.java | 97 +++-- .../RemoteClusterStateServiceTests.java | 333 +++++++++++++++++- 5 files changed, 399 insertions(+), 43 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java index 3c0c9a0611439..0a8c13adb034f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteRoutingTableServiceIT.java @@ -67,7 +67,10 @@ protected Settings nodeSettings(int nodeOrdinal) { ) .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, REMOTE_ROUTING_TABLE_REPO) .put(REMOTE_PUBLICATION_EXPERIMENTAL, true) - .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING.getKey(), true) + .put( + RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), + RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE + ) .build(); } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java index 3a3e023db6446..0e6321867a33b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java @@ -90,7 +90,10 @@ protected Settings nodeSettings(int nodeOrdinal) { .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, routingTableRepoName) .put(routingTableRepoTypeAttributeKey, ReloadableFsRepository.TYPE) .put(routingTableRepoSettingsAttributeKeyPrefix + "location", segmentRepoPath) - .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING.getKey(), true) + .put( + RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), + RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE + ) .build(); } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 8e036209daff0..87c6140129e7d 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -740,7 +740,7 @@ public void apply(Settings value, Settings current, Settings previous) { IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING, RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_TYPE_SETTING, RemoteRoutingTableBlobStore.REMOTE_ROUTING_TABLE_PATH_HASH_ALGO_SETTING, - RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING, + RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING, // Admission Control Settings AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 88dda3953a5f9..c742c0d8d2179 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -71,6 +71,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -141,13 +142,42 @@ public class RemoteClusterStateService implements Closeable { Setting.Property.NodeScope ); - public static final Setting REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING = Setting.boolSetting( - "cluster.remote_store.state.checksum_validation.enabled", - false, - Property.Dynamic, - Property.NodeScope + public static final Setting REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING = new Setting<>( + "cluster.remote_store.state.checksum_validation.mode", + RemoteClusterStateValidationMode.NONE.name(), + RemoteClusterStateValidationMode::parseString, + Setting.Property.Dynamic, + Setting.Property.NodeScope ); + public enum RemoteClusterStateValidationMode { + DEBUG("debug"), + TRACE("trace"), + FAILURE("failure"), + NONE("none"); + + public final String mode; + + RemoteClusterStateValidationMode(String mode) { + this.mode = mode; + } + + public static RemoteClusterStateValidationMode parseString(String mode) { + try { + return RemoteClusterStateValidationMode.valueOf(mode.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + "[" + + mode + + "] mode is not supported. " + + "supported modes are [" + + Arrays.toString(RemoteClusterStateValidationMode.values()) + + "]" + ); + } + } + } + private TimeValue remoteStateReadTimeout; private final String nodeId; private final Supplier repositoriesService; @@ -159,7 +189,7 @@ public class RemoteClusterStateService implements Closeable { private BlobStoreTransferService blobStoreTransferService; private RemoteRoutingTableService remoteRoutingTableService; private volatile TimeValue slowWriteLoggingThreshold; - private boolean checksumValidationEnabled; + private RemoteClusterStateValidationMode remoteClusterStateValidationMode; private final RemotePersistenceStats remoteStateStats; private RemoteClusterStateCleanupManager remoteClusterStateCleanupManager; @@ -206,11 +236,8 @@ public RemoteClusterStateService( clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); this.remoteStateReadTimeout = clusterSettings.get(REMOTE_STATE_READ_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(REMOTE_STATE_READ_TIMEOUT_SETTING, this::setRemoteStateReadTimeout); - this.checksumValidationEnabled = clusterSettings.get(REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING); - clusterSettings.addSettingsUpdateConsumer( - REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING, - this::setChecksumValidationEnabled - ); + this.remoteClusterStateValidationMode = REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer(REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING, this::setChecksumValidationMode); this.remoteStateStats = new RemotePersistenceStats(); this.namedWriteableRegistry = namedWriteableRegistry; @@ -272,7 +299,7 @@ public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterStat uploadedMetadataResults, previousClusterUUID, clusterStateDiffManifest, - checksumValidationEnabled ? new ClusterStateChecksum(clusterState) : null, + !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) ? new ClusterStateChecksum(clusterState) : null, false, codecVersion ); @@ -472,7 +499,7 @@ public RemoteClusterStateManifestInfo writeIncrementalMetadata( uploadedMetadataResults, previousManifest.getPreviousClusterUUID(), clusterStateDiffManifest, - checksumValidationEnabled ? new ClusterStateChecksum(clusterState) : null, + !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) ? new ClusterStateChecksum(clusterState) : null, false, previousManifest.getCodecVersion() ); @@ -917,7 +944,7 @@ public RemoteClusterStateManifestInfo markLastStateAsCommitted(ClusterState clus uploadedMetadataResults, previousManifest.getPreviousClusterUUID(), previousManifest.getDiffManifest(), - checksumValidationEnabled ? previousManifest.getClusterStateChecksum() : null, + !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) ? new ClusterStateChecksum(clusterState) : null, true, previousManifest.getCodecVersion() ); @@ -1003,8 +1030,8 @@ private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } - private void setChecksumValidationEnabled(Boolean checksumValidationEnabled) { - this.checksumValidationEnabled = checksumValidationEnabled; + private void setChecksumValidationMode(RemoteClusterStateValidationMode remoteClusterStateValidationMode) { + this.remoteClusterStateValidationMode = remoteClusterStateValidationMode; } // Package private for unit test @@ -1376,7 +1403,9 @@ public ClusterState getClusterStateForManifest( includeEphemeral ); - if (includeEphemeral && checksumValidationEnabled && manifest.getClusterStateChecksum() != null) { + if (includeEphemeral + && !remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) + && manifest.getClusterStateChecksum() != null) { validateClusterStateFromChecksum(manifest, clusterState, clusterName, localNodeId, true); } } else { @@ -1498,7 +1527,7 @@ public ClusterState getClusterStateUsingDiff(ClusterMetadataManifest manifest, C .routingTable(new RoutingTable(manifest.getRoutingTableVersion(), indexRoutingTables)) .build(); - if (checksumValidationEnabled && manifest.getClusterStateChecksum() != null) { + if (!remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.NONE) && manifest.getClusterStateChecksum() != null) { validateClusterStateFromChecksum(manifest, clusterState, previousState.getClusterName().value(), localNodeId, false); } final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); @@ -1517,20 +1546,24 @@ void validateClusterStateFromChecksum( ) { ClusterStateChecksum newClusterStateChecksum = new ClusterStateChecksum(clusterState); List failedValidation = newClusterStateChecksum.getMismatchEntities(manifest.getClusterStateChecksum()); - if (!failedValidation.isEmpty()) { - logger.error( - () -> new ParameterizedMessage( - "Cluster state checksums do not match. Checksum from manifest {}, checksum from created cluster state {}. Entities failing validation {}", - manifest.getClusterStateChecksum(), - newClusterStateChecksum, - failedValidation - ) + if (failedValidation.isEmpty()) { + return; + } + logger.error( + () -> new ParameterizedMessage( + "Cluster state checksums do not match. Checksum from manifest {}, checksum from created cluster state {}. Entities failing validation {}", + manifest.getClusterStateChecksum(), + newClusterStateChecksum, + failedValidation + ) + ); + if (isFullStateDownload && remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) { + throw new IllegalStateException( + "Cluster state checksums do not match during full state read. Validation failed for " + failedValidation ); - if (isFullStateDownload) { - throw new IllegalStateException( - "Cluster state checksums do not match during full state read. Validation failed for " + failedValidation - ); - } + } + if (remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE) + || remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.TRACE)) { // download full cluster state and match against state created for the failing entities ClusterState fullClusterState = readClusterStateInParallel( ClusterState.builder(new ClusterName(clusterName)).build(), @@ -1663,6 +1696,8 @@ void validateClusterStateFromChecksum( break; } } + } + if (remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) { throw new IllegalStateException( "Cluster state checksums do not match during diff read. Validation failed for " + failedValidation ); diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index ccadbfb3fbebc..15bdaf03b5990 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -2959,12 +2959,12 @@ private void initializeRoutingTable() { ); } - private void initializeWithChecksumEnabled() { + private void initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode mode) { Settings newSettings = Settings.builder() .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, "routing_repository") .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "remote_store_repository") .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) - .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_ENABLED_SETTING.getKey(), true) + .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(), mode.name()) .build(); clusterSettings.applySettings(newSettings); @@ -2991,7 +2991,7 @@ private void initializeWithChecksumEnabled() { } public void testWriteFullMetadataSuccessWithChecksumValidationEnabled() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); mockBlobStoreObjects(); when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); @@ -3034,8 +3034,51 @@ public void testWriteFullMetadataSuccessWithChecksumValidationEnabled() throws I assertThat(manifest.getClusterStateChecksum(), is(expectedManifest.getClusterStateChecksum())); } + public void testWriteFullMetadataSuccessWithChecksumValidationModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + mockBlobStoreObjects(); + when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); + + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata( + clusterState, + "prev-cluster-uuid", + MANIFEST_CURRENT_CODEC_VERSION + ).getClusterMetadataManifest(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( + "test-index", + "index-uuid", + "routing-filename", + INDEX_ROUTING_METADATA_PREFIX + ); + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of(uploadedIndexMetadata)) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1L) + .indicesRouting(List.of(uploadedIndiceRoutingMetadata)) + .build(); + + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); + assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); + assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); + assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); + assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); + assertNull(manifest.getClusterStateChecksum()); + } + public void testWriteIncrementalMetadataSuccessWithChecksumValidationEnabled() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); @@ -3086,8 +3129,60 @@ public void testWriteIncrementalMetadataSuccessWithChecksumValidationEnabled() t assertThat(manifest.getClusterStateChecksum(), is(expectedManifest.getClusterStateChecksum())); } + public void testWriteIncrementalMetadataSuccessWithChecksumValidationModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .indices(Collections.emptyList()) + .checksum(new ClusterStateChecksum(clusterState)) + .build(); + when((blobStoreRepository.basePath())).thenReturn(BlobPath.cleanPath().add("base-path")); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + clusterState, + previousManifest + ).getClusterMetadataManifest(); + final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); + final UploadedIndexMetadata uploadedIndiceRoutingMetadata = new UploadedIndexMetadata( + "test-index", + "index-uuid", + "routing-filename", + INDEX_ROUTING_METADATA_PREFIX + ); + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of(uploadedIndexMetadata)) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .routingTableVersion(1) + .indicesRouting(List.of(uploadedIndiceRoutingMetadata)) + .checksum(new ClusterStateChecksum(clusterState)) + .build(); + + assertThat(manifest.getIndices().size(), is(1)); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + assertThat(manifest.getRoutingTableVersion(), is(expectedManifest.getRoutingTableVersion())); + assertThat(manifest.getIndicesRouting().get(0).getIndexName(), is(uploadedIndiceRoutingMetadata.getIndexName())); + assertThat(manifest.getIndicesRouting().get(0).getIndexUUID(), is(uploadedIndiceRoutingMetadata.getIndexUUID())); + assertThat(manifest.getIndicesRouting().get(0).getUploadedFilename(), notNullValue()); + assertNull(manifest.getClusterStateChecksum()); + } + public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullChecksum() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().build(); mockBlobStoreObjects(); remoteClusterStateService.start(); @@ -3145,7 +3240,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullC } public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); ClusterState clusterState = generateClusterStateWithAllAttributes().build(); ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( new ClusterStateChecksum(clusterState) @@ -3176,8 +3271,40 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws verify(mockService, times(1)).validateClusterStateFromChecksum(manifest, clusterState, ClusterName.DEFAULT.value(), NODE_ID, true); } + public void testGetClusterStateForManifestWithChecksumValidationModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).build(); + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + verify(mockService, times(0)).validateClusterStateFromChecksum(any(), any(), anyString(), anyString(), anyBoolean()); + } + public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMismatch() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); ClusterState clusterState = generateClusterStateWithAllAttributes().build(); ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( new ClusterStateChecksum(clusterState) @@ -3218,8 +3345,95 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMisma ); } + public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException { + initializeWithChecksumEnabled( + randomFrom( + Arrays.asList( + RemoteClusterStateService.RemoteClusterStateValidationMode.DEBUG, + RemoteClusterStateService.RemoteClusterStateValidationMode.TRACE + ) + ) + ); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).build(); + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true); + verify(mockService, times(1)).validateClusterStateFromChecksum( + manifest, + clusterStateWrong, + ClusterName.DEFAULT.value(), + NODE_ID, + true + ); + } + public void testGetClusterStateUsingDiffWithChecksum() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + + verify(mockService, times(1)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.NONE); ClusterState clusterState = generateClusterStateWithAllAttributes().build(); ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( new ClusterStateChecksum(clusterState) @@ -3250,6 +3464,107 @@ public void testGetClusterStateUsingDiffWithChecksum() throws IOException { ); mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + verify(mockService, times(0)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.DEBUG); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); + verify(mockService, times(1)).validateClusterStateFromChecksum( + eq(manifest), + any(ClusterState.class), + eq(ClusterName.DEFAULT.value()), + eq(NODE_ID), + eq(false) + ); + } + + public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws IOException { + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.TRACE); + ClusterState clusterState = generateClusterStateWithAllAttributes().build(); + ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( + new ClusterStateChecksum(clusterState) + ).diffManifest(ClusterStateDiffManifest.builder().build()).build(); + + remoteClusterStateService.start(); + RemoteClusterStateService mockService = spy(remoteClusterStateService); + ClusterState clusterStateWrong = ClusterState.builder(clusterState).routingTable(RoutingTable.EMPTY_ROUTING_TABLE).build(); + doReturn(clusterStateWrong).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(emptyList()), + eq(emptyMap()), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + anyBoolean(), + eq(emptyList()), + anyBoolean(), + eq(emptyMap()), + anyBoolean(), + anyBoolean() + ); + doReturn(clusterState).when(mockService) + .readClusterStateInParallel( + any(), + eq(manifest), + eq(manifest.getClusterUUID()), + eq(NODE_ID), + eq(manifest.getIndices()), + eq(manifest.getCustomMetadataMap()), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(true), + eq(manifest.getIndicesRouting()), + eq(true), + eq(manifest.getClusterStateCustomMap()), + eq(false), + eq(true) + ); + + mockService.getClusterStateUsingDiff(manifest, clusterState, NODE_ID); verify(mockService, times(1)).validateClusterStateFromChecksum( eq(manifest), any(ClusterState.class), @@ -3260,7 +3575,7 @@ public void testGetClusterStateUsingDiffWithChecksum() throws IOException { } public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOException { - initializeWithChecksumEnabled(); + initializeWithChecksumEnabled(RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE); ClusterState clusterState = generateClusterStateWithAllAttributes().build(); ClusterMetadataManifest manifest = generateClusterMetadataManifestWithAllAttributes().checksum( new ClusterStateChecksum(clusterState) From 2c664910c58506f8ab2c14ab0b4edfd2123b3743 Mon Sep 17 00:00:00 2001 From: Himshikha Gupta Date: Tue, 3 Sep 2024 22:49:57 +0530 Subject: [PATCH 2/2] javadoc Signed-off-by: Himshikha Gupta --- .../gateway/remote/RemoteClusterStateService.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index c742c0d8d2179..a223bfbe736c3 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -150,6 +150,13 @@ public class RemoteClusterStateService implements Closeable { Setting.Property.NodeScope ); + /** + * Validation mode for cluster state checksum. + * None: Validation will be disabled. + * Debug: Validation enabled but only matches checksum and logs failing entities. + * Trace: Matches checksum and downloads full cluster state to find diff in failing entities. Only logs failures. + * Failure: Throws exception on failing validation. + */ public enum RemoteClusterStateValidationMode { DEBUG("debug"), TRACE("trace"),