From 944356eb7265422a30d09300e94661682889c6e2 Mon Sep 17 00:00:00 2001 From: Jai Balani Date: Fri, 29 Nov 2024 12:04:54 +0530 Subject: [PATCH] Bootstrap FC optimization: Interfaces for state build and state transition flows (#2946) * Changes for bootstrap improvement with statebuild * Added initial changes for state build * Changes for moving state build to FCM * Changes for state build including FCM invocation * reformatting changes --------- Co-authored-by: Jai Balani --- .../java/com/github/ambry/server/StoreManager.java | 6 ++++++ .../com/github/ambry/cloud/CloudStorageManager.java | 4 ++++ .../github/ambry/clustermap/HelixParticipant.java | 13 +++++++++++++ .../main/java/com/github/ambry/FileCopyManager.java | 3 ++- .../java/com/github/ambry/store/StorageManager.java | 8 ++++++++ 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/ambry-api/src/main/java/com/github/ambry/server/StoreManager.java b/ambry-api/src/main/java/com/github/ambry/server/StoreManager.java index c25fb82092..6fa212b3e5 100644 --- a/ambry-api/src/main/java/com/github/ambry/server/StoreManager.java +++ b/ambry-api/src/main/java/com/github/ambry/server/StoreManager.java @@ -34,6 +34,12 @@ public interface StoreManager { */ boolean addBlobStore(ReplicaId replica); + /** + * Build state after filecopy is completed + * @param partitionName the partition id for which state is to be built.. + */ + void buildStateForFileCopy(String partitionName); + /** * Remove store from storage manager. * @param id the {@link PartitionId} associated with store diff --git a/ambry-cloud/src/main/java/com/github/ambry/cloud/CloudStorageManager.java b/ambry-cloud/src/main/java/com/github/ambry/cloud/CloudStorageManager.java index a33cf80c16..bcc9fa87af 100644 --- a/ambry-cloud/src/main/java/com/github/ambry/cloud/CloudStorageManager.java +++ b/ambry-cloud/src/main/java/com/github/ambry/cloud/CloudStorageManager.java @@ -56,6 +56,10 @@ public CloudStorageManager(VerifiableProperties properties, VcrMetrics vcrMetric public boolean addBlobStore(ReplicaId replica) { return createAndStartBlobStoreIfAbsent(replica.getPartitionId()) != null; } + @Override + public void buildStateForFileCopy(String partitionName){ + // no-op + } @Override public boolean shutdownBlobStore(PartitionId id) { diff --git a/ambry-clustermap/src/main/java/com/github/ambry/clustermap/HelixParticipant.java b/ambry-clustermap/src/main/java/com/github/ambry/clustermap/HelixParticipant.java index c698df4fc0..707d557f84 100644 --- a/ambry-clustermap/src/main/java/com/github/ambry/clustermap/HelixParticipant.java +++ b/ambry-clustermap/src/main/java/com/github/ambry/clustermap/HelixParticipant.java @@ -863,6 +863,8 @@ private DataNodeConfig getDataNodeConfig() { @Override public void onPartitionBecomeBootstrapFromOffline(String partitionName) { + // TODO: Prefilecopy steps: Handle scenarios for Filceopy -> filecopy, Replication->Filecopy, + // Filceopy -> replication and replication->replication rollout/rollback. try { // 1. take actions in storage manager (add new replica if necessary) PartitionStateChangeListener storageManagerListener = @@ -871,6 +873,17 @@ public void onPartitionBecomeBootstrapFromOffline(String partitionName) { storageManagerListener.onPartitionBecomeBootstrapFromOffline(partitionName); } + /** + * Should be invoked after storage manager listener to ensure that the replica is added to the store. + * Conditional execution based on requirement for File Copy. + */ + PartitionStateChangeListener fileCopyManagerListener = + partitionStateChangeListeners.get(StateModelListenerType.FileCopyManagerListener); + if(fileCopyManagerListener != null){ + fileCopyManagerListener.onPartitionBecomeBootstrapFromOffline(partitionName); + replicaSyncUpManager.waitForFileCopyCompleted(partitionName); + } + // 2. take actions in replication manager (add new replica if necessary) PartitionStateChangeListener replicationManagerListener = partitionStateChangeListeners.get(StateModelListenerType.ReplicationManagerListener); diff --git a/ambry-file-transfer/src/main/java/com/github/ambry/FileCopyManager.java b/ambry-file-transfer/src/main/java/com/github/ambry/FileCopyManager.java index dab5ad17d0..d4cd37c1bf 100644 --- a/ambry-file-transfer/src/main/java/com/github/ambry/FileCopyManager.java +++ b/ambry-file-transfer/src/main/java/com/github/ambry/FileCopyManager.java @@ -41,7 +41,8 @@ class PartitionStateChangeListenerImpl implements PartitionStateChangeListener { @Override public void onPartitionBecomeBootstrapFromOffline(String partitionName) { - + // StateBuilding at the end of FCM's Offline->Bootstrap transition + storeManager.buildStateForFileCopy(partitionName); } @Override diff --git a/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java b/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java index d3b279e3a1..3d5d76309a 100644 --- a/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java +++ b/ambry-store/src/main/java/com/github/ambry/store/StorageManager.java @@ -530,6 +530,10 @@ public boolean addBlobStore(ReplicaId replica) { logger.info("New store is successfully added into StorageManager"); return true; } + @Override + public void buildStateForFileCopy(String partitionName){ + // no-op + } /** * If a bootstrap replica fails, try to remove all the files and directories associated with it. @@ -716,6 +720,8 @@ private class PartitionStateChangeListenerImpl implements PartitionStateChangeLi @Override public void onPartitionBecomeBootstrapFromOffline(String partitionName) { + // For Filecopy, 4 steps to be taken up here: Prefilecopy, Filecopy, State Build, Exception handling and cleanup + // check if partition exists on current node ReplicaId replica = partitionNameToReplicaId.get(partitionName); Store store; @@ -737,6 +743,8 @@ public void onPartitionBecomeBootstrapFromOffline(String partitionName) { // Attempt to add store into storage manager. If store already exists on disk (but not in clustermap), make // sure old store of this replica is deleted (this store may be created in previous replica addition but failed // at some point). Then a brand new store associated with this replica should be created and started. + // TODO: For Filecopy, we do not init the BlobStore since new log/index isn't required, the files will be directly + // copied from remote node. if (!addBlobStore(replicaToAdd)) { // We have decreased the available disk space in HelixClusterManager#getDiskForBootstrapReplica. Increase it // back since addition of store failed.