-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Segment Replication] Fix OngoingSegmentReplications to key by allocation ID instead of DiscoveryNode. #4182
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,10 @@ | |
import java.io.IOException; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.function.Predicate; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* Manages references to ongoing segrep events on a node. | ||
|
@@ -38,7 +41,7 @@ class OngoingSegmentReplications { | |
private final RecoverySettings recoverySettings; | ||
private final IndicesService indicesService; | ||
private final Map<ReplicationCheckpoint, CopyState> copyStateMap; | ||
private final Map<DiscoveryNode, SegmentReplicationSourceHandler> nodesToHandlers; | ||
private final Map<String, SegmentReplicationSourceHandler> allocationIdToHandlers; | ||
|
||
/** | ||
* Constructor. | ||
|
@@ -50,7 +53,7 @@ class OngoingSegmentReplications { | |
this.indicesService = indicesService; | ||
this.recoverySettings = recoverySettings; | ||
this.copyStateMap = Collections.synchronizedMap(new HashMap<>()); | ||
this.nodesToHandlers = ConcurrentCollections.newConcurrentMap(); | ||
this.allocationIdToHandlers = ConcurrentCollections.newConcurrentMap(); | ||
} | ||
|
||
/** | ||
|
@@ -96,8 +99,7 @@ synchronized CopyState getCachedCopyState(ReplicationCheckpoint checkpoint) thro | |
* @param listener {@link ActionListener} that resolves when sending files is complete. | ||
*/ | ||
void startSegmentCopy(GetSegmentFilesRequest request, ActionListener<GetSegmentFilesResponse> listener) { | ||
final DiscoveryNode node = request.getTargetNode(); | ||
final SegmentReplicationSourceHandler handler = nodesToHandlers.get(node); | ||
final SegmentReplicationSourceHandler handler = allocationIdToHandlers.get(request.getTargetAllocationId()); | ||
if (handler != null) { | ||
if (handler.isReplicating()) { | ||
throw new OpenSearchException( | ||
|
@@ -108,7 +110,7 @@ void startSegmentCopy(GetSegmentFilesRequest request, ActionListener<GetSegmentF | |
} | ||
// update the given listener to release the CopyState before it resolves. | ||
final ActionListener<GetSegmentFilesResponse> wrappedListener = ActionListener.runBefore(listener, () -> { | ||
final SegmentReplicationSourceHandler sourceHandler = nodesToHandlers.remove(node); | ||
final SegmentReplicationSourceHandler sourceHandler = allocationIdToHandlers.remove(request.getTargetAllocationId()); | ||
if (sourceHandler != null) { | ||
removeCopyState(sourceHandler.getCopyState()); | ||
} | ||
|
@@ -123,19 +125,6 @@ void startSegmentCopy(GetSegmentFilesRequest request, ActionListener<GetSegmentF | |
} | ||
} | ||
|
||
/** | ||
* Cancel any ongoing replications for a given {@link DiscoveryNode} | ||
* | ||
* @param node {@link DiscoveryNode} node for which to cancel replication events. | ||
*/ | ||
void cancelReplication(DiscoveryNode node) { | ||
final SegmentReplicationSourceHandler handler = nodesToHandlers.remove(node); | ||
if (handler != null) { | ||
handler.cancel("Cancel on node left"); | ||
removeCopyState(handler.getCopyState()); | ||
} | ||
} | ||
|
||
/** | ||
* Prepare for a Replication event. This method constructs a {@link CopyState} holding files to be sent off of the current | ||
* nodes's store. This state is intended to be sent back to Replicas before copy is initiated so the replica can perform a diff against its | ||
|
@@ -149,9 +138,9 @@ void cancelReplication(DiscoveryNode node) { | |
*/ | ||
CopyState prepareForReplication(CheckpointInfoRequest request, FileChunkWriter fileChunkWriter) throws IOException { | ||
final CopyState copyState = getCachedCopyState(request.getCheckpoint()); | ||
if (nodesToHandlers.putIfAbsent( | ||
request.getTargetNode(), | ||
createTargetHandler(request.getTargetNode(), copyState, fileChunkWriter) | ||
if (allocationIdToHandlers.putIfAbsent( | ||
request.getTargetAllocationId(), | ||
createTargetHandler(request.getTargetNode(), copyState, request.getTargetAllocationId(), fileChunkWriter) | ||
) != null) { | ||
throw new OpenSearchException( | ||
"Shard copy {} on node {} already replicating", | ||
|
@@ -163,18 +152,23 @@ CopyState prepareForReplication(CheckpointInfoRequest request, FileChunkWriter f | |
} | ||
|
||
/** | ||
* Cancel all Replication events for the given shard, intended to be called when the current primary is shutting down. | ||
* Cancel all Replication events for the given shard, intended to be called when a primary is shutting down. | ||
* | ||
* @param shard {@link IndexShard} | ||
* @param reason {@link String} - Reason for the cancel | ||
*/ | ||
synchronized void cancel(IndexShard shard, String reason) { | ||
for (SegmentReplicationSourceHandler entry : nodesToHandlers.values()) { | ||
if (entry.getCopyState().getShard().equals(shard)) { | ||
entry.cancel(reason); | ||
} | ||
} | ||
copyStateMap.clear(); | ||
cancelHandlers(handler -> handler.getCopyState().getShard().shardId().equals(shard.shardId()), reason); | ||
} | ||
|
||
/** | ||
* Cancel any ongoing replications for a given {@link DiscoveryNode} | ||
* | ||
* @param node {@link DiscoveryNode} node for which to cancel replication events. | ||
*/ | ||
void cancelReplication(DiscoveryNode node) { | ||
cancelHandlers(handler -> handler.getTargetNode().equals(node), "Node left"); | ||
|
||
} | ||
|
||
/** | ||
|
@@ -186,19 +180,25 @@ boolean isInCopyStateMap(ReplicationCheckpoint replicationCheckpoint) { | |
} | ||
|
||
int size() { | ||
return nodesToHandlers.size(); | ||
return allocationIdToHandlers.size(); | ||
} | ||
|
||
int cachedCopyStateSize() { | ||
return copyStateMap.size(); | ||
} | ||
|
||
private SegmentReplicationSourceHandler createTargetHandler(DiscoveryNode node, CopyState copyState, FileChunkWriter fileChunkWriter) { | ||
private SegmentReplicationSourceHandler createTargetHandler( | ||
DiscoveryNode node, | ||
CopyState copyState, | ||
String allocationId, | ||
FileChunkWriter fileChunkWriter | ||
) { | ||
return new SegmentReplicationSourceHandler( | ||
node, | ||
fileChunkWriter, | ||
copyState.getShard().getThreadPool(), | ||
copyState, | ||
allocationId, | ||
Math.toIntExact(recoverySettings.getChunkSize().getBytes()), | ||
recoverySettings.getMaxConcurrentFileChunks() | ||
); | ||
|
@@ -231,4 +231,23 @@ private synchronized void removeCopyState(CopyState copyState) { | |
copyStateMap.remove(copyState.getRequestedReplicationCheckpoint()); | ||
} | ||
} | ||
|
||
/** | ||
* Remove handlers from allocationIdToHandlers map based on a filter predicate. | ||
* This will also decref the handler's CopyState reference. | ||
*/ | ||
private void cancelHandlers(Predicate<? super SegmentReplicationSourceHandler> predicate, String reason) { | ||
final List<String> allocationIds = allocationIdToHandlers.values() | ||
.stream() | ||
.filter(predicate) | ||
.map(SegmentReplicationSourceHandler::getAllocationId) | ||
.collect(Collectors.toList()); | ||
for (String allocationId : allocationIds) { | ||
final SegmentReplicationSourceHandler handler = allocationIdToHandlers.remove(allocationId); | ||
if (handler != null) { | ||
handler.cancel(reason); | ||
removeCopyState(handler.getCopyState()); | ||
} | ||
} | ||
} | ||
Comment on lines
+239
to
+252
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 This is better than previous iteration :) |
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we also remove the handler fromallocationIdToHandlers
after the segments have been copied()?LGTM