-
Notifications
You must be signed in to change notification settings - Fork 509
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HDDS-10983. EC Key read corruption when the replica index of container in DN mismatches #6779
Changes from 3 commits
a1d5c4a
8815940
4287270
2bb1b9d
5886cf4
cfc7415
8edc907
1476d59
437594d
65994f3
731107c
5857567
dd11beb
4c4e2c7
0d769da
45279a3
58b089d
e4c0d67
da98011
5db065f
b0764e4
3ae6768
040d6d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -26,6 +26,8 @@ | |||
import org.apache.hadoop.fs.CanUnbuffer; | ||||
import org.apache.hadoop.fs.Seekable; | ||||
import org.apache.hadoop.hdds.client.BlockID; | ||||
import org.apache.hadoop.hdds.protocol.DatanodeDetails; | ||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; | ||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo; | ||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto; | ||||
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; | ||||
|
@@ -60,6 +62,7 @@ public class ChunkInputStream extends InputStream | |||
private final ChunkInfo chunkInfo; | ||||
private final long length; | ||||
private final BlockID blockID; | ||||
private ContainerProtos.DatanodeBlockID datanodeBlockID; | ||||
private final XceiverClientFactory xceiverClientFactory; | ||||
private XceiverClientSpi xceiverClient; | ||||
private final Supplier<Pipeline> pipelineSupplier; | ||||
|
@@ -290,13 +293,27 @@ protected synchronized void releaseClient() { | |||
} | ||||
} | ||||
|
||||
/** | ||||
* Updates DatanodeBlockId which based on blockId. | ||||
*/ | ||||
private void updateDatanodeBlockId() throws IOException { | ||||
DatanodeDetails closestNode = pipelineSupplier.get().getClosestNode(); | ||||
int replicaIdx = pipelineSupplier.get().getReplicaIndex(closestNode); | ||||
ContainerProtos.DatanodeBlockID.Builder builder = blockID.getDatanodeBlockIDProtobufBuilder(); | ||||
if (replicaIdx > 0) { | ||||
builder.setReplicaIndex(replicaIdx); | ||||
} | ||||
datanodeBlockID = builder.build(); | ||||
} | ||||
|
||||
/** | ||||
* Acquire new client if previous one was released. | ||||
*/ | ||||
protected synchronized void acquireClient() throws IOException { | ||||
if (xceiverClientFactory != null && xceiverClient == null) { | ||||
xceiverClient = xceiverClientFactory.acquireClientForReadData( | ||||
pipelineSupplier.get()); | ||||
updateDatanodeBlockId(); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please store There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is one more ozone/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java Line 301 in 1476d59
|
||||
} | ||||
} | ||||
|
||||
|
@@ -423,7 +440,7 @@ protected ByteBuffer[] readChunk(ChunkInfo readChunkInfo) | |||
|
||||
ReadChunkResponseProto readChunkResponse = | ||||
ContainerProtocolCalls.readChunk(xceiverClient, | ||||
readChunkInfo, blockID, validators, tokenSupplier.get()); | ||||
readChunkInfo, datanodeBlockID, validators, tokenSupplier.get()); | ||||
|
||||
if (readChunkResponse.hasData()) { | ||||
return readChunkResponse.getData().asReadOnlyByteBufferList() | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,33 +20,49 @@ | |
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; | ||
import org.apache.hadoop.hdds.protocol.proto.HddsProtos; | ||
|
||
import java.util.Arrays; | ||
import java.util.Objects; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* BlockID of Ozone (containerID + localID + blockCommitSequenceId). | ||
* BlockID of Ozone (containerID + localID + blockCommitSequenceId + replicaIndex). | ||
*/ | ||
|
||
public class BlockID { | ||
|
||
private final ContainerBlockID containerBlockID; | ||
private long blockCommitSequenceId; | ||
// null value when not set with private constructor.(This is to avoid confusion of replica index 0 & null value). | ||
// This value would be only set when deserializing from ContainerProtos.DatanodeBlockID or copying from another | ||
// BlockID object. | ||
private Integer replicaIndex; | ||
private StackTraceElement[] stackTraceElements; | ||
|
||
public BlockID(long containerID, long localID) { | ||
this(containerID, localID, 0); | ||
this(containerID, localID, 0, null); | ||
} | ||
|
||
private BlockID(long containerID, long localID, long bcsID) { | ||
private BlockID(long containerID, long localID, long bcsID, Integer repIndex) { | ||
containerBlockID = new ContainerBlockID(containerID, localID); | ||
blockCommitSequenceId = bcsID; | ||
this.replicaIndex = repIndex; | ||
this.stackTraceElements = Thread.currentThread().getStackTrace(); | ||
swamirishi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
public BlockID(BlockID blockID) { | ||
this(blockID.getContainerID(), blockID.getLocalID(), blockID.getBlockCommitSequenceId(), | ||
blockID.getReplicaIndex()); | ||
} | ||
|
||
public BlockID(ContainerBlockID containerBlockID) { | ||
this(containerBlockID, 0); | ||
this(containerBlockID, 0, null); | ||
} | ||
|
||
private BlockID(ContainerBlockID containerBlockID, long bcsId) { | ||
private BlockID(ContainerBlockID containerBlockID, long bcsId, Integer repIndex) { | ||
this.containerBlockID = containerBlockID; | ||
blockCommitSequenceId = bcsId; | ||
this.replicaIndex = repIndex; | ||
this.stackTraceElements = Thread.currentThread().getStackTrace(); | ||
} | ||
|
||
public long getContainerID() { | ||
|
@@ -65,6 +81,15 @@ public void setBlockCommitSequenceId(long blockCommitSequenceId) { | |
this.blockCommitSequenceId = blockCommitSequenceId; | ||
} | ||
|
||
// Can return a null value in case it is not set. | ||
public Integer getReplicaIndex() { | ||
return replicaIndex; | ||
} | ||
|
||
public void setReplicaIndex(Integer replicaIndex) { | ||
this.replicaIndex = replicaIndex; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unused. If removed, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
public ContainerBlockID getContainerBlockID() { | ||
return containerBlockID; | ||
} | ||
|
@@ -79,21 +104,33 @@ public String toString() { | |
public void appendTo(StringBuilder sb) { | ||
containerBlockID.appendTo(sb); | ||
sb.append(" bcsId: ").append(blockCommitSequenceId); | ||
sb.append(" replicaIndex: ").append(replicaIndex); | ||
sb.append(Arrays.stream(stackTraceElements).map(StackTraceElement::toString).collect(Collectors.joining("\n"))); | ||
swamirishi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
@JsonIgnore | ||
public ContainerProtos.DatanodeBlockID getDatanodeBlockIDProtobuf() { | ||
ContainerProtos.DatanodeBlockID.Builder blockID = getDatanodeBlockIDProtobufBuilder(); | ||
if (replicaIndex != null) { | ||
blockID.setReplicaIndex(replicaIndex); | ||
} | ||
return blockID.build(); | ||
} | ||
|
||
@JsonIgnore | ||
public ContainerProtos.DatanodeBlockID.Builder getDatanodeBlockIDProtobufBuilder() { | ||
return ContainerProtos.DatanodeBlockID.newBuilder(). | ||
setContainerID(containerBlockID.getContainerID()) | ||
.setLocalID(containerBlockID.getLocalID()) | ||
.setBlockCommitSequenceId(blockCommitSequenceId).build(); | ||
.setBlockCommitSequenceId(blockCommitSequenceId); | ||
} | ||
|
||
@JsonIgnore | ||
public static BlockID getFromProtobuf( | ||
ContainerProtos.DatanodeBlockID blockID) { | ||
public static BlockID getFromProtobuf(ContainerProtos.DatanodeBlockID blockID) { | ||
return new BlockID(blockID.getContainerID(), | ||
blockID.getLocalID(), blockID.getBlockCommitSequenceId()); | ||
blockID.getLocalID(), | ||
blockID.getBlockCommitSequenceId(), | ||
blockID.hasReplicaIndex() ? blockID.getReplicaIndex() : null); | ||
} | ||
|
||
@JsonIgnore | ||
|
@@ -107,7 +144,7 @@ public HddsProtos.BlockID getProtobuf() { | |
public static BlockID getFromProtobuf(HddsProtos.BlockID blockID) { | ||
return new BlockID( | ||
ContainerBlockID.getFromProtobuf(blockID.getContainerBlockID()), | ||
blockID.getBlockCommitSequenceId()); | ||
blockID.getBlockCommitSequenceId(), null); | ||
} | ||
|
||
@Override | ||
|
@@ -120,13 +157,13 @@ public boolean equals(Object o) { | |
} | ||
BlockID blockID = (BlockID) o; | ||
return containerBlockID.equals(blockID.getContainerBlockID()) | ||
&& blockCommitSequenceId == blockID.getBlockCommitSequenceId(); | ||
&& blockCommitSequenceId == blockID.getBlockCommitSequenceId() | ||
&& Objects.equals(replicaIndex, blockID.getReplicaIndex()); | ||
swamirishi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects | ||
.hash(containerBlockID.getContainerID(), containerBlockID.getLocalID(), | ||
blockCommitSequenceId); | ||
return Objects.hash(containerBlockID.getContainerID(), containerBlockID.getLocalID(), | ||
blockCommitSequenceId, replicaIndex); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The patch is huge (122K), hard to review. Size could be reduced by prefactoring:
newBuilder()
in a separate patch, without any functional changes, before the fix.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok will raise a patch for this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@adoroszlai @sodonnel I have raised another patch since this patch is becomning too big to review #6812
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The other PR ,merged now.