-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-20994] Remove redundant characters in OpenBlocks to save memory for shuffle service. #18231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
96d07aa
dcf156a
1e53262
8170c8a
5dd0e77
1e72eab
a2af617
6677bc9
2592ef4
5b0ce67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,7 +44,6 @@ | |
| import static org.apache.spark.network.util.NettyUtils.getRemoteAddress; | ||
| import org.apache.spark.network.util.TransportConf; | ||
|
|
||
|
|
||
| /** | ||
| * RPC Handler for a server which can serve shuffle blocks from outside of an Executor process. | ||
| * | ||
|
|
@@ -91,26 +90,8 @@ protected void handleMessage( | |
| try { | ||
| OpenBlocks msg = (OpenBlocks) msgObj; | ||
| checkAuth(client, msg.appId); | ||
|
|
||
| Iterator<ManagedBuffer> iter = new Iterator<ManagedBuffer>() { | ||
| private int index = 0; | ||
|
|
||
| @Override | ||
| public boolean hasNext() { | ||
| return index < msg.blockIds.length; | ||
| } | ||
|
|
||
| @Override | ||
| public ManagedBuffer next() { | ||
| final ManagedBuffer block = blockManager.getBlockData(msg.appId, msg.execId, | ||
| msg.blockIds[index]); | ||
| index++; | ||
| metrics.blockTransferRateBytes.mark(block != null ? block.size() : 0); | ||
| return block; | ||
| } | ||
| }; | ||
|
|
||
| long streamId = streamManager.registerStream(client.getClientId(), iter); | ||
| long streamId = streamManager.registerStream(client.getClientId(), | ||
| new ManagedBufferIterator(msg.appId, msg.execId, msg.blockIds)); | ||
| if (logger.isTraceEnabled()) { | ||
| logger.trace("Registered streamId {} with {} buffers for client {} from host {}", | ||
| streamId, | ||
|
|
@@ -209,4 +190,51 @@ public Map<String, Metric> getMetrics() { | |
| } | ||
| } | ||
|
|
||
| private class ManagedBufferIterator implements Iterator<ManagedBuffer> { | ||
|
|
||
| private int index = 0; | ||
| private final String appId; | ||
| private final String execId; | ||
| private final int shuffleId; | ||
| // An array containing mapId and reduceId pairs. | ||
| private final int[] mapIdAndReduceIds; | ||
|
|
||
| ManagedBufferIterator(String appId, String execId, String[] blockIds) { | ||
| this.appId = appId; | ||
| this.execId = execId; | ||
| String[] blockId0Parts = blockIds[0].split("_"); | ||
| if (blockId0Parts.length != 4 || !blockId0Parts[0].equals("shuffle")) { | ||
| throw new IllegalArgumentException("Unexpected shuffle block id format: " + blockIds[0]); | ||
| } | ||
| this.shuffleId = Integer.parseInt(blockId0Parts[1]); | ||
| mapIdAndReduceIds = new int[2 * blockIds.length]; | ||
| for (int i = 0; i < blockIds.length; i++) { | ||
| String[] blockIdParts = blockIds[i].split("_"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we check |
||
| if (blockIdParts.length != 4 || !blockIdParts[0].equals("shuffle")) { | ||
| throw new IllegalArgumentException("Unexpected shuffle block id format: " + blockIds[i]); | ||
| } | ||
| if (Integer.parseInt(blockIdParts[1]) != shuffleId) { | ||
| throw new IllegalArgumentException("Expected shuffleId=" + shuffleId + | ||
| ", got:" + blockIds[i]); | ||
| } | ||
| mapIdAndReduceIds[2 * i] = Integer.parseInt(blockIdParts[2]); | ||
| mapIdAndReduceIds[2 * i + 1] = Integer.parseInt(blockIdParts[3]); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public boolean hasNext() { | ||
| return index < mapIdAndReduceIds.length; | ||
| } | ||
|
|
||
| @Override | ||
| public ManagedBuffer next() { | ||
| final ManagedBuffer block = blockManager.getBlockData(appId, execId, shuffleId, | ||
| mapIdAndReduceIds[index], mapIdAndReduceIds[index + 1]); | ||
| index += 2; | ||
| metrics.blockTransferRateBytes.mark(block != null ? block.size() : 0); | ||
| return block; | ||
| } | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -150,27 +150,20 @@ public void registerExecutor( | |
| } | ||
|
|
||
| /** | ||
| * Obtains a FileSegmentManagedBuffer from a shuffle block id. We expect the blockId has the | ||
| * format "shuffle_ShuffleId_MapId_ReduceId" (from ShuffleBlockId), and additionally make | ||
| * assumptions about how the hash and sort based shuffles store their data. | ||
| * Obtains a FileSegmentManagedBuffer from (shuffleId, mapId, reduceId). We make assumptions | ||
| * about how the hash and sort based shuffles store their data. | ||
| */ | ||
| public ManagedBuffer getBlockData(String appId, String execId, String blockId) { | ||
| String[] blockIdParts = blockId.split("_"); | ||
| if (blockIdParts.length < 4) { | ||
| throw new IllegalArgumentException("Unexpected block id format: " + blockId); | ||
| } else if (!blockIdParts[0].equals("shuffle")) { | ||
| throw new IllegalArgumentException("Expected shuffle block id, got: " + blockId); | ||
| } | ||
| int shuffleId = Integer.parseInt(blockIdParts[1]); | ||
| int mapId = Integer.parseInt(blockIdParts[2]); | ||
| int reduceId = Integer.parseInt(blockIdParts[3]); | ||
|
|
||
| public ManagedBuffer getBlockData( | ||
| String appId, | ||
| String execId, | ||
| int shuffleId, | ||
| int mapId, | ||
| int reduceId) { | ||
| ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId)); | ||
| if (executor == null) { | ||
| throw new RuntimeException( | ||
| String.format("Executor is not registered (appId=%s, execId=%s)", appId, execId)); | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: we should keep the original format.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are two blank lines originally. I guess it's appropriate to remove one? |
||
| return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -202,7 +202,7 @@ public void onBlockFetchFailure(String blockId, Throwable t) { | |
| } | ||
| }; | ||
|
|
||
| String[] blockIds = { "shuffle_2_3_4", "shuffle_6_7_8" }; | ||
| String[] blockIds = { "shuffle_0_1_2", "shuffle_0_3_4" }; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the purpose of this change?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this change ,we cannot shuffle blocks with multiple |
||
| OneForOneBlockFetcher fetcher = | ||
| new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf, null); | ||
| fetcher.start(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why break this out -- it's not necessary for the change right? just for clarity?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the iterator is becoming a little bit complicated. So I break this out and give a constructor.