diff --git a/README.md b/README.md index 4ab674229e..8ec523c45e 100644 --- a/README.md +++ b/README.md @@ -235,33 +235,11 @@ and job recovery (i.e., `yarn.app.mapreduce.am.job.recovery.enable=false`) The important configuration is listed as following. -### Coordinator - -For more details of advanced configuration, please see [Uniffle Coordinator Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/coordinator_guide.md). - -### Shuffle Server - -|Property Name|Default|Description| -|---|---|---| -|rss.coordinator.quorum|-|Coordinator quorum| -|rss.rpc.server.port|-|RPC port for Shuffle server| -|rss.jetty.http.port|-|Http port for Shuffle server| -|rss.server.buffer.capacity|-|Max memory of buffer manager for shuffle server| -|rss.server.memory.shuffle.highWaterMark.percentage|75.0|Threshold of spill data to storage, percentage of rss.server.buffer.capacity| -|rss.server.memory.shuffle.lowWaterMark.percentage|25.0|Threshold of keep data in memory, percentage of rss.server.buffer.capacity| -|rss.server.read.buffer.capacity|-|Max size of buffer for reading data| -|rss.server.heartbeat.interval|10000|Heartbeat interval to Coordinator (ms)| -|rss.server.flush.threadPool.size|10|Thread pool for flush data to file| -|rss.server.commit.timeout|600000|Timeout when commit shuffle data (ms)| -|rss.storage.type|-|Supports MEMORY_LOCALFILE, MEMORY_HDFS, MEMORY_LOCALFILE_HDFS| -|rss.server.flush.cold.storage.threshold.size|64M| The threshold of data size for LOACALFILE and HDFS if MEMORY_LOCALFILE_HDFS is used| -|rss.server.tags|-|The comma-separated list of tags to indicate the shuffle server's attributes. It will be used as the assignment basis for the coordinator| -|rss.server.single.buffer.flush.enabled|false|Whether single buffer flush when size exceeded rss.server.single.buffer.flush.threshold| -|rss.server.single.buffer.flush.threshold|64M|The threshold of single shuffle buffer flush| - -### Shuffle Client - -For more details of advanced configuration, please see [Uniffle Shuffle Client Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/client_guide.md). +|Role|Link| +|---|---| +|coordinator|[Uniffle Coordinator Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/coordinator_guide.md)| +|shuffle server|[Uniffle Shuffle Server Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/server_guide.md)| +|client|[Uniffle Shuffle Client Guide](https://github.com/apache/incubator-uniffle/blob/master/docs/client_guide.md)| ## Security:Hadoop kerberos authentication The primary goals of the Uniffle Kerberos security are: diff --git a/docs/server_guide.md b/docs/server_guide.md index e959839f04..5e2a97ec23 100644 --- a/docs/server_guide.md +++ b/docs/server_guide.md @@ -20,3 +20,65 @@ license: | limitations under the License. --- # Uniffle Shuffle Server Guide + +## Deploy +This document will introduce how to deploy Uniffle shuffle servers. + +### Steps +1. unzip package to RSS_HOME +2. update RSS_HOME/bin/rss-env.sh, eg, + ``` + JAVA_HOME= + HADOOP_HOME= + XMX_SIZE="80g" + ``` +3. update RSS_HOME/conf/server.conf, eg, + ``` + rss.rpc.server.port 19999 + rss.jetty.http.port 19998 + rss.rpc.executor.size 2000 + # it should be configed the same as in coordinator + rss.storage.type MEMORY_LOCALFILE_HDFS + rss.coordinator.quorum :19999,:19999 + # local storage path for shuffle server + rss.storage.basePath /data1/rssdata,/data2/rssdata.... + # it's better to config thread num according to local disk num + rss.server.flush.thread.alive 5 + rss.server.flush.threadPool.size 10 + rss.server.buffer.capacity 40g + rss.server.read.buffer.capacity 20g + rss.server.heartbeat.timeout 60000 + rss.server.heartbeat.interval 10000 + rss.rpc.message.max.size 1073741824 + rss.server.preAllocation.expired 120000 + rss.server.commit.timeout 600000 + rss.server.app.expired.withoutHeartbeat 120000 + # note: the default value of rss.server.flush.cold.storage.threshold.size is 64m + # there will be no data written to DFS if set it as 100g even rss.storage.type=MEMORY_LOCALFILE_HDFS + # please set proper value if DFS is used, eg, 64m, 128m. + rss.server.flush.cold.storage.threshold.size 100g + ``` +4. start Shuffle Server + ``` + bash RSS_HOME/bin/start-shuffle-server.sh + ``` + +## Configuration +|Property Name|Default|Description| +|---|---|---| +|rss.coordinator.quorum|-|Coordinator quorum| +|rss.rpc.server.port|-|RPC port for Shuffle server| +|rss.jetty.http.port|-|Http port for Shuffle server| +|rss.server.buffer.capacity|-|Max memory of buffer manager for shuffle server| +|rss.server.memory.shuffle.highWaterMark.percentage|75.0|Threshold of spill data to storage, percentage of rss.server.buffer.capacity| +|rss.server.memory.shuffle.lowWaterMark.percentage|25.0|Threshold of keep data in memory, percentage of rss.server.buffer.capacity| +|rss.server.read.buffer.capacity|-|Max size of buffer for reading data| +|rss.server.heartbeat.interval|10000|Heartbeat interval to Coordinator (ms)| +|rss.server.flush.threadPool.size|10|Thread pool for flush data to file| +|rss.server.commit.timeout|600000|Timeout when commit shuffle data (ms)| +|rss.storage.type|-|Supports MEMORY_LOCALFILE, MEMORY_HDFS, MEMORY_LOCALFILE_HDFS| +|rss.server.flush.cold.storage.threshold.size|64M| The threshold of data size for LOACALFILE and HDFS if MEMORY_LOCALFILE_HDFS is used| +|rss.server.tags|-|The comma-separated list of tags to indicate the shuffle server's attributes. It will be used as the assignment basis for the coordinator| +|rss.server.single.buffer.flush.enabled|false|Whether single buffer flush when size exceeded rss.server.single.buffer.flush.threshold| +|rss.server.single.buffer.flush.threshold|64M|The threshold of single shuffle buffer flush| +|rss.server.disk.capacity|-1|Disk capacity that shuffle server can use. If it's negative, it will use the default disk whole space| diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServerConf.java b/server/src/main/java/org/apache/uniffle/server/ShuffleServerConf.java index 7336f6d5be..9e58084264 100644 --- a/server/src/main/java/org/apache/uniffle/server/ShuffleServerConf.java +++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServerConf.java @@ -177,9 +177,9 @@ public class ShuffleServerConf extends RssBaseConf { public static final ConfigOption DISK_CAPACITY = ConfigOptions .key("rss.server.disk.capacity") .longType() - .checkValue(ConfigUtils.POSITIVE_LONG_VALIDATOR, "disk capacity must be positive") - .defaultValue(1024L * 1024L * 1024L * 1024L) - .withDescription("Disk capacity that shuffle server can use"); + .defaultValue(-1L) + .withDescription("Disk capacity that shuffle server can use. " + + "If it's negative, it will use the default whole space"); public static final ConfigOption SHUFFLE_EXPIRED_TIMEOUT_MS = ConfigOptions .key("rss.server.shuffle.expired.timeout.ms") diff --git a/storage/src/main/java/org/apache/uniffle/storage/common/LocalStorage.java b/storage/src/main/java/org/apache/uniffle/storage/common/LocalStorage.java index a2fa084754..19c886ee12 100644 --- a/storage/src/main/java/org/apache/uniffle/storage/common/LocalStorage.java +++ b/storage/src/main/java/org/apache/uniffle/storage/common/LocalStorage.java @@ -44,7 +44,7 @@ public class LocalStorage extends AbstractStorage { private static final Logger LOG = LoggerFactory.getLogger(LocalStorage.class); public static final String STORAGE_HOST = "local"; - private final long capacity; + private long capacity; private final String basePath; private final double cleanupThreshold; private final long cleanIntervalMs; @@ -77,10 +77,16 @@ private LocalStorage(Builder builder) { LOG.warn("Init base directory " + basePath + " fail, the disk should be corrupted", ioe); throw new RuntimeException(ioe); } - long freeSpace = baseFolder.getFreeSpace(); - if (freeSpace < capacity) { - throw new IllegalArgumentException("The Disk of " + basePath + " Available Capacity " + freeSpace - + " is smaller than configuration"); + if (capacity < 0L) { + this.capacity = baseFolder.getTotalSpace(); + LOG.info("Make the disk capacity the total space when \"rss.server.disk.capacity\" is not specified " + + "or less than 0"); + } else { + long freeSpace = baseFolder.getFreeSpace(); + if (freeSpace < capacity) { + throw new IllegalArgumentException("The Disk of " + basePath + " Available Capacity " + freeSpace + + " is smaller than configuration"); + } } }