org.apache.hadoop.io.compress.zlib.ZlibCompressor
org.apache.hadoop.io.compress.zlib.ZlibDecompressor
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java
index 3996534bd54ec..bbba64e748afa 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java
@@ -22,9 +22,27 @@
@InterfaceAudience.LimitedPrivate({"MapReduce"})
@InterfaceStability.Unstable
+
+/**
+ * Spill文件索引 在相应Reducer的数据请求时快速定位到相应的partition。
+ * 一个Spill文件对应一个索引,索引存储专门分配的缓冲中(对应map输出的
+ * 环形Buffer)
+ */
public class IndexRecord {
+
+ /**
+ * 起始偏移量(字节数)
+ */
public long startOffset;
+
+ /**
+ * Partition数据原始长度(字节数)
+ */
public long rawLength;
+
+ /**
+ * partition数据长度,如果压缩则算压缩后的长度(字节数)
+ */
public long partLength;
public IndexRecord() { }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java
index 7fdb83dc3e726..c7c3d13be2826 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java
@@ -37,6 +37,17 @@
*
* Note: If you require your Partitioner class to obtain the Job's
* configuration object, implement the {@link Configurable} interface.
+ *
+ * Partitioner对map输出的key进行划分,决定key及其记录应该发往哪一个reducer.
+ *
+ * 即指定每一个key应该由哪个reducer来处理.
+ *
+ * 发往同一个reducer的所有key组成一个Partition.
+ *
+ * 如果作业只有一个reducer,则框架不会该作业创建Partitioner.
+ *
+ * 作业使用哪一个Partitioner由用户配置决定,Partitioner的逻辑中需要使用作业的配置信息,
+ * 可以通过实现Configurable接口访问配置信息.
*
* @see Reducer
*/
@@ -50,6 +61,9 @@ public abstract class Partitioner {
*
* Typically a hash function on a all or a subset of the key.
*
+ * 每个Partition(Reducer)对应一个整数编号,该方法返回代码key所属的Partition
+ * 的编号. 传入的Partition总数即为作业的reducer总数.
+ *
* @param key the key to be partioned.
* @param value the entry value.
* @param numPartitions the total number of partitions.
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java
index ab67ab05734e6..15edd9e5d42be 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java
@@ -30,7 +30,7 @@
/**
* Reduces a set of intermediate values which share a key to a smaller set of
* values.
- *
+ *
* Reducer
implementations
* can access the {@link Configuration} for the job via the
* {@link JobContext#getConfiguration()} method.
@@ -114,7 +114,12 @@
* }
* }
*
- *
+ *
+ *
+ * Reducer逻辑实现的模板方法. 执行入口为run(). setup , reduce, cleanup 是3个模板方法,可以实现任意一个方法改变逻辑.
+ *
+ * 作业的配置信息可以通过Context的getConfigurable方法获取.
+ *
* @see Mapper
* @see Partitioner
*/
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java
index 9708b6ef1a435..3c1643b0f870e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java
@@ -23,6 +23,8 @@
/**
* Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types.
+ *
+ * 任务类型
*/
@InterfaceAudience.Public
@InterfaceStability.Stable