diff --git a/README.txt b/README.txt index 148cd31c86b72..ca182dbd5d77f 100644 --- a/README.txt +++ b/README.txt @@ -1,3 +1,7 @@ + +本项目为本人近期阅读Hadoop源码时fork出来的,主要用于注释源码。 + + For the latest information about Hadoop, please visit our website at: http://hadoop.apache.org/core/ diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 398bb840cf750..e8d607dfa44ee 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -616,7 +616,7 @@ javah - ${env.JAVA_HOME}/bin/javah + ${java.home}/bin/javah org.apache.hadoop.io.compress.zlib.ZlibCompressor org.apache.hadoop.io.compress.zlib.ZlibDecompressor diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java index 3996534bd54ec..bbba64e748afa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IndexRecord.java @@ -22,9 +22,27 @@ @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable + +/** + * Spill文件索引 在相应Reducer的数据请求时快速定位到相应的partition。 + * 一个Spill文件对应一个索引,索引存储专门分配的缓冲中(对应map输出的 + * 环形Buffer) + */ public class IndexRecord { + + /** + * 起始偏移量(字节数) + */ public long startOffset; + + /** + * Partition数据原始长度(字节数) + */ public long rawLength; + + /** + * partition数据长度,如果压缩则算压缩后的长度(字节数) + */ public long partLength; public IndexRecord() { } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java index 7fdb83dc3e726..c7c3d13be2826 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java @@ -37,6 +37,17 @@ * *

Note: If you require your Partitioner class to obtain the Job's * configuration object, implement the {@link Configurable} interface.

+ * + * Partitioner对map输出的key进行划分,决定key及其记录应该发往哪一个reducer. + * + * 即指定每一个key应该由哪个reducer来处理. + * + * 发往同一个reducer的所有key组成一个Partition. + * + * 如果作业只有一个reducer,则框架不会该作业创建Partitioner. + * + * 作业使用哪一个Partitioner由用户配置决定,Partitioner的逻辑中需要使用作业的配置信息, + * 可以通过实现Configurable接口访问配置信息. * * @see Reducer */ @@ -50,6 +61,9 @@ public abstract class Partitioner { * *

Typically a hash function on a all or a subset of the key.

* + * 每个Partition(Reducer)对应一个整数编号,该方法返回代码key所属的Partition + * 的编号. 传入的Partition总数即为作业的reducer总数. + * * @param key the key to be partioned. * @param value the entry value. * @param numPartitions the total number of partitions. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java index ab67ab05734e6..15edd9e5d42be 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Reducer.java @@ -30,7 +30,7 @@ /** * Reduces a set of intermediate values which share a key to a smaller set of * values. - * + * *

Reducer implementations * can access the {@link Configuration} for the job via the * {@link JobContext#getConfiguration()} method.

@@ -114,7 +114,12 @@ * } * } * - * + * + * + * Reducer逻辑实现的模板方法. 执行入口为run(). setup , reduce, cleanup 是3个模板方法,可以实现任意一个方法改变逻辑. + * + * 作业的配置信息可以通过Context的getConfigurable方法获取. + * * @see Mapper * @see Partitioner */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java index 9708b6ef1a435..3c1643b0f870e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskType.java @@ -23,6 +23,8 @@ /** * Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types. + * + * 任务类型 */ @InterfaceAudience.Public @InterfaceStability.Stable