Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Hadoop to 3.0.0-beta1 #12

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ install:
- mvn -version
- cd hadoop
- mvn clean package -DskipTests -Pdist -Dtar | grep "Building Apache Hadoop"
- cp hadoop-dist/target/hadoop-3.0.0-beta1-SNAPSHOT.tar.gz ../hadoop-base
- cp hadoop-dist/target/hadoop-3.0.0-beta1.tar.gz ../hadoop-base

script:
- cd ..
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ In order to bind, tar.gz package assumed be put under `hadoop-base` directory.

```bash
$ cd docker-hadoop-cluster
$ cp /path/to/hadoop-3.0.0-alpha3-SNAPSHOT.tar.gz hadoop-base
$ cp /path/to/hadoop-3.0.0-beta1.tar.gz hadoop-base
$ make
```

Expand Down Expand Up @@ -84,12 +84,12 @@ services:
$ docker exec -it master bash
bash-4.1# cd /usr/local/hadoop
bash-4.1# bin/hadoop version
Hadoop 3.0.0-SNAPSHOT
Hadoop 3.0.0-beta1
Source code repository git://git.apache.org/hadoop.git -r 0c7d3f480548745e9e9ccad1d318371c020c3003
Compiled by lewuathe on 2015-09-13T01:12Z
Compiled with protoc 2.5.0
From source with checksum 9174a352ac823cdfa576f525665e99
This command was run using /usr/local/hadoop-3.0.0-SNAPSHOT/share/hadoop/common/hadoop-common-3.0.0-SNAPSHOT.jar
This command was run using /usr/local/hadoop-3.0.0-beta1/share/hadoop/common/hadoop-common-3.0.0-beta1.jar
```

# Deploy on EC2
Expand Down
8 changes: 7 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
version: '2'
version: '3'

services:

master:
build: ./hadoop-master
ports:
Expand All @@ -9,30 +10,35 @@ services:
- "19888:19888"
- "8188:8188"
container_name: "master"

slave1:
build: ./hadoop-slave
container_name: "slave1"
ports:
- "9901:9864"
- "8041:8042"

slave2:
build: ./hadoop-slave
container_name: "slave2"
ports:
- "9902:9864"
- "8042:8042"

slave3:
build: ./hadoop-slave
container_name: "slave3"
ports:
- "9903:9864"
- "8043:8042"

slave4:
build: ./hadoop-slave
container_name: "slave4"
ports:
- "9904:9864"
- "8044:8042"

slave5:
build: ./hadoop-slave
container_name: "slave5"
Expand Down
107 changes: 51 additions & 56 deletions hadoop-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,85 +12,80 @@
#
# Creates multi node hadoop cluster on Docker

FROM sequenceiq/pam:ubuntu-14.04
FROM ubuntu:17.10
MAINTAINER lewuathe

USER root

# install dev tools
RUN apt-get update
RUN apt-get install -y curl tar sudo openssh-server openssh-client rsync
RUN apt-get update && \
apt-get install -y tar sudo openssh-server openssh-client rsync

# passwordless ssh
RUN rm -f /etc/ssh/ssh_host_dsa_key /etc/ssh/ssh_host_rsa_key /root/.ssh/id_rsa
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
COPY conf/.ssh/* /root/.ssh/
RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config && \
echo "UsePAM no" >> /etc/ssh/sshd_config && \
echo "Port 2122" >> /etc/ssh/sshd_config && \
rm -f /etc/ssh/ssh_host_dsa_key && ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key && \
rm -f /etc/ssh/ssh_host_rsa_key && ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key && \
rm -f /root/.ssh/id_rsa && ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa && \
cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys && \
chmod 600 /root/.ssh/config && \
chown -R root:root /root/.ssh

# java
RUN mkdir -p /usr/java/default && \
curl -Ls 'http://download.oracle.com/otn-pub/java/jdk/8u131-b11/d54c1d3a095b4ff2b6607d096fa80163/jdk-8u131-linux-x64.tar.gz' -H 'Cookie: oraclelicense=accept-securebackup-cookie' | \
tar --strip-components=1 -xz -C /usr/java/default/

# ADD jdk-8u112-linux-x64.tar.gz /usr/java
# RUN sudo ln -s /usr/java/jdk1.8.0_112/ /usr/java/default
ARG JAVA_HOME=/usr/java/default

ENV JAVA_HOME /usr/java/default
ENV PATH $PATH:$JAVA_HOME/bin
ENV JAVA_HOME ${JAVA_HOME}
ENV PATH $PATH:$JAVA_HOME/bin

# download native support
RUN mkdir -p /tmp/native
RUN curl -Ls http://dl.bintray.com/sequenceiq/sequenceiq-bin/hadoop-native-64-2.7.0.tar | tar -x -C /tmp/native
RUN mkdir -p ${JAVA_HOME} && \
wget 'http://download.oracle.com/otn-pub/java/jdk/8u144-b01/090f390dda5b47b9b721c7dfaa008135/jdk-8u144-linux-x64.tar.gz' --header='Cookie: oraclelicense=accept-securebackup-cookie' -O - | \
tar --strip-components=1 -xz -C ${JAVA_HOME}

ENV HADOOP_VERSION=3.0.0-beta1-SNAPSHOT
ADD hadoop-${HADOOP_VERSION}.tar.gz /usr/local/
WORKDIR /usr/local
RUN ln -s /usr/local/hadoop-${HADOOP_VERSION} /usr/local/hadoop
# hadoop

ENV HADOOP_HOME /usr/local/hadoop
ENV HADOOP_COMMON_HOME /usr/local/hadoop
ENV HADOOP_HDFS_HOME /usr/local/hadoop
ENV HADOOP_MAPRED_HOME /usr/local/hadoop
ENV HADOOP_YARN_HOME /usr/local/hadoop
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
ENV YARN_CONF_DIR /usr/local/hadoop/etc/hadoop
ENV HADOOP_LOG_DIR /var/log/hadoop
ARG HADOOP_VERSION=3.0.0-beta1
ARG HADOOP_HOME_PARENT_DIR=/usr/local
ARG HADOOP_HOME=${HADOOP_HOME_PARENT_DIR}/hadoop

RUN mkdir /var/log/hadoop
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_HOME ${HADOOP_HOME}
ENV HADOOP_COMMON_HOME ${HADOOP_HOME}
ENV HADOOP_HDFS_HOME ${HADOOP_HOME}
ENV HADOOP_MAPRED_HOME ${HADOOP_HOME}
ENV HADOOP_YARN_HOME ${HADOOP_HOME}
ENV HADOOP_CONF_DIR ${HADOOP_HOME}/etc/hadoop
ENV YARN_CONF_DIR ${HADOOP_HOME}/etc/hadoop
ENV HADOOP_LOG_DIR /var/log/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/java/default\nexport H=/usr/local/hadoop\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_HOME/etc/hadoop/hadoop-env.sh
#RUN . $HADOOP_HOME/etc/hadoop/hadoop-env.sh
ADD hadoop-${HADOOP_VERSION}.tar.gz ${HADOOP_HOME_PARENT_DIR}
RUN ln -s ${HADOOP_HOME_PARENT_DIR}/hadoop-${HADOOP_VERSION} ${HADOOP_HOME}
# RUN mkdir -p ${HADOOP_HOME} && \
# wget http://apache.javapipe.com/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz -O - | \
# tar --strip-components=1 -xz -C ${HADOOP_HOME}

RUN mkdir $HADOOP_HOME/input
RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input
COPY conf/hadoop/* $HADOOP_HOME/etc/hadoop/

ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
ADD log4j.properties $HADOOP_HOME/etc/hadoop/log4j.properties
RUN mkdir $HADOOP_LOG_DIR && \
mkdir $HADOOP_HOME/input && \
cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input && \
sed -i "s!^# export JAVA_HOME.*!export JAVA_HOME=${JAVA_HOME}\nexport H=${HADOOP_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\n!" $HADOOP_HOME/etc/hadoop/hadoop-env.sh && \
sed -i "s!^# export HADOOP_CONF_DIR.*!export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop/!" $HADOOP_HOME/etc/hadoop/hadoop-env.sh && \
sed -i "s!\$HADOOP_HOME!${HADOOP_HOME}!g" $HADOOP_HOME/etc/hadoop/yarn-site.xml

RUN $HADOOP_HOME/bin/hdfs namenode -format

# fixing the libhadoop.so like a boss
RUN rm -rf /usr/local/hadoop/lib/native
RUN mv /tmp/native /usr/local/hadoop/lib

ADD ssh_config /root/.ssh/config
RUN chmod 600 /root/.ssh/config
RUN chown root:root /root/.ssh/config
RUN mkdir -p /tmp/native && \
wget http://dl.bintray.com/sequenceiq/sequenceiq-bin/hadoop-native-64-2.7.0.tar -O - | tar -x -C /tmp/native && \
rm -rf ${HADOOP_HOME}/lib/native && \
mv /tmp/native ${HADOOP_HOME}/lib

# workingaround docker.io build error
RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh
RUN chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh
RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh

# fix the 254 error code
RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config
RUN echo "UsePAM no" >> /etc/ssh/sshd_config
RUN echo "Port 2122" >> /etc/ssh/sshd_config
RUN chmod +x ${HADOOP_HOME}/etc/hadoop/*-env.sh

RUN service ssh start

Expand All @@ -103,4 +98,4 @@ EXPOSE 19888
#Yarn ports
EXPOSE 8030 8031 8032 8033 8040 8042 8088 8188
#Other ports
EXPOSE 49707 2122
EXPOSE 49707 2122
File renamed without changes.
8 changes: 8 additions & 0 deletions hadoop-base/conf/hadoop/core-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<configuration>

<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>

</configuration>
13 changes: 13 additions & 0 deletions hadoop-base/conf/hadoop/hdfs-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<configuration>

<property>
<name>dfs.replication</name>
<value>3</value>
</property>

<property>
<name>dfs.namenode.ec.policies.enabled</name>
<value>RS-3-2-64k</value>
</property>

</configuration>
File renamed without changes.
8 changes: 8 additions & 0 deletions hadoop-base/conf/hadoop/mapred-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<configuration>

<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>

</configuration>
89 changes: 89 additions & 0 deletions hadoop-base/conf/hadoop/yarn-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_HOME/etc/hadoop, $HADOOP_HOME/share/hadoop/common/*, $HADOOP_HOME/share/hadoop/common/lib/*, $HADOOP_HOME/share/hadoop/hdfs/*, $HADOOP_HOME/share/hadoop/hdfs/lib/*, $HADOOP_HOME/share/hadoop/mapreduce/*, $HADOOP_HOME/share/hadoop/mapreduce/lib/*, $HADOOP_HOME/share/hadoop/yarn/*, $HADOOP_HOME/share/hadoop/yarn/lib/*</value>
</property>

<property>
<description>
Number of seconds after an application finishes before the nodemanager's
DeletionService will delete the application's localized file directory
and log directory.

To diagnose Yarn application problems, set this property's value large
enough (for example, to 600 = 10 minutes) to permit examination of these
directories. After changing the property's value, you must restart the
nodemanager in order for it to have an effect.

The roots of Yarn applications' work directories is configurable with
the yarn.nodemanager.local-dirs property (see below), and the roots
of the Yarn applications' log directories is configurable with the
yarn.nodemanager.log-dirs property (see also below).
</description>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>600</value>
</property>

<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>

<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>

<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>

<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>

<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.timeline-service.hostname</name>
<value>master</value>
</property>

<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.webapp.ui2.enable</name>
<value>true</value>
</property>

<!-- NAMENODE must be same to RESOURCEMANAGER in this configuration -->
<!-- Disable due to HDFS initialization timing issue
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>hdfs://master:9000/node-labels/</value>
</property>

<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>
-->
</configuration>
6 changes: 0 additions & 6 deletions hadoop-base/core-site.xml

This file was deleted.

10 changes: 0 additions & 10 deletions hadoop-base/hdfs-site.xml

This file was deleted.

6 changes: 0 additions & 6 deletions hadoop-base/mapred-site.xml

This file was deleted.

Loading