This repository has been archived by the owner on Nov 16, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 357
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #208 from arundasan91/patch-3
Adding Docker support for CaffeOnSpark
- Loading branch information
Showing
7 changed files
with
422 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# CaffeOnSpark Standalone Docker | ||
|
||
Dockerfiles for both CPU and GPU builds are available in `standalone` folder. To use the CPU only version use the commands given. A GPU version of docker can be run using the command [`nvidia-docker`](https://github.com/NVIDIA/nvidia-docker) instead of `docker` using the `standalone/gpu` folder. | ||
|
||
Dockerfiles for CPU build is provided in `standalone/cpu` folder. The image can be built by running: | ||
``` | ||
docker build -t caffeonspark:cpu standalone/cpu | ||
``` | ||
After the image is built, use `docker images` to validate. | ||
|
||
## Launching CaffeOnSpark container | ||
Hadoop and Spark are essential requirements for CaffeOnSpark. To ensure that both process runs flawless, we have included `standalone/cpu/config/bootstrap.sh` script which must be run everytime the container is started. | ||
|
||
To launch a container running CaffeOnSpark please use: | ||
``` | ||
docker run -it caffeonspark:cpu /etc/bootstrap.sh -bash | ||
``` | ||
|
||
Now you have a working environment with CaffeOnSpark. | ||
|
||
To verify installation, please follow [GetStarted_yarn](https://github.com/yahoo/CaffeOnSpark/wiki/GetStarted_yarn) guide from `Step 7`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
# Copyright 2016 Yahoo Inc. | ||
# Licensed under the terms of the Apache 2.0 license. | ||
# Please see LICENSE file in the project root for terms. | ||
# | ||
# This file is the dockerfile to setup caffeonspark cpu standalone version. | ||
|
||
FROM ubuntu:14.04 | ||
|
||
RUN apt-get update && apt-get install -y software-properties-common | ||
RUN add-apt-repository ppa:openjdk-r/ppa | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
build-essential \ | ||
vim \ | ||
cmake \ | ||
git \ | ||
wget \ | ||
libatlas-base-dev \ | ||
libboost-all-dev \ | ||
libgflags-dev \ | ||
libgoogle-glog-dev \ | ||
libhdf5-serial-dev \ | ||
libleveldb-dev \ | ||
liblmdb-dev \ | ||
libopencv-dev \ | ||
libprotobuf-dev \ | ||
libsnappy-dev \ | ||
protobuf-compiler \ | ||
python-dev \ | ||
python-numpy \ | ||
python-pip \ | ||
python-scipy \ | ||
maven \ | ||
unzip \ | ||
zip \ | ||
unzip \ | ||
libopenblas-dev \ | ||
openssh-server \ | ||
openssh-client \ | ||
libopenblas-dev \ | ||
libboost-all-dev \ | ||
openjdk-8-jdk | ||
|
||
RUN rm -rf /var/lib/apt/lists/* | ||
|
||
|
||
# Passwordless SSH | ||
RUN ssh-keygen -y -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key | ||
RUN ssh-keygen -y -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key | ||
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa | ||
RUN cp /root/.ssh/id_rsa.pub ~/.ssh/authorized_keys | ||
|
||
|
||
# Apache Hadoop and Spark section | ||
RUN wget http://apache.mirrors.tds.net/hadoop/common/hadoop-2.6.4/hadoop-2.6.4.tar.gz | ||
RUN wget http://archive.apache.org/dist/spark/spark-1.6.0/spark-1.6.0-bin-hadoop2.6.tgz | ||
|
||
RUN gunzip hadoop-2.6.4.tar.gz | ||
RUN gunzip spark-1.6.0-bin-hadoop2.6.tgz | ||
RUN tar -xf hadoop-2.6.4.tar | ||
RUN tar -xf spark-1.6.0-bin-hadoop2.6.tar | ||
|
||
RUN sudo cp -r hadoop-2.6.4 /usr/local/hadoop | ||
RUN sudo cp -r spark-1.6.0-bin-hadoop2.6 /usr/local/spark | ||
|
||
RUN rm hadoop-2.6.4.tar spark-1.6.0-bin-hadoop2.6.tar | ||
RUN rm -rf hadoop-2.6.4/ spark-1.6.0-bin-hadoop2.6/ | ||
|
||
RUN sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode | ||
RUN sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode | ||
|
||
# Environment variables | ||
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64 | ||
ENV HADOOP_HOME=/usr/local/hadoop | ||
ENV SPARK_HOME=/usr/local/spark | ||
ENV PATH $PATH:$JAVA_HOME/bin | ||
ENV PATH $PATH:$HADOOP_HOME/bin | ||
ENV PATH $PATH:$HADOOP_HOME/sbin | ||
ENV PATH $PATH:$SPARK_HOME/bin | ||
ENV PATH $PATH:$SPARK_HOME/sbin | ||
ENV HADOOP_MAPRED_HOME /usr/local/hadoop | ||
ENV HADOOP_COMMON_HOME /usr/local/hadoop | ||
ENV HADOOP_HDFS_HOME /usr/local/hadoop | ||
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop | ||
ENV YARN_HOME /usr/local/hadoop | ||
ENV HADOOP_COMMON_LIB_NATIVE_DIR /usr/local/hadoop/lib/native | ||
ENV HADOOP_OPTS "-Djava.library.path=$HADOOP_HOME/lib" | ||
|
||
# Clone CaffeOnSpark | ||
ENV CAFFE_ON_SPARK=/opt/CaffeOnSpark | ||
WORKDIR $CAFFE_ON_SPARK | ||
RUN git clone https://github.com/yahoo/CaffeOnSpark.git . --recursive | ||
|
||
# Some of the Hadoop part extracted from "https://hub.docker.com/r/sequenceiq/hadoop-docker/~/dockerfile/" | ||
RUN mkdir $HADOOP_HOME/input | ||
RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input | ||
RUN cd /usr/local/hadoop/input | ||
|
||
# Copy .xml files. | ||
RUN cp ${CAFFE_ON_SPARK}/scripts/*.xml ${HADOOP_HOME}/etc/hadoop | ||
|
||
# Format namenode and finish hadoop, spark installations. | ||
RUN $HADOOP_HOME/bin/hdfs namenode -format | ||
|
||
RUN ls /root/.ssh/ | ||
ADD config/ssh_config /root/.ssh/config | ||
RUN chmod 600 /root/.ssh/config | ||
RUN chown root:root /root/.ssh/config | ||
|
||
ADD config/bootstrap.sh /etc/bootstrap.sh | ||
RUN chown root:root /etc/bootstrap.sh | ||
RUN chmod 700 /etc/bootstrap.sh | ||
|
||
ENV BOOTSTRAP /etc/bootstrap.sh | ||
|
||
RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_HOME/etc/hadoop/hadoop-env.sh | ||
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_HOME/etc/hadoop/hadoop-env.sh | ||
|
||
# workingaround docker.io build error | ||
RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh | ||
RUN chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh | ||
RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh | ||
|
||
# fix the 254 error code | ||
RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config | ||
RUN echo "UsePAM no" >> /etc/ssh/sshd_config | ||
RUN echo "Port 2122" >> /etc/ssh/sshd_config | ||
|
||
RUN service ssh start && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh && $HADOOP_HOME/bin/hdfs dfs -mkdir -p /user/root | ||
RUN service ssh start && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh && $HADOOP_HOME/bin/hdfs dfs -put $HADOOP_HOME/etc/hadoop/ input | ||
|
||
CMD ["/etc/bootstrap.sh", "-bash"] | ||
|
||
# Hdfs ports | ||
EXPOSE 50010 50020 50070 50075 50090 8020 9000 | ||
# Mapred ports | ||
EXPOSE 10020 19888 | ||
#Yarn ports | ||
EXPOSE 8030 8031 8032 8033 8040 8042 8088 | ||
#Other ports | ||
EXPOSE 49707 2122 | ||
|
||
|
||
# Continue with CaffeOnSpark build. | ||
# ENV CAFFE_ON_SPARK=/opt/CaffeOnSpark | ||
WORKDIR $CAFFE_ON_SPARK | ||
# RUN git clone https://github.com/yahoo/CaffeOnSpark.git . --recursive | ||
RUN cp caffe-public/Makefile.config.example caffe-public/Makefile.config | ||
RUN echo "INCLUDE_DIRS += ${JAVA_HOME}/include" >> caffe-public/Makefile.config | ||
RUN sed -i "s/# CPU_ONLY := 1/CPU_ONLY := 1/g" caffe-public/Makefile.config | ||
RUN sed -i "s|CUDA_DIR := /usr/local/cuda|# CUDA_DIR := /usr/local/cuda|g" caffe-public/Makefile.config | ||
RUN sed -i "s|CUDA_ARCH :=|# CUDA_ARCH :=|g" caffe-public/Makefile.config | ||
RUN sed -i "s|BLAS := atlas|BLAS := open|g" caffe-public/Makefile.config | ||
RUN sed -i "s|TEST_GPUID := 0|# TEST_GPUID := 0|g" caffe-public/Makefile.config | ||
|
||
RUN make build | ||
|
||
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:$CAFFE_ON_SPARK/caffe-public/distribute/lib:$CAFFE_ON_SPARK/caffe-distri/distribute/lib | ||
|
||
WORKDIR /root |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
# Copyright 2016 Yahoo Inc. | ||
# Licensed under the terms of the Apache 2.0 license. | ||
# Please see LICENSE file in the project root for terms. | ||
# | ||
# This script starts hadoop dfs and yarn while the docker container is started. | ||
|
||
: ${HADOOP_PREFIX:=/usr/local/hadoop} | ||
|
||
$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh | ||
|
||
rm /tmp/*.pid | ||
|
||
# installing libraries if any - (resource urls added comma separated to the ACP system variable) | ||
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - | ||
|
||
# adding necessary paths to environment variables (FIXME: These are already in Dockerfile, but does not work. So giving them explicitly.) | ||
export PATH=$PATH:$SPARK_HOME/bin | ||
export PATH=$PATH:$HADOOP_HOME/bin | ||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CAFFE_ON_SPARK/caffe-public/distribute/lib:$CAFFE_ON_SPARK/caffe-distri/distribute/lib | ||
|
||
service ssh start | ||
$HADOOP_PREFIX/sbin/start-dfs.sh | ||
$HADOOP_PREFIX/sbin/start-yarn.sh | ||
|
||
if [[ $1 == "-d" ]]; then | ||
while true; do sleep 1000; done | ||
fi | ||
|
||
if [[ $1 == "-bash" ]]; then | ||
/bin/bash | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Copyright 2016 Yahoo Inc. | ||
# Licensed under the terms of the Apache 2.0 license. | ||
# Please see LICENSE file in the project root for terms. | ||
# | ||
# This file creates user specific ssh configuration | ||
# | ||
Host * | ||
UserKnownHostsFile /dev/null | ||
StrictHostKeyChecking no | ||
LogLevel quiet | ||
Port 2122 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
# Copyright 2016 Yahoo Inc. | ||
# Licensed under the terms of the Apache 2.0 license. | ||
# Please see LICENSE file in the project root for terms. | ||
# | ||
# This file is the dockerfile to setup caffeonspark cpu standalone version. | ||
|
||
FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 | ||
|
||
RUN apt-get update && apt-get install -y software-properties-common | ||
RUN add-apt-repository ppa:openjdk-r/ppa | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
build-essential \ | ||
vim \ | ||
cmake \ | ||
git \ | ||
wget \ | ||
libatlas-base-dev \ | ||
libboost-all-dev \ | ||
libgflags-dev \ | ||
libgoogle-glog-dev \ | ||
libhdf5-serial-dev \ | ||
libleveldb-dev \ | ||
liblmdb-dev \ | ||
libopencv-dev \ | ||
libprotobuf-dev \ | ||
libsnappy-dev \ | ||
protobuf-compiler \ | ||
python-dev \ | ||
python-numpy \ | ||
python-pip \ | ||
python-scipy \ | ||
maven \ | ||
unzip \ | ||
zip \ | ||
unzip \ | ||
libopenblas-dev \ | ||
openssh-server \ | ||
openssh-client \ | ||
libopenblas-dev \ | ||
libboost-all-dev \ | ||
openjdk-8-jdk | ||
|
||
RUN rm -rf /var/lib/apt/lists/* | ||
|
||
|
||
# Passwordless SSH | ||
RUN ssh-keygen -y -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key | ||
RUN ssh-keygen -y -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key | ||
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa | ||
RUN cp /root/.ssh/id_rsa.pub ~/.ssh/authorized_keys | ||
|
||
|
||
# Apache Hadoop and Spark section | ||
RUN wget http://apache.mirrors.tds.net/hadoop/common/hadoop-2.6.4/hadoop-2.6.4.tar.gz | ||
RUN wget http://archive.apache.org/dist/spark/spark-1.6.0/spark-1.6.0-bin-hadoop2.6.tgz | ||
|
||
RUN gunzip hadoop-2.6.4.tar.gz | ||
RUN gunzip spark-1.6.0-bin-hadoop2.6.tgz | ||
RUN tar -xf hadoop-2.6.4.tar | ||
RUN tar -xf spark-1.6.0-bin-hadoop2.6.tar | ||
|
||
RUN sudo cp -r hadoop-2.6.4 /usr/local/hadoop | ||
RUN sudo cp -r spark-1.6.0-bin-hadoop2.6 /usr/local/spark | ||
|
||
RUN rm hadoop-2.6.4.tar spark-1.6.0-bin-hadoop2.6.tar | ||
RUN rm -rf hadoop-2.6.4/ spark-1.6.0-bin-hadoop2.6/ | ||
|
||
RUN sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode | ||
RUN sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode | ||
|
||
# Environment variables | ||
ENV JAVA_HOME /usr/lib/jvm/java-1.8.0-openjdk-amd64 | ||
ENV HADOOP_HOME=/usr/local/hadoop | ||
ENV SPARK_HOME=/usr/local/spark | ||
ENV PATH $PATH:$JAVA_HOME/bin | ||
ENV PATH $PATH:$HADOOP_HOME/bin | ||
ENV PATH $PATH:$HADOOP_HOME/sbin | ||
ENV PATH $PATH:$SPARK_HOME/bin | ||
ENV PATH $PATH:$SPARK_HOME/sbin | ||
ENV HADOOP_MAPRED_HOME /usr/local/hadoop | ||
ENV HADOOP_COMMON_HOME /usr/local/hadoop | ||
ENV HADOOP_HDFS_HOME /usr/local/hadoop | ||
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop | ||
ENV YARN_CONF_DIR /usr/local/hadoop/etc/hadoop | ||
ENV YARN_HOME /usr/local/hadoop | ||
ENV HADOOP_COMMON_LIB_NATIVE_DIR /usr/local/hadoop/lib/native | ||
ENV HADOOP_OPTS "-Djava.library.path=$HADOOP_HOME/lib" | ||
|
||
# Clone CaffeOnSpark | ||
ENV CAFFE_ON_SPARK=/opt/CaffeOnSpark | ||
WORKDIR $CAFFE_ON_SPARK | ||
RUN git clone https://github.com/yahoo/CaffeOnSpark.git . --recursive | ||
|
||
# Some of the Hadoop part extracted from "https://hub.docker.com/r/sequenceiq/hadoop-docker/~/dockerfile/" | ||
RUN mkdir $HADOOP_HOME/input | ||
RUN cp $HADOOP_HOME/etc/hadoop/*.xml $HADOOP_HOME/input | ||
RUN cd /usr/local/hadoop/input | ||
|
||
# Copy .xml files. | ||
RUN cp ${CAFFE_ON_SPARK}/scripts/*.xml ${HADOOP_HOME}/etc/hadoop | ||
|
||
# Format namenode and finish hadoop, spark installations. | ||
RUN $HADOOP_HOME/bin/hdfs namenode -format | ||
|
||
RUN ls /root/.ssh/ | ||
ADD config/ssh_config /root/.ssh/config | ||
RUN chmod 600 /root/.ssh/config | ||
RUN chown root:root /root/.ssh/config | ||
|
||
ADD config/bootstrap.sh /etc/bootstrap.sh | ||
RUN chown root:root /etc/bootstrap.sh | ||
RUN chmod 700 /etc/bootstrap.sh | ||
|
||
ENV BOOTSTRAP /etc/bootstrap.sh | ||
|
||
RUN sed -i '/^export JAVA_HOME/ s:.*:export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64\nexport HADOOP_HOME=/usr/local/hadoop\n:' $HADOOP_HOME/etc/hadoop/hadoop-env.sh | ||
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/:' $HADOOP_HOME/etc/hadoop/hadoop-env.sh | ||
|
||
# workingaround docker.io build error | ||
RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh | ||
RUN chmod +x /usr/local/hadoop/etc/hadoop/*-env.sh | ||
RUN ls -la /usr/local/hadoop/etc/hadoop/*-env.sh | ||
|
||
# fix the 254 error code | ||
RUN sed -i "/^[^#]*UsePAM/ s/.*/#&/" /etc/ssh/sshd_config | ||
RUN echo "UsePAM no" >> /etc/ssh/sshd_config | ||
RUN echo "Port 2122" >> /etc/ssh/sshd_config | ||
|
||
RUN service ssh start && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh && $HADOOP_HOME/bin/hdfs dfs -mkdir -p /user/root | ||
RUN service ssh start && $HADOOP_HOME/etc/hadoop/hadoop-env.sh && $HADOOP_HOME/sbin/start-dfs.sh && $HADOOP_HOME/bin/hdfs dfs -put $HADOOP_HOME/etc/hadoop/ input | ||
|
||
CMD ["/etc/bootstrap.sh", "-bash"] | ||
|
||
# Hdfs ports | ||
EXPOSE 50010 50020 50070 50075 50090 8020 9000 | ||
# Mapred ports | ||
EXPOSE 10020 19888 | ||
#Yarn ports | ||
EXPOSE 8030 8031 8032 8033 8040 8042 8088 | ||
#Other ports | ||
EXPOSE 49707 2122 | ||
|
||
# Continue with CaffeOnSpark build. | ||
# ENV CAFFE_ON_SPARK=/opt/CaffeOnSpark | ||
WORKDIR $CAFFE_ON_SPARK | ||
# RUN git clone https://github.com/yahoo/CaffeOnSpark.git . --recursive | ||
RUN cp caffe-public/Makefile.config.example caffe-public/Makefile.config | ||
RUN echo "INCLUDE_DIRS += ${JAVA_HOME}/include" >> caffe-public/Makefile.config | ||
#RUN sed -i "s/# USE_CUDNN := 1/USE_CUDNN := 1/g" caffe-public/Makefile.config | ||
RUN sed -i "s|BLAS := atlas|BLAS := open|g" caffe-public/Makefile.config | ||
|
||
RUN make build | ||
|
||
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:$CAFFE_ON_SPARK/caffe-public/distribute/lib:$CAFFE_ON_SPARK/caffe-distri/distribute/lib | ||
|
||
WORKDIR /root |
Oops, something went wrong.