Skip to content

Commit

Permalink
Add hadoop scripts (#260)
Browse files Browse the repository at this point in the history
* add entrypoint to all containers

* adding hadoop scripts to docker dir
  • Loading branch information
jperez999 authored May 3, 2022
1 parent 723b419 commit 6153712
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 0 deletions.
39 changes: 39 additions & 0 deletions docker/build-hadoop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash

if [[ "$#" != 1 ]]; then
echo "ERROR: Must provide Hadoop version number!"
echo " Example: ${BASH_SOURCE[0]} \"3.3.2\""
exit 1
fi
HADOOP_VER="$1"

SCRIPT_DIR=$(dirname ${BASH_SOURCE[0]})
cd ${SCRIPT_DIR}

# Download desired revision.
git clone --branch rel/release-${HADOOP_VER} --depth 1 https://github.com/apache/hadoop.git hadoop
cd hadoop

# Temporarily disable name resolution for jboss repository. NVIDIA IT ticket number: "INC0866408"
if [[ ! "$(curl --connect-timeout 5 https://repository.jboss.org)" ]] 2>/dev/null; then
echo 'Unable to connect to repository.jboss.org. Disabling...'
echo '127.0.0.1 repository.jboss.org' >> /etc/hosts
fi

# Build Hadoop.
mvn clean package \
-Pdist,native \
-DskipTests \
-Dtar -Dmaven.javadoc.skip=true \
-Drequire.snappy \
-Drequire.zstd \
-Drequire.openssl \
-Drequire.pmdk

# Move compiled distribution to $SCRIPT_DIR and delete temporary files.
mv hadoop-dist/target/hadoop-${HADOOP_VER}.tar.gz ..
cd ..
rm -rf hadoop /root/.m2

# Self-delete.
rm -rf ${BASH_SOURCE[0]}
71 changes: 71 additions & 0 deletions docker/install-hadoop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env bash

if [[ "$#" != 1 ]]; then
echo "ERROR: Must provide Hadoop version number!"
echo " Example: ${BASH_SOURCE[0]} \"3.3.2\""
exit 1
fi
HADOOP_VER="$1"

SCRIPT_DIR=$(dirname ${BASH_SOURCE[0]})
cd ${SCRIPT_DIR}

# Extract files and delete archive.
mkdir -p ${HADOOP_HOME}/logs
tar xf hadoop-${HADOOP_VER}.tar.gz --strip-components 1 --directory ${HADOOP_HOME}
rm -rf hadoop-${HADOOP_VER}.tar.gz

# Cleanup reundant files.
for f in $(find ${HADOOP_HOME} -name *.cmd); do
rm -rf $f
done

# Pretend that the package has been installed like any other.
ln -s ${HADOOP_HOME}/include/hdfs.h /usr/local/include/hdfs.h
ln -s ${HADOOP_HOME}/lib/native/libhdfs.so /usr/local/lib/libhdfs.so
ln -s ${HADOOP_HOME}/lib/native/libhdfs.so.0.0.0 /usr/local/lib/libhdfs.so.0.0.0
ln -s ${HADOOP_HOME}/lib/native/libhadoop.so /usr/local/lib/libhadoop.so
ln -s ${HADOOP_HOME}/lib/native/libhadoop.so.1.0.0 /usr/local/lib/libhadoop.so.1.0.0

# Create minimalist single-node "default" configuration.
sed -i "s/^# export JAVA_HOME=$/export JAVA_HOME=${JAVA_HOME//\//\\\/}/g" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh

echo '<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Single-node dummy configuration -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>' > ${HADOOP_HOME}/etc/hadoop/core-site.xml

echo '<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Single-node dummy configuration -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>' > ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml

ssh-keygen -q -t ecdsa -b 521 -N "" <<< ""
cat $HOME/.ssh/id_ecdsa.pub >> $HOME/.ssh/authorized_keys

ldconfig
echo "
Hadoop version: $(hadoop version)
To run a single-node hadoop instance (for development only):
hadoop namenode -format
service ssh start
start-dfs.sh
"

# Self-delete.
rm -rf ${BASH_SOURCE[0]}

0 comments on commit 6153712

Please sign in to comment.