Skip to content

Commit

Permalink
initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
stmcpherson committed Sep 25, 2013
1 parent 71f3f8a commit 3cf8897
Show file tree
Hide file tree
Showing 26 changed files with 2,790 additions and 3 deletions.
12 changes: 12 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Copyright 2011-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License"). You
may not use this file except in compliance with the License. A copy of
the License is located at

http://aws.amazon.com/apache2.0/

or in the "license" file accompanying this file. This file is
distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License.
19 changes: 16 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
bootstrap.actions
=================
# Copyright 2011-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
A Bootstrap Action is a shell script stored in Amazon S3 that Amazon EMR executes on every node of your cluster. Bootstrap actions execute as the Hadoop user by default; they execute with root privileges if you use sudo. From the EMR Command Line Interface you can reference a Bootstrap Action as follows:

This repository hold the Amazon Elastic MapReduce sample bootstrap actions
--bootstrap-action "s3://myawsbucket/FileName" --args "arg1,arg2"

For more information about EMR Bootstrap actions, see http://docs.aws.amazon.com/ElasticMapReduce/latest/DeveloperGuide/emr-plan-bootstrap.html
Binary file added accumulo/.DS_Store
Binary file not shown.
28 changes: 28 additions & 0 deletions accumulo/Launch Accumulo using EMR CLI.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright 2011-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.

The following code sample is part of an AWS article demonstrating the installation of Accumulo on Amazon EMR: http://aws.amazon.com/articles/Elastic-MapReduce/2065170233315712

Run the following command using the Amazon EMR Command Line Interface and replace the text in capital letters as follows. Check the Amazon EMR Bootstrap Actions repository for the latest Accumulo installation script.

elastic-mapreduce --create --alive --name "Accumulo" --bootstrap-action \
s3://elasticmapreduce/samples/accumulo/accumulo-install.sh \
--args "IP,DBNAME,PASSWORD" --bootstrap-name "install Accumulo" \
--enable-debugging --log-uri s3://BUCKETNAME/accumulo-logs/ \
--instance-type m1.large --instance-count 4 --key-pair KEY

IP: IP address of a Zookeeper node
DBNAME: Name of the database that you would like to create in Accumulo
PASSWORD: Accumulo DB password
KEY: Your Amazon EC2 SSH key-pair name
BUCKETNAME: Your Amazon S3 bucket name where the Amazon EMR logs will be uploaded
60 changes: 60 additions & 0 deletions accumulo/accumulo-install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2011-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.

cd /home/hadoop

cat > /home/hadoop/accumulo.sh << 'EOF2'
if ps ax | grep -v grep | egrep "datanode|namenode"> /dev/null
then
if [ ! -d "/home/hadoop/accumulo-1.4.2" ]; then
cd /home/hadoop/
sudo apt-get -y install zookeeper expect
wget http://mirrors.sonic.net/apache/accumulo/1.4.2/accumulo-1.4.2-dist.tar.gz
tar -xvzf accumulo-1.4.2-dist.tar.gz
##cp -a accumulo-1.4.2 /home/hadoop/
cp accumulo-1.4.2/conf/examples/1GB/standalone/* accumulo-1.4.2/conf/
sed -i "s/<value>localhost:2181<\/value>/<value>$1:2181<\/value>/" accumulo-1.4.2/conf/accumulo-site.xml
cat >> accumulo-1.4.2/conf/accumulo-env.sh << EOF
export ACCUMULO_HOME=/home/hadoop/accumulo-1.4.2
export HADOOP_HOME=/home/hadoop
export ACCUMULO_LOG_DIR=/mnt/var/log/hadoop
export ZOOKEEPER_HOME=/usr/share/java
export JAVA_HOME=/usr/lib/jvm/java-6-sun
EOF
grep -Fq '"isMaster":true' /mnt/var/lib/info/instance.json
if [ $? -eq 0 ];
then
expect -c "
spawn accumulo-1.4.2/bin/accumulo init
expect -nocase \"Instance name\" {send \"$2\r\"}
expect -nocase \"Enter initial password for*\" {send \"$3\r\"}
expect -nocase \"*password*\" {send \"$3\r\r\";expect eof}"
hostname > accumulo-1.4.2/conf/masters
echo 'x' > accumulo-1.4.2/conf/slaves
else
hostname > accumulo-1.4.2/conf/slaves
MASTER=$(grep -i "job.tracker<" /home/hadoop/conf/mapred-site.xml | grep -o '[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}')
echo $MASTER > accumulo-1.4.2/conf/masters
fi
accumulo-1.4.2/bin/start-here.sh
sudo sed -i 's/.*accumulo.*//' /etc/crontab
fi
fi
EOF2

sudo sh -c "echo '*/1 * * * * hadoop bash /home/hadoop/accumulo.sh $1 $2 $3 > /home/hadoop/cron.log 2>&1 ' >> /etc/crontab"
Binary file added configurations/.DS_Store
Binary file not shown.
91 changes: 91 additions & 0 deletions configurations/configure-daemons
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright 2011-2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.

#!/bin/bash

set -e

# first validate the arguments
REPLACE_FILE=false
for i in "$@" ; do
case $i in
--*-heap-size*)
if ! echo $i | grep -E -- '--[a-zA-Z]+-heap-size=[0-9]+$' > /dev/null 2>&1 ; then
echo "Couldn't parse option $i expected --cmd-heap-size=1023 where cmd is jobtracker or some such and 1023 is the number of megabytes to allocate to the Java process for that command" 1>&2
exit 1
fi
;;
--*-opts*)
if ! echo $i | grep -E -- '--[a-zA-Z]+-opts=.+' > /dev/null 2>&1 ; then
echo "Couldn't parse option $i expected --cmd-opts=-XX:+UseG1GC where cmd is jobtracker or some such and -XX:+UseG1GC is the option to pass to the JVM" 1>&2
exit 1
fi
;;
--help)
set +x
echo "Usage: "
echo "--<daemon>-heap-size"
echo " Set the heap size in megabytes for the specified daemon."
echo " "
echo "--<daemon>-opts"
echo " Set additional Java options for the specified daemon."
echo " "
echo "--replace"
echo " Replace the existing hadoop-user-env.sh file if it exists."
echo " "
echo "<daemon> is one of:"
echo " namenode, datanode, jobtracker, tasktracker, client"
echo " "
echo " "
echo "Example Usage:"
echo " --namenode-heap-size=2048 --namenode-opts=\"-XX:GCTimeRatio=19\""
exit 1
;;
--replace)
REPLACE_FILE=true
;;
*)
echo "Unknown option $i" 1>&2
exit 1
;;
esac
done

set -x

HADOOP_ENV_FILE=/home/hadoop/conf/hadoop-user-env.sh

if [ $REPLACE_FILE == "true" ] ; then
rm -rf $HADOOP_ENV_FILE
fi

echo "#!/bin/bash" >> $HADOOP_ENV_FILE

for i in "$@" ; do
case $i in
--*-heap-size=*)
HEAP_SIZE_CMD=HADOOP_$(echo $i | sed 's|--\([^-]*\)-heap-size=.*|\1|' | tr 'a-z' 'A-Z')_HEAPSIZE
HEAP_SIZE_VALUE=$(echo $i | sed 's|--[^-]*-heap-size=\(.*\)|\1|')
cat >> $HADOOP_ENV_FILE <<EOF
$HEAP_SIZE_CMD=$HEAP_SIZE_VALUE
EOF
;;
--*-opts*)
OPTS_CMD=HADOOP_$(echo $i | sed 's|--\([^-]*\)-opts=.*|\1|' | tr 'a-z' 'A-Z')_OPTS
OPTS_VALUE=$(echo $i | sed 's|--[^-]*-opts=\(.*\)|\1|')
cat >> $HADOOP_ENV_FILE <<EOF
$OPTS_CMD="$OPTS_VALUE"
EOF
;;
esac
done
Loading

0 comments on commit 3cf8897

Please sign in to comment.