|
@@ -0,0 +1,459 @@
|
|
|
+#!/bin/bash -x
|
|
|
+
|
|
|
+# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
+# contributor license agreements. See the NOTICE file distributed with
|
|
|
+# this work for additional information regarding copyright ownership.
|
|
|
+# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
+# (the "License"); you may not use this file except in compliance with
|
|
|
+# the License. You may obtain a copy of the License at
|
|
|
+#
|
|
|
+# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+#
|
|
|
+# Unless required by applicable law or agreed to in writing, software
|
|
|
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+# See the License for the specific language governing permissions and
|
|
|
+# limitations under the License.
|
|
|
+
|
|
|
+################################################################################
|
|
|
+# Script that is run on each instance on boot.
|
|
|
+################################################################################
|
|
|
+
|
|
|
+################################################################################
|
|
|
+# Initialize variables
|
|
|
+################################################################################
|
|
|
+SELF_HOST=`/sbin/ifconfig eth0 | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}'`
|
|
|
+HADOOP_VERSION=${HADOOP_VERSION:-0.20.1}
|
|
|
+HADOOP_HOME=/usr/local/hadoop-$HADOOP_VERSION
|
|
|
+HADOOP_CONF_DIR=$HADOOP_HOME/conf
|
|
|
+for role in $(echo "$ROLES" | tr "," "\n"); do
|
|
|
+ case $role in
|
|
|
+ nn)
|
|
|
+ NN_HOST=$SELF_HOST
|
|
|
+ ;;
|
|
|
+ jt)
|
|
|
+ JT_HOST=$SELF_HOST
|
|
|
+ ;;
|
|
|
+ esac
|
|
|
+done
|
|
|
+
|
|
|
+function register_auto_shutdown() {
|
|
|
+ if [ ! -z "$AUTO_SHUTDOWN" ]; then
|
|
|
+ shutdown -h +$AUTO_SHUTDOWN >/dev/null &
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+function update_repo() {
|
|
|
+ if which dpkg &> /dev/null; then
|
|
|
+ sudo apt-get update
|
|
|
+ elif which rpm &> /dev/null; then
|
|
|
+ yum update -y yum
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# Install a list of packages on debian or redhat as appropriate
|
|
|
+function install_packages() {
|
|
|
+ if which dpkg &> /dev/null; then
|
|
|
+ apt-get update
|
|
|
+ apt-get -y install $@
|
|
|
+ elif which rpm &> /dev/null; then
|
|
|
+ yum install -y $@
|
|
|
+ else
|
|
|
+ echo "No package manager found."
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+# Install any user packages specified in the USER_PACKAGES environment variable
|
|
|
+function install_user_packages() {
|
|
|
+ if [ ! -z "$USER_PACKAGES" ]; then
|
|
|
+ install_packages $USER_PACKAGES
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+function install_hadoop() {
|
|
|
+ useradd hadoop
|
|
|
+
|
|
|
+ hadoop_tar_url=http://s3.amazonaws.com/hadoop-releases/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
|
|
|
+ hadoop_tar_file=`basename $hadoop_tar_url`
|
|
|
+ hadoop_tar_md5_file=`basename $hadoop_tar_url.md5`
|
|
|
+
|
|
|
+ curl="curl --retry 3 --silent --show-error --fail"
|
|
|
+ for i in `seq 1 3`;
|
|
|
+ do
|
|
|
+ $curl -O $hadoop_tar_url
|
|
|
+ $curl -O $hadoop_tar_url.md5
|
|
|
+ if md5sum -c $hadoop_tar_md5_file; then
|
|
|
+ break;
|
|
|
+ else
|
|
|
+ rm -f $hadoop_tar_file $hadoop_tar_md5_file
|
|
|
+ fi
|
|
|
+ done
|
|
|
+
|
|
|
+ if [ ! -e $hadoop_tar_file ]; then
|
|
|
+ echo "Failed to download $hadoop_tar_url. Aborting."
|
|
|
+ exit 1
|
|
|
+ fi
|
|
|
+
|
|
|
+ tar zxf $hadoop_tar_file -C /usr/local
|
|
|
+ rm -f $hadoop_tar_file $hadoop_tar_md5_file
|
|
|
+
|
|
|
+ echo "export HADOOP_HOME=$HADOOP_HOME" >> ~root/.bashrc
|
|
|
+ echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> ~root/.bashrc
|
|
|
+}
|
|
|
+
|
|
|
+function prep_disk() {
|
|
|
+ mount=$1
|
|
|
+ device=$2
|
|
|
+ automount=${3:-false}
|
|
|
+
|
|
|
+ echo "warning: ERASING CONTENTS OF $device"
|
|
|
+ mkfs.xfs -f $device
|
|
|
+ if [ ! -e $mount ]; then
|
|
|
+ mkdir $mount
|
|
|
+ fi
|
|
|
+ mount -o defaults,noatime $device $mount
|
|
|
+ if $automount ; then
|
|
|
+ echo "$device $mount xfs defaults,noatime 0 0" >> /etc/fstab
|
|
|
+ fi
|
|
|
+}
|
|
|
+
|
|
|
+function wait_for_mount {
|
|
|
+ mount=$1
|
|
|
+ device=$2
|
|
|
+
|
|
|
+ mkdir $mount
|
|
|
+
|
|
|
+ i=1
|
|
|
+ echo "Attempting to mount $device"
|
|
|
+ while true ; do
|
|
|
+ sleep 10
|
|
|
+ echo -n "$i "
|
|
|
+ i=$[$i+1]
|
|
|
+ mount -o defaults,noatime $device $mount || continue
|
|
|
+ echo " Mounted."
|
|
|
+ break;
|
|
|
+ done
|
|
|
+}
|
|
|
+
|
|
|
+function make_hadoop_dirs {
|
|
|
+ for mount in "$@"; do
|
|
|
+ if [ ! -e $mount/hadoop ]; then
|
|
|
+ mkdir -p $mount/hadoop
|
|
|
+ chown hadoop:hadoop $mount/hadoop
|
|
|
+ fi
|
|
|
+ done
|
|
|
+}
|
|
|
+
|
|
|
+# Configure Hadoop by setting up disks and site file
|
|
|
+function configure_hadoop() {
|
|
|
+
|
|
|
+ MOUNT=/data
|
|
|
+ FIRST_MOUNT=$MOUNT
|
|
|
+ DFS_NAME_DIR=$MOUNT/hadoop/hdfs/name
|
|
|
+ FS_CHECKPOINT_DIR=$MOUNT/hadoop/hdfs/secondary
|
|
|
+ DFS_DATA_DIR=$MOUNT/hadoop/hdfs/data
|
|
|
+ MAPRED_LOCAL_DIR=$MOUNT/hadoop/mapred/local
|
|
|
+ MAX_MAP_TASKS=2
|
|
|
+ MAX_REDUCE_TASKS=1
|
|
|
+ CHILD_OPTS=-Xmx550m
|
|
|
+ CHILD_ULIMIT=1126400
|
|
|
+ TMP_DIR=$MOUNT/tmp/hadoop-\${user.name}
|
|
|
+
|
|
|
+ mkdir -p $MOUNT/hadoop
|
|
|
+ chown hadoop:hadoop $MOUNT/hadoop
|
|
|
+ mkdir $MOUNT/tmp
|
|
|
+ chmod a+rwxt $MOUNT/tmp
|
|
|
+
|
|
|
+ mkdir /etc/hadoop
|
|
|
+ ln -s $HADOOP_CONF_DIR /etc/hadoop/conf
|
|
|
+
|
|
|
+ ##############################################################################
|
|
|
+ # Modify this section to customize your Hadoop cluster.
|
|
|
+ ##############################################################################
|
|
|
+ cat > $HADOOP_CONF_DIR/hadoop-site.xml <<EOF
|
|
|
+<?xml version="1.0"?>
|
|
|
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
|
+<configuration>
|
|
|
+<property>
|
|
|
+ <name>dfs.block.size</name>
|
|
|
+ <value>134217728</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>dfs.data.dir</name>
|
|
|
+ <value>$DFS_DATA_DIR</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>dfs.datanode.du.reserved</name>
|
|
|
+ <value>1073741824</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>dfs.datanode.handler.count</name>
|
|
|
+ <value>3</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<!--property>
|
|
|
+ <name>dfs.hosts</name>
|
|
|
+ <value>$HADOOP_CONF_DIR/dfs.hosts</value>
|
|
|
+ <final>true</final>
|
|
|
+</property-->
|
|
|
+<!--property>
|
|
|
+ <name>dfs.hosts.exclude</name>
|
|
|
+ <value>$HADOOP_CONF_DIR/dfs.hosts.exclude</value>
|
|
|
+ <final>true</final>
|
|
|
+</property-->
|
|
|
+<property>
|
|
|
+ <name>dfs.name.dir</name>
|
|
|
+ <value>$DFS_NAME_DIR</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>dfs.namenode.handler.count</name>
|
|
|
+ <value>5</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>dfs.permissions</name>
|
|
|
+ <value>true</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>dfs.replication</name>
|
|
|
+ <value>$DFS_REPLICATION</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>fs.checkpoint.dir</name>
|
|
|
+ <value>$FS_CHECKPOINT_DIR</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>fs.default.name</name>
|
|
|
+ <value>hdfs://$NN_HOST:8020/</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>fs.trash.interval</name>
|
|
|
+ <value>1440</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>hadoop.tmp.dir</name>
|
|
|
+ <value>/data/tmp/hadoop-\${user.name}</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>io.file.buffer.size</name>
|
|
|
+ <value>65536</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.child.java.opts</name>
|
|
|
+ <value>$CHILD_OPTS</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.child.ulimit</name>
|
|
|
+ <value>$CHILD_ULIMIT</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.job.tracker</name>
|
|
|
+ <value>$JT_HOST:8021</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.job.tracker.handler.count</name>
|
|
|
+ <value>5</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.local.dir</name>
|
|
|
+ <value>$MAPRED_LOCAL_DIR</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.map.tasks.speculative.execution</name>
|
|
|
+ <value>true</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.reduce.parallel.copies</name>
|
|
|
+ <value>10</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.reduce.tasks</name>
|
|
|
+ <value>10</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.reduce.tasks.speculative.execution</name>
|
|
|
+ <value>false</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.submit.replication</name>
|
|
|
+ <value>10</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.system.dir</name>
|
|
|
+ <value>/hadoop/system/mapred</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.tasktracker.map.tasks.maximum</name>
|
|
|
+ <value>$MAX_MAP_TASKS</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.tasktracker.reduce.tasks.maximum</name>
|
|
|
+ <value>$MAX_REDUCE_TASKS</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>tasktracker.http.threads</name>
|
|
|
+ <value>46</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.compress.map.output</name>
|
|
|
+ <value>true</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>mapred.output.compression.type</name>
|
|
|
+ <value>BLOCK</value>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>hadoop.rpc.socket.factory.class.default</name>
|
|
|
+ <value>org.apache.hadoop.net.StandardSocketFactory</value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
|
|
|
+ <value></value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
|
|
|
+ <value></value>
|
|
|
+ <final>true</final>
|
|
|
+</property>
|
|
|
+<property>
|
|
|
+ <name>io.compression.codecs</name>
|
|
|
+ <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value>
|
|
|
+</property>
|
|
|
+</configuration>
|
|
|
+EOF
|
|
|
+
|
|
|
+ # Keep PID files in a non-temporary directory
|
|
|
+ sed -i -e "s|# export HADOOP_PID_DIR=.*|export HADOOP_PID_DIR=/var/run/hadoop|" \
|
|
|
+ $HADOOP_CONF_DIR/hadoop-env.sh
|
|
|
+ mkdir -p /var/run/hadoop
|
|
|
+ chown -R hadoop:hadoop /var/run/hadoop
|
|
|
+
|
|
|
+ # Set SSH options within the cluster
|
|
|
+ sed -i -e 's|# export HADOOP_SSH_OPTS=.*|export HADOOP_SSH_OPTS="-o StrictHostKeyChecking=no"|' \
|
|
|
+ $HADOOP_CONF_DIR/hadoop-env.sh
|
|
|
+
|
|
|
+ # Disable IPv6
|
|
|
+ sed -i -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"|' \
|
|
|
+ $HADOOP_CONF_DIR/hadoop-env.sh
|
|
|
+
|
|
|
+ # Hadoop logs should be on the /mnt partition
|
|
|
+ sed -i -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/var/log/hadoop/logs|' \
|
|
|
+ $HADOOP_CONF_DIR/hadoop-env.sh
|
|
|
+ rm -rf /var/log/hadoop
|
|
|
+ mkdir /data/hadoop/logs
|
|
|
+ chown hadoop:hadoop /data/hadoop/logs
|
|
|
+ ln -s /data/hadoop/logs /var/log/hadoop
|
|
|
+ chown -R hadoop:hadoop /var/log/hadoop
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+# Sets up small website on cluster.
|
|
|
+function setup_web() {
|
|
|
+
|
|
|
+ if which dpkg &> /dev/null; then
|
|
|
+ apt-get -y install thttpd
|
|
|
+ WWW_BASE=/var/www
|
|
|
+ elif which rpm &> /dev/null; then
|
|
|
+ yum install -y thttpd
|
|
|
+ chkconfig --add thttpd
|
|
|
+ WWW_BASE=/var/www/thttpd/html
|
|
|
+ fi
|
|
|
+
|
|
|
+ cat > $WWW_BASE/index.html << END
|
|
|
+<html>
|
|
|
+<head>
|
|
|
+<title>Hadoop Cloud Cluster</title>
|
|
|
+</head>
|
|
|
+<body>
|
|
|
+<h1>Hadoop Cloud Cluster</h1>
|
|
|
+To browse the cluster you need to have a proxy configured.
|
|
|
+Start the proxy with <tt>hadoop-cloud proxy <cluster_name></tt>,
|
|
|
+and point your browser to
|
|
|
+<a href="http://apache-hadoop-ec2.s3.amazonaws.com/proxy.pac">this Proxy
|
|
|
+Auto-Configuration (PAC)</a> file. To manage multiple proxy configurations,
|
|
|
+you may wish to use
|
|
|
+<a href="https://addons.mozilla.org/en-US/firefox/addon/2464">FoxyProxy</a>.
|
|
|
+<ul>
|
|
|
+<li><a href="http://$NN_HOST:50070/">NameNode</a>
|
|
|
+<li><a href="http://$JT_HOST:50030/">JobTracker</a>
|
|
|
+</ul>
|
|
|
+</body>
|
|
|
+</html>
|
|
|
+END
|
|
|
+
|
|
|
+ service thttpd start
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+function start_namenode() {
|
|
|
+ if which dpkg &> /dev/null; then
|
|
|
+ AS_HADOOP="su -s /bin/bash - hadoop -c"
|
|
|
+ elif which rpm &> /dev/null; then
|
|
|
+ AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
|
|
|
+ fi
|
|
|
+
|
|
|
+ # Format HDFS
|
|
|
+ [ ! -e $FIRST_MOUNT/hadoop/hdfs ] && $AS_HADOOP "$HADOOP_HOME/bin/hadoop namenode -format"
|
|
|
+
|
|
|
+ $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start namenode"
|
|
|
+
|
|
|
+ $AS_HADOOP "$HADOOP_HOME/bin/hadoop dfsadmin -safemode wait"
|
|
|
+ $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -mkdir /user"
|
|
|
+ # The following is questionable, as it allows a user to delete another user
|
|
|
+ # It's needed to allow users to create their own user directories
|
|
|
+ $AS_HADOOP "$HADOOP_HOME/bin/hadoop fs -chmod +w /user"
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+function start_daemon() {
|
|
|
+ if which dpkg &> /dev/null; then
|
|
|
+ AS_HADOOP="su -s /bin/bash - hadoop -c"
|
|
|
+ elif which rpm &> /dev/null; then
|
|
|
+ AS_HADOOP="/sbin/runuser -s /bin/bash - hadoop -c"
|
|
|
+ fi
|
|
|
+ $AS_HADOOP "$HADOOP_HOME/bin/hadoop-daemon.sh start $1"
|
|
|
+}
|
|
|
+
|
|
|
+update_repo
|
|
|
+register_auto_shutdown
|
|
|
+install_user_packages
|
|
|
+install_hadoop
|
|
|
+configure_hadoop
|
|
|
+
|
|
|
+for role in $(echo "$ROLES" | tr "," "\n"); do
|
|
|
+ case $role in
|
|
|
+ nn)
|
|
|
+ setup_web
|
|
|
+ start_namenode
|
|
|
+ ;;
|
|
|
+ snn)
|
|
|
+ start_daemon secondarynamenode
|
|
|
+ ;;
|
|
|
+ jt)
|
|
|
+ start_daemon jobtracker
|
|
|
+ ;;
|
|
|
+ dn)
|
|
|
+ start_daemon datanode
|
|
|
+ ;;
|
|
|
+ tt)
|
|
|
+ start_daemon tasktracker
|
|
|
+ ;;
|
|
|
+ esac
|
|
|
+done
|
|
|
+
|