瀏覽代碼

HADOOP-6255. Create RPM and Debian packages for common. Changes deployment
layout to be consistent across the binary tgz, rpm, and deb. Adds setup
scripts for easy one node cluster configuration and user creation.
(Eric Yang via omalley)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.20-security-204@1128390 13f79535-47bb-0310-9956-ffa450edef68

Owen O'Malley 14 年之前
父節點
當前提交
4ac0f5aa28
共有 47 個文件被更改,包括 2590 次插入69 次删除
  1. 7 0
      CHANGES.txt
  2. 25 9
      bin/hadoop
  3. 12 12
      bin/hadoop-config.sh
  4. 4 4
      bin/hadoop-daemon.sh
  5. 1 1
      bin/hadoop-daemons.sh
  6. 1 1
      bin/rcc
  7. 1 1
      bin/slaves.sh
  8. 1 1
      bin/start-all.sh
  9. 1 1
      bin/start-balancer.sh
  10. 1 1
      bin/start-dfs.sh
  11. 1 1
      bin/start-jobhistoryserver.sh
  12. 1 1
      bin/start-mapred.sh
  13. 1 1
      bin/stop-all.sh
  14. 1 1
      bin/stop-balancer.sh
  15. 1 1
      bin/stop-dfs.sh
  16. 1 1
      bin/stop-jobhistoryserver.sh
  17. 1 1
      bin/stop-mapred.sh
  18. 206 28
      build.xml
  19. 6 0
      conf/log4j.properties
  20. 6 1
      ivy.xml
  21. 2 1
      ivy/libraries.properties
  22. 147 0
      src/docs/src/documentation/content/xdocs/deployment_layout.xml
  23. 2 1
      src/docs/src/documentation/content/xdocs/site.xml
  24. 1 0
      src/packages/deb/hadoop.control/conffile
  25. 9 0
      src/packages/deb/hadoop.control/control
  26. 24 0
      src/packages/deb/hadoop.control/postinst
  27. 21 0
      src/packages/deb/hadoop.control/postrm
  28. 22 0
      src/packages/deb/hadoop.control/preinst
  29. 29 0
      src/packages/deb/hadoop.control/prerm
  30. 142 0
      src/packages/deb/init.d/hadoop-datanode
  31. 142 0
      src/packages/deb/init.d/hadoop-jobtracker
  32. 154 0
      src/packages/deb/init.d/hadoop-namenode
  33. 142 0
      src/packages/deb/init.d/hadoop-tasktracker
  34. 76 0
      src/packages/hadoop-create-user.sh
  35. 279 0
      src/packages/hadoop-setup-conf.sh
  36. 53 0
      src/packages/hadoop-setup-hdfs.sh
  37. 215 0
      src/packages/hadoop-setup-single-node.sh
  38. 84 0
      src/packages/rpm/init.d/hadoop-datanode
  39. 84 0
      src/packages/rpm/init.d/hadoop-jobtracker
  40. 98 0
      src/packages/rpm/init.d/hadoop-namenode
  41. 84 0
      src/packages/rpm/init.d/hadoop-tasktracker
  42. 194 0
      src/packages/rpm/spec/hadoop.spec
  43. 11 0
      src/packages/templates/conf/core-site.xml
  44. 64 0
      src/packages/templates/conf/hadoop-env.sh
  45. 23 0
      src/packages/templates/conf/hdfs-site.xml
  46. 31 0
      src/packages/templates/conf/mapred-site.xml
  47. 178 0
      src/packages/update-hadoop-env.sh

+ 7 - 0
CHANGES.txt

@@ -6,6 +6,13 @@ Release 0.20.205.0 - unreleased
 
 Release 0.20.204.0 - unreleased
 
+  NEW FEATURES
+
+    HADOOP-6255. Create RPM and Debian packages for common. Changes deployment
+    layout to be consistent across the binary tgz, rpm, and deb. Adds setup
+    scripts for easy one node cluster configuration and user creation.
+    (Eric Yang via omalley)
+
   BUG FIXES
 
     MAPREDUCE-2447. Fix Child.java to set Task.jvmContext sooner to avoid

+ 25 - 9
bin/hadoop

@@ -50,7 +50,12 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+if [ "$HADOOP_HOME" != "" ]; then
+  echo "Warning: \$HADOOP_HOME is deprecated."
+  echo
+fi
+
+. "$bin"/../libexec/hadoop-config.sh
 
 cygwin=false
 case "`uname`" in
@@ -308,6 +313,10 @@ if [ -d "${HADOOP_HOME}/build/native" -o -d "${HADOOP_HOME}/lib/native" ]; then
     fi
   fi
 fi
+if [ -e "${HADOOP_PREFIX}/lib/libhadoop.a" ]; then
+  JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib
+fi
+
 
 # cygwin path translation
 if $cygwin; then
@@ -319,6 +328,13 @@ HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_HOME"
 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
+
+if [ $COMMAND = "namenode" ]; then
+  HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,DRFAS}"
+else
+  HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
+fi
+
 if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
   HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
 fi  
@@ -332,14 +348,14 @@ if [ "$starting_secure_dn" = "true" ]; then
    HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid"
   fi
 
-  exec "$HADOOP_HOME/bin/jsvc" -Dproc_$COMMAND -outfile "$HADOOP_LOG_DIR/jsvc.out" \
-                                               -errfile "$HADOOP_LOG_DIR/jsvc.err" \
-                                               -pidfile "$HADOOP_SECURE_DN_PID" \
-                                               -nodetach \
-                                               -user "$HADOOP_SECURE_DN_USER" \
-                                               -cp "$CLASSPATH" \
-                                               $JAVA_HEAP_MAX $HADOOP_OPTS \
-                                               org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
+  exec "$HADOOP_HOME/libexec/jsvc" -Dproc_$COMMAND -outfile "$HADOOP_LOG_DIR/jsvc.out" \
+                                                   -errfile "$HADOOP_LOG_DIR/jsvc.err" \
+                                                   -pidfile "$HADOOP_SECURE_DN_PID" \
+                                                   -nodetach \
+                                                   -user "$HADOOP_SECURE_DN_USER" \
+                                                   -cp "$CLASSPATH" \
+                                                   $JAVA_HEAP_MAX $HADOOP_OPTS \
+                                                   org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
 else
   # run it
   exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"

+ 12 - 12
bin/hadoop-config.sh

@@ -19,16 +19,10 @@
 
 # resolve links - $0 may be a softlink
 
-this="$0"
-while [ -h "$this" ]; do
-  ls=`ls -ld "$this"`
-  link=`expr "$ls" : '.*-> \(.*\)$'`
-  if expr "$link" : '.*/.*' > /dev/null; then
-    this="$link"
-  else
-    this=`dirname "$this"`/"$link"
-  fi
-done
+this="${BASH_SOURCE-$0}"
+common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
+script="$(basename -- "$this")"
+this="$common_bin/$script"
 
 # convert relative path to absolute path
 bin=`dirname "$this"`
@@ -37,7 +31,8 @@ bin=`cd "$bin"; pwd`
 this="$bin/$script"
 
 # the root of the Hadoop installation
-export HADOOP_HOME=`dirname "$this"`/..
+export HADOOP_PREFIX=`dirname "$this"`/..
+export HADOOP_HOME=${HADOOP_PREFIX}/share/hadoop
 
 #check to see if the conf dir is given as an optional argument
 if [ $# -gt 1 ]
@@ -52,7 +47,12 @@ then
 fi
  
 # Allow alternate conf dir location.
-HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_HOME/conf}"
+if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then
+  DEFAULT_CONF_DIR="conf"
+else
+  DEFAULT_CONF_DIR="etc/hadoop"
+fi
+HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
 
 #check to see it is specified whether to use the slaves or the
 # masters file

+ 4 - 4
bin/hadoop-daemon.sh

@@ -20,7 +20,7 @@
 #
 # Environment Variables
 #
-#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
 #   HADOOP_LOG_DIR   Where log files are stored.  PWD by default.
 #   HADOOP_MASTER    host:path where hadoop code should be rsync'd from
 #   HADOOP_PID_DIR   The pid files are stored. /tmp by default.
@@ -39,7 +39,7 @@ fi
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # get arguments
 startStop=$1
@@ -121,8 +121,8 @@ case $startStop in
 
     hadoop_rotate_log $log
     echo starting $command, logging to $log
-    cd "$HADOOP_HOME"
-    nohup nice -n $HADOOP_NICENESS "$HADOOP_HOME"/bin/hadoop --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
+    cd "$HADOOP_PREFIX"
+    nohup nice -n $HADOOP_NICENESS "$HADOOP_PREFIX"/bin/hadoop --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
     echo $! > $pid
     sleep 1; head "$log"
     ;;

+ 1 - 1
bin/hadoop-daemons.sh

@@ -29,6 +29,6 @@ fi
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. $bin/hadoop-config.sh
+. $bin/../libexec/hadoop-config.sh
 
 exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_HOME" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"

+ 1 - 1
bin/rcc

@@ -30,7 +30,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
   . "${HADOOP_CONF_DIR}/hadoop-env.sh"

+ 1 - 1
bin/slaves.sh

@@ -38,7 +38,7 @@ fi
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # If the slaves file is specified in the command line,
 # then it takes precedence over the definition in 

+ 1 - 1
bin/start-all.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # start dfs daemons
 "$bin"/start-dfs.sh --config $HADOOP_CONF_DIR

+ 1 - 1
bin/start-balancer.sh

@@ -18,7 +18,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # Start balancer daemon.
 

+ 1 - 1
bin/start-dfs.sh

@@ -25,7 +25,7 @@ usage="Usage: start-dfs.sh [-upgrade|-rollback]"
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # get arguments
 if [ $# -ge 1 ]; then

+ 1 - 1
bin/start-jobhistoryserver.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # start daemon
 "$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start historyserver

+ 1 - 1
bin/start-mapred.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # start mapred daemons
 # start jobtracker first to minimize connection errors at startup

+ 1 - 1
bin/stop-all.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 "$bin"/stop-mapred.sh --config $HADOOP_CONF_DIR
 "$bin"/stop-dfs.sh --config $HADOOP_CONF_DIR

+ 1 - 1
bin/stop-balancer.sh

@@ -18,7 +18,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 # Stop balancer daemon.
 # Run this on the machine where the balancer is running

+ 1 - 1
bin/stop-dfs.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 "$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop namenode
 "$bin"/hadoop-daemons.sh --config $HADOOP_CONF_DIR stop datanode

+ 1 - 1
bin/stop-jobhistoryserver.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 "$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop historyserver
 

+ 1 - 1
bin/stop-mapred.sh

@@ -21,7 +21,7 @@
 bin=`dirname "$0"`
 bin=`cd "$bin"; pwd`
 
-. "$bin"/hadoop-config.sh
+. "$bin"/../libexec/hadoop-config.sh
 
 "$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop jobtracker
 "$bin"/hadoop-daemons.sh --config $HADOOP_CONF_DIR stop tasktracker

+ 206 - 28
build.xml

@@ -32,6 +32,7 @@
   <property name="final.name" value="${name}-${version}"/>
   <property name="test.final.name" value="${name}-test-${version}"/>
   <property name="year" value="2009"/>
+  <property name="package.release" value="1"/>
   
   <property name="core.final.name" value="${name}-core-${version}"/>
   <property name="test.final.name" value="${name}-test-${version}"/>
@@ -61,6 +62,11 @@
   <property name="librecordio.src" value="${c++.src}/librecordio"/>
   <property name="tools.src" value="${basedir}/src/tools"/>
 
+  <property name="package.prefix" value="/usr"/>
+  <property name="package.conf.dir" value="/etc/hadoop"/>
+  <property name="package.log.dir" value="/var/log/hadoop"/>
+  <property name="package.pid.dir" value="/var/run/hadoop"/>
+
   <property name="xercescroot" value=""/> 
   <property name="build.dir" value="${basedir}/build"/>
   <property name="build.classes" value="${build.dir}/classes"/>
@@ -163,7 +169,7 @@
   <property name="make.cmd" value="make"/>
 
   <property name="jsvc.build.dir" value="${build.dir}/jsvc" />
-  <property name="jsvc.install.dir" value="${dist.dir}/bin" /> 
+  <property name="jsvc.install.dir" value="${dist.dir}/libexec" /> 
   <property name="jsvc.location" value="http://archive.apache.org/dist/commons/daemon/binaries/1.0.2/linux/commons-daemon-1.0.2-bin-linux-i386.tar.gz" />
   <property name="jsvc.dest.name" value="jsvc.tar.gz" />
 
@@ -183,6 +189,9 @@
 
   <!-- end of task-controller properties -->
 
+  <property name="package.buildroot" value="/tmp/hadoop_package_build_${user.name}"/>
+  <property name="package.build.dir" value="/tmp/hadoop_package_build_${user.name}/BUILD"/>
+
   <!-- IVY properteis set here -->
   <property name="ivy.dir" location="ivy" />
   <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
@@ -1375,11 +1384,13 @@
 	  description="Build distribution">
     <mkdir dir="${dist.dir}"/>
     <mkdir dir="${dist.dir}/lib"/>
+    <mkdir dir="${dist.dir}/libexec"/>
     <mkdir dir="${dist.dir}/contrib"/>
     <mkdir dir="${dist.dir}/bin"/>
     <mkdir dir="${dist.dir}/docs"/>
     <mkdir dir="${dist.dir}/docs/api"/>
     <mkdir dir="${dist.dir}/docs/jdiff"/>
+    <mkdir dir="${dist.dir}/sbin"/>
 
     <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
       <fileset dir="${common.ivy.lib.dir}"/>
@@ -1418,6 +1429,12 @@
       <fileset dir="bin"/>
     </copy>
 
+    <copy todir="${dist.dir}/libexec">
+      <fileset dir="bin">
+        <include name="hadoop-config.sh"/>
+      </fileset>
+    </copy>
+
     <copy todir="${dist.dir}/conf">
       <fileset dir="${conf.dir}" excludes="**/*.template"/>
     </copy>
@@ -1438,6 +1455,12 @@
       </fileset>
     </copy>
 
+    <copy todir="${dist.dir}/sbin">
+      <fileset dir="${basedir}/src/packages">
+        <include name="*.sh" />
+      </fileset>
+    </copy>
+
     <copy todir="${dist.dir}/src" includeEmptyDirs="true">
       <fileset dir="src" excludes="**/*.template **/docs/build/**/*"/>
     </copy>
@@ -1450,6 +1473,7 @@
 
     <chmod perm="ugo+x" type="file" parallel="false">
         <fileset dir="${dist.dir}/bin"/>
+        <fileset dir="${dist.dir}/sbin"/>
         <fileset dir="${dist.dir}/src/contrib/">
           <include name="*/bin/*" />
         </fileset>
@@ -1487,71 +1511,109 @@
   <target name="bin-package" depends="compile, jar, examples, tools-jar, jar-test, ant-tasks, package-librecordio, jsvc" 
 		description="assembles artifacts for binary target">
     <mkdir dir="${dist.dir}"/>
-    <mkdir dir="${dist.dir}/lib"/>
-    <mkdir dir="${dist.dir}/contrib"/>
     <mkdir dir="${dist.dir}/bin"/>
+    <mkdir dir="${dist.dir}/etc/hadoop"/>
+    <mkdir dir="${dist.dir}/lib"/>
+    <mkdir dir="${dist.dir}/libexec"/>
+    <mkdir dir="${dist.dir}/sbin"/>
+    <mkdir dir="${dist.dir}/share/${name}/contrib"/>
+    <mkdir dir="${dist.dir}/share/${name}/webapps"/>
+    <mkdir dir="${dist.dir}/share/${name}/templates/conf"/>
+
+    <copy todir="${dist.dir}/share/${name}/templates/conf" includeEmptyDirs="false">
+      <fileset dir="${basedir}/src/packages/templates/conf">
+        <include name="*"/>
+      </fileset>
+    </copy>
 
-    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
+    <copy todir="${dist.dir}/share/${name}/lib" includeEmptyDirs="false" flatten="true">
       <fileset dir="${common.ivy.lib.dir}"/>
     </copy>
 
-    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
+    <copy todir="${dist.dir}/share/${name}/lib" includeEmptyDirs="false">
       <fileset dir="lib">
         <exclude name="**/native/**"/>
       </fileset>
     </copy>
 
-  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
-	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
-	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
-	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
-	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
+    <exec dir="${dist.dir}" executable="sh" failonerror="true">
+      <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
+      <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
+      <env key="DIST_LIB_DIR" value="${dist.dir}/native"/>
+      <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
     </exec>
 
+    <move todir="${dist.dir}/lib" flatten="true">
+      <fileset dir="${dist.dir}/native">
+        <include name="*/*"/>
+      </fileset>
+    </move>
+    <delete dir="${dist.dir}/native"/>
+
     <subant target="package">
       <!--Pass down the version in case its needed again and the target
       distribution directory so contribs know where to install to.-->
       <property name="version" value="${version}"/>
-      <property name="dist.dir" value="${dist.dir}"/>
+      <property name="dist.dir" value="${dist.dir}/share/${name}"/>
       <fileset file="${contrib.dir}/build.xml"/>
     </subant>  	
 
-    <copy todir="${dist.dir}/webapps">
+    <copy todir="${dist.dir}/share/${name}/webapps">
       <fileset dir="${build.webapps}"/>
     </copy>
 
-    <copy todir="${dist.dir}"> 
+    <copy todir="${dist.dir}/share/${name}"> 
       <fileset file="${build.dir}/${name}-*-${version}.jar"/>
     </copy>
     
-    <copy todir="${dist.dir}/bin">
-      <fileset dir="bin"/>
-    </copy>
+    <copy file="bin/hadoop" todir="${dist.dir}/bin"/>
 
-    <copy todir="${dist.dir}/conf">
-      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+    <copy todir="${dist.dir}/sbin">
+      <fileset dir="bin">
+        <include name="*"/>
+        <exclude name="hadoop"/>
+        <exclude name="hadoop-config.sh"/>
+      </fileset>
     </copy>
 
-    <copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
+    <copy todir="${dist.dir}/libexec">
+      <fileset dir="bin">
+        <include name="hadoop-config.sh"/>
+      </fileset>
+    </copy>
 
-    <copy todir="${dist.dir}/ivy">
-      <fileset dir="ivy"/>
+    <copy todir="${dist.dir}/etc/hadoop">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
     </copy>
 
-    <copy todir="${dist.dir}">
+    <copy todir="${dist.dir}/share/doc/${name}">
       <fileset dir=".">
         <include name="*.txt" />
       </fileset>
     </copy>
   	
-    <copy todir="${dist.dir}/c++" includeEmptyDirs="false">
-      <fileset dir="${build.dir}/c++"/>
+    <copy todir="${dist.dir}/sbin">
+      <fileset dir="${basedir}/src/packages">
+        <include name="*.sh" />
+      </fileset>
     </copy>
 
-    <copy todir="${dist.dir}/" file="build.xml"/>
+    <copy todir="${dist.dir}/include/${name}" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${build.dir}/c++">
+        <include name="**/include/${name}/*"/>
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${build.dir}/c++">
+        <include name="**/lib/*"/>
+      </fileset>
+    </copy>
 
     <chmod perm="ugo+x" type="file" parallel="false">
         <fileset dir="${dist.dir}/bin"/>
+        <fileset dir="${dist.dir}/libexec"/>
+        <fileset dir="${dist.dir}/sbin"/>
     </chmod>
   </target>
 
@@ -1597,6 +1659,121 @@
     </macro_tar>
   </target>
 
+  <target name="rpm" depends="binary" description="Make rpm package">
+    <mkdir dir="${package.buildroot}/BUILD" />
+    <mkdir dir="${package.buildroot}/RPMS" />
+    <mkdir dir="${package.buildroot}/SRPMS" />
+    <mkdir dir="${package.buildroot}/SOURCES" />
+    <mkdir dir="${package.buildroot}/SPECS" />
+    <macro_tar param.destfile="${package.buildroot}/SOURCES/${final.name}-script.tar.gz">
+      <param.listofitems>
+        <tarfileset dir="${basedir}/src/packages/rpm/init.d" mode="755">
+          <include name="*" />
+        </tarfileset>
+      </param.listofitems>
+    </macro_tar>
+    <copy todir="${package.buildroot}/SOURCES">
+      <fileset dir="${build.dir}">
+        <include name="${final.name}-bin.tar.gz" />
+      </fileset>
+    </copy>
+    <copy file="${src.dir}/packages/rpm/spec/hadoop.spec" todir="${package.buildroot}/SPECS">
+      <filterchain>
+        <replacetokens>
+          <token key="final.name" value="${final.name}" />
+          <token key="version" value="${hadoop.version}" />
+          <token key="package.release" value="${package.release}" />
+          <token key="package.build.dir" value="${package.build.dir}" />
+          <token key="package.prefix" value="${package.prefix}" />
+          <token key="package.conf.dir" value="${package.conf.dir}" />
+          <token key="package.log.dir" value="${package.log.dir}" />
+          <token key="package.pid.dir" value="${package.pid.dir}" />
+        </replacetokens>
+      </filterchain>
+    </copy>
+    <rpm specFile="hadoop.spec" command="-bb --target ${os.arch}" topDir="${package.buildroot}" cleanBuildDir="true" failOnError="true"/>
+    <copy todir="${build.dir}/" flatten="true">
+      <fileset dir="${package.buildroot}/RPMS">
+        <include name="**/${name}*.rpm" />
+      </fileset>
+      <fileset dir="${package.buildroot}/SRPMS">
+        <include name="**/${name}*.rpm" />
+      </fileset>
+    </copy>
+    <delete dir="${package.buildroot}" quiet="true" verbose="false"/>
+  </target>
+
+  <target name="deb" depends="ant-tasks, binary" description="Make deb package">
+    <taskdef name="deb"
+           classname="org.vafer.jdeb.ant.DebAntTask">
+      <classpath refid="classpath" />
+    </taskdef>
+
+    <mkdir dir="${package.build.dir}/hadoop.control" />
+    <mkdir dir="${package.buildroot}/${package.prefix}/share/hadoop" />
+    <copy todir="${package.buildroot}/${package.prefix}">
+      <fileset dir="${build.dir}/${final.name}">
+        <include name="**" />
+      </fileset>
+    </copy>
+    <copy todir="${package.build.dir}/hadoop.control">
+      <fileset dir="${src.dir}/packages/deb/hadoop.control">
+        <exclude name="control" />
+      </fileset>
+    </copy>
+    <copy file="${src.dir}/packages/deb/hadoop.control/control" todir="${package.build.dir}/hadoop.control">
+      <filterchain>
+        <replacetokens>
+          <token key="final.name" value="${final.name}" />
+          <token key="version" value="${hadoop.version}" />
+          <token key="package.release" value="${package.release}" />
+          <token key="package.build.dir" value="${package.build.dir}" />
+          <token key="package.prefix" value="${package.prefix}" />
+          <token key="package.conf.dir" value="${package.conf.dir}" />
+          <token key="package.log.dir" value="${package.log.dir}" />
+          <token key="package.pid.dir" value="${package.pid.dir}" />
+        </replacetokens>
+      </filterchain>
+    </copy>
+    <deb destfile="${package.buildroot}/${name}_${hadoop.version}-${package.release}_${os.arch}.deb" control="${package.build.dir}/hadoop.control">
+      <tarfileset dir="${build.dir}/${final.name}" filemode="644" prefix="${package.prefix}">
+        <exclude name="bin/*" />
+        <exclude name="sbin/*" />
+        <exclude name="libexec/*" />
+        <exclude name="contrib/*/bin/*" />
+        <exclude name="src/contrib/ec2/bin/*" />
+        <exclude name="src/contrib/ec2/bin/image/*" />
+        <exclude name="etc" />
+        <exclude name="etc/**" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${build.dir}/${final.name}" filemode="755" prefix="${package.prefix}">
+        <include name="bin/*" />
+        <include name="sbin/*" />
+        <include name="libexec/*" />
+        <include name="contrib/*/bin/*" />
+        <include name="src/contrib/ec2/bin/*" />
+        <include name="src/contrib/ec2/bin/image/*" />
+      </tarfileset>
+      <tarfileset dir="${build.dir}/${final.name}/etc/hadoop" filemode="644" prefix="${package.conf.dir}">
+        <exclude name="core-site.xml" />
+        <exclude name="hdfs-site.xml" />
+        <exclude name="mapred-site.xml" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${basedir}/src/packages/deb/init.d" filemode="755" prefix="/etc/init.d">
+        <include name="**" />
+      </tarfileset>
+    </deb>
+
+    <copy todir="${build.dir}/" flatten="true">
+      <fileset dir="${package.buildroot}">
+        <include name="**/${name}*.deb" />
+      </fileset>
+    </copy>
+    <delete dir="${package.buildroot}" quiet="true" verbose="false"/>
+  </target>
+
   <!-- ================================================================== -->
   <!-- Perform audit activities for the release                           -->
   <!-- ================================================================== -->
@@ -1615,6 +1792,7 @@
     <delete dir="${build.dir}"/>
     <delete dir="${docs.src}/build"/>
     <delete dir="${src.docs.cn}/build"/>
+    <delete dir="${package.buildroot}"/>
     <delete file="${basedir}/ivy/hadoop-core-pom.xml"/>
     <delete file="${basedir}/ivy/hadoop-test-pom.xml"/>
     <delete file="${basedir}/ivy/hadoop-examples-pom.xml"/>
@@ -1690,8 +1868,8 @@
   </target>
 
   <target name="package-librecordio" depends="compile-librecordio" if="librecordio">
-    <mkdir dir="${dist.dir}/librecordio"/> 
-    <copy todir="${dist.dir}/librecordio">
+    <mkdir dir="${dist.dir}/lib/librecordio"/> 
+    <copy todir="${dist.dir}/lib/librecordio">
        <fileset dir="${build.librecordio}" casesensitive="yes" followsymlinks="false">
           <exclude name="**/tests/**"/>
           <exclude name="*.so"/> 
@@ -1699,7 +1877,7 @@
        </fileset>
     </copy>
     <chmod perm="ugo+x" type="file">
-       <fileset dir="${dist.dir}/librecordio"/>
+       <fileset dir="${dist.dir}/lib/librecordio"/>
     </chmod>
   </target>
  

+ 6 - 0
conf/log4j.properties

@@ -114,6 +114,12 @@ log4j.logger.org.apache.hadoop.metrics2=${hadoop.metrics.log.level}
 # Jets3t library
 log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
 
+#
+# Null Appender
+# Trap security logger on the hadoop client side
+#
+log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
+
 #
 # Event Counter Appender
 # Sends counts of logging messages at different severity levels to Hadoop Metrics.

+ 6 - 1
ivy.xml

@@ -70,7 +70,7 @@
     <conf name="lucene" visibility="private"/>
     <conf name="jdiff" visibility="private" extends="log4j,s3-client,jetty,server"/>
     <conf name="checkstyle" visibility="private"/>
-
+    <conf name="packaging" visibility="private"/>
   </configurations>
 
   <publications>
@@ -291,5 +291,10 @@
       rev="${aspectj.version}"
       conf="common->default">
     </dependency>
+    <dependency org="org.vafer" 
+      name="jdeb" 
+      rev="${jdeb.version}"
+      conf="common->master">
+    </dependency>
  </dependencies>
 </ivy-module>

+ 2 - 1
ivy/libraries.properties

@@ -14,7 +14,7 @@
 #It drives ivy and the generation of a maven POM
 
 # This is the version of hadoop we are generating
-hadoop.version=0.20.203.0
+hadoop.version=0.20.204.0
 hadoop-gpl-compression.version=0.1.0
 
 #These are the versions of our dependencies (in alphabetical order)
@@ -57,6 +57,7 @@ jets3t.version=0.6.1
 jetty.version=6.1.26
 jetty-util.version=6.1.26
 junit.version=4.5
+jdeb.version=0.8
 jdiff.version=1.0.9
 json.version=1.0
 

+ 147 - 0
src/docs/src/documentation/content/xdocs/deployment_layout.xml

@@ -0,0 +1,147 @@
+<?xml version="1.0"?>
+<!--
+  Copyright 2002-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+
+  <header>
+    <title> 
+      Hadoop Deployment Layout
+    </title>
+  </header>
+
+  <body>
+    <section>
+      <title> Introduction </title>
+      <p>
+        This document describes the standard deployment layout for Hadoop.  With increased complexity and evolving Hadoop ecosystem, having standard deployment layout ensures better integration between Hadoop sub-projects.  By making the installation process easier, we can lower the barrier to entry and increase Hadoop adoption.
+      </p>
+    </section>
+
+    <section> 
+      <title> Packages </title>
+        <p>
+          We need to divide Hadoop up into packages that can be independently upgraded.  The list of packages should include:
+        </p>
+        <ul>
+          <li>Hadoop Common - Common including the native code and required jar files.</li>
+          <li>HDFS Client - HDFS jars, scripts, and shared libraries.</li>
+          <li>HDFS Server - jsvc executable</li>
+          <li>Yarn Client - Yarn client jars and scripts</li>
+          <li>Yarn Server - Yarn server jars and scripts</li>
+          <li>MapReduce - MapReduce jars, scripts, and shared libraries</li>
+          <li>LZO - LZ0 codec from github.com/omally/hadoop-gpl-compression</li>
+          <li>Metrics - Plugins for Chukwa and Ganglia</li>
+        </ul>
+        <p>Packages from other teams will include:</p>
+        <ul>
+          <li>Pig</li>
+          <li>Hive</li>
+          <li>Oozie client</li>
+          <li>Oozie server</li>
+          <li>Howl client</li>
+          <li>Howl server</li>
+        </ul>
+        <p>These packages should be deployable with RPM on RedHat.  We also need a package that depends on a version of each of these packages.  In general, we can generate tarballs in the new deployment layout.</p>
+        <p>Note that some packages, like Pig, which are user facing, will have 2 versions installed in a given deployment.  This will be accomplished by modifying the package name and the associated binaries to include the version number.</p>
+        <p>All of the following paths are based on a prefix directory that is the root of the installation.  Our packages must support having multiple Hadoop stack installation on a computer at the same time.  For RPMs, this means that the packages must be relocatable and honor the --prefix option.</p>
+     </section>
+
+ 
+      <section> 
+        <title> Deployment </title>
+        <p>It is important to have a standard deployment that results from installing the packages regardless of the package manager.  Here are the top level directories and a sample of what would be under each.  Note that all of the packages are installed "flattened" into the prefix directory.  For compatibility reasons, we should create "share/hadoop" that matches the old HADOOP_HOME and set the HADOOP_HOME variable to that.</p>
+        <source>
+        $PREFIX/ bin / hadoop
+               |     | mapred
+               |     | pig -> pig7
+               |     | pig6
+               |     + pig7
+               |
+               + etc / hadoop / core-site.xml
+               |              | hdfs-site.xml
+               |              + mapred-site.xml
+               |
+               + include / hadoop / Pipes.hh
+               |         |        + TemplateFactory.hh
+               |         + hdfs.h
+               |
+               + lib / jni / hadoop-common / libhadoop.so.0.20.0
+               |     |
+               |     | libhdfs.so -> libhdfs.so.0.20.0
+               |     + libhdfs.so.0.20.0
+               |
+               + libexec / task-controller
+               |
+               + man / man1 / hadoop.1
+               |            | mapred.1
+               |            | pig6.1
+               |            + pig7.1
+               |
+               + share / hadoop-common 
+               |       | hadoop-hdfs
+               |       | hadoop-mapreduce
+               |       | pig6
+               |       + pig7
+               |
+               + sbin / hdfs-admin
+               |      | mapred-admin
+               |
+               + src / hadoop-common
+               |     | hadoop-hdfs
+               |     + hadoop-mapreduce
+               |
+               + var / lib / data-node
+                     |     + task-tracker
+                     |
+                     | log / hadoop-datanode
+                     |     + hadoop-tasktracker
+                     |
+                     + run / hadoop-datanode.pid
+                           + hadoop-tasktracker.pid
+        </source>
+        <p>Note that we must continue to honor HADOOP_CONF_DIR to override the configuration location, but that it should default to $prefix/etc.  User facing binaries and scripts go into bin.  Configuration files go into etc with multiple configuration files having a directory.  JNI shared libraries go into lib/jni/$tool since Java does not allow to specify the version of the library to load.  Libraries that aren't loaded via System.loadLibrary are placed directly under lib.  64 bit versions of the libraries for platforms that support them should be placed in lib64.  All of the architecture-independent pieces, including the jars for each tool will be placed in share/$tool.  The default location for all the run time information will be in var.  The storage will be in var/lib, the logs in var/log and the pid files in var/run.</p>
+      </section>
+
+      <section> 
+        <title> Path Configurations </title>
+        <p>Path can be configured at compile phase or installation phase.  For RPM, it takes advantage of the --relocate directive to allow path reconfiguration at install phase.  For Debian package, path is configured at compile phase.
+        </p>
+          <p>Build phase parameter:</p>
+          <ul>
+            <li>package.prefix - Location of package prefix (Default /usr)</li>
+            <li>package.conf.dir - Location of configuration directory (Default /etc/hadoop)</li>
+            <li>package.log.dir - Location of log directory (Default /var/log/hadoop)</li>
+            <li>package.pid.dir - Location of pid directory (Default /var/run/hadoop)</li>
+          </ul>
+
+          <p>Install phase parameter:</p>
+          <source>
+          rpm -i hadoop-[version]-[rev].[arch].rpm \
+              --relocate /usr=/usr/local/hadoop \
+              --relocate /etc/hadoop=/usr/local/etc/hadoop \
+              --relocate /var/log/hadoop=/opt/logs/hadoop \
+              --relocate /var/run/hadoop=/opt/run/hadoop
+          </source>
+      </section>
+
+  </body>
+</document>
+

+ 2 - 1
src/docs/src/documentation/content/xdocs/site.xml

@@ -59,7 +59,8 @@ See http://forrest.apache.org/docs/linking.html for more info.
     <hdfs_libhdfs       label="C API libhdfs" href="libhdfs.html" />
   </docs>
   
-  <docs label="Common"> 
+  <docs label="Common">
+    <deployment label="Deployment Layout" href="deployment_layout.html" />
     <fsshell       label="File System Shell" href="file_system_shell.html" />
     <SLA      label="Service Level Authorization" href="service_level_auth.html"/>
     <native_lib   label="Native Libraries" href="native_libraries.html" />

+ 1 - 0
src/packages/deb/hadoop.control/conffile

@@ -0,0 +1 @@
+/usr/share/hadoop/conf

+ 9 - 0
src/packages/deb/hadoop.control/control

@@ -0,0 +1,9 @@
+Package: hadoop
+Version: @version@
+Section: misc
+Priority: optional
+Provides: hadoop
+Architecture: all
+Maintainer: Apache Software Foundation <general@hadoop.apache.org>
+Description: The Apache Hadoop project develops open-source software for reliable, scalable, distributed computing.
+Distribution: development

+ 24 - 0
src/packages/deb/hadoop.control/postinst

@@ -0,0 +1,24 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bash /usr/sbin/update-hadoop-env.sh \
+  --prefix=/usr \
+  --bin-dir=/usr/bin \
+  --sbin-dir=/usr/sbin \
+  --conf-dir=/etc/hadoop \
+  --log-dir=/var/log/hadoop \
+  --pid-dir=/var/run/hadoop

+ 21 - 0
src/packages/deb/hadoop.control/postrm

@@ -0,0 +1,21 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+/usr/sbin/userdel hdfs 2> /dev/null >/dev/null
+/usr/sbin/userdel mapred 2> /dev/null >/dev/null
+/usr/sbin/groupdel hadoop 2> /dev/null >dev/null
+exit 0

+ 22 - 0
src/packages/deb/hadoop.control/preinst

@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop
+
+/usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r --groups hadoop --home /var/lib/hadoop/mapred mapred 2> /dev/null || :
+/usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r --groups hadoop --home /var/lib/hadoop/hdfs hdfs 2> /dev/null || :
+

+ 29 - 0
src/packages/deb/hadoop.control/prerm

@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+/etc/init.d/hadoop-namenode stop 2>/dev/null >/dev/null
+/etc/init.d/hadoop-datanode stop 2>/dev/null >/dev/null
+/etc/init.d/hadoop-jobtracker stop 2>/dev/null >/dev/null
+/etc/init.d/hadoop-tasktracker stop 2>/dev/null >/dev/null
+bash /usr/sbin/update-hadoop-env.sh \
+  --prefix=/usr \
+  --bin-dir=/usr/bin \
+  --sbin-dir=/usr/sbin \
+  --conf-dir=/etc/hadoop \
+  --log-dir=/var/log/hadoop \
+  --pid-dir=/var/run/hadoop \
+  --uninstal

+ 142 - 0
src/packages/deb/init.d/hadoop-datanode

@@ -0,0 +1,142 @@
+#! /bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+### BEGIN INIT INFO
+# Provides:		hadoop-datanode	
+# Required-Start:	$remote_fs $syslog
+# Required-Stop:	$remote_fs $syslog
+# Default-Start:	2 3 4 5
+# Default-Stop:		
+# Short-Description:	Apache Hadoop Name Node server
+### END INIT INFO
+
+set -e
+
+# /etc/init.d/hadoop-datanode: start and stop the Apache Hadoop Data Node daemon
+
+test -x /usr/bin/hadoop || exit 0
+( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
+
+umask 022
+
+if test -f /etc/default/hadoop-env.sh; then
+    . /etc/default/hadoop-env.sh
+fi
+
+. /lib/lsb/init-functions
+
+# Are we running from init?
+run_by_init() {
+    ([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
+}
+
+check_for_no_start() {
+    # forget it if we're trying to start, and /etc/hadoop/hadoop-datanode_not_to_be_run exists
+    if [ -e /etc/hadoop/hadoop-datanode_not_to_be_run ]; then 
+	if [ "$1" = log_end_msg ]; then
+	    log_end_msg 0
+	fi
+	if ! run_by_init; then
+	    log_action_msg "Apache Hadoop Data Node server not in use (/etc/hadoop/hadoop-datanode_not_to_be_run)"
+	fi
+	exit 0
+    fi
+}
+
+check_privsep_dir() {
+    # Create the PrivSep empty dir if necessary
+    if [ ! -d ${HADOOP_PID_DIR} ]; then
+	mkdir -p ${HADOOP_PID_DIR}
+        chown root:hadoop ${HADOOP_PID_DIR}
+	chmod 0775 ${HADOOP_PID_DIR} 
+    fi
+}
+
+export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
+
+case "$1" in
+  start)
+	check_privsep_dir
+	check_for_no_start
+	log_daemon_msg "Starting Apache Hadoop Data Node server" "hadoop-datanode"
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+  stop)
+	log_daemon_msg "Stopping Apache Hadoop Data Node server" "hadoop-datanode"
+	if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Data Node server" "hadoop-datanode"
+	start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid
+	check_for_no_start log_end_msg
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  try-restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Data Node server" "hadoop-datanode"
+	set +e
+	start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid
+	RET="$?"
+	set -e
+	case $RET in
+	    0)
+		# old daemon stopped
+		check_for_no_start log_end_msg
+		if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start datanode; then
+		    log_end_msg 0
+		else
+		    log_end_msg 1
+		fi
+		;;
+	    1)
+		# daemon not running
+		log_progress_msg "(not running)"
+		log_end_msg 0
+		;;
+	    *)
+		# failed to stop
+		log_progress_msg "(failed to stop)"
+		log_end_msg 1
+		;;
+	esac
+	;;
+
+  status)
+	status_of_proc -p ${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid ${JAVA_HOME}/bin/java hadoop-datanode && exit 0 || exit $?
+	;;
+
+  *)
+	log_action_msg "Usage: /etc/init.d/hadoop-datanode {start|stop|restart|try-restart|status}"
+	exit 1
+esac
+
+exit 0

+ 142 - 0
src/packages/deb/init.d/hadoop-jobtracker

@@ -0,0 +1,142 @@
+#! /bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+### BEGIN INIT INFO
+# Provides:		hadoop-jobtracker	
+# Required-Start:	$remote_fs $syslog
+# Required-Stop:	$remote_fs $syslog
+# Default-Start:	2 3 4 5
+# Default-Stop:		
+# Short-Description:	Apache Hadoop Job Tracker server
+### END INIT INFO
+
+set -e
+
+# /etc/init.d/hadoop-jobtracker: start and stop the Apache Hadoop Job Tracker daemon
+
+test -x /usr/bin/hadoop || exit 0
+( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
+
+umask 022
+
+if test -f /etc/default/hadoop-env.sh; then
+    . /etc/default/hadoop-env.sh
+fi
+
+. /lib/lsb/init-functions
+
+# Are we running from init?
+run_by_init() {
+    ([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
+}
+
+check_for_no_start() {
+    # forget it if we're trying to start, and /etc/hadoop/hadoop-jobtracker_not_to_be_run exists
+    if [ -e /etc/hadoop/hadoop-jobtracker_not_to_be_run ]; then 
+	if [ "$1" = log_end_msg ]; then
+	    log_end_msg 0
+	fi
+	if ! run_by_init; then
+	    log_action_msg "Apache Hadoop Job Tracker server not in use (/etc/hadoop/hadoop-jobtracker_not_to_be_run)"
+	fi
+	exit 0
+    fi
+}
+
+check_privsep_dir() {
+    # Create the PrivSep empty dir if necessary
+    if [ ! -d ${HADOOP_PID_DIR} ]; then
+	mkdir -p ${HADOOP_PID_DIR}
+        chown root:hadoop ${HADOOP_PID_DIR}
+	chmod 0775 ${HADOOP_PID_DIR} 
+    fi
+}
+
+export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
+
+case "$1" in
+  start)
+	check_privsep_dir
+	check_for_no_start
+	log_daemon_msg "Starting Apache Hadoop Job Tracker server" "hadoop-jobtracker"
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start jobtracker; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+  stop)
+	log_daemon_msg "Stopping Apache Hadoop Job Tracker server" "hadoop-jobtracker"
+	if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Job Tracker server" "hadoop-jobtracker"
+	start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid
+	check_for_no_start log_end_msg
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start jobtracker; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  try-restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Job Tracker server" "hadoop-jobtracker"
+	set +e
+	start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid
+	RET="$?"
+	set -e
+	case $RET in
+	    0)
+		# old daemon stopped
+		check_for_no_start log_end_msg
+		if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start jobtracker; then
+		    log_end_msg 0
+		else
+		    log_end_msg 1
+		fi
+		;;
+	    1)
+		# daemon not running
+		log_progress_msg "(not running)"
+		log_end_msg 0
+		;;
+	    *)
+		# failed to stop
+		log_progress_msg "(failed to stop)"
+		log_end_msg 1
+		;;
+	esac
+	;;
+
+  status)
+	status_of_proc -p ${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid ${JAVA_HOME}/bin/java hadoop-jobtracker && exit 0 || exit $?
+	;;
+
+  *)
+	log_action_msg "Usage: /etc/init.d/hadoop-jobtracker {start|stop|restart|try-restart|status}"
+	exit 1
+esac
+
+exit 0

+ 154 - 0
src/packages/deb/init.d/hadoop-namenode

@@ -0,0 +1,154 @@
+#! /bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+### BEGIN INIT INFO
+# Provides:		hadoop-namenode	
+# Required-Start:	$remote_fs $syslog
+# Required-Stop:	$remote_fs $syslog
+# Default-Start:	2 3 4 5
+# Default-Stop:		
+# Short-Description:	Apache Hadoop Name Node server
+### END INIT INFO
+
+set -e
+
+# /etc/init.d/hadoop-namenode: start and stop the Apache Hadoop Name Node daemon
+
+test -x /usr/bin/hadoop || exit 0
+( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
+
+umask 022
+
+if test -f /etc/default/hadoop-env.sh; then
+    . /etc/default/hadoop-env.sh
+fi
+
+. /lib/lsb/init-functions
+
+# Are we running from init?
+run_by_init() {
+    ([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
+}
+
+check_for_no_start() {
+    # forget it if we're trying to start, and /etc/hadoop/hadoop-namenode_not_to_be_run exists
+    if [ -e /etc/hadoop/hadoop-namenode_not_to_be_run ]; then 
+	if [ "$1" = log_end_msg ]; then
+	    log_end_msg 0
+	fi
+	if ! run_by_init; then
+	    log_action_msg "Apache Hadoop Name Node server not in use (/etc/hadoop/hadoop-namenode_not_to_be_run)"
+	fi
+	exit 0
+    fi
+}
+
+check_privsep_dir() {
+    # Create the PrivSep empty dir if necessary
+    if [ ! -d ${HADOOP_PID_DIR} ]; then
+	mkdir -p ${HADOOP_PID_DIR}
+        chown root:hadoop ${HADOOP_PID_DIR}
+	chmod 0775 ${HADOOP_PID_DIR} 
+    fi
+}
+
+format() {
+    sudo -u hdfs ${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format
+}
+
+export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
+
+case "$1" in
+  start)
+	check_privsep_dir
+	check_for_no_start
+	log_daemon_msg "Starting Apache Hadoop Name Node server" "hadoop-namenode"
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start namenode; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+  stop)
+	log_daemon_msg "Stopping Apache Hadoop Name Node server" "hadoop-namenode"
+	if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+  format)
+	log_daemon_msg "Formatting Apache Hadoop Name Node" "hadoop-namenode"
+	format
+	if [ $? -eq 0 ]; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+  restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Name Node server" "hadoop-namenode"
+	start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid
+	check_for_no_start log_end_msg
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start namenode; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  try-restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Name Node server" "hadoop-namenode"
+	set +e
+	start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid
+	RET="$?"
+	set -e
+	case $RET in
+	    0)
+		# old daemon stopped
+		check_for_no_start log_end_msg
+		if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid -c hdfs -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start namenode; then
+		    log_end_msg 0
+		else
+		    log_end_msg 1
+		fi
+		;;
+	    1)
+		# daemon not running
+		log_progress_msg "(not running)"
+		log_end_msg 0
+		;;
+	    *)
+		# failed to stop
+		log_progress_msg "(failed to stop)"
+		log_end_msg 1
+		;;
+	esac
+	;;
+
+  status)
+	status_of_proc -p ${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid ${JAVA_HOME}/bin/java hadoop-namenode && exit 0 || exit $?
+	;;
+
+  *)
+	log_action_msg "Usage: /etc/init.d/hadoop-namenode {start|stop|restart|try-restart|status}"
+	exit 1
+esac
+
+exit 0

+ 142 - 0
src/packages/deb/init.d/hadoop-tasktracker

@@ -0,0 +1,142 @@
+#! /bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+### BEGIN INIT INFO
+# Provides:		hadoop-tasktracker	
+# Required-Start:	$remote_fs $syslog
+# Required-Stop:	$remote_fs $syslog
+# Default-Start:	2 3 4 5
+# Default-Stop:		
+# Short-Description:	Apache Hadoop Task Tracker server
+### END INIT INFO
+
+set -e
+
+# /etc/init.d/hadoop-tasktracker: start and stop the Apache Hadoop Task Tracker daemon
+
+test -x /usr/bin/hadoop || exit 0
+( /usr/bin/hadoop 2>&1 | grep -q hadoop ) 2>/dev/null || exit 0
+
+umask 022
+
+if test -f /etc/default/hadoop-env.sh; then
+    . /etc/default/hadoop-env.sh
+fi
+
+. /lib/lsb/init-functions
+
+# Are we running from init?
+run_by_init() {
+    ([ "$previous" ] && [ "$runlevel" ]) || [ "$runlevel" = S ]
+}
+
+check_for_no_start() {
+    # forget it if we're trying to start, and /etc/hadoop/hadoop-tasktracker_not_to_be_run exists
+    if [ -e /etc/hadoop/hadoop-tasktracker_not_to_be_run ]; then 
+	if [ "$1" = log_end_msg ]; then
+	    log_end_msg 0
+	fi
+	if ! run_by_init; then
+	    log_action_msg "Apache Hadoop Task Tracker server not in use (/etc/hadoop/hadoop-tasktracker_not_to_be_run)"
+	fi
+	exit 0
+    fi
+}
+
+check_privsep_dir() {
+    # Create the PrivSep empty dir if necessary
+    if [ ! -d ${HADOOP_PID_DIR} ]; then
+	mkdir -p ${HADOOP_PID_DIR}
+        chown root:hadoop ${HADOOP_PID_DIR}
+	chmod 0775 ${HADOOP_PID_DIR} 
+    fi
+}
+
+export PATH="${PATH:+$PATH:}/usr/sbin:/usr/bin"
+
+case "$1" in
+  start)
+	check_privsep_dir
+	check_for_no_start
+	log_daemon_msg "Starting Apache Hadoop Task Tracker server" "hadoop-tasktracker"
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start tasktracker; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+  stop)
+	log_daemon_msg "Stopping Apache Hadoop Task Tracker server" "hadoop-tasktracker"
+	if start-stop-daemon --stop --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Task Tracker server" "hadoop-tasktracker"
+	start-stop-daemon --stop --quiet --oknodo --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid
+	check_for_no_start log_end_msg
+	if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start tasktracker; then
+	    log_end_msg 0
+	else
+	    log_end_msg 1
+	fi
+	;;
+
+  try-restart)
+	check_privsep_dir
+	log_daemon_msg "Restarting Apache Hadoop Task Tracker server" "hadoop-tasktracker"
+	set +e
+	start-stop-daemon --stop --quiet --retry 30 --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid
+	RET="$?"
+	set -e
+	case $RET in
+	    0)
+		# old daemon stopped
+		check_for_no_start log_end_msg
+		if start-stop-daemon --start --quiet --oknodo --pidfile ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid -c mapred -x ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh -- --config ${HADOOP_CONF_DIR} start tasktracker; then
+		    log_end_msg 0
+		else
+		    log_end_msg 1
+		fi
+		;;
+	    1)
+		# daemon not running
+		log_progress_msg "(not running)"
+		log_end_msg 0
+		;;
+	    *)
+		# failed to stop
+		log_progress_msg "(failed to stop)"
+		log_end_msg 1
+		;;
+	esac
+	;;
+
+  status)
+	status_of_proc -p ${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid ${JAVA_HOME}/bin/java hadoop-tasktracker && exit 0 || exit $?
+	;;
+
+  *)
+	log_action_msg "Usage: /etc/init.d/hadoop-tasktracker {start|stop|restart|try-restart|status}"
+	exit 1
+esac
+
+exit 0

+ 76 - 0
src/packages/hadoop-create-user.sh

@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
+script="$(basename -- "$this")"
+this="$bin/$script"
+
+if [ "$HADOOP_HOME" != "" ]; then
+  echo "Warning: \$HADOOP_HOME is deprecated."
+  echo
+fi
+
+. "$bin"/../libexec/hadoop-config.sh
+
+usage() {
+  echo "
+usage: $0 <parameters>
+  Require parameter:
+     -u <username>                                 Create user on HDFS
+  Optional parameters:
+     -h                                            Display this message
+  "
+  exit 1
+}
+
+if [ $# != 2 ] ; then
+    usage
+    exit 1
+fi
+
+while getopts "hu:" OPTION
+do
+  case $OPTION in
+    u)
+      SETUP_USER=$2; shift 2
+      ;; 
+    h)
+      usage
+      ;; 
+    --)
+      shift ; break
+      ;;
+    *)
+      echo "Unknown option: $1"
+      usage
+      exit 1 
+      ;;
+  esac
+done 
+
+export HADOOP_PREFIX
+export HADOOP_CONF_DIR
+export JAVA_HOME
+export SETUP_USER=${SETUP_USER}
+export SETUP_PATH=/user/${SETUP_USER}
+
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -mkdir ${SETUP_PATH}' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} fs -chown ${SETUP_USER}:${SETUP_USER} ${SETUP_PATH}' hdfs
+
+if [ "$?" == "0" ]; then
+  echo "User directory has been setup: ${SETUP_PATH}"
+fi

+ 279 - 0
src/packages/hadoop-setup-conf.sh

@@ -0,0 +1,279 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+if [ "$HADOOP_HOME" != "" ]; then
+  echo "Warning: \$HADOOP_HOME is deprecated."
+  echo
+fi
+
+. "$bin"/../libexec/hadoop-config.sh
+
+usage() {
+  echo "
+usage: $0 <parameters>
+
+  Optional parameters:
+     --auto                                                          Setup path and configuration automatically
+     --default                                                       Setup configuration as default
+     --conf-dir=/etc/hadoop                                          Set configuration directory
+     --datanode-dir=/var/lib/hadoop/hdfs/datanode                    Set datanode directory
+     -h                                                              Display this message
+     --jobtracker-url=hostname:9001                                  Set jobtracker url
+     --log-dir=/var/log/hadoop                                       Set log directory
+     --pid-dir=/var/run/hadoop                                       Set pid directory
+     --hdfs-dir=/var/lib/hadoop/hdfs                                 Set hdfs directory
+     --mapred-dir=/var/lib/hadoop/mapred                             Set mapreduce directory
+     --namenode-dir=/var/lib/hadoop/hdfs/namenode                    Set namenode directory
+     --namenode-url=hdfs://hostname:9000/                            Set namenode url
+     --replication=3                                                 Set replication factor
+     --taskscheduler=org.apache.hadoop.mapred.JobQueueTaskScheduler  Set task scheduler
+  "
+  exit 1
+}
+
+template_generator() {
+  REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
+  cat $1 |
+  while read line ; do
+    while [[ "$line" =~ $REGEX ]] ; do
+      LHS=${BASH_REMATCH[1]}
+      RHS="$(eval echo "\"$LHS\"")"
+      line=${line//$LHS/$RHS}
+    done
+    echo $line >> $2
+  done
+}
+
+OPTS=$(getopt \
+  -n $0 \
+  -o '' \
+  -l 'auto' \
+  -l 'conf-dir:' \
+  -l 'default' \
+  -l 'hdfs-dir:' \
+  -l 'namenode-dir:' \
+  -l 'datanode-dir:' \
+  -l 'mapred-dir:' \
+  -l 'namenode-url:' \
+  -l 'jobtracker-url:' \
+  -l 'log-dir:' \
+  -l 'pid-dir:' \
+  -l 'replication:' \
+  -l 'taskscheduler:' \
+  -o 'h' \
+  -- "$@") 
+  
+if [ $? != 0 ] ; then
+    usage
+fi
+
+# Make sure the HADOOP_LOG_DIR is not picked up from user environment.
+unset HADOOP_LOG_DIR
+  
+eval set -- "${OPTS}"
+while true ; do
+  case "$1" in
+    --auto)
+      AUTOSETUP=1
+      AUTOMATED=1
+      shift
+      ;; 
+    --conf-dir)
+      HADOOP_CONF_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --default)
+      AUTOMATED=1; shift
+      ;;
+    -h)
+      usage
+      ;; 
+    --hdfs-dir)
+      HADOOP_HDFS_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --namenode-dir)
+      HADOOP_NN_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --datanode-dir)
+      HADOOP_DN_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --mapred-dir)
+      HADOOP_MAPRED_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --namenode-url)
+      HADOOP_NN_HOST=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --jobtracker-url)
+      HADOOP_JT_HOST=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --log-dir)
+      HADOOP_LOG_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --pid-dir)
+      HADOOP_PID_DIR=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --replication)
+      HADOOP_REPLICATION=$2; shift 2
+      AUTOMATED=1
+      ;; 
+    --taskscheduler)
+      HADOOP_TASK_SCHEDULER=$2; shift 2
+      AUTOMATED=1
+      ;;
+    --)
+      shift ; break
+      ;;
+    *)
+      echo "Unknown option: $1"
+      usage
+      exit 1 
+      ;;
+  esac
+done 
+
+AUTOSETUP=${AUTOSETUP:-1}
+JAVA_HOME=${JAVA_HOME:-/usr/java/default}
+HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://`hostname`:9000/}
+HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
+HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
+HADOOP_JT_HOST=${HADOOP_JT_HOST:-`hostname`:9001}
+HADOOP_HDFS_DIR=${HADOOP_HDFS_DIR:-/var/lib/hadoop/hdfs}
+HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
+HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-/var/log/hadoop}
+HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/log/hadoop}
+HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
+HADOOP_REPLICATION=${HADOOP_RELICATION:-3}
+HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
+
+if [ "${AUTOMATED}" != "1" ]; then
+  echo "Setup Hadoop Configuration"
+  echo
+  echo -n "Where would you like to put config directory? (${HADOOP_CONF_DIR}) "
+  read USER_HADOOP_CONF_DIR
+  echo -n "Where would you like to put log directory? (${HADOOP_LOG_DIR}) "
+  read USER_HADOOP_LOG_DIR
+  echo -n "Where would you like to put pid directory? (${HADOOP_PID_DIR}) "
+  read USER_HADOOP_PID_DIR
+  echo -n "What is the url of the namenode? (${HADOOP_NN_HOST}) "
+  read USER_HADOOP_NN_HOST
+  echo -n "Where would you like to put namenode data directory? (${HADOOP_NN_DIR}) "
+  read USER_HADOOP_NN_DIR
+  echo -n "Where would you like to put datanode data directory? (${HADOOP_DN_DIR}) "
+  read USER_HADOOP_DN_DIR
+  echo -n "What is the url of the jobtracker? (${HADOOP_JT_HOST}) "
+  read USER_HADOOP_JT_HOST
+  echo -n "Where would you like to put jobtracker/tasktracker data directory? (${HADOOP_MAPRED_DIR}) "
+  read USER_HADOOP_MAPRED_DIR
+  echo -n "Where is JAVA_HOME directory? (${JAVA_HOME}) "
+  read USER_JAVA_HOME
+  echo -n "Would you like to create directories/copy conf files to localhost? (Y/n) "
+  read USER_AUTOSETUP
+  echo
+  JAVA_HOME=${USER_USER_JAVA_HOME:-$JAVA_HOME}
+  HADOOP_NN_HOST=${USER_HADOOP_NN_HOST:-$HADOOP_NN_HOST}
+  HADOOP_NN_DIR=${USER_HADOOP_NN_DIR:-$HADOOP_NN_DIR}
+  HADOOP_DN_DIR=${USER_HADOOP_DN_DIR:-$HADOOP_DN_DIR}
+  HADOOP_JT_HOST=${USER_HADOOP_JT_HOST:-$HADOOP_JT_HOST}
+  HADOOP_HDFS_DIR=${USER_HADOOP_HDFS_DIR:-$HADOOP_HDFS_DIR}
+  HADOOP_MAPRED_DIR=${USER_HADOOP_MAPRED_DIR:-$HADOOP_MAPRED_DIR}
+  HADOOP_TASK_SCHEDULER=${HADOOP_TASK_SCHEDULER:-org.apache.hadoop.mapred.JobQueueTaskScheduler}
+  HADOOP_LOG_DIR=${USER_HADOOP_LOG_DIR:-$HADOOP_LOG_DIR}
+  HADOOP_PID_DIR=${USER_HADOOP_PID_DIR:-$HADOOP_PID_DIR}
+  HADOOP_CONF_DIR=${USER_HADOOP_CONF_DIR:-$HADOOP_CONF_DIR}
+  AUTOSETUP=${USER_AUTOSETUP:-y}
+  echo "Review your choices:"
+  echo
+  echo "Config directory            : ${HADOOP_CONF_DIR}"
+  echo "Log directory               : ${HADOOP_LOG_DIR}"
+  echo "PID directory               : ${HADOOP_PID_DIR}"
+  echo "Namenode url                : ${HADOOP_NN_HOST}"
+  echo "Namenode directory          : ${HADOOP_NN_DIR}"
+  echo "Datanode directory          : ${HADOOP_DN_DIR}"
+  echo "Jobtracker url              : ${HADOOP_JT_HOST}"
+  echo "Mapreduce directory         : ${HADOOP_MAPRED_DIR}"
+  echo "Task scheduler              : ${HADOOP_TASK_SCHEDULER}"
+  echo "JAVA_HOME directory         : ${JAVA_HOME}"
+  echo "Create dirs/copy conf files : ${AUTOSETUP}"
+  echo
+  echo -n "Proceed with generate configuration? (y/N) "
+  read CONFIRM
+  if [ "${CONFIRM}" != "y" ]; then
+    echo "User aborted setup, exiting..."
+    exit 1
+  fi
+fi
+
+rm -f core-site.xml >/dev/null
+rm -f hdfs-site.xml >/dev/null
+rm -f mapred-site.xml >/dev/null
+rm -f hadoop-env.sh >/dev/null
+
+template_generator ${HADOOP_HOME}/templates/conf/core-site.xml core-site.xml
+template_generator ${HADOOP_HOME}/templates/conf/hdfs-site.xml hdfs-site.xml
+template_generator ${HADOOP_HOME}/templates/conf/mapred-site.xml mapred-site.xml
+template_generator ${HADOOP_HOME}/templates/conf/hadoop-env.sh hadoop-env.sh
+
+chown root:hadoop hadoop-env.sh
+chmod 755 hadoop-env.sh
+
+if [ "${AUTOSETUP}" == "1" -o "${AUTOSETUP}" == "y" ]; then
+  mkdir -p ${HADOOP_HDFS_DIR}
+  mkdir -p ${HADOOP_NN_DIR}
+  mkdir -p ${HADOOP_DN_DIR}
+  mkdir -p ${HADOOP_MAPRED_DIR}
+  mkdir -p ${HADOOP_CONF_DIR}
+  mkdir -p ${HADOOP_LOG_DIR}
+  mkdir -p ${HADOOP_LOG_DIR}/hdfs
+  mkdir -p ${HADOOP_LOG_DIR}/mapred
+  mkdir -p ${HADOOP_PID_DIR}
+  chown hdfs:hadoop ${HADOOP_HDFS_DIR}
+  chown hdfs:hadoop ${HADOOP_NN_DIR}
+  chown hdfs:hadoop ${HADOOP_DN_DIR}
+  chown mapred:hadoop ${HADOOP_MAPRED_DIR}
+  chown root:hadoop ${HADOOP_LOG_DIR}
+  chmod 775 ${HADOOP_LOG_DIR}
+  chmod 775 ${HADOOP_PID_DIR}
+  chown hdfs:hadoop ${HADOOP_LOG_DIR}/hdfs
+  chown mapred:hadoop ${HADOOP_LOG_DIR}/mapred
+  cp -f *.xml ${HADOOP_CONF_DIR}
+  cp -f hadoop-env.sh ${HADOOP_CONF_DIR}
+  echo "Configuration setup is completed."
+  if [[ "$HADOOP_NN_HOST" =~ "`hostname`" ]]; then
+    echo "Proceed to run hadoop-setup-hdfs.sh on namenode."
+  fi
+else
+  echo
+  echo "Configuration file has been generated, please copy:"
+  echo
+  echo "core-site.xml"
+  echo "hdfs-site.xml"
+  echo "mapred-site.xml"
+  echo "hadoop-env.sh"
+  echo
+  echo " to ${HADOOP_CONF_DIR} on all nodes, and proceed to run hadoop-setup-hdfs.sh on namenode."
+fi

+ 53 - 0
src/packages/hadoop-setup-hdfs.sh

@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+if [ "$HADOOP_HOME" != "" ]; then
+  echo "Warning: \$HADOOP_HOME is deprecated."
+  echo
+fi
+
+. "$bin"/../libexec/hadoop-config.sh
+
+echo "Setup Hadoop Distributed File System"
+echo
+echo "Formatting namenode"
+echo
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} namenode -format' hdfs
+echo
+echo "Starting namenode process"
+echo
+/etc/init.d/hadoop-namenode start
+echo
+echo "Initialize HDFS file system: "
+echo
+
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /user/mapred' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chown mapred:mapred /user/mapred' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /tmp' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chmod 777 /tmp' hdfs
+
+if [ $? -eq 0 ]; then
+  echo "Completed."
+else
+  echo "Unknown error occurred, check hadoop logs for details."
+fi
+
+echo
+echo "Please startup datanode processes: /etc/init.d/hadoop-datanode start"

+ 215 - 0
src/packages/hadoop-setup-single-node.sh

@@ -0,0 +1,215 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script for setup HDFS file system for single node deployment
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+if [ "$HADOOP_HOME" != "" ]; then
+  echo "Warning: \$HADOOP_HOME is deprecated."
+  echo
+fi
+
+. "$bin"/../libexec/hadoop-config.sh
+
+usage() {
+  echo "
+usage: $0 <parameters>
+
+  Optional parameters:
+     --default                   Setup system as default
+     -h                          Display this message
+  "
+  exit 1
+}
+
+template_generator() {
+  REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
+  cat $1 |
+  while read line ; do
+    while [[ "$line" =~ $REGEX ]] ; do
+      LHS=${BASH_REMATCH[1]}
+      RHS="$(eval echo "\"$LHS\"")"
+      line=${line//$LHS/$RHS}
+    done
+    echo $line >> $2
+  done
+}
+
+OPTS=$(getopt \
+  -n $0 \
+  -o '' \
+  -l 'default' \
+  -- "$@")
+
+if [ $? != 0 ] ; then
+    usage
+fi
+
+if [ -e /etc/hadoop/hadoop-env.sh ]; then
+  . /etc/hadoop/hadoop-env.sh
+fi
+
+eval set -- "${OPTS}"
+while true ; do
+  case "$1" in
+    --default)
+      AUTOMATED=1; shift
+      ;;
+    -h)
+      usage
+      ;;
+    --)
+      shift ; break
+      ;;
+    *)
+      echo "Unknown option: $1"
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if [ "${AUTOMATED}" != "1" ]; then
+  echo "Welcome to Hadoop single node setup wizard"
+  echo
+  echo -n "Would you like to use default single node configuration? (y/n) "
+  read SET_CONFIG
+  echo -n "Would you like to format name node? (y/n) "
+  read SET_FORMAT
+  echo -n "Would you like to setup default directory structure? (y/n) "
+  read SET_MKDIR
+  echo -n "Would you like to start up Hadoop? (y/n) "
+  read STARTUP
+  echo -n "Would you like to start up Hadoop on reboot? (y/n) "
+  read SET_REBOOT
+  echo
+  echo "Review your choices:"
+  echo
+  echo "Setup single node configuration    : ${SET_CONFIG}"
+  echo "Format namenode                    : ${SET_FORMAT}"
+  echo "Setup default file system structure: ${SET_MKDIR}"
+  echo "Start up Hadoop                    : ${STARTUP}"
+  echo "Start up Hadoop on reboot          : ${SET_REBOOT}"
+  echo
+  echo -n "Proceed with setup? (y/n) "
+  read CONFIRM
+  if [ "${CONFIRM}" != "y" ]; then
+    echo "User aborted setup, exiting..."
+    exit 1
+  fi
+else
+  SET_CONFIG="y"
+  SET_FORMAT="y"
+  SET_MKDIR="y"
+  STARTUP="y"
+  SET_REBOOT="y"
+fi
+
+AUTOMATED=${AUTOMATED:-0}
+SET_CONFIG=${SET_CONFIG:-y}
+SET_FORMAT=${SET_FORMAT:-n}
+SET_MKDIR=${SET_MKDIR:-y}
+STARTUP=${STARTUP:-y}
+SET_REBOOT=${SET_REBOOT:-y}
+
+# Make sure system is not already started
+/etc/init.d/hadoop-namenode stop 2>/dev/null >/dev/null
+/etc/init.d/hadoop-datanode stop 2>/dev/null >/dev/null
+/etc/init.d/hadoop-jobtracker stop 2>/dev/null >/dev/null
+/etc/init.d/hadoop-tasktracker stop 2>/dev/null >/dev/null
+
+if [ "${SET_CONFIG}" == "y" ]; then
+  JAVA_HOME=${JAVA_HOME:-/usr/java/default}
+  HADOOP_NN_HOST=${HADOOP_NN_HOST:-hdfs://localhost:9000/}
+  HADOOP_NN_DIR=${HADOOP_NN_DIR:-/var/lib/hadoop/hdfs/namenode}
+  HADOOP_DN_DIR=${HADOOP_DN_DIR:-/var/lib/hadoop/hdfs/datanode}
+  HADOOP_JT_HOST=${HADOOP_JT_HOST:-localhost:9001}
+  HADOOP_HDFS_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/hdfs}
+  HADOOP_MAPRED_DIR=${HADOOP_MAPRED_DIR:-/var/lib/hadoop/mapred}
+  HADOOP_PID_DIR=${HADOOP_PID_DIR:-/var/run/hadoop}
+  HADOOP_LOG_DIR="/var/log/hadoop"
+  HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop}
+  HADOOP_REPLICATION=${HADOOP_RELICATION:-1}
+  ${HADOOP_PREFIX}/sbin/hadoop-setup-conf.sh --auto \
+    --conf-dir=${HADOOP_CONF_DIR} \
+    --datanode-dir=${HADOOP_DN_DIR} \
+    --hdfs-dir=${HADOOP_HDFS_DIR} \
+    --jobtracker-url=${HADOOP_JT_HOST} \
+    --log-dir=${HADOOP_LOG_DIR} \
+    --pid-dir=${HADOOP_PID_DIR} \
+    --mapred-dir=${HADOOP_MAPRED_DIR} \
+    --namenode-dir=${HADOOP_NN_DIR} \
+    --namenode-url=${HADOOP_NN_HOST} \
+    --replication=${HADOOP_REPLICATION}
+fi
+
+if [ ! -e ${HADOOP_NN_DIR} ]; then
+  rm -rf ${HADOOP_HDFS_DIR} 2>/dev/null >/dev/null
+  mkdir -p ${HADOOP_HDFS_DIR}
+  chmod 755 ${HADOOP_HDFS_DIR}
+  chown hdfs:hadoop ${HADOOP_HDFS_DIR}
+  /etc/init.d/hadoop-namenode format
+elif [ "${SET_FORMAT}" == "y" ]; then
+  rm -rf ${HADOOP_HDFS_DIR} 2>/dev/null >/dev/null
+  mkdir -p ${HADOOP_HDFS_DIR}
+  chmod 755 ${HADOOP_HDFS_DIR}
+  chown hdfs:hadoop ${HADOOP_HDFS_DIR}
+  rm -rf ${HADOOP_NN_DIR}
+  /etc/init.d/hadoop-namenode format
+fi
+
+/etc/init.d/hadoop-namenode start
+/etc/init.d/hadoop-datanode start
+
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /user/mapred' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chown mapred:mapred /user/mapred' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -mkdir /tmp' hdfs
+su -c '${HADOOP_PREFIX}/bin/hadoop --config ${HADOOP_CONF_DIR} dfs -chmod 777 /tmp' hdfs
+
+/etc/init.d/hadoop-jobtracker start
+/etc/init.d/hadoop-tasktracker start
+
+if [ "${SET_REBOOT}" == "y" ]; then
+  if [ -e /etc/debian_version ]; then
+    ln -sf ../init.d/hadoop-namenode /etc/rc2.d/S90hadoop-namenode
+    ln -sf ../init.d/hadoop-datanode /etc/rc2.d/S91hadoop-datanode
+    ln -sf ../init.d/hadoop-jobtracker /etc/rc2.d/S92hadoop-jobtracker
+    ln -sf ../init.d/hadoop-tasktracker /etc/rc2.d/S93hadoop-tasktracker
+    ln -sf ../init.d/hadoop-namenode /etc/rc6.d/S10hadoop-namenode
+    ln -sf ../init.d/hadoop-datanode /etc/rc6.d/S11hadoop-datanode
+    ln -sf ../init.d/hadoop-jobtracker /etc/rc6.d/S12hadoop-jobtracker
+    ln -sf ../init.d/hadoop-tasktracker /etc/rc6.d/S13hadoop-tasktracker
+  elif [ -e /etc/redhat-release ]; then
+    /sbin/chkconfig hadoop-namenode --add
+    /sbin/chkconfig hadoop-datanode --add
+    /sbin/chkconfig hadoop-jobtracker --add
+    /sbin/chkconfig hadoop-tasktracker --add
+    /sbin/chkconfig hadoop-namenode on
+    /sbin/chkconfig hadoop-datanode on
+    /sbin/chkconfig hadoop-jobtracker on
+    /sbin/chkconfig hadoop-tasktracker on
+  fi
+fi
+
+if [ "${STARTUP}" != "y" ]; then
+  /etc/init.d/hadoop-namenode stop
+  /etc/init.d/hadoop-datanode stop
+  /etc/init.d/hadoop-jobtracker stop
+  /etc/init.d/hadoop-tasktracker stop
+fi

+ 84 - 0
src/packages/rpm/init.d/hadoop-datanode

@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 
+# Starts a Hadoop datanode
+# 
+# chkconfig: 2345 90 10
+# description: Hadoop datanode
+
+source /etc/rc.d/init.d/functions
+source /etc/default/hadoop-env.sh
+
+RETVAL=0
+PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-datanode.pid"
+desc="Hadoop datanode daemon"
+
+start() {
+  echo -n $"Starting $desc (hadoop-datanode): "
+  daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start datanode
+  RETVAL=$?
+  echo
+  [ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-datanode
+  return $RETVAL
+}
+
+stop() {
+  echo -n $"Stopping $desc (hadoop-datanode): "
+  daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop datanode
+  RETVAL=$?
+  sleep 5
+  echo
+  [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-datanode $PIDFILE
+}
+
+restart() {
+  stop
+  start
+}
+
+checkstatus(){
+  status -p $PIDFILE ${JAVA_HOME}/bin/java
+  RETVAL=$?
+}
+
+condrestart(){
+  [ -e /var/lock/subsys/hadoop-datanode ] && restart || :
+}
+
+case "$1" in
+  start)
+    start
+    ;;
+  stop)
+    stop
+    ;;
+  status)
+    checkstatus
+    ;;
+  restart)
+    restart
+    ;;
+  condrestart)
+    condrestart
+    ;;
+  *)
+    echo $"Usage: $0 {start|stop|status|restart|condrestart}"
+    exit 1
+esac
+
+exit $RETVAL

+ 84 - 0
src/packages/rpm/init.d/hadoop-jobtracker

@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 
+# Starts a Hadoop jobtracker
+# 
+# chkconfig: 2345 90 10
+# description: Hadoop jobtracker
+
+source /etc/rc.d/init.d/functions
+source /etc/default/hadoop-env.sh
+
+RETVAL=0
+PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-jobtracker.pid"
+desc="Hadoop jobtracker daemon"
+
+start() {
+  echo -n $"Starting $desc (hadoop-jobtracker): "
+  daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start jobtracker
+  RETVAL=$?
+  echo
+  [ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-jobtracker
+  return $RETVAL
+}
+
+stop() {
+  echo -n $"Stopping $desc (hadoop-jobtracker): "
+  daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop jobtracker
+  RETVAL=$?
+  sleep 5
+  echo
+  [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-jobtracker $PIDFILE
+}
+
+restart() {
+  stop
+  start
+}
+
+checkstatus(){
+  status -p $PIDFILE ${JAVA_HOME}/bin/java
+  RETVAL=$?
+}
+
+condrestart(){
+  [ -e /var/lock/subsys/hadoop-jobtracker ] && restart || :
+}
+
+case "$1" in
+  start)
+    start
+    ;;
+  stop)
+    stop
+    ;;
+  status)
+    checkstatus
+    ;;
+  restart)
+    restart
+    ;;
+  condrestart)
+    condrestart
+    ;;
+  *)
+    echo $"Usage: $0 {start|stop|status|restart|condrestart}"
+    exit 1
+esac
+
+exit $RETVAL

+ 98 - 0
src/packages/rpm/init.d/hadoop-namenode

@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 
+# Starts a Hadoop namenode
+# 
+# chkconfig: 2345 90 10
+# description: Hadoop namenode
+
+source /etc/rc.d/init.d/functions
+source /etc/default/hadoop-env.sh
+
+RETVAL=0
+PIDFILE="${HADOOP_PID_DIR}/hadoop-hdfs-namenode.pid"
+desc="Hadoop namenode daemon"
+
+start() {
+  echo -n $"Starting $desc (hadoop-namenode): "
+  daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start namenode $1
+  RETVAL=$?
+  echo
+  [ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-namenode
+  return $RETVAL
+}
+
+upgrade() {
+  start -upgrade
+}
+
+stop() {
+  echo -n $"Stopping $desc (hadoop-namenode): "
+  daemon --user hdfs ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop namenode
+  RETVAL=$?
+  sleep 5
+  echo
+  [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-namenode $PIDFILE
+}
+
+checkstatus(){
+  status -p $PIDFILE ${JAVA_HOME}/bin/java
+  RETVAL=$?
+}
+
+restart() {
+  stop
+  start
+}
+
+condrestart(){
+  [ -e /var/lock/subsys/hadoop-namenode ] && restart || :
+}
+
+format() {
+  daemon --user hdfs ${HADOOP_PREFIX}/bin/hadoop namenode -format
+}
+
+case "$1" in
+  start)
+    start
+    ;;
+  upgrade)
+    upgrade
+    ;;
+  format)
+    format
+    ;;
+  stop)
+    stop
+    ;;
+  status)
+    checkstatus
+    ;;
+  restart)
+    restart
+    ;;
+  condrestart|try-restart)
+    condrestart
+    ;;
+  *)
+    echo $"Usage: $0 {start|stop|status|restart|try-restart|upgrade}"
+    exit 1
+esac
+
+exit $RETVAL

+ 84 - 0
src/packages/rpm/init.d/hadoop-tasktracker

@@ -0,0 +1,84 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# 
+# Starts a Hadoop tasktracker
+# 
+# chkconfig: 2345 90 10
+# description: Hadoop tasktracker
+
+source /etc/rc.d/init.d/functions
+source /etc/default/hadoop-env.sh
+
+RETVAL=0
+PIDFILE="${HADOOP_PID_DIR}/hadoop-mapred-tasktracker.pid"
+desc="Hadoop tasktracker daemon"
+
+start() {
+  echo -n $"Starting $desc (hadoop-tasktracker): "
+  daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" start tasktracker
+  RETVAL=$?
+  echo
+  [ $RETVAL -eq 0 ] && touch /var/lock/subsys/hadoop-tasktracker
+  return $RETVAL
+}
+
+stop() {
+  echo -n $"Stopping $desc (hadoop-tasktracker): "
+  daemon --user mapred ${HADOOP_PREFIX}/sbin/hadoop-daemon.sh --config "${HADOOP_CONF_DIR}" stop tasktracker
+  RETVAL=$?
+  sleep 5
+  echo
+  [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/hadoop-tasktracker $PIDFILE
+}
+
+restart() {
+  stop
+  start
+}
+
+checkstatus(){
+  status -p $PIDFILE ${JAVA_HOME}/bin/java
+  RETVAL=$?
+}
+
+condrestart(){
+  [ -e /var/lock/subsys/hadoop-tasktracker ] && restart || :
+}
+
+case "$1" in
+  start)
+    start
+    ;;
+  stop)
+    stop
+    ;;
+  status)
+    checkstatus
+    ;;
+  restart)
+    restart
+    ;;
+  condrestart)
+    condrestart
+    ;;
+  *)
+    echo $"Usage: $0 {start|stop|status|restart|condrestart}"
+    exit 1
+esac
+
+exit $RETVAL

+ 194 - 0
src/packages/rpm/spec/hadoop.spec

@@ -0,0 +1,194 @@
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements.  See the NOTICE file distributed with
+#   this work for additional information regarding copyright ownership.
+#   The ASF licenses this file to You under the Apache License, Version 2.0
+#   (the "License"); you may not use this file except in compliance with
+#   the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#
+# RPM Spec file for Hadoop version @version@
+#
+
+%define name         hadoop
+%define version      @version@
+%define release      @package.release@
+
+# Installation Locations
+%define _prefix      @package.prefix@
+%define _bin_dir     %{_prefix}/bin
+%define _conf_dir    @package.conf.dir@
+%define _include_dir %{_prefix}/include
+%define _lib_dir     %{_prefix}/lib
+%define _lib64_dir   %{_prefix}/lib64
+%define _libexec_dir %{_prefix}/libexec
+%define _log_dir     @package.log.dir@
+%define _man_dir     %{_prefix}/man
+%define _pid_dir     @package.pid.dir@
+%define _sbin_dir    %{_prefix}/sbin
+%define _share_dir   %{_prefix}/share
+%define _var_dir     /var/lib/hadoop
+
+# Build time settings
+%define _build_dir  @package.build.dir@
+%define _final_name @final.name@
+%define debug_package %{nil}
+
+# Disable brp-java-repack-jars for aspect J
+%define __os_install_post    \
+    /usr/lib/rpm/redhat/brp-compress \
+    %{!?__debug_package:/usr/lib/rpm/redhat/brp-strip %{__strip}} \
+    /usr/lib/rpm/redhat/brp-strip-static-archive %{__strip} \
+    /usr/lib/rpm/redhat/brp-strip-comment-note %{__strip} %{__objdump} \
+    /usr/lib/rpm/brp-python-bytecompile %{nil}
+
+# RPM searches perl files for dependancies and this breaks for non packaged perl lib
+# like thrift so disable this
+%define _use_internal_dependency_generator 0
+
+%ifarch i386
+%global hadoop_arch Linux-i386-32
+%endif
+%ifarch amd64 x86_64
+%global hadoop_arch Linux-amd64-64
+%endif
+%ifarch noarch
+%global hadoop_arch ""
+%endif
+
+Summary: The Apache Hadoop project develops open-source software for reliable, scalable, distributed computing
+License: Apache License, Version 2.0
+URL: http://hadoop.apache.org/core/
+Vendor: Apache Software Foundation
+Group: Development/Libraries
+Name: %{name}
+Version: %{version}
+Release: %{release} 
+Source0: %{_final_name}-bin.tar.gz
+Source1: %{_final_name}-script.tar.gz
+Prefix: %{_prefix}
+Prefix: %{_conf_dir}
+Prefix: %{_log_dir}
+Prefix: %{_pid_dir}
+Buildroot: %{_build_dir}
+Requires: sh-utils, textutils, /usr/sbin/useradd, /usr/sbin/usermod, /sbin/chkconfig, /sbin/service, jdk >= 1.6
+AutoReqProv: no
+Provides: hadoop
+
+%description
+The Apache Hadoop project develops open-source software for reliable, scalable, 
+distributed computing.  Hadoop includes these subprojects:
+
+Hadoop Common: The common utilities that support the other Hadoop subprojects.
+HDFS: A distributed file system that provides high throughput access to application data.
+MapReduce: A software framework for distributed processing of large data sets on compute clusters.
+
+%prep
+%setup -n %{_final_name} -a 0
+%setup -n %{_final_name} -a 1
+
+%build
+if [ -d ${RPM_BUILD_DIR}%{_prefix} ]; then
+  rm -rf ${RPM_BUILD_DIR}%{_prefix}
+fi
+
+if [ -d ${RPM_BUILD_DIR}%{_log_dir} ]; then
+  rm -rf ${RPM_BUILD_DIR}%{_log_dir}
+fi
+
+if [ -d ${RPM_BUILD_DIR}%{_conf_dir} ]; then
+  rm -rf ${RPM_BUILD_DIR}%{_conf_dir}
+fi
+
+if [ -d ${RPM_BUILD_DIR}%{_pid_dir} ]; then
+  rm -rf ${RPM_BUILD_DIR}%{_pid_dir}
+fi
+
+mkdir -p ${RPM_BUILD_DIR}%{_prefix}
+mkdir -p ${RPM_BUILD_DIR}%{_bin_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_include_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_lib_dir}
+%ifarch amd64 x86_64
+mkdir -p ${RPM_BUILD_DIR}%{_lib64_dir}
+%endif
+mkdir -p ${RPM_BUILD_DIR}%{_libexec_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_log_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_conf_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_man_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_pid_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_sbin_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_share_dir}
+mkdir -p ${RPM_BUILD_DIR}%{_var_dir}
+mkdir -p ${RPM_BUILD_DIR}/etc/rc.d/init.d
+
+mv ${RPM_BUILD_DIR}/%{_final_name}/hadoop-namenode ${RPM_BUILD_DIR}/etc/rc.d/init.d/hadoop-namenode
+mv ${RPM_BUILD_DIR}/%{_final_name}/hadoop-datanode ${RPM_BUILD_DIR}/etc/rc.d/init.d/hadoop-datanode
+mv ${RPM_BUILD_DIR}/%{_final_name}/hadoop-jobtracker ${RPM_BUILD_DIR}/etc/rc.d/init.d/hadoop-jobtracker
+mv ${RPM_BUILD_DIR}/%{_final_name}/hadoop-tasktracker ${RPM_BUILD_DIR}/etc/rc.d/init.d/hadoop-tasktracker
+chmod 0755 ${RPM_BUILD_DIR}/etc/rc.d/init.d/*
+chmod 0755 ${RPM_BUILD_DIR}/%{_final_name}/sbin/hadoop-*
+
+#########################
+#### INSTALL SECTION ####
+#########################
+%install
+mv ${RPM_BUILD_DIR}/%{_final_name}/etc/hadoop/* ${RPM_BUILD_DIR}%{_conf_dir}
+mv ${RPM_BUILD_DIR}/%{_final_name}/* ${RPM_BUILD_DIR}%{_prefix}
+
+if [ "${RPM_BUILD_DIR}%{_conf_dir}" != "${RPM_BUILD_DIR}/%{_prefix}/conf" ]; then
+  rm -rf ${RPM_BUILD_DIR}/%{_prefix}/etc
+fi
+
+%pre
+getent group hadoop 2>/dev/null >/dev/null || /usr/sbin/groupadd -r hadoop
+
+/usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r --groups hadoop --home /tmp mapred 2> /dev/null || :
+/usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r --groups hadoop --home /tmp hdfs 2> /dev/null || :
+
+%post
+bash ${RPM_INSTALL_PREFIX0}/sbin/update-hadoop-env.sh \
+       --prefix=${RPM_INSTALL_PREFIX0} \
+       --bin-dir=${RPM_INSTALL_PREFIX0}/bin \
+       --sbin-dir=${RPM_INSTALL_PREFIX0}/sbin \
+       --conf-dir=${RPM_INSTALL_PREFIX1} \
+       --log-dir=${RPM_INSTALL_PREFIX2} \
+       --pid-dir=${RPM_INSTALL_PREFIX3}
+
+%preun
+bash ${RPM_INSTALL_PREFIX0}/sbin/update-hadoop-env.sh \
+       --prefix=${RPM_INSTALL_PREFIX0} \
+       --bin-dir=${RPM_INSTALL_PREFIX0}/bin \
+       --sbin-dir=${RPM_INSTALL_PREFIX0}/sbin \
+       --conf-dir=${RPM_INSTALL_PREFIX1} \
+       --log-dir=${RPM_INSTALL_PREFIX2} \
+       --pid-dir=${RPM_INSTALL_PREFIX3} \
+       --uninstall
+
+%files 
+%defattr(-,root,root)
+%attr(0755,root,hadoop) %{_log_dir}
+%attr(0775,root,hadoop) %{_pid_dir}
+%config(noreplace) %{_conf_dir}/capacity-scheduler.xml
+%config(noreplace) %{_conf_dir}/configuration.xsl
+%config(noreplace) %{_conf_dir}/core-site.xml
+%config(noreplace) %{_conf_dir}/hadoop-env.sh
+%config(noreplace) %{_conf_dir}/hadoop-metrics2.properties
+%config(noreplace) %{_conf_dir}/hadoop-policy.xml
+%config(noreplace) %{_conf_dir}/hdfs-site.xml
+%config(noreplace) %{_conf_dir}/log4j.properties
+%config(noreplace) %{_conf_dir}/mapred-queue-acls.xml
+%config(noreplace) %{_conf_dir}/mapred-site.xml
+%config(noreplace) %{_conf_dir}/masters
+%config(noreplace) %{_conf_dir}/slaves
+%config(noreplace) %{_conf_dir}/ssl-client.xml.example
+%config(noreplace) %{_conf_dir}/ssl-server.xml.example
+%config(noreplace) %{_conf_dir}/taskcontroller.cfg
+%{_prefix}
+%attr(0755,root,root) /etc/rc.d/init.d

+ 11 - 0
src/packages/templates/conf/core-site.xml

@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>fs.default.name</name>
+    <value>${HADOOP_NN_HOST}</value>
+  </property>
+</configuration>

+ 64 - 0
src/packages/templates/conf/hadoop-env.sh

@@ -0,0 +1,64 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+export JAVA_HOME=${JAVA_HOME}
+
+# Location where Hadoop is installed
+export HADOOP_PREFIX=${HADOOP_PREFIX}
+export HADOOP_HOME=${HADOOP_PREFIX}/share/hadoop
+
+# Extra Java CLASSPATH elements.  Optional.
+# export HADOOP_CLASSPATH=
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options.  Empty by default.
+# export HADOOP_OPTS=-server
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+# export HADOOP_TASKTRACKER_OPTS=
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+# export HADOOP_CLIENT_OPTS
+
+# Extra ssh options.  Empty by default.
+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+# host:path where hadoop code should be rsync'd from.  Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+HADOOP_PID_DIR=${HADOOP_PID_DIR}
+export HADOOP_PID_DIR=${HADOOP_PID_DIR:-$HADOOP_PREFIX/var/run}
+
+# A string representing this instance of hadoop. $USER by default.
+export HADOOP_IDENT_STRING=`whoami`
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+# export HADOOP_NICENESS=10
+
+# Where log files are stored.  $HADOOP_HOME/logs by default.
+HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$HADOOP_IDENT_STRING
+export HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-$HADOOP_HOME/var/log}
+
+# Hadoop configuration directory
+HADOOP_CONF_DIR=${HADOOP_CONF_DIR}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$HADOOP_PREFIX/conf}

+ 23 - 0
src/packages/templates/conf/hdfs-site.xml

@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>dfs.replication</name>
+    <value>${HADOOP_REPLICATION}</value>
+  </property>
+  <property>
+    <name>dfs.name.dir</name>
+    <value>${HADOOP_NN_DIR}</value>
+  </property>
+  <property>
+    <name>dfs.data.dir</name>
+    <value>${HADOOP_DN_DIR}</value>
+  </property>
+  <property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp</value>
+  </property>
+</configuration>

+ 31 - 0
src/packages/templates/conf/mapred-site.xml

@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>${HADOOP_JT_HOST}</value>
+  </property>
+
+  <property>
+    <name>mapred.system.dir</name>
+    <value>/user/mapred/system</value>
+  </property>
+
+  <property>
+    <name>mapred.local.dir</name>
+    <value>${HADOOP_MAPRED_DIR}</value>
+  </property>
+
+  <property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp</value>
+  </property>
+
+  <property>
+    <name>mapred.jobtracker.taskScheduler</name>
+    <value>${HADOOP_TASK_SCHEDULER}</value>
+  </property>
+</configuration>

+ 178 - 0
src/packages/update-hadoop-env.sh

@@ -0,0 +1,178 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script configures hadoop-env.sh and symlinkis directories for 
+# relocating RPM locations.
+
+usage() {
+  echo "
+usage: $0 <parameters>
+  Required parameters:
+     --prefix=PREFIX             path to install into
+
+  Optional parameters:
+     --arch=i386                 OS Architecture
+     --bin-dir=PREFIX/bin        Executable directory
+     --conf-dir=/etc/hadoop      Configuration directory
+     --log-dir=/var/log/hadoop   Log directory
+     --pid-dir=/var/run          PID file location
+     --sbin-dir=PREFIX/sbin      System executable directory
+  "
+  exit 1
+}
+
+template_generator() {
+  REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})'
+  cat $1 |
+  while read line ; do
+    while [[ "$line" =~ $REGEX ]] ; do
+      LHS=${BASH_REMATCH[1]}
+      RHS="$(eval echo "\"$LHS\"")"
+      line=${line//$LHS/$RHS}
+    done
+    echo $line >> $2
+  done
+}
+
+OPTS=$(getopt \
+  -n $0 \
+  -o '' \
+  -l 'arch:' \
+  -l 'prefix:' \
+  -l 'bin-dir:' \
+  -l 'conf-dir:' \
+  -l 'lib-dir:' \
+  -l 'log-dir:' \
+  -l 'pid-dir:' \
+  -l 'sbin-dir:' \
+  -l 'uninstall' \
+  -- "$@")
+
+if [ $? != 0 ] ; then
+    usage
+fi
+
+eval set -- "${OPTS}"
+while true ; do
+  case "$1" in
+    --arch)
+      ARCH=$2 ; shift 2
+      ;;
+    --prefix)
+      PREFIX=$2 ; shift 2
+      ;;
+    --bin-dir)
+      BIN_DIR=$2 ; shift 2
+      ;;
+    --log-dir)
+      LOG_DIR=$2 ; shift 2
+      ;;
+    --lib-dir)
+      LIB_DIR=$2 ; shift 2
+      ;;
+    --conf-dir)
+      CONF_DIR=$2 ; shift 2
+      ;;
+    --pid-dir)
+      PID_DIR=$2 ; shift 2
+      ;;
+    --sbin-dir)
+      SBIN_DIR=$2 ; shift 2
+      ;;
+    --uninstall)
+      UNINSTALL=1; shift
+      ;;
+    --)
+      shift ; break
+      ;;
+    *)
+      echo "Unknown option: $1"
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+for var in PREFIX; do
+  if [ -z "$(eval "echo \$$var")" ]; then
+    echo Missing param: $var
+    usage
+  fi
+done
+
+ARCH=${ARCH:-i386}
+BIN_DIR=${BIN_DIR:-$PREFIX/share/hadoop/bin}
+CONF_DIR=${CONF_DIR:-$PREFIX/etc/hadoop}
+LIB_DIR=${LIB_DIR:-$PREFIX/lib}
+LOG_DIR=${LOG_DIR:-$PREFIX/var/log}
+PID_DIR=${PID_DIR:-$PREFIX/var/run}
+SBIN_DIR=${SBIN_DIR:-$PREFIX/share/hadoop/sbin}
+UNINSTALL=${UNINSTALL:-0}
+
+if [ "${ARCH}" != "i386" ]; then
+  LIB_DIR=${LIB_DIR}64
+fi
+
+if [ "${UNINSTALL}" -eq "1" ]; then
+  # Remove symlinks
+  if [ "${CONF_DIR}" != "${PREFIX}/etc/hadoop" ]; then
+    rm -rf ${PREFIX}/etc/hadoop
+  fi
+  rm -f /etc/default/hadoop-env.sh
+  rm -f /etc/profile.d/hadoop-env.sh
+else
+  # Create symlinks
+  if [ "${CONF_DIR}" != "${PREFIX}/etc/hadoop" ]; then
+    mkdir -p ${PREFIX}/etc
+    ln -sf ${CONF_DIR} ${PREFIX}/etc/hadoop
+  fi
+  ln -sf ${CONF_DIR}/hadoop-env.sh /etc/default/hadoop-env.sh
+  ln -sf ${CONF_DIR}/hadoop-env.sh /etc/profile.d/hadoop-env.sh
+
+  mkdir -p ${LOG_DIR}
+  mkdir -p ${LOG_DIR}/hdfs
+  mkdir -p ${LOG_DIR}/mapred
+  chown root:hadoop ${LOG_DIR}
+  chown hdfs ${LOG_DIR}/hdfs
+  chown mapred ${LOG_DIR}/mapred
+  chmod 755 ${LOG_DIR}
+  chmod 755 ${LOG_DIR}/hdfs
+  chmod 755 ${LOG_DIR}/mapred
+
+  if [ ! -d ${PID_DIR} ]; then
+    mkdir -p ${PID_DIR}
+    chown root:hadoop ${PID_DIR}
+    chmod 775 ${PID_DIR}
+  fi
+
+  TFILE="/tmp/$(basename $0).$$.tmp"
+  if [ -z "${JAVA_HOME}" ]; then
+    if [ -e /etc/debian_version ]; then
+      JAVA_HOME=`update-alternatives --config java | grep java | cut -f2 -d':' | cut -f2 -d' ' | sed -e 's/\/bin\/java//'`
+    else
+      JAVA_HOME=/usr/java/default
+    fi
+  fi
+  HADOOP_CONF_DIR=${CONF_DIR}
+  HADOOP_LOG_DIR=${LOG_DIR}
+  HADOOP_PID_DIR=${PID_DIR}
+  HADOOP_PREFIX=${PREFIX}
+  HADOOP_HOME=${PREFIX}/share/hadoop
+  template_generator ${PREFIX}/share/hadoop/templates/conf/hadoop-env.sh $TFILE
+  cp ${TFILE} ${CONF_DIR}/hadoop-env.sh
+  rm -f ${TFILE}
+fi