浏览代码

HADOOP-9902. Shell script rewrite (aw)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1618847 13f79535-47bb-0310-9956-ffa450edef68
Allen Wittenauer 10 年之前
父节点
当前提交
31467453ec
共有 35 个文件被更改,包括 3090 次插入1914 次删除
  1. 7 0
      hadoop-assemblies/src/main/resources/assemblies/hadoop-dist.xml
  2. 2 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  3. 132 98
      hadoop-common-project/hadoop-common/src/main/bin/hadoop
  4. 147 249
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
  5. 28 186
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh
  6. 26 11
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh
  7. 1036 0
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
  8. 93 0
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example
  9. 16 35
      hadoop-common-project/hadoop-common/src/main/bin/rcc
  10. 23 28
      hadoop-common-project/hadoop-common/src/main/bin/slaves.sh
  11. 26 12
      hadoop-common-project/hadoop-common/src/main/bin/start-all.sh
  12. 25 11
      hadoop-common-project/hadoop-common/src/main/bin/stop-all.sh
  13. 372 49
      hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
  14. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/distribute-exclude.sh
  15. 200 213
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
  16. 58 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh
  17. 28 12
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/refresh-namenodes.sh
  18. 24 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-balancer.sh
  19. 86 56
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
  20. 25 9
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh
  21. 24 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-balancer.sh
  22. 65 40
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
  23. 25 9
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh
  24. 104 113
      hadoop-mapreduce-project/bin/mapred
  25. 46 26
      hadoop-mapreduce-project/bin/mapred-config.sh
  26. 21 122
      hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh
  27. 52 8
      hadoop-mapreduce-project/conf/mapred-env.sh
  28. 0 70
      hadoop-yarn-project/hadoop-yarn/bin/slaves.sh
  29. 24 10
      hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh
  30. 28 12
      hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh
  31. 148 232
      hadoop-yarn-project/hadoop-yarn/bin/yarn
  32. 72 44
      hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh
  33. 21 136
      hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh
  34. 23 15
      hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh
  35. 80 83
      hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh

+ 7 - 0
hadoop-assemblies/src/main/resources/assemblies/hadoop-dist.xml

@@ -29,6 +29,7 @@
         <exclude>*-config.cmd</exclude>
         <exclude>start-*.cmd</exclude>
         <exclude>stop-*.cmd</exclude>
+        <exclude>hadoop-layout.sh.example</exclude>
       </excludes>
       <fileMode>0755</fileMode>
     </fileSet>
@@ -42,6 +43,8 @@
       <includes>
         <include>*-config.sh</include>
         <include>*-config.cmd</include>
+        <include>*-functions.sh</include>
+        <include>hadoop-layout.sh.example</include>
       </includes>
       <fileMode>0755</fileMode>
     </fileSet>
@@ -57,6 +60,10 @@
         <exclude>hadoop.cmd</exclude>
         <exclude>hdfs.cmd</exclude>
         <exclude>hadoop-config.cmd</exclude>
+        <exclude>hadoop-functions.sh</exclude>
+        <exclude>hadoop-layout.sh.example</exclude>
+        <exclude>hdfs-config.cmd</exclude>
+        <exclude>hdfs-config.sh</exclude>
       </excludes>
       <fileMode>0755</fileMode>
     </fileSet>

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -9,6 +9,8 @@ Trunk (Unreleased)
 
     HADOOP-10474 Move o.a.h.record to hadoop-streaming. (wheat9)
 
+    HADOOP-9902. Shell script rewrite (aw)
+
   NEW FEATURES
 
     HADOOP-10433. Key Management Server based on KeyProvider API. (tucu)

+ 132 - 98
hadoop-common-project/hadoop-common/src/main/bin/hadoop

@@ -15,130 +15,164 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This script runs the hadoop core commands. 
-
-bin=`which $0`
-bin=`dirname ${bin}`
-bin=`cd "$bin" > /dev/null; pwd`
- 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-function print_usage(){
+function hadoop_usage()
+{
   echo "Usage: hadoop [--config confdir] COMMAND"
   echo "       where COMMAND is one of:"
-  echo "  fs                   run a generic filesystem user client"
-  echo "  version              print the version"
-  echo "  jar <jar>            run a jar file"
-  echo "  checknative [-a|-h]  check native hadoop and compression libraries availability"
-  echo "  distcp <srcurl> <desturl> copy file or directories recursively"
-  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
+  echo "  archive -archiveName NAME -p <parent path> <src>* <dest>"
+  echo "                       create a Hadoop archive"
+  echo "  checknative [-a|-h]  check native Hadoop and compression "
+  echo "                         libraries availability"
   echo "  classpath            prints the class path needed to get the"
+  echo "                         Hadoop jar and the required libraries"
   echo "  credential           interact with credential providers"
-  echo "                       Hadoop jar and the required libraries"
   echo "  daemonlog            get/set the log level for each daemon"
+  echo "  distch path:owner:group:permisson"
+  echo "                       distributed metadata changer"
+  echo "  distcp <srcurl> <desturl> "
+  echo "                       copy file or directories recursively"
+  echo "  fs                   run a generic filesystem user client"
+  echo "  jar <jar>            run a jar file"
+  echo "  jnipath              prints the java.library.path"
+  echo "  key                  manage keys via the KeyProvider"
+  echo "  version              print the version"
   echo " or"
   echo "  CLASSNAME            run the class named CLASSNAME"
   echo ""
   echo "Most commands print help when invoked w/o parameters."
 }
 
+
+# This script runs the hadoop core commands.
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
+
 if [ $# = 0 ]; then
-  print_usage
-  exit
+  hadoop_exit_with_usage 1
 fi
 
 COMMAND=$1
-case $COMMAND in
-  # usage flags
-  --help|-help|-h)
-    print_usage
-    exit
-    ;;
+shift
 
-  #hdfs commands
-  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
-    echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
-    echo "Instead use the hdfs command for it." 1>&2
-    echo "" 1>&2
-    #try to locate hdfs and if present, delegate to it.  
-    shift
-    if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
-      exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups}  "$@"
-    elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
-      exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
+case ${COMMAND} in
+  balancer|datanode|dfs|dfsadmin|dfsgroups|  \
+  namenode|secondarynamenode|fsck|fetchdt|oiv| \
+  portmap|nfs3)
+    hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
+    COMMAND=${COMMAND/dfsgroups/groups}
+    hadoop_error "WARNING: Attempting to execute replacement \"hdfs ${COMMAND}\" instead."
+    hadoop_error ""
+    #try to locate hdfs and if present, delegate to it.
+    if [[ -f "${HADOOP_HDFS_HOME}/bin/hdfs" ]]; then
+      # shellcheck disable=SC2086
+      exec "${HADOOP_HDFS_HOME}/bin/hdfs" \
+      --config "${HADOOP_CONF_DIR}" "${COMMAND}"  "$@"
+    elif [[ -f "${HADOOP_PREFIX}/bin/hdfs" ]]; then
+      # shellcheck disable=SC2086
+      exec "${HADOOP_PREFIX}/bin/hdfs" \
+      --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
     else
-      echo "HADOOP_HDFS_HOME not found!"
+      hadoop_error "HADOOP_HDFS_HOME not found!"
       exit 1
     fi
-    ;;
-
+  ;;
+  
   #mapred commands for backwards compatibility
   pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
-    echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
-    echo "Instead use the mapred command for it." 1>&2
-    echo "" 1>&2
+    hadoop_error "WARNING: Use of this script to execute ${COMMAND} is deprecated."
+    COMMAND=${COMMAND/mrgroups/groups}
+    hadoop_error "WARNING: Attempting to execute replacement \"mapred ${COMMAND}\" instead."
+    hadoop_error ""
     #try to locate mapred and if present, delegate to it.
-    shift
-    if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
-      exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
-    elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
-      exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
+    if [[ -f "${HADOOP_MAPRED_HOME}/bin/mapred" ]]; then
+      exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
+      --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
+    elif [[ -f "${HADOOP_PREFIX}/bin/mapred" ]]; then
+      exec "${HADOOP_PREFIX}/bin/mapred" \
+      --config "${HADOOP_CONF_DIR}" "${COMMAND}" "$@"
     else
-      echo "HADOOP_MAPRED_HOME not found!"
+      hadoop_error "HADOOP_MAPRED_HOME not found!"
       exit 1
     fi
-    ;;
-
-  #core commands  
-  *)
-    # the core commands
-    if [ "$COMMAND" = "fs" ] ; then
-      CLASS=org.apache.hadoop.fs.FsShell
-    elif [ "$COMMAND" = "version" ] ; then
-      CLASS=org.apache.hadoop.util.VersionInfo
-    elif [ "$COMMAND" = "jar" ] ; then
-      CLASS=org.apache.hadoop.util.RunJar
-    elif [ "$COMMAND" = "key" ] ; then
-      CLASS=org.apache.hadoop.crypto.key.KeyShell
-    elif [ "$COMMAND" = "checknative" ] ; then
-      CLASS=org.apache.hadoop.util.NativeLibraryChecker
-    elif [ "$COMMAND" = "distcp" ] ; then
-      CLASS=org.apache.hadoop.tools.DistCp
-      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
-    elif [ "$COMMAND" = "daemonlog" ] ; then
-      CLASS=org.apache.hadoop.log.LogLevel
-    elif [ "$COMMAND" = "archive" ] ; then
-      CLASS=org.apache.hadoop.tools.HadoopArchives
-      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
-    elif [ "$COMMAND" = "credential" ] ; then
-      CLASS=org.apache.hadoop.security.alias.CredentialShell
-    elif [ "$COMMAND" = "classpath" ] ; then
-      if [ "$#" -eq 1 ]; then
-        # No need to bother starting up a JVM for this simple case.
-        echo $CLASSPATH
-        exit
-      else
-        CLASS=org.apache.hadoop.util.Classpath
-      fi
-    elif [[ "$COMMAND" = -*  ]] ; then
-        # class and package names cannot begin with a -
-        echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
-        exit 1
-    else
-      CLASS=$COMMAND
+  ;;
+  archive)
+    CLASS=org.apache.hadoop.tools.HadoopArchives
+    hadoop_add_classpath "${TOOL_PATH}"
+  ;;
+  checknative)
+    CLASS=org.apache.hadoop.util.NativeLibraryChecker
+  ;;
+  classpath)
+    if [[ "$#" -eq 1 ]]; then
+      CLASS=org.apache.hadoop.util.Classpath
+    else   
+      hadoop_finalize
+      echo "${CLASSPATH}"
+      exit 0
     fi
-    shift
-    
-    # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
-    HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+  ;;
+  credential)
+    CLASS=org.apache.hadoop.security.alias.CredentialShell
+  ;;
+  daemonlog)
+    CLASS=org.apache.hadoop.log.LogLevel
+  ;;
+  distch)
+    CLASS=org.apache.hadoop.tools.DistCh
+    hadoop_add_classpath "${TOOL_PATH}"
+  ;;
+  distcp)
+    CLASS=org.apache.hadoop.tools.DistCp
+    hadoop_add_classpath "${TOOL_PATH}"
+  ;;
+  fs)
+    CLASS=org.apache.hadoop.fs.FsShell
+  ;;
+  jar)
+    CLASS=org.apache.hadoop.util.RunJar
+  ;;
+  jnipath)
+    hadoop_finalize
+    echo "${JAVA_LIBRARY_PATH}"
+    exit 0
+  ;;
+  key)
+    CLASS=org.apache.hadoop.crypto.key.KeyShell
+  ;;
+  version)
+    CLASS=org.apache.hadoop.util.VersionInfo
+  ;;
+  -*|hdfs)
+    hadoop_exit_with_usage 1
+  ;;
+  *)
+    CLASS="${COMMAND}"
+  ;;
+esac
 
-    #make sure security appender is turned off
-    HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
+# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
+HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
 
-    export CLASSPATH=$CLASSPATH
-    exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
-    ;;
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
+
+hadoop_finalize
+export CLASSPATH
+hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
 
-esac

+ 147 - 249
hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh

@@ -1,3 +1,5 @@
+#
+#
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -13,280 +15,176 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+####
+# IMPORTANT
+####
+
+## The hadoop-config.sh tends to get executed by non-Hadoop scripts.
+## Those parts expect this script to parse/manipulate $@. In order
+## to maintain backward compatibility, this means a surprising
+## lack of functions for bits that would be much better off in
+## a function.
+##
+## In other words, yes, there is some bad things happen here and
+## unless we break the rest of the ecosystem, we can't change it. :(
+
+
 # included in all the hadoop scripts with source command
 # should not be executable directly
 # also should not be passed any arguments, since we need original $*
-
-# Resolve links ($0 may be a softlink) and convert a relative path
-# to an absolute path.  NB: The -P option requires bash built-ins
-# or POSIX:2001 compliant cd and pwd.
-
-#   HADOOP_CLASSPATH Extra Java CLASSPATH entries.
 #
-#   HADOOP_USER_CLASSPATH_FIRST      When defined, the HADOOP_CLASSPATH is 
-#                                    added in the beginning of the global
-#                                    classpath. Can be defined, for example,
-#                                    by doing 
-#                                    export HADOOP_USER_CLASSPATH_FIRST=true
-#
-
-this="${BASH_SOURCE-$0}"
-common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
-script="$(basename -- "$this")"
-this="$common_bin/$script"
-
-[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
-
-HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
-HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
-HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
-HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
-HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
-YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
-YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
-MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
-MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
-
-# the root of the Hadoop installation
-# See HADOOP-6255 for directory structure layout
-HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)
-HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
-export HADOOP_PREFIX
-
-#check to see if the conf dir is given as an optional argument
-if [ $# -gt 1 ]
-then
-    if [ "--config" = "$1" ]
-	  then
-	      shift
-	      confdir=$1
-	      if [ ! -d "$confdir" ]; then
-                echo "Error: Cannot find configuration directory: $confdir"
-                exit 1
-             fi
-	      shift
-	      HADOOP_CONF_DIR=$confdir
-    fi
-fi
- 
-# Allow alternate conf dir location.
-if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then
-  DEFAULT_CONF_DIR="conf"
-else
-  DEFAULT_CONF_DIR="etc/hadoop"
-fi
-
-export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
+# after doing more config, caller should also exec finalize
+# function to finish last minute/default configs for
+# settings that might be different between daemons & interactive
 
-if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
-  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
-fi
-
-# User can specify hostnames or a file where the hostnames are (not both)
-if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
-  echo \
-    "Error: Please specify one variable HADOOP_SLAVES or " \
-    "HADOOP_SLAVE_NAME and not both."
+# you must be this high to ride the ride
+if [[ -z "${BASH_VERSINFO}" ]] || [[ "${BASH_VERSINFO}" -lt 3 ]]; then
+  echo "Hadoop requires bash v3 or better. Sorry."
   exit 1
 fi
 
-# Process command line options that specify hosts or file with host
-# list
-if [ $# -gt 1 ]
-then
-    if [ "--hosts" = "$1" ]
-    then
-        shift
-        export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$1"
-        shift
-    elif [ "--hostnames" = "$1" ]
-    then
-        shift
-        export HADOOP_SLAVE_NAMES=$1
-        shift
-    fi
-fi
+# In order to get partially bootstrapped, we need to figure out where
+# we are located. Chances are good that our caller has already done
+# this work for us, but just in case...
 
-# User can specify hostnames or a file where the hostnames are (not both)
-# (same check as above but now we know it's command line options that cause
-# the problem)
-if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
-  echo \
-    "Error: Please specify one of --hosts or --hostnames options and not both."
-  exit 1
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+  _hadoop_common_this="${BASH_SOURCE-$0}"
+  HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hadoop_common_this}")" >/dev/null && pwd -P)
 fi
 
-# check if net.ipv6.bindv6only is set to 1
-bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)
-if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ]
-then
-  echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken"
-  echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
+# get our functions defined for usage later
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh"
+else
+  echo "ERROR: Unable to exec ${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh." 1>&2
   exit 1
 fi
 
-# Newer versions of glibc use an arena memory allocator that causes virtual
-# memory usage to explode. This interacts badly with the many threads that
-# we use in Hadoop. Tune the variable down to prevent vmem explosion.
-export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
-
-# Attempt to set JAVA_HOME if it is not set
-if [[ -z $JAVA_HOME ]]; then
-  # On OSX use java_home (or /Library for older versions)
-  if [ "Darwin" == "$(uname -s)" ]; then
-    if [ -x /usr/libexec/java_home ]; then
-      export JAVA_HOME=($(/usr/libexec/java_home))
-    else
-      export JAVA_HOME=(/Library/Java/Home)
-    fi
-  fi
-
-  # Bail if we did not detect it
-  if [[ -z $JAVA_HOME ]]; then
-    echo "Error: JAVA_HOME is not set and could not be found." 1>&2
-    exit 1
-  fi
-fi
-
-JAVA=$JAVA_HOME/bin/java
-
-# check envvars which might override default args
-if [ "$HADOOP_HEAPSIZE" != "" ]; then
-  #echo "run with heapsize $HADOOP_HEAPSIZE"
-  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
-  #echo $JAVA_HEAP_MAX
-fi
-
-# CLASSPATH initially contains $HADOOP_CONF_DIR
-CLASSPATH="${HADOOP_CONF_DIR}"
-
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
-
-if [ "$HADOOP_COMMON_HOME" = "" ]; then
-  if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then
-    export HADOOP_COMMON_HOME=$HADOOP_PREFIX
-  fi
-fi
-
-# for releases, add core hadoop jar & webapps to CLASSPATH
-if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR
+# allow overrides of the above and pre-defines of the below
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh"
 fi
 
-if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*'
-fi
-
-CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'
-
-# default log directory & file
-if [ "$HADOOP_LOG_DIR" = "" ]; then
-  HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
-fi
-if [ "$HADOOP_LOGFILE" = "" ]; then
-  HADOOP_LOGFILE='hadoop.log'
-fi
-
-# default policy file for service-level authorization
-if [ "$HADOOP_POLICYFILE" = "" ]; then
-  HADOOP_POLICYFILE="hadoop-policy.xml"
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-# setup 'java.library.path' for native-hadoop code if necessary
-
-if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
-    
-  if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
-    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
-    else
-      JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
-    fi
-  fi
-fi
-
-# setup a default TOOL_PATH
-TOOL_PATH="${TOOL_PATH:-$HADOOP_PREFIX/share/hadoop/tools/lib/*}"
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
-if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
-  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH
-fi  
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
-
-# Disable ipv6 as it can cause issues
-HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
-
-# put hdfs in classpath if present
-if [ "$HADOOP_HDFS_HOME" = "" ]; then
-  if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then
-    export HADOOP_HDFS_HOME=$HADOOP_PREFIX
-  fi
-fi
-
-if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR
-fi
+#
+# IMPORTANT! We are not executing user provided code yet!
+#
 
-if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*'
-fi
+# Let's go!  Base definitions so we can move forward
+hadoop_bootstrap_init
 
-CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*'
+# let's find our conf.
+#
+# first, check and process params passed to us
+# we process this in-line so that we can directly modify $@
+# if something downstream is processing that directly,
+# we need to make sure our params have been ripped out
+# note that we do many of them here for various utilities.
+# this provides consistency and forces a more consistent
+# user experience
+
+
+# save these off in case our caller needs them
+# shellcheck disable=SC2034
+HADOOP_USER_PARAMS="$@"
+
+HADOOP_DAEMON_MODE="default"
+
+while [[ -z "${_hadoop_common_done}" ]]; do
+  case $1 in
+    --buildpaths)
+      # shellcheck disable=SC2034
+      HADOOP_ENABLE_BUILD_PATHS=true
+      shift
+    ;;
+    --config)
+      shift
+      confdir=$1
+      shift
+      if [[ -d "${confdir}" ]]; then
+        # shellcheck disable=SC2034
+        YARN_CONF_DIR="${confdir}"
+        # shellcheck disable=SC2034
+        HADOOP_CONF_DIR="${confdir}"
+      elif [[ -z "${confdir}" ]]; then
+        hadoop_error "ERROR: No parameter provided for --config "
+        hadoop_exit_with_usage 1
+      else
+        hadoop_error "ERROR: Cannot find configuration directory \"${confdir}\""
+        hadoop_exit_with_usage 1
+      fi
+    ;;
+    --daemon)
+      shift
+      HADOOP_DAEMON_MODE=$1
+      shift
+      if [[ -z "${HADOOP_DAEMON_MODE}" || \
+        ! "${HADOOP_DAEMON_MODE}" =~ ^st(art|op|atus)$ ]]; then
+        hadoop_error "ERROR: --daemon must be followed by either \"start\", \"stop\", or \"status\"."
+        hadoop_exit_with_usage 1
+      fi
+    ;;
+    --help|-help|-h|help|--h|--\?|-\?|\?)
+      hadoop_exit_with_usage 0
+    ;;
+    --hostnames)
+      shift
+      # shellcheck disable=SC2034
+      HADOOP_SLAVE_NAMES="$1"
+      shift
+    ;;
+    --hosts)
+      shift
+      hadoop_populate_slaves_file "$1"
+      shift
+    ;;
+    *)
+      _hadoop_common_done=true
+    ;;
+  esac
+done
+
+hadoop_find_confdir
+hadoop_exec_hadoopenv
 
-# put yarn in classpath if present
-if [ "$HADOOP_YARN_HOME" = "" ]; then
-  if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then
-    export HADOOP_YARN_HOME=$HADOOP_PREFIX
-  fi
-fi
+#
+# IMPORTANT! User provided code is now available!
+#
 
-if [ -d "$HADOOP_YARN_HOME/$YARN_DIR/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR
-fi
+# do all the OS-specific startup bits here
+# this allows us to get a decent JAVA_HOME,
+# call crle for LD_LIBRARY_PATH, etc.
+hadoop_os_tricks
 
-if [ -d "$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR'/*'
-fi
+hadoop_java_setup
 
-CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR'/*'
+hadoop_basic_init
 
-# put mapred in classpath if present AND different from YARN
-if [ "$HADOOP_MAPRED_HOME" = "" ]; then
-  if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then
-    export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
-  fi
+# inject any sub-project overrides, defaults, etc.
+if declare -F hadoop_subproject_init >/dev/null ; then
+  hadoop_subproject_init
 fi
 
-if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then
-  if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then
-    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR
-  fi
+# get the native libs in there pretty quick
+hadoop_add_javalibpath "${HADOOP_PREFIX}/build/native"
+hadoop_add_javalibpath "${HADOOP_PREFIX}/${HADOOP_COMMON_LIB_NATIVE_DIR}"
 
-  if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then
-    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*'
-  fi
-
-  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'
-fi
+# get the basic java class path for these subprojects
+# in as quickly as possible since other stuff
+# will definitely depend upon it.
+#
+# at some point, this will get replaced with something pluggable
+# so that these functions can sit in their projects rather than
+# common
+#
+for i in common hdfs yarn mapred
+do
+  hadoop_add_to_classpath_$i
+done
 
-# Add the user-specified CLASSPATH via HADOOP_CLASSPATH
-# Add it first or last depending on if user has
-# set env-var HADOOP_USER_CLASSPATH_FIRST
-if [ "$HADOOP_CLASSPATH" != "" ]; then
-  # Prefix it if its to be preceded
-  if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then
-    CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH}
-  else
-    CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
-  fi
+#
+# backwards compatibility. new stuff should
+# call this when they are ready
+#
+if [[ -z "${HADOOP_NEW_CONFIG}" ]]; then
+  hadoop_finalize
 fi

+ 28 - 186
hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemon.sh

@@ -15,200 +15,42 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-# Runs a Hadoop command as a daemon.
-#
-# Environment Variables
-#
-#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
-#   HADOOP_LOG_DIR   Where log files are stored.  PWD by default.
-#   HADOOP_MASTER    host:path where hadoop code should be rsync'd from
-#   HADOOP_PID_DIR   The pid files are stored. /tmp by default.
-#   HADOOP_IDENT_STRING   A string representing this instance of hadoop. $USER by default
-#   HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
-
-usage="Usage: hadoop-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] [--script script] (start|stop) <hadoop-command> <args...>"
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-# get arguments
-
-#default value
-hadoopScript="$HADOOP_PREFIX"/bin/hadoop
-if [ "--script" = "$1" ]
-  then
-    shift
-    hadoopScript=$1
-    shift
-fi
-startStop=$1
-shift
-command=$1
-shift
-
-hadoop_rotate_log ()
+function hadoop_usage
 {
-    log=$1;
-    num=5;
-    if [ -n "$2" ]; then
-	num=$2
-    fi
-    if [ -f "$log" ]; then # rotate logs
-	while [ $num -gt 1 ]; do
-	    prev=`expr $num - 1`
-	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
-	    num=$prev
-	done
-	mv "$log" "$log.$num";
-    fi
+  echo "Usage: hadoop-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
 }
 
-if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
-  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
-fi
-
-# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
-if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
-  export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
-  export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
-  export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
-  starting_secure_dn="true"
-fi
-
-#Determine if we're starting a privileged NFS, if so, redefine the appropriate variables
-if [ "$command" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
-    export HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
-    export HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
-    export HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
-    starting_privileged_nfs="true"
-fi
-
-if [ "$HADOOP_IDENT_STRING" = "" ]; then
-  export HADOOP_IDENT_STRING="$USER"
-fi
-
-
-# get log directory
-if [ "$HADOOP_LOG_DIR" = "" ]; then
-  export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
-fi
-
-if [ ! -w "$HADOOP_LOG_DIR" ] ; then
-  mkdir -p "$HADOOP_LOG_DIR"
-  chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
 fi
 
-if [ "$HADOOP_PID_DIR" = "" ]; then
-  HADOOP_PID_DIR=/tmp
+if [[ $# = 0 ]]; then
+  hadoop_exit_with_usage 1
 fi
 
-# some variables
-export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
-export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-"INFO,RFA"}
-export HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-"INFO,RFAS"}
-export HDFS_AUDIT_LOGGER=${HDFS_AUDIT_LOGGER:-"INFO,NullAppender"}
-log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
-pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
-HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5}
+daemonmode=$1
+shift
 
-# Set default scheduling priority
-if [ "$HADOOP_NICENESS" = "" ]; then
-    export HADOOP_NICENESS=0
+if [[ -z "${HADOOP_HDFS_HOME}" ]]; then
+  hdfsscript="${HADOOP_PREFIX}/bin/hdfs"
+else
+  hdfsscript="${HADOOP_HDFS_HOME}/bin/hdfs"
 fi
 
-case $startStop in
-
-  (start)
-
-    [ -w "$HADOOP_PID_DIR" ] ||  mkdir -p "$HADOOP_PID_DIR"
-
-    if [ -f $pid ]; then
-      if kill -0 `cat $pid` > /dev/null 2>&1; then
-        echo $command running as process `cat $pid`.  Stop it first.
-        exit 1
-      fi
-    fi
-
-    if [ "$HADOOP_MASTER" != "" ]; then
-      echo rsync from $HADOOP_MASTER
-      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
-    fi
-
-    hadoop_rotate_log $log
-    echo starting $command, logging to $log
-    cd "$HADOOP_PREFIX"
-    case $command in
-      namenode|secondarynamenode|datanode|journalnode|dfs|dfsadmin|fsck|balancer|zkfc)
-        if [ -z "$HADOOP_HDFS_HOME" ]; then
-          hdfsScript="$HADOOP_PREFIX"/bin/hdfs
-        else
-          hdfsScript="$HADOOP_HDFS_HOME"/bin/hdfs
-        fi
-        nohup nice -n $HADOOP_NICENESS $hdfsScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
-      ;;
-      (*)
-        nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
-      ;;
-    esac
-    echo $! > $pid
-    sleep 1
-    head "$log"
-    # capture the ulimit output
-    if [ "true" = "$starting_secure_dn" ]; then
-      echo "ulimit -a for secure datanode user $HADOOP_SECURE_DN_USER" >> $log
-      # capture the ulimit info for the appropriate user
-      su --shell=/bin/bash $HADOOP_SECURE_DN_USER -c 'ulimit -a' >> $log 2>&1
-    elif [ "true" = "$starting_privileged_nfs" ]; then
-        echo "ulimit -a for privileged nfs user $HADOOP_PRIVILEGED_NFS_USER" >> $log
-        su --shell=/bin/bash $HADOOP_PRIVILEGED_NFS_USER -c 'ulimit -a' >> $log 2>&1
-    else
-      echo "ulimit -a for user $USER" >> $log
-      ulimit -a >> $log 2>&1
-    fi
-    sleep 3;
-    if ! ps -p $! > /dev/null ; then
-      exit 1
-    fi
-    ;;
-          
-  (stop)
-
-    if [ -f $pid ]; then
-      TARGET_PID=`cat $pid`
-      if kill -0 $TARGET_PID > /dev/null 2>&1; then
-        echo stopping $command
-        kill $TARGET_PID
-        sleep $HADOOP_STOP_TIMEOUT
-        if kill -0 $TARGET_PID > /dev/null 2>&1; then
-          echo "$command did not stop gracefully after $HADOOP_STOP_TIMEOUT seconds: killing with kill -9"
-          kill -9 $TARGET_PID
-        fi
-      else
-        echo no $command to stop
-      fi
-      rm -f $pid
-    else
-      echo no $command to stop
-    fi
-    ;;
-
-  (*)
-    echo $usage
-    exit 1
-    ;;
-
-esac
-
+exec "$hdfsscript" --config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"
 

+ 26 - 11
hadoop-common-project/hadoop-common/src/main/bin/hadoop-daemons.sh

@@ -18,19 +18,34 @@
 
 # Run a Hadoop command on all slave hosts.
 
-usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+function hadoop_usage
+{
+  echo "Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) <hadoop-command> <args...>"
+}
+
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
   exit 1
 fi
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+if [[ $# = 0 ]]; then
+  hadoop_exit_with_usage 1
+fi
 
-exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
+hadoop_connect_to_hosts "${bin}/hadoop-daemon.sh" \
+--config "${HADOOP_CONF_DIR}" "$@"

+ 1036 - 0
hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh

@@ -0,0 +1,1036 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+function hadoop_error
+{
+  # NOTE: This function is not user replaceable.
+
+  echo "$*" 1>&2
+}
+
+function hadoop_bootstrap_init
+{
+  # NOTE: This function is not user replaceable.
+
+  # the root of the Hadoop installation
+  # See HADOOP-6255 for the expected directory structure layout
+  
+  # By now, HADOOP_LIBEXEC_DIR should have been defined upstream
+  # We can piggyback off of that to figure out where the default
+  # HADOOP_FREFIX should be.  This allows us to run without
+  # HADOOP_PREFIX ever being defined by a human! As a consequence
+  # HADOOP_LIBEXEC_DIR now becomes perhaps the single most powerful
+  # env var within Hadoop.
+  if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+    hadoop_error "HADOOP_LIBEXEC_DIR is not defined.  Exiting."
+    exit 1
+  fi
+  HADOOP_DEFAULT_PREFIX=$(cd -P -- "${HADOOP_LIBEXEC_DIR}/.." >/dev/null && pwd -P)
+  HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
+  export HADOOP_PREFIX
+  
+  #
+  # short-cuts. vendors may redefine these as well, preferably
+  # in hadoop-layouts.sh
+  #
+  HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
+  HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
+  HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
+  HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
+  HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
+  YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
+  YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
+  MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
+  MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
+  # setup a default TOOL_PATH
+  TOOL_PATH=${TOOL_PATH:-${HADOOP_PREFIX}/share/hadoop/tools/lib/*}
+
+  export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
+
+  
+  # defaults
+  export HADOOP_OPTS=${HADOOP_OPTS:-"-Djava.net.preferIPv4Stack=true"}
+}
+
+function hadoop_find_confdir
+{
+  # NOTE: This function is not user replaceable.
+
+  # Look for the basic hadoop configuration area.
+  #
+  #
+  # An attempt at compatibility with some Hadoop 1.x
+  # installs.
+  if [[ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]]; then
+    DEFAULT_CONF_DIR="conf"
+  else
+    DEFAULT_CONF_DIR="etc/hadoop"
+  fi
+  export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_PREFIX}/${DEFAULT_CONF_DIR}}"
+}
+
+function hadoop_exec_hadoopenv
+{
+  # NOTE: This function is not user replaceable.
+
+  if [[ -z "${HADOOP_ENV_PROCESSED}" ]]; then
+    if [[ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]]; then
+      export HADOOP_ENV_PROCESSED=true
+      . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+    fi
+  fi
+}
+
+
+function hadoop_basic_init
+{
+  # Some of these are also set in hadoop-env.sh.
+  # we still set them here just in case hadoop-env.sh is
+  # broken in some way, set up defaults, etc.
+  #
+  # but it is important to note that if you update these
+  # you also need to update hadoop-env.sh as well!!!
+  
+  # CLASSPATH initially contains $HADOOP_CONF_DIR
+  CLASSPATH="${HADOOP_CONF_DIR}"
+  
+  if [[ -z "${HADOOP_COMMON_HOME}" ]] &&
+  [[ -d "${HADOOP_PREFIX}/${HADOOP_COMMON_DIR}" ]]; then
+    export HADOOP_COMMON_HOME="${HADOOP_PREFIX}"
+  fi
+  
+  # default policy file for service-level authorization
+  HADOOP_POLICYFILE=${HADOOP_POLICYFILE:-"hadoop-policy.xml"}
+  
+  # define HADOOP_HDFS_HOME
+  if [[ -z "${HADOOP_HDFS_HOME}" ]] &&
+  [[ -d "${HADOOP_PREFIX}/${HDFS_DIR}" ]]; then
+    export HADOOP_HDFS_HOME="${HADOOP_PREFIX}"
+  fi
+  
+  # define HADOOP_YARN_HOME
+  if [[ -z "${HADOOP_YARN_HOME}" ]] &&
+  [[ -d "${HADOOP_PREFIX}/${YARN_DIR}" ]]; then
+    export HADOOP_YARN_HOME="${HADOOP_PREFIX}"
+  fi
+  
+  # define HADOOP_MAPRED_HOME
+  if [[ -z "${HADOOP_MAPRED_HOME}" ]] &&
+  [[ -d "${HADOOP_PREFIX}/${MAPRED_DIR}" ]]; then
+    export HADOOP_MAPRED_HOME="${HADOOP_PREFIX}"
+  fi
+  
+  HADOOP_IDENT_STRING=${HADOP_IDENT_STRING:-$USER}
+  HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-"${HADOOP_PREFIX}/logs"}
+  HADOOP_LOGFILE=${HADOOP_LOGFILE:-hadoop.log}
+  HADOOP_NICENESS=${HADOOP_NICENESS:-0}
+  HADOOP_STOP_TIMEOUT=${HADOOP_STOP_TIMEOUT:-5}
+  HADOOP_PID_DIR=${HADOOP_PID_DIR:-/tmp}
+  HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,console}
+  HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_DAEMON_ROOT_LOGGER:-INFO,RFA}
+  HADOOP_SECURITY_LOGGER=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}
+  HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024}
+  HADOOP_SSH_OPTS=${HADOOP_SSH_OPTS:-"-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"}
+  HADOOP_SECURE_LOG_DIR=${HADOOP_SECURE_LOG_DIR:-${HADOOP_LOG_DIR}}
+  HADOOP_SECURE_PID_DIR=${HADOOP_SECURE_PID_DIR:-${HADOOP_PID_DIR}}
+  HADOOP_SSH_PARALLEL=${HADOOP_SSH_PARALLEL:-10}
+}
+
+function hadoop_populate_slaves_file()
+{
+  # NOTE: This function is not user replaceable.
+
+  local slavesfile=$1
+  shift
+  if [[ -f "${slavesfile}" ]]; then
+    # shellcheck disable=2034
+    HADOOP_SLAVES="${slavesfile}"
+  elif [[ -f "${HADOOP_CONF_DIR}/${slavesfile}" ]]; then
+    # shellcheck disable=2034
+    HADOOP_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}"
+    # shellcheck disable=2034
+    YARN_SLAVES="${HADOOP_CONF_DIR}/${slavesfile}"
+  else
+    hadoop_error "ERROR: Cannot find hosts file \"${slavesfile}\""
+    hadoop_exit_with_usage 1
+  fi
+}
+
+function hadoop_rotate_log
+{
+  #
+  # log rotation (mainly used for .out files)
+  # Users are likely to replace this one for something
+  # that gzips or uses dates or who knows what.
+  #
+  # be aware that &1 and &2 might go through here
+  # so don't do anything too crazy...
+  #
+  local log=$1;
+  local num=${2:-5};
+  
+  if [[ -f "${log}" ]]; then # rotate logs
+    while [[ ${num} -gt 1 ]]; do
+      #shellcheck disable=SC2086
+      let prev=${num}-1
+      if [[ -f "${log}.${prev}" ]]; then
+        mv "${log}.${prev}" "${log}.${num}"
+      fi
+      num=${prev}
+    done
+    mv "${log}" "${log}.${num}"
+  fi
+}
+
+function hadoop_actual_ssh
+{
+  # we are passing this function to xargs
+  # should get hostname followed by rest of command line
+  local slave=$1
+  shift
+  
+  # shellcheck disable=SC2086
+  ssh ${HADOOP_SSH_OPTS} ${slave} $"${@// /\\ }" 2>&1 | sed "s/^/$slave: /"
+}
+
+function hadoop_connect_to_hosts
+{
+  # shellcheck disable=SC2124
+  local params="$@"
+  
+  #
+  # ssh (or whatever) to a host
+  #
+  # User can specify hostnames or a file where the hostnames are (not both)
+  if [[ -n "${HADOOP_SLAVES}" && -n "${HADOOP_SLAVE_NAMES}" ]] ; then
+    hadoop_error "ERROR: Both HADOOP_SLAVES and HADOOP_SLAVE_NAME were defined. Aborting."
+    exit 1
+  fi
+  
+  if [[ -n "${HADOOP_SLAVE_NAMES}" ]] ; then
+    SLAVE_NAMES=${HADOOP_SLAVE_NAMES}
+  else
+    SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
+  fi
+  
+  # if pdsh is available, let's use it.  otherwise default
+  # to a loop around ssh.  (ugh)
+  if [[ -e '/usr/bin/pdsh' ]]; then
+    if [[ -z "${HADOOP_SLAVE_NAMES}" ]] ; then
+      # if we were given a file, just let pdsh deal with it.
+      # shellcheck disable=SC2086
+      PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \
+      -f "${HADOOP_SSH_PARALLEL}" -w ^"${SLAVE_FILE}" $"${@// /\\ }" 2>&1
+    else
+      # no spaces allowed in the pdsh arg host list
+      # shellcheck disable=SC2086
+      SLAVE_NAMES=$(echo ${SLAVE_NAMES} | tr -s ' ' ,)
+      PDSH_SSH_ARGS_APPEND="${HADOOP_SSH_OPTS}" pdsh \
+      -f "${HADOOP_SSH_PARALLEL}" -w "${SLAVE_NAMES}" $"${@// /\\ }" 2>&1
+    fi
+  else
+    if [[ -z "${SLAVE_NAMES}" ]]; then
+      SLAVE_NAMES=$(sed 's/#.*$//;/^$/d' "${SLAVE_FILE}")
+    fi
+    
+    # quoting here gets tricky. it's easier to push it into a function
+    # so that we don't have to deal with it. However...
+    # xargs can't use a function so instead we'll export it out
+    # and force it into a subshell
+    # moral of the story: just use pdsh.
+    export -f hadoop_actual_ssh
+    export HADOOP_SSH_OPTS
+    echo "${SLAVE_NAMES}" | \
+    xargs -n 1 -P"${HADOOP_SSH_PARALLEL}" \
+    -I {} bash -c --  "hadoop_actual_ssh {} ${params}"
+    wait
+  fi
+}
+
+function hadoop_add_param
+{
+  #
+  # general param dedupe..
+  # $1 is what we are adding to
+  # $2 is the name of what we want to add (key)
+  # $3 is the key+value of what we're adding
+  #
+  # doing it this way allows us to support all sorts of
+  # different syntaxes, just so long as they are space
+  # delimited
+  #
+  if [[ ! ${!1} =~ $2 ]] ; then
+    # shellcheck disable=SC2086
+    eval $1="'${!1} $3'"
+  fi
+}
+
+function hadoop_add_classpath
+{
+  # two params:
+  # $1 = directory, file, wildcard, whatever to add
+  # $2 = before or after, which determines where in the
+  #      classpath this object should go. default is after
+  # return 0 = success
+  # return 1 = failure (duplicate, doesn't exist, whatever)
+  
+  # However, with classpath (& JLP), we can do dedupe
+  # along with some sanity checking (e.g., missing directories)
+  # since we have a better idea of what is legal
+  #
+  # for wildcard at end, we can
+  # at least check the dir exists
+  if [[ $1 =~ ^.*\*$ ]]; then
+    local mp=$(dirname "$1")
+    if [[ ! -d "${mp}" ]]; then
+      return 1
+    fi
+    
+    # no wildcard in the middle, so check existence
+    # (doesn't matter *what* it is)
+  elif [[ ! $1 =~ ^.*\*.*$ ]] && [[ ! -e "$1" ]]; then
+    return 1
+  fi
+  
+  if [[ -z "${CLASSPATH}" ]]; then
+    CLASSPATH=$1
+  elif [[ ":${CLASSPATH}:" != *":$1:"* ]]; then
+    if [[ "$2" = "before" ]]; then
+      CLASSPATH="$1:${CLASSPATH}"
+    else
+      CLASSPATH+=:$1
+    fi
+  fi
+  return 0
+}
+
+function hadoop_add_colonpath
+{
+  # two params:
+  # $1 = directory, file, wildcard, whatever to add
+  # $2 = before or after, which determines where in the
+  #      classpath this object should go
+  # return 0 = success
+  # return 1 = failure (duplicate)
+  
+  # this is CLASSPATH, JLP, etc but with dedupe but no
+  # other checking
+  if [[ -d "${2}" ]] && [[ ":${!1}:" != *":$2:"* ]]; then
+    if [[ -z "${!1}" ]]; then
+      # shellcheck disable=SC2086
+      eval $1="'$2'"
+    elif [[ "$3" = "before" ]]; then
+      # shellcheck disable=SC2086
+      eval $1="'$2:${!1}'"
+    else
+      # shellcheck disable=SC2086
+      eval $1+="'$2'"
+    fi
+  fi
+}
+
+function hadoop_add_javalibpath
+{
+  # specialized function for a common use case
+  hadoop_add_colonpath JAVA_LIBRARY_PATH "$1" "$2"
+}
+
+function hadoop_add_ldlibpath
+{
+  # specialized function for a common use case
+  hadoop_add_colonpath LD_LIBRARY_PATH "$1" "$2"
+  
+  # note that we export this
+  export LD_LIBRARY_PATH
+}
+
+function hadoop_add_to_classpath_common
+{
+  
+  #
+  # get all of the common jars+config in the path
+  #
+  
+  # developers
+  if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+    hadoop_add_classpath "${HADOOP_COMMON_HOME}/hadoop-common/target/classes"
+  fi
+  
+  if [[ -d "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}/webapps" ]]; then
+    hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"
+  fi
+  
+  hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_LIB_JARS_DIR}"'/*'
+  hadoop_add_classpath "${HADOOP_COMMON_HOME}/${HADOOP_COMMON_DIR}"'/*'
+}
+
+function hadoop_add_to_classpath_hdfs
+{
+  #
+  # get all of the hdfs jars+config in the path
+  #
+  # developers
+  if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+    hadoop_add_classpath "${HADOOP_HDFS_HOME}/hadoop-hdfs/target/classes"
+  fi
+  
+  # put hdfs in classpath if present
+  if [[ -d "${HADOOP_HDFS_HOME}/${HDFS_DIR}/webapps" ]]; then
+    hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDFS_DIR}"
+  fi
+  
+  hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDFS_LIB_JARS_DIR}"'/*'
+  hadoop_add_classpath "${HADOOP_HDFS_HOME}/${HDFS_DIR}"'/*'
+}
+
+function hadoop_add_to_classpath_yarn
+{
+  #
+  # get all of the yarn jars+config in the path
+  #
+  # developers
+  if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+    for i in yarn-api yarn-common yarn-mapreduce yarn-master-worker \
+    yarn-server/yarn-server-nodemanager \
+    yarn-server/yarn-server-common \
+    yarn-server/yarn-server-resourcemanager; do
+      hadoop_add_classpath "${HADOOP_YARN_HOME}/$i/target/classes"
+    done
+    
+    hadoop_add_classpath "${HADOOP_YARN_HOME}/build/test/classes"
+    hadoop_add_classpath "${HADOOP_YARN_HOME}/build/tools"
+  fi
+  
+  if [[ -d "${HADOOP_YARN_HOME}/${YARN_DIR}/webapps" ]]; then
+    hadoop_add_classpath "${HADOOP_YARN_HOME}/${YARN_DIR}"
+  fi
+  
+  hadoop_add_classpath "${HADOOP_YARN_HOME}/${YARN_LIB_JARS_DIR}"'/*'
+  hadoop_add_classpath  "${HADOOP_YARN_HOME}/${YARN_DIR}"'/*'
+}
+
+function hadoop_add_to_classpath_mapred
+{
+  #
+  # get all of the mapreduce jars+config in the path
+  #
+  # developers
+  if [[ -n "${HADOOP_ENABLE_BUILD_PATHS}" ]]; then
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-shuffle/target/classes"
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-common/target/classes"
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-hs/target/classes"
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-hs-plugins/target/classes"
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-app/target/classes"
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-jobclient/target/classes"
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/hadoop-mapreduce-client-core/target/classes"
+  fi
+  
+  if [[ -d "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}/webapps" ]]; then
+    hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}"
+  fi
+  
+  hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_LIB_JARS_DIR}"'/*'
+  hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}"'/*'
+}
+
+
+function hadoop_add_to_classpath_userpath
+{
+  # Add the user-specified HADOOP_CLASSPATH to the
+  # official CLASSPATH env var.
+  # Add it first or last depending on if user has
+  # set env-var HADOOP_USER_CLASSPATH_FIRST
+  # we'll also dedupe it, because we're cool like that.
+  #
+  local c
+  local array
+  local i
+  local j
+  let c=0
+  
+  if [[ -n "${HADOOP_CLASSPATH}" ]]; then
+    # I wonder if Java runs on VMS.
+    for i in $(echo "${HADOOP_CLASSPATH}" | tr : '\n'); do
+      array[$c]=$i
+      let c+=1
+    done
+    let j=c-1
+    
+    if [[ -z "${HADOOP_USER_CLASSPATH_FIRST}" ]]; then
+      for ((i=j; i>=0; i--)); do
+        hadoop_add_classpath "${array[$i]}" before
+      done
+    else
+      for ((i=0; i<=j; i++)); do
+        hadoop_add_classpath "${array[$i]}" after
+      done
+    fi
+  fi
+}
+
+function hadoop_os_tricks
+{
+  local bindv6only
+
+  # some OSes have special needs. here's some out of the box
+  # examples for OS X and Linux. Vendors, replace this with your special sauce.
+  case ${HADOOP_OS_TYPE} in
+    Darwin)
+      if [[ -x /usr/libexec/java_home ]]; then
+        export JAVA_HOME="$(/usr/libexec/java_home)"
+      else
+        export JAVA_HOME=/Library/Java/Home
+      fi
+    ;;
+    Linux)
+      bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)
+
+      # NOTE! HADOOP_ALLOW_IPV6 is a developer hook.  We leave it
+      # undocumented in hadoop-env.sh because we don't want users to
+      # shoot themselves in the foot while devs make IPv6 work.
+      if [[ -n "${bindv6only}" ]] && 
+         [[ "${bindv6only}" -eq "1" ]] && 
+         [[ "${HADOOP_ALLOW_IPV6}" != "yes" ]]; then
+        hadoop_error "ERROR: \"net.ipv6.bindv6only\" is set to 1 "
+        hadoop_error "ERROR: Hadoop networking could be broken. Aborting."
+        hadoop_error "ERROR: For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
+        exit 1
+      fi
+      # Newer versions of glibc use an arena memory allocator that
+      # causes virtual # memory usage to explode. This interacts badly
+      # with the many threads that we use in Hadoop. Tune the variable
+      # down to prevent vmem explosion.
+      export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
+    ;;
+  esac
+}
+
+function hadoop_java_setup
+{
+  # Bail if we did not detect it
+  if [[ -z "${JAVA_HOME}" ]]; then
+    hadoop_error "ERROR: JAVA_HOME is not set and could not be found."
+    exit 1
+  fi
+  
+  if [[ ! -d "${JAVA_HOME}" ]]; then
+    hadoop_error "ERROR: JAVA_HOME ${JAVA_HOME} does not exist."
+    exit 1
+  fi
+  
+  JAVA="${JAVA_HOME}/bin/java"
+  
+  if [[ ! -x "$JAVA" ]]; then
+    hadoop_error "ERROR: $JAVA is not executable."
+    exit 1
+  fi
+  # shellcheck disable=SC2034
+  JAVA_HEAP_MAX=-Xmx1g
+  HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-1024}
+  
+  # check envvars which might override default args
+  if [[ -n "$HADOOP_HEAPSIZE" ]]; then
+    # shellcheck disable=SC2034
+    JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
+  fi
+}
+
+
+function hadoop_finalize_libpaths
+{
+  if [[ -n "${JAVA_LIBRARY_PATH}" ]]; then
+    hadoop_add_param HADOOP_OPTS java.library.path \
+    "-Djava.library.path=${JAVA_LIBRARY_PATH}"
+    export LD_LIBRARY_PATH
+  fi
+}
+
+#
+# fill in any last minute options that might not have been defined yet
+#
+# Note that we are replacing ' ' with '\ ' so that directories with
+# spaces work correctly when run exec blah
+#
+function hadoop_finalize_hadoop_opts
+{
+  hadoop_add_param HADOOP_OPTS hadoop.log.dir "-Dhadoop.log.dir=${HADOOP_LOG_DIR/ /\ }"
+  hadoop_add_param HADOOP_OPTS hadoop.log.file "-Dhadoop.log.file=${HADOOP_LOGFILE/ /\ }"
+  hadoop_add_param HADOOP_OPTS hadoop.home.dir "-Dhadoop.home.dir=${HADOOP_PREFIX/ /\ }"
+  hadoop_add_param HADOOP_OPTS hadoop.id.str "-Dhadoop.id.str=${HADOOP_IDENT_STRING/ /\ }"
+  hadoop_add_param HADOOP_OPTS hadoop.root.logger "-Dhadoop.root.logger=${HADOOP_ROOT_LOGGER}"
+  hadoop_add_param HADOOP_OPTS hadoop.policy.file "-Dhadoop.policy.file=${HADOOP_POLICYFILE/ /\ }"
+  hadoop_add_param HADOOP_OPTS hadoop.security.logger "-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER}"
+}
+
+function hadoop_finalize_classpath
+{
+  
+  # we want the HADOOP_CONF_DIR at the end
+  # according to oom, it gives a 2% perf boost
+  hadoop_add_classpath "${HADOOP_CONF_DIR}" after
+  
+  # user classpath gets added at the last minute. this allows
+  # override of CONF dirs and more
+  hadoop_add_to_classpath_userpath
+}
+
+function hadoop_finalize
+{
+  # user classpath gets added at the last minute. this allows
+  # override of CONF dirs and more
+  hadoop_finalize_classpath
+  hadoop_finalize_libpaths
+  hadoop_finalize_hadoop_opts
+}
+
+function hadoop_exit_with_usage
+{
+  # NOTE: This function is not user replaceable.
+
+  local exitcode=$1
+  if [[ -z $exitcode ]]; then
+    exitcode=1
+  fi
+  if declare -F hadoop_usage >/dev/null ; then
+    hadoop_usage
+  elif [[ -x /usr/bin/cowsay ]]; then
+    /usr/bin/cowsay -f elephant "Sorry, no help available."
+  else
+    hadoop_error "Sorry, no help available."
+  fi
+  exit $exitcode
+}
+
+function hadoop_verify_secure_prereq
+{
+  # if you are on an OS like Illumos that has functional roles
+  # and you are using pfexec, you'll probably want to change
+  # this.
+  
+  # ${EUID} comes from the shell itself!
+  if [[ "${EUID}" -ne 0 ]] || [[ -n "${HADOOP_SECURE_COMMAND}" ]]; then
+    hadoop_error "ERROR: You must be a privileged in order to run a secure serice."
+    return 1
+  else
+    return 0
+  fi
+}
+
+function hadoop_setup_secure_service
+{
+  # need a more complicated setup? replace me!
+  
+  HADOOP_PID_DIR=${HADOOP_SECURE_PID_DIR}
+  HADOOP_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
+}
+
+function hadoop_verify_piddir
+{
+  if [[ -z "${HADOOP_PID_DIR}" ]]; then
+    hadoop_error "No pid directory defined."
+    exit 1
+  fi
+  if [[ ! -w "${HADOOP_PID_DIR}" ]] && [[ ! -d "${HADOOP_PID_DIR}" ]]; then
+    hadoop_error "WARNING: ${HADOOP_PID_DIR} does not exist. Creating."
+    mkdir -p "${HADOOP_PID_DIR}" > /dev/null 2>&1
+    if [[ $? -gt 0 ]]; then
+      hadoop_error "ERROR: Unable to create ${HADOOP_PID_DIR}. Aborting."
+      exit 1
+    fi
+  fi
+  touch "${HADOOP_PID_DIR}/$$" >/dev/null 2>&1
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR: Unable to write in ${HADOOP_PID_DIR}. Aborting."
+    exit 1
+  fi
+  rm "${HADOOP_PID_DIR}/$$" >/dev/null 2>&1
+}
+
+function hadoop_verify_logdir
+{
+  if [[ -z "${HADOOP_LOG_DIR}" ]]; then
+    hadoop_error "No log directory defined."
+    exit 1
+  fi
+  if [[ ! -w "${HADOOP_LOG_DIR}" ]] && [[ ! -d "${HADOOP_LOG_DIR}" ]]; then
+    hadoop_error "WARNING: ${HADOOP_LOG_DIR} does not exist. Creating."
+    mkdir -p "${HADOOP_LOG_DIR}" > /dev/null 2>&1
+    if [[ $? -gt 0 ]]; then
+      hadoop_error "ERROR: Unable to create ${HADOOP_LOG_DIR}. Aborting."
+      exit 1
+    fi
+  fi
+  touch "${HADOOP_LOG_DIR}/$$" >/dev/null 2>&1
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR: Unable to write in ${HADOOP_LOG_DIR}. Aborting."
+    exit 1
+  fi
+  rm "${HADOOP_LOG_DIR}/$$" >/dev/null 2>&1
+}
+
+function hadoop_status_daemon() {
+  #
+  # LSB 4.1.0 compatible status command (1)
+  #
+  # 0 = program is running
+  # 1 = dead, but still a pid (2)
+  # 2 = (not used by us)
+  # 3 = not running
+  #
+  # 1 - this is not an endorsement of the LSB
+  #
+  # 2 - technically, the specification says /var/run/pid, so
+  #     we should never return this value, but we're giving
+  #     them the benefit of a doubt and returning 1 even if
+  #     our pid is not in in /var/run .
+  #
+  
+  local pidfile=$1
+  shift
+  
+  local pid
+  
+  if [[ -f "${pidfile}" ]]; then
+    pid=$(cat "${pidfile}")
+    if ps -p "${pid}" > /dev/null 2>&1; then
+      return 0
+    fi
+    return 1
+  fi
+  return 3
+}
+
+function hadoop_java_exec
+{
+  # run a java command.  this is used for
+  # non-daemons
+
+  local command=$1
+  local class=$2
+  shift 2
+  # we eval this so that paths with spaces work
+  #shellcheck disable=SC2086
+  eval exec "$JAVA" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@"
+
+}
+
+function hadoop_start_daemon
+{
+  # this is our non-privileged daemon starter
+  # that fires up a daemon in the *foreground*
+  # so complex! so wow! much java!
+  local command=$1
+  local class=$2
+  shift 2
+  #shellcheck disable=SC2086
+  eval exec "$JAVA" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@"
+}
+
+function hadoop_start_daemon_wrapper
+{
+  # this is our non-privileged daemon start
+  # that fires up a daemon in the *background*
+  local daemonname=$1
+  local class=$2
+  local pidfile=$3
+  local outfile=$4
+  shift 4
+  
+  hadoop_rotate_log "${outfile}"
+  
+  hadoop_start_daemon "${daemonname}" \
+  "$class" "$@" >> "${outfile}" 2>&1 < /dev/null &
+  #shellcheck disable=SC2086
+  echo $! > "${pidfile}" 2>/dev/null
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR:  Cannot write pid ${pidfile}."
+  fi
+  
+  # shellcheck disable=SC2086
+  renice "${HADOOP_NICENESS}" $! >/dev/null 2>&1
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR: Cannot set priority of process $!"
+  fi
+  
+  # shellcheck disable=SC2086
+  disown $! 2>&1
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR: Cannot disconnect process $!"
+  fi
+  sleep 1
+  
+  # capture the ulimit output
+  ulimit -a >> "${outfile}" 2>&1
+  
+  # shellcheck disable=SC2086
+  if ! ps -p $! >/dev/null 2>&1; then
+    return 1
+  fi
+  return 0
+}
+
+function hadoop_start_secure_daemon
+{
+  # this is used to launch a secure daemon in the *foreground*
+  #
+  local daemonname=$1
+  local class=$2
+  
+  # pid file to create for our deamon
+  local daemonpidfile=$3
+  
+  # where to send stdout. jsvc has bad habits so this *may* be &1
+  # which means you send it to stdout!
+  local daemonoutfile=$4
+  
+  # where to send stderr.  same thing, except &2 = stderr
+  local daemonerrfile=$5
+  shift 5
+  
+  
+  
+  hadoop_rotate_log "${daemonoutfile}"
+  hadoop_rotate_log "${daemonerrfile}"
+  
+  jsvc="${JSVC_HOME}/jsvc"
+  if [[ ! -f "${jsvc}" ]]; then
+    hadoop_error "JSVC_HOME is not set or set incorrectly. jsvc is required to run secure"
+    hadoop_error "or privileged daemons. Please download and install jsvc from "
+    hadoop_error "http://archive.apache.org/dist/commons/daemon/binaries/ "
+    hadoop_error "and set JSVC_HOME to the directory containing the jsvc binary."
+    exit 1
+  fi
+  
+  # note that shellcheck will throw a
+  # bogus for-our-use-case 2086 here.
+  # it doesn't properly support multi-line situations
+  
+  exec "${jsvc}" \
+  "-Dproc_${daemonname}" \
+  -outfile "${daemonoutfile}" \
+  -errfile "${daemonerrfile}" \
+  -pidfile "${daemonpidfile}" \
+  -nodetach \
+  -user "${HADOOP_SECURE_USER}" \
+  -cp "${CLASSPATH}" \
+  ${HADOOP_OPTS} \
+  "${class}" "$@"
+}
+
+function hadoop_start_secure_daemon_wrapper
+{
+  # this wraps hadoop_start_secure_daemon to take care
+  # of the dirty work to launch a daemon in the background!
+  local daemonname=$1
+  local class=$2
+  
+  # same rules as hadoop_start_secure_daemon except we
+  # have some additional parameters
+  
+  local daemonpidfile=$3
+  
+  local daemonoutfile=$4
+  
+  # the pid file of the subprocess that spawned our
+  # secure launcher
+  local jsvcpidfile=$5
+  
+  # the output of the subprocess that spawned our secure
+  # launcher
+  local jsvcoutfile=$6
+  
+  local daemonerrfile=$7
+  shift 7
+  
+  hadoop_rotate_log "${jsvcoutfile}"
+  
+  hadoop_start_secure_daemon \
+  "${daemonname}" \
+  "${class}" \
+  "${daemonpidfile}" \
+  "${daemonoutfile}" \
+  "${daemonerrfile}" "$@" >> "${jsvcoutfile}" 2>&1 < /dev/null &
+  
+  # This wrapper should only have one child.  Unlike Shawty Lo.
+  #shellcheck disable=SC2086
+  echo $! > "${jsvcpidfile}" 2>/dev/null
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR:  Cannot write pid ${pidfile}."
+  fi
+  sleep 1
+  #shellcheck disable=SC2086
+  renice "${HADOOP_NICENESS}" $! >/dev/null 2>&1
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR: Cannot set priority of process $!"
+  fi
+  if [[ -f "${daemonpidfile}" ]]; then
+    #shellcheck disable=SC2046
+    renice "${HADOOP_NICENESS}" $(cat "${daemonpidfile}") >/dev/null 2>&1
+    if [[ $? -gt 0 ]]; then
+      hadoop_error "ERROR: Cannot set priority of process $(cat "${daemonpidfile}")"
+    fi
+  fi
+  #shellcheck disable=SC2086
+  disown $! 2>&1
+  if [[ $? -gt 0 ]]; then
+    hadoop_error "ERROR: Cannot disconnect process $!"
+  fi
+  # capture the ulimit output
+  su "${HADOOP_SECURE_USER}" -c 'bash -c "ulimit -a"' >> "${jsvcoutfile}" 2>&1
+  #shellcheck disable=SC2086
+  if ! ps -p $! >/dev/null 2>&1; then
+    return 1
+  fi
+  return 0
+}
+
+function hadoop_stop_daemon
+{
+  local cmd=$1
+  local pidfile=$2
+  shift 2
+  
+  local pid
+  
+  if [[ -f "${pidfile}" ]]; then
+    pid=$(cat "$pidfile")
+    
+    kill "${pid}" >/dev/null 2>&1
+    sleep "${HADOOP_STOP_TIMEOUT}"
+    if kill -0 "${pid}" > /dev/null 2>&1; then
+      hadoop_error "WARNING: ${cmd} did not stop gracefully after ${HADOOP_STOP_TIMEOUT} seconds: Trying to kill with kill -9"
+      kill -9 "${pid}" >/dev/null 2>&1
+    fi
+    if ps -p "${pid}" > /dev/null 2>&1; then
+      hadoop_error "ERROR: Unable to kill ${pid}"
+    else
+      rm -f "${pidfile}" >/dev/null 2>&1
+    fi
+  fi
+}
+
+
+function hadoop_stop_secure_daemon
+{
+  local command=$1
+  local daemonpidfile=$2
+  local privpidfile=$3
+  shift 3
+  local ret
+  
+  hadoop_stop_daemon "${command}" "${daemonpidfile}"
+  ret=$?
+  rm -f "${daemonpidfile}" "${privpidfile}" 2>/dev/null
+  return ${ret}
+}
+
+function hadoop_daemon_handler
+{
+  local daemonmode=$1
+  local daemonname=$2
+  local class=$3
+  local pidfile=$4
+  local outfile=$5
+  shift 5
+  
+  case ${daemonmode} in
+    status)
+      hadoop_status_daemon "${daemon_pidfile}"
+      exit $?
+    ;;
+    
+    stop)
+      hadoop_stop_daemon "${daemonname}" "${daemon_pidfile}"
+      exit $?
+    ;;
+    
+    ##COMPAT  -- older hadoops would also start daemons by default
+    start|default)
+      hadoop_verify_piddir
+      hadoop_verify_logdir
+      hadoop_status_daemon "${daemon_pidfile}"
+      if [[ $? == 0  ]]; then
+        hadoop_error "${daemonname} running as process $(cat "${daemon_pidfile}").  Stop it first."
+        exit 1
+      else
+        # stale pid file, so just remove it and continue on
+        rm -f "${daemon_pidfile}" >/dev/null 2>&1
+      fi
+      ##COMPAT  - differenticate between --daemon start and nothing
+      # "nothing" shouldn't detach
+      if [[ "$daemonmode" = "default" ]]; then
+        hadoop_start_daemon "${daemonname}" "${class}" "$@"
+      else
+        hadoop_start_daemon_wrapper "${daemonname}" \
+        "${class}" "${daemon_pidfile}" "${daemon_outfile}" "$@"
+      fi
+    ;;
+  esac
+}
+
+
+function hadoop_secure_daemon_handler
+{
+  local daemonmode=$1
+  local daemonname=$2
+  local classname=$3
+  local daemon_pidfile=$4
+  local daemon_outfile=$5
+  local priv_pidfile=$6
+  local priv_outfile=$7
+  local priv_errfile=$8
+  shift 8
+  
+  case ${daemonmode} in
+    status)
+      hadoop_status_daemon "${daemon_pidfile}"
+      exit $?
+    ;;
+    
+    stop)
+      hadoop_stop_secure_daemon "${daemonname}" \
+      "${daemon_pidfile}" "${priv_pidfile}"
+      exit $?
+    ;;
+    
+    ##COMPAT  -- older hadoops would also start daemons by default
+    start|default)
+      hadoop_verify_piddir
+      hadoop_verify_logdir
+      hadoop_status_daemon "${daemon_pidfile}"
+      if [[ $? == 0  ]]; then
+        hadoop_error "${daemonname} running as process $(cat "${daemon_pidfile}").  Stop it first."
+        exit 1
+      else
+        # stale pid file, so just remove it and continue on
+        rm -f "${daemon_pidfile}" >/dev/null 2>&1
+      fi
+      
+      ##COMPAT  - differenticate between --daemon start and nothing
+      # "nothing" shouldn't detach
+      if [[ "${daemonmode}" = "default" ]]; then
+        hadoop_start_secure_daemon "${daemonname}" "${classname}" \
+        "${daemon_pidfile}" "${daemon_outfile}" \
+        "${priv_errfile}"  "$@"
+      else
+        hadoop_start_secure_daemon_wrapper "${daemonname}" "${classname}" \
+        "${daemon_pidfile}" "${daemon_outfile}" \
+        "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}"  "$@"
+      fi
+    ;;
+  esac
+}
+

+ 93 - 0
hadoop-common-project/hadoop-common/src/main/bin/hadoop-layout.sh.example

@@ -0,0 +1,93 @@
+# Copyright 2014 The Apache Software Foundation
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##
+## VENDORS!
+##
+## This is where you can redefine the layout of Hadoop directories
+## and expect to be reasonably compatible.  Needless to say, this
+## is expert level stuff and one needs to tread carefully.
+##
+## If you move HADOOP_LIBEXEC_DIR from some location that
+## isn't bin/../libexec, you MUST define either HADOOP_LIBEXEC_DIR
+## or have HADOOP_PREFIX/libexec/hadoop-config.sh and
+## HADOOP_PREFIX/libexec/hadoop-layout.sh (this file) exist.
+
+## NOTE:
+##
+## hadoop-functions.sh gets executed BEFORE this file.  So you can
+##   redefine all of those functions here.
+##
+## *-env.sh get executed AFTER this file but generally too late to
+## override the settings (but not the functions!) here.  However, this
+## also means you cannot use things like HADOOP_CONF_DIR for these
+## definitions.
+
+####
+# Common disk layout
+####
+
+# Default location for the common/core Hadoop project
+# export HADOOP_COMMON_HOME=$HADOOP_PREFIX
+
+# Relative locations where components under HADOOP_COMMON_HOME are located
+# export HADOOP_COMMON_DIR="share/hadoop/common"
+# export HADOOP_COMMON_LIB_JARS_DIR="share/hadoop/common/lib"
+# export HADOOP_COMMON_LIB_NATIVE_DIR="lib/native"
+
+####
+# HDFS disk layout
+####
+
+# Default location for the HDFS subproject
+# export HADOOP_HDFS_HOME=$HADOOP_PREFIX
+
+# Relative locations where components under HADOOP_HDFS_HOME are located
+# export HDFS_DIR="share/hadoop/hdfs"
+# export HDFS_LIB_JARS_DIR="share/hadoop/hdfs/lib"
+
+####
+# YARN disk layout
+####
+
+# Default location for the YARN subproject
+# export HADOOP_YARN_HOME=$HADOOP_PREFIX
+
+# Relative locations where components under HADOOP_YARN_HOME are located
+# export YARN_DIR="share/hadoop/yarn"
+# export YARN_LIB_JARS_DIR="share/hadoop/yarn/lib"
+
+# Default location for the MapReduce subproject
+# export HADOOP_MAPRED_HOME=$HADOOP_PREFIX
+
+####
+# MapReduce disk layout
+####
+
+# Relative locations where components under HADOOP_MAPRED_HOME are located
+# export MAPRED_DIR="share/hadoop/mapreduce"
+# export MAPRED_LIB_JARS_DIR="share/hadoop/mapreduce/lib"
+
+####
+# Misc paths
+####
+
+# setup a default TOOL_PATH, where things like distcp lives
+# note that this path only gets added for certain commands and not
+# part of the general classpath
+# export TOOL_PATH="$HADOOP_PREFIX/share/hadoop/tools/lib/*"

+ 16 - 35
hadoop-common-project/hadoop-common/src/main/bin/rcc

@@ -15,47 +15,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-# The Hadoop record compiler
-#
-# Environment Variables
-#
-#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
-#
-#   HADOOP_OPTS      Extra Java runtime options.
-#
-#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
-#
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+# This script runs the hadoop core commands.
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "$this")" >/dev/null && pwd -P)
+script="$(basename -- "$this")"
+this="$bin/$script"
 
 DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
-  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
-fi
+HADOOP_NEW_CONFIG=true
+. "$HADOOP_LIBEXEC_DIR/hadoop-config.sh"
 
-# some Java parameters
-if [ "$JAVA_HOME" != "" ]; then
-  #echo "run java in $JAVA_HOME"
-  JAVA_HOME=$JAVA_HOME
-fi
-  
-if [ "$JAVA_HOME" = "" ]; then
-  echo "Error: JAVA_HOME is not set."
-  exit 1
+if [ $# = 0 ]; then
+  hadoop_exit_with_usage 1
 fi
 
-JAVA=$JAVA_HOME/bin/java
-JAVA_HEAP_MAX=-Xmx1000m 
+CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
 
-# restore ordinary behaviour
-unset IFS
+# Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
+HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
 
-CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
+hadoop_add_param HADOOP_OPTS Xmx "$JAVA_HEAP_MAX"
 
-# run it
-exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
+hadoop_finalize
+export CLASSPATH
+hadoop_java_exec rcc "${CLASS}" "$@"

+ 23 - 28
hadoop-common-project/hadoop-common/src/main/bin/slaves.sh

@@ -27,38 +27,33 @@
 #   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
 ##
 
-usage="Usage: slaves.sh [--config confdir] command..."
+function hadoop_usage {
+  echo "Usage: slaves.sh [--config confdir] command..."
+}
 
-# if no args specified, show usage
-if [ $# -le 0 ]; then
-  echo $usage
-  exit 1
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
 fi
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
-
-
-# Where to start the script, see hadoop-config.sh
-# (it set up the variables based on command line options)
-if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
-  SLAVE_NAMES=$HADOOP_SLAVE_NAMES
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
 else
-  SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
-  SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed  's/#.*$//;/^$/d')
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
+
+# if no args specified, show usage
+if [[ $# -le 0 ]]; then
+  hadoop_exit_with_usage 1
 fi
 
-# start the daemons
-for slave in $SLAVE_NAMES ; do
- ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
-   2>&1 | sed "s/^/$slave: /" &
- if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
-   sleep $HADOOP_SLAVE_SLEEP
- fi
-done
+hadoop_connect_to_hosts "$@"
 
-wait

+ 26 - 12
hadoop-common-project/hadoop-common/src/main/bin/start-all.sh

@@ -15,24 +15,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+echo "This script is deprecated. Use start-dfs.sh and start-yarn.sh instead."
+exit 1
 
-# Start all hadoop daemons.  Run this on master node.
 
-echo "This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh"
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
 # start hdfs daemons if hdfs is present
-if [ -f "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh ]; then
-  "${HADOOP_HDFS_HOME}"/sbin/start-dfs.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" ]]; then
+  "${HADOOP_HDFS_HOME}/sbin/start-dfs.sh" --config "${HADOOP_CONF_DIR}"
 fi
 
 # start yarn daemons if yarn is present
-if [ -f "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh ]; then
-  "${HADOOP_YARN_HOME}"/sbin/start-yarn.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" ]]; then
+  "${HADOOP_YARN_HOME}/sbin/start-yarn.sh" --config "${HADOOP_CONF_DIR}"
 fi
+
+
+

+ 25 - 11
hadoop-common-project/hadoop-common/src/main/bin/stop-all.sh

@@ -18,21 +18,35 @@
 
 # Stop all hadoop daemons.  Run this on master node.
 
-echo "This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh"
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+echo "This script is deprecated. Use stop-dfs.sh and stop-yarn.sh instead."
+exit 1
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh." 2>&1
+  exit 1
+fi
 
 # stop hdfs daemons if hdfs is present
-if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh ]; then
-  "${HADOOP_HDFS_HOME}"/sbin/stop-dfs.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" ]]; then
+  "${HADOOP_HDFS_HOME}/sbin/stop-dfs.sh" --config "${HADOOP_CONF_DIR}"
 fi
 
 # stop yarn daemons if yarn is present
-if [ -f "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh ]; then
-  "${HADOOP_HDFS_HOME}"/sbin/stop-yarn.sh --config $HADOOP_CONF_DIR
+if [[ -f "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" ]]; then
+  "${HADOOP_HDFS_HOME}/sbin/stop-yarn.sh" --config "${HADOOP_CONF_DIR}"
 fi
+

+ 372 - 49
hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh

@@ -1,3 +1,4 @@
+#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,71 +17,393 @@
 
 # Set Hadoop-specific environment variables here.
 
-# The only required environment variable is JAVA_HOME.  All others are
-# optional.  When running a distributed configuration it is best to
-# set JAVA_HOME in this file, so that it is correctly defined on
-# remote nodes.
+##
+## THIS FILE ACTS AS THE MASTER FILE FOR ALL HADOOP PROJECTS.
+## SETTINGS HERE WILL BE READ BY ALL HADOOP COMMANDS.  THEREFORE,
+## ONE CAN USE THIS FILE TO SET YARN, HDFS, AND MAPREDUCE
+## CONFIGURATION OPTIONS INSTEAD OF xxx-env.sh.
+##
+## Precedence rules:
+##
+## {yarn-env.sh|hdfs-env.sh} > hadoop-env.sh > hard-coded defaults
+##
+## {YARN_xyz|HDFS_xyz} > HADOOP_xyz > hard-coded defaults
+##
+
+# Many of the options here are built from the perspective that users
+# may want to provide OVERWRITING values on the command line.
+# For example:
+#
+#  JAVA_HOME=/usr/java/testing hdfs dfs -ls
+#
+# Therefore, the vast majority (BUT NOT ALL!) of these defaults
+# are configured for substitution and not append.  If you would
+# like append, you'll # need to modify this file accordingly.
+
+###
+# Generic settings for HADOOP
+###
+
+# Technically, the only required environment variable is JAVA_HOME.
+# All others are optional.  However, our defaults are probably not
+# your defaults.  Many sites configure these options outside of Hadoop,
+# such as in /etc/profile.d
 
 # The java implementation to use.
-export JAVA_HOME=${JAVA_HOME}
+export JAVA_HOME=${JAVA_HOME:-"hadoop-env.sh is not configured"}
 
-# The jsvc implementation to use. Jsvc is required to run secure datanodes.
-#export JSVC_HOME=${JSVC_HOME}
+# Location of Hadoop's configuration information.  i.e., where this
+# file is probably living.  You will almost certainly want to set
+# this in /etc/profile.d or equivalent.
+# export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
 
-export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
+# The maximum amount of heap to use, in MB. Default is 1024.
+# export HADOOP_HEAPSIZE=1024
 
-# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
-for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
-  if [ "$HADOOP_CLASSPATH" ]; then
-    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
-  else
-    export HADOOP_CLASSPATH=$f
-  fi
-done
+# Extra Java runtime options for all Hadoop commands. We don't support
+# IPv6 yet/still, so by default we set preference to IPv4.
+# export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true"
 
-# The maximum amount of heap to use, in MB. Default is 1000.
-#export HADOOP_HEAPSIZE=
-#export HADOOP_NAMENODE_INIT_HEAPSIZE=""
+# Some parts of the shell code may do special things dependent upon
+# the operating system.  We have to set this here. See the next
+# section as to why....
+export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
 
-# Extra Java runtime options.  Empty by default.
-export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
 
-MAC_OSX=false
-case "`uname`" in
-Darwin*) MAC_OSX=true;;
+# Under certain conditions, Java on OS X will throw SCDynamicStore errors
+# in the system logs.
+# See HADOOP-8719 for more information.  If you need Kerberos
+# support on OS X, you'll want to change/remove this extra bit.
+case ${HADOOP_OS_TYPE} in
+  Darwin*)
+    export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.realm= "
+    export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.kdc= "
+    export HADOOP_OPTS="${HADOOP_OPTS} -Djava.security.krb5.conf= "
+  ;;
 esac
-if $MAC_OSX; then
-    export HADOOP_OPTS="$HADOOP_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
-fi
 
-# Command specific options appended to HADOOP_OPTS when specified
-export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
-export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"
+# Extra Java runtime options for Hadoop clients (i.e., hdfs dfs -blah)
+# These get added to HADOOP_OPTS for such commands.  In most cases,
+# this should be left empty and let users supply it on the
+# command line.
+# extra HADOOP_CLIENT_OPTS=""
 
-export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
+#
+# A note about classpaths.
+#
+# The classpath is configured such that entries are stripped prior
+# to handing to Java based either upon duplication or non-existence.
+# Wildcards and/or directories are *NOT* expanded as the
+# de-duplication is fairly simple.  So if two directories are in
+# the classpath that both contain awesome-methods-1.0.jar,
+# awesome-methods-1.0.jar will still be seen by java.  But if
+# the classpath specifically has awesome-methods-1.0.jar from the
+# same directory listed twice, the last one will be removed.
+#
 
-export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
-export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"
+# An additional, custom CLASSPATH.  This is really meant for
+# end users, but as an administrator, one might want to push
+# something extra in here too, such as the jar to the topology
+# method.  Just be sure to append to the existing HADOOP_USER_CLASSPATH
+# so end users have a way to add stuff.
+# export HADOOP_USER_CLASSPATH="/some/cool/path/on/your/machine"
 
-# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
-export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
-#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
+# Should HADOOP_USER_CLASSPATH be first in the official CLASSPATH?
+# export HADOOP_USER_CLASSPATH_FIRST="yes"
 
-# On secure datanodes, user to run the datanode as after dropping privileges
-export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
+###
+# Options for remote shell connectivity
+###
 
-# Where log files are stored.  $HADOOP_HOME/logs by default.
-#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER
+# There are some optional components of hadoop that allow for
+# command and control of remote hosts.  For example,
+# start-dfs.sh will attempt to bring up all NNs, DNS, etc.
 
-# Where log files are stored in the secure data environment.
-export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}
+# Options to pass to SSH when one of the "log into a host and
+# start/stop daemons" scripts is executed
+# export HADOOP_SSH_OPTS="-o BatchMode=yes -o StrictHostKeyChecking=no -o ConnectTimeout=10s"
 
-# The directory where pid files are stored. /tmp by default.
-# NOTE: this should be set to a directory that can only be written to by 
-#       the user that will run the hadoop daemons.  Otherwise there is the
-#       potential for a symlink attack.
-export HADOOP_PID_DIR=${HADOOP_PID_DIR}
-export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}
+# The built-in ssh handler will limit itself to 10 simultaneous connections.
+# For pdsh users, this sets the fanout size ( -f )
+# Change this to increase/decrease as necessary.
+# export HADOOP_SSH_PARALLEL=10
+
+# Filename which contains all of the hosts for any remote execution
+# helper scripts # such as slaves.sh, start-dfs.sh, etc.
+# export HADOOP_SLAVES="${HADOOP_CONF_DIR}/slaves"
+
+###
+# Options for all daemons
+###
+#
+
+#
+# You can define variables right here and then re-use them later on.
+# For example, it is common to use the same garbage collection settings
+# for all the daemons.  So we could define:
+#
+# export HADOOP_GC_SETTINGS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
+#
+# .. and then use it as per the b option under the namenode.
+
+# Where (primarily) daemon log files are stored.
+# $HADOOP_PREFIX/logs by default.
+# export HADOOP_LOG_DIR=${HADOOP_PREFIX}/logs
 
 # A string representing this instance of hadoop. $USER by default.
-export HADOOP_IDENT_STRING=$USER
+# This is used in writing log and pid files, so keep that in mind!
+# export HADOOP_IDENT_STRING=$USER
+
+# How many seconds to pause after stopping a daemon
+# export HADOOP_STOP_TIMEOUT=5
+
+# Where pid files are stored.  /tmp by default.
+# export HADOOP_PID_DIR=/tmp
+
+# Default log level and output location
+# This sets the hadoop.root.logger property
+# export HADOOP_ROOT_LOGGER=INFO,console
+
+# Default log level for daemons spawned explicitly by hadoop-daemon.sh
+# This sets the hadoop.root.logger property
+# export HADOOP_DAEMON_ROOT_LOGGER=INFO,RFA
+
+# Default log level and output location for security-related messages.
+# It sets -Dhadoop.security.logger on the command line.
+# You will almost certainly want to change this on a per-daemon basis!
+# export HADOOP_SECURITY_LOGGER=INFO,NullAppender
+
+# Default log level for file system audit messages.
+# It sets -Dhdfs.audit.logger on the command line.
+# You will almost certainly want to change this on a per-daemon basis!
+# export HADOOP_AUDIT_LOGGER=INFO,NullAppender
+
+# Default process priority level
+# Note that sub-processes will also run at this level!
+# export HADOOP_NICENESS=0
+
+# Default name for the service level authorization file
+# export HADOOP_POLICYFILE="hadoop-policy.xml"
+
+###
+# Secure/privileged execution
+###
+
+#
+# Out of the box, Hadoop uses jsvc from Apache Commons to launch daemons
+# on privileged ports.  This functionality can be replaced by providing
+# custom functions.  See hadoop-functions.sh for more information.
+#
+
+# The jsvc implementation to use. Jsvc is required to run secure datanodes.
+# export JSVC_HOME=/usr/bin
+
+#
+# This directory contains pids for secure and privileged processes.
+#export HADOOP_SECURE_PID_DIR=${HADOOP_PID_DIR}
+
+#
+# This directory contains the logs for secure and privileged processes.
+# export HADOOP_SECURE_LOG=${HADOOP_LOG_DIR}
+
+#
+# When running a secure daemon, the default value of HADOOP_IDENT_STRING
+# ends up being a bit bogus.  Therefore, by default, the code will
+# replace HADOOP_IDENT_STRING with HADOOP_SECURE_xx_USER.  If you want
+# to keep HADOOP_IDENT_STRING untouched, then uncomment this line.
+# export HADOOP_SECURE_IDENT_PRESERVE="true"
+
+###
+# NameNode specific parameters
+###
+# Specify the JVM options to be used when starting the NameNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# a) Set JMX options
+# export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
+#
+# b) Set garbage collection logs
+# export HADOOP_NAMENODE_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+# c) ... or set them directly
+# export HADOOP_NAMENODE_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+
+# this is the default:
+# export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
+
+###
+# SecondaryNameNode specific parameters
+###
+# Specify the JVM options to be used when starting the SecondaryNameNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# This is the default:
+# export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"
+
+###
+# DataNode specific parameters
+###
+# Specify the JVM options to be used when starting the DataNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# This is the default:
+# export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS"
+
+# On secure datanodes, user to run the datanode as after dropping privileges
+# This **MUST** be uncommented to enable secure HDFS!
+# export HADOOP_SECURE_DN_USER=hdfs
+
+# Supplemental options for secure datanodes
+# By default, we use jsvc which needs to know to launch a
+# server jvm.
+# export HADOOP_DN_SECURE_EXTRA_OPTS="-jvm server"
+
+# Where datanode log files are stored in the secure data environment.
+# export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_SECURE_LOG_DIR}
+
+# Where datanode pid files are stored in the secure data environment.
+# export HADOOP_SECURE_DN_PID_DIR=${HADOOP_SECURE_PID_DIR}
+
+###
+# NFS3 Gateway specific parameters
+###
+# Specify the JVM options to be used when starting the NFS3 Gateway.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_NFS3_OPTS=""
+
+# Specify the JVM options to be used when starting the Hadoop portmapper.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_PORTMAP_OPTS="-Xmx512m"
+
+# Supplemental options for priviliged gateways
+# By default, we use jsvc which needs to know to launch a
+# server jvm.
+# export HADOOP_NFS3_SECURE_EXTRA_OPTS="-jvm server"
+
+# On privileged gateways, user to run the gateway as after dropping privileges
+# export HADOOP_PRIVILEGED_NFS_USER=nfsserver
+
+###
+# ZKFailoverController specific parameters
+###
+# Specify the JVM options to be used when starting the ZKFailoverController.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_ZKFC_OPTS=""
+
+###
+# QuorumJournalNode specific parameters
+###
+# Specify the JVM options to be used when starting the QuorumJournalNode.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_JOURNALNODE_OPTS=""
+
+###
+# HDFS Balancer specific parameters
+###
+# Specify the JVM options to be used when starting the HDFS Balancer.
+# These options will be appended to the options specified as HADOOP_OPTS
+# and therefore may override any similar flags set in HADOOP_OPTS
+#
+# export HADOOP_BALANCER_OPTS=""
+
+###
+# Advanced Users Only!
+###
+
+#
+# When building Hadoop, you can add the class paths to your commands
+# via this special env var:
+# HADOOP_ENABLE_BUILD_PATHS="true"
+
+# You can do things like replace parts of the shell underbelly.
+# Most of this code is in hadoop-functions.sh.
+#
+#
+# For example, if you want to add compression to the rotation
+# menthod for the .out files that daemons generate, you can do
+# that by redefining the hadoop_rotate_log function by
+# uncommenting this code block:
+
+#function hadoop_rotate_log
+#{
+#  #
+#  # log rotation (mainly used for .out files)
+#  # Users are likely to replace this one for something
+#  # that gzips or uses dates or who knows what.
+#  #
+#  # be aware that &1 and &2 might go through here
+#  # so don't do anything too crazy...
+#  #
+#  local log=$1;
+#  local num=${2:-5};
+#
+#  if [[ -f "${log}" ]]; then # rotate logs
+#    while [[ ${num} -gt 1 ]]; do
+#      #shellcheck disable=SC2086
+#      let prev=${num}-1
+#      if [[ -f "${log}.${prev}" ]]; then
+#        mv "${log}.${prev}" "${log}.${num}"
+#      fi
+#      num=${prev}
+#    done
+#    mv "${log}" "${log}.${num}"
+#    gzip -9 "${log}.${num}"
+#  fi
+#}
+#
+#
+# Another example:  finding java
+#
+# By default, Hadoop assumes that $JAVA_HOME is always defined
+# outside of its configuration. Eons ago, Apple standardized
+# on a helper program called java_home to find it for you.
+#
+#function hadoop_java_setup
+#{
+#
+#  if [[ -z "${JAVA_HOME}" ]]; then
+#     case $HADOOP_OS_TYPE in
+#       Darwin*)
+#          JAVA_HOME=$(/usr/libexec/java_home)
+#          ;;
+#     esac
+#  fi
+#
+#  # Bail if we did not detect it
+#  if [[ -z "${JAVA_HOME}" ]]; then
+#    echo "ERROR: JAVA_HOME is not set and could not be found." 1>&2
+#    exit 1
+#  fi
+#
+#  if [[ ! -d "${JAVA_HOME}" ]]; then
+#     echo "ERROR: JAVA_HOME (${JAVA_HOME}) does not exist." 1>&2
+#     exit 1
+#  fi
+#
+#  JAVA="${JAVA_HOME}/bin/java"
+#
+#  if [[ ! -x ${JAVA} ]]; then
+#    echo "ERROR: ${JAVA} is not executable." 1>&2
+#    exit 1
+#  fi
+#  JAVA_HEAP_MAX=-Xmx1g
+#  HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-128}
+#
+#  # check envvars which might override default args
+#  if [[ -n "$HADOOP_HEAPSIZE" ]]; then
+#    JAVA_HEAP_MAX="-Xmx${HADOOP_HEAPSIZE}m"
+#  fi
+#}
+
+

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/distribute-exclude.sh

@@ -57,9 +57,9 @@ excludeFilenameRemote=$("$HADOOP_PREFIX/bin/hdfs" getconf -excludeFile)
 
 if [ "$excludeFilenameRemote" = '' ] ; then
   echo \
-    "Error: hdfs getconf -excludeFile returned empty string, " \
-    "please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
-    "configuration and on all namenodes"
+  "Error: hdfs getconf -excludeFile returned empty string, " \
+  "please setup dfs.hosts.exclude in hdfs-site.xml in local cluster " \
+  "configuration and on all namenodes"
   exit 1
 fi
 

+ 200 - 213
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs

@@ -15,250 +15,237 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Environment Variables
-#
-#   JSVC_HOME  home directory of jsvc binary.  Required for starting secure
-#              datanode.
-#
-#   JSVC_OUTFILE  path to jsvc output file.  Defaults to
-#                 $HADOOP_LOG_DIR/jsvc.out.
-#
-#   JSVC_ERRFILE  path to jsvc error file.  Defaults to $HADOOP_LOG_DIR/jsvc.err.
-
-bin=`which $0`
-bin=`dirname ${bin}`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
-
-function print_usage(){
-  echo "Usage: hdfs [--config confdir] COMMAND"
+function hadoop_usage
+{
+  echo "Usage: hdfs [--config confdir] [--daemon (start|stop|status)] COMMAND"
   echo "       where COMMAND is one of:"
-  echo "  dfs                  run a filesystem command on the file systems supported in Hadoop."
-  echo "  namenode -format     format the DFS filesystem"
-  echo "  secondarynamenode    run the DFS secondary namenode"
-  echo "  namenode             run the DFS namenode"
-  echo "  journalnode          run the DFS journalnode"
-  echo "  zkfc                 run the ZK Failover Controller daemon"
+  echo "  balancer             run a cluster balancing utility"
+  echo "  cacheadmin           configure the HDFS cache"
+  echo "  classpath            prints the class path needed to get the"
+  echo "                       Hadoop jar and the required libraries"
   echo "  datanode             run a DFS datanode"
+  echo "  dfs                  run a filesystem command on the file system"
   echo "  dfsadmin             run a DFS admin client"
-  echo "  haadmin              run a DFS HA admin client"
-  echo "  fsck                 run a DFS filesystem checking utility"
-  echo "  balancer             run a cluster balancing utility"
-  echo "  jmxget               get JMX exported values from NameNode or DataNode."
-  echo "  oiv                  apply the offline fsimage viewer to an fsimage"
-  echo "  oiv_legacy           apply the offline fsimage viewer to an legacy fsimage"
-  echo "  oev                  apply the offline edits viewer to an edits file"
   echo "  fetchdt              fetch a delegation token from the NameNode"
+  echo "  fsck                 run a DFS filesystem checking utility"
   echo "  getconf              get config values from configuration"
   echo "  groups               get the groups which users belong to"
-  echo "  snapshotDiff         diff two snapshots of a directory or diff the"
-  echo "                       current directory contents with a snapshot"
+  echo "  haadmin              run a DFS HA admin client"
+  echo "  jmxget               get JMX exported values from NameNode or DataNode."
+  echo "  journalnode          run the DFS journalnode"
   echo "  lsSnapshottableDir   list all snapshottable dirs owned by the current user"
-  echo "						Use -help to see options"
-  echo "  portmap              run a portmap service"
+  echo "                               Use -help to see options"
+  echo "  namenode             run the DFS namenode"
+  echo "                               Use -format to initialize the DFS filesystem"
   echo "  nfs3                 run an NFS version 3 gateway"
-  echo "  cacheadmin           configure the HDFS cache"
+  echo "  oev                  apply the offline edits viewer to an edits file"
+  echo "  oiv                  apply the offline fsimage viewer to an fsimage"
+  echo "  oiv_legacy           apply the offline fsimage viewer to a legacy fsimage"
+  echo "  portmap              run a portmap service"
+  echo "  secondarynamenode    run the DFS secondary namenode"
+  echo "  snapshotDiff         diff two snapshots of a directory or diff the"
+  echo "                       current directory contents with a snapshot"
+  echo "  zkfc                 run the ZK Failover Controller daemon"
   echo ""
   echo "Most commands print help when invoked w/o parameters."
 }
 
-if [ $# = 0 ]; then
-  print_usage
-  exit
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
+
+if [[ $# = 0 ]]; then
+  hadoop_exit_with_usage 1
 fi
 
 COMMAND=$1
 shift
 
-case $COMMAND in
-  # usage flags
-  --help|-help|-h)
-    print_usage
+case ${COMMAND} in
+  balancer)
+    CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_BALANCER_OPTS}"
+  ;;
+  cacheadmin)
+    CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
+  ;;
+  classpath)
+    hadoop_finalize
+    echo "${CLASSPATH}"
     exit
-    ;;
-esac
-
-# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
-if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
-  if [ -n "$JSVC_HOME" ]; then
-    if [ -n "$HADOOP_SECURE_DN_PID_DIR" ]; then
-      HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
-    fi
-  
-    if [ -n "$HADOOP_SECURE_DN_LOG_DIR" ]; then
-      HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
-      HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
-    fi
-   
-    HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
-    HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
-    starting_secure_dn="true"
-  else
-    echo "It looks like you're trying to start a secure DN, but \$JSVC_HOME"\
-      "isn't set. Falling back to starting insecure DN."
-  fi
-fi
-
-# Determine if we're starting a privileged NFS daemon, and if so, redefine appropriate variables
-if [ "$COMMAND" == "nfs3" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_PRIVILEGED_NFS_USER" ]; then
-  if [ -n "$JSVC_HOME" ]; then
-    if [ -n "$HADOOP_PRIVILEGED_NFS_PID_DIR" ]; then
-      HADOOP_PID_DIR=$HADOOP_PRIVILEGED_NFS_PID_DIR
+  ;;
+  datanode)
+    daemon="true"
+    # Determine if we're starting a secure datanode, and
+    # if so, redefine appropriate variables
+    if [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
+      secure_service="true"
+      secure_user="${HADOOP_SECURE_DN_USER}"
+      
+      # backward compatiblity
+      HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_DN_PID_DIR}"
+      HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_DN_LOG_DIR}"
+      
+      HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DN_SECURE_EXTRA_OPTS} ${HADOOP_DATANODE_OPTS}"
+      CLASS="org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter"
+    else
+      HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_DATANODE_OPTS}"
+      CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
     fi
-  
-    if [ -n "$HADOOP_PRIVILEGED_NFS_LOG_DIR" ]; then
-      HADOOP_LOG_DIR=$HADOOP_PRIVILEGED_NFS_LOG_DIR
-      HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
+  ;;
+  dfs)
+    CLASS=org.apache.hadoop.fs.FsShell
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  dfsadmin)
+    CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  fetchdt)
+    CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
+  ;;
+  fsck)
+    CLASS=org.apache.hadoop.hdfs.tools.DFSck
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  getconf)
+    CLASS=org.apache.hadoop.hdfs.tools.GetConf
+  ;;
+  groups)
+    CLASS=org.apache.hadoop.hdfs.tools.GetGroups
+  ;;
+  haadmin)
+    CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
+    CLASSPATH="${CLASSPATH}:${TOOL_PATH}"
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  journalnode)
+    daemon="true"
+    CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOURNALNODE_OPTS}"
+  ;;
+  jmxget)
+    CLASS=org.apache.hadoop.hdfs.tools.JMXGet
+  ;;
+  lsSnapshottableDir)
+    CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
+  ;;
+  namenode)
+    daemon="true"
+    CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NAMENODE_OPTS}"
+  ;;
+  nfs3)
+    daemon="true"
+    if [[ -n "${HADOOP_PRIVILEGED_NFS_USER}" ]]; then
+      secure_service="true"
+      secure_user="${HADOOP_PRIVILEGED_NFS_USER}"
+      
+      # backward compatiblity
+      HADOOP_SECURE_PID_DIR="${HADOOP_SECURE_PID_DIR:-$HADOOP_SECURE_NFS3_PID_DIR}"
+      HADOOP_SECURE_LOG_DIR="${HADOOP_SECURE_LOG_DIR:-$HADOOP_SECURE_NFS3_LOG_DIR}"
+      
+      HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_SECURE_EXTRA_OPTS} ${HADOOP_NFS3_OPTS}"
+      CLASS=org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter
+    else
+      HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_NFS3_OPTS}"
+      CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
     fi
-   
-    HADOOP_IDENT_STRING=$HADOOP_PRIVILEGED_NFS_USER
-    HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
-    starting_privileged_nfs="true"
-  else
-    echo "It looks like you're trying to start a privileged NFS server, but"\
-      "\$JSVC_HOME isn't set. Falling back to starting unprivileged NFS server."
-  fi
-fi
+  ;;
+  oev)
+    CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
+  ;;
+  oiv)
+    CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
+  ;;
+  oiv_legacy)
+    CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
+  ;;
+  portmap)
+    daemon="true"
+    CLASS=org.apache.hadoop.portmap.Portmap
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_PORTMAP_OPTS}"
+  ;;
+  secondarynamenode)
+    daemon="true"
+    CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_SECONDARYNAMENODE_OPTS}"
+  ;;
+  snapshotDiff)
+    CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
+  ;;
+  zkfc)
+    daemon="true"
+    CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_ZKFC_OPTS}"
+  ;;
+  -*)
+    hadoop_exit_with_usage 1
+  ;;
+  *)
+    CLASS="${COMMAND}"
+  ;;
+esac
 
-if [ "$COMMAND" = "namenode" ] ; then
-  CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
-elif [ "$COMMAND" = "zkfc" ] ; then
-  CLASS='org.apache.hadoop.hdfs.tools.DFSZKFailoverController'
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_ZKFC_OPTS"
-elif [ "$COMMAND" = "secondarynamenode" ] ; then
-  CLASS='org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode'
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
-elif [ "$COMMAND" = "datanode" ] ; then
-  CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
-  if [ "$starting_secure_dn" = "true" ]; then
-    HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
-  else
-    HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
+if [[ -n "${secure_service}" ]]; then
+  HADOOP_SECURE_USER="${secure_user}"
+  if hadoop_verify_secure_prereq; then
+    hadoop_setup_secure_service
+    priv_outfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.out"
+    priv_errfile="${HADOOP_LOG_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.err"
+    priv_pidfile="${HADOOP_PID_DIR}/privileged-${HADOOP_IDENT_STRING}-${COMMAND-$HOSTNAME}.pid"
+    daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+    daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
   fi
-elif [ "$COMMAND" = "journalnode" ] ; then
-  CLASS='org.apache.hadoop.hdfs.qjournal.server.JournalNode'
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOURNALNODE_OPTS"
-elif [ "$COMMAND" = "dfs" ] ; then
-  CLASS=org.apache.hadoop.fs.FsShell
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "dfsadmin" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "haadmin" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
-  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "fsck" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.DFSck
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "balancer" ] ; then
-  CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
-elif [ "$COMMAND" = "jmxget" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.JMXGet
-elif [ "$COMMAND" = "oiv" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
-elif [ "$COMMAND" = "oiv_legacy" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
-elif [ "$COMMAND" = "oev" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
-elif [ "$COMMAND" = "fetchdt" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
-elif [ "$COMMAND" = "getconf" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.GetConf
-elif [ "$COMMAND" = "groups" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.GetGroups
-elif [ "$COMMAND" = "snapshotDiff" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff
-elif [ "$COMMAND" = "lsSnapshottableDir" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir
-elif [ "$COMMAND" = "portmap" ] ; then
-  CLASS=org.apache.hadoop.portmap.Portmap
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_PORTMAP_OPTS"
-elif [ "$COMMAND" = "nfs3" ] ; then
-  CLASS=org.apache.hadoop.hdfs.nfs.nfs3.Nfs3
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NFS3_OPTS"
-elif [ "$COMMAND" = "cacheadmin" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin
 else
-  CLASS="$COMMAND"
+  daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+  daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
 fi
 
-export CLASSPATH=$CLASSPATH
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
-
-# Check to see if we should start a secure datanode
-if [ "$starting_secure_dn" = "true" ]; then
-  if [ "$HADOOP_PID_DIR" = "" ]; then
-    HADOOP_SECURE_DN_PID="/tmp/hadoop_secure_dn.pid"
+if [[ "${HADOOP_DAEMON_MODE}" != "default" ]]; then
+  # shellcheck disable=SC2034
+  HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+  if [[ -n "${secure_service}" ]]; then
+    # shellcheck disable=SC2034
+    HADOOP_LOGFILE="hadoop-${HADOOP_SECURE_USER}-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
   else
-    HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid"
-  fi
-
-  JSVC=$JSVC_HOME/jsvc
-  if [ ! -f $JSVC ]; then
-    echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run secure datanodes. "
-    echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
-      "and set JSVC_HOME to the directory containing the jsvc binary."
-    exit
+    # shellcheck disable=SC2034
+    HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
   fi
+fi
 
-  if [[ ! $JSVC_OUTFILE ]]; then
-    JSVC_OUTFILE="$HADOOP_LOG_DIR/jsvc.out"
-  fi
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
+hadoop_finalize
 
-  if [[ ! $JSVC_ERRFILE ]]; then
-    JSVC_ERRFILE="$HADOOP_LOG_DIR/jsvc.err"
-  fi
+export CLASSPATH
 
-  exec "$JSVC" \
-           -Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
-           -errfile "$JSVC_ERRFILE" \
-           -pidfile "$HADOOP_SECURE_DN_PID" \
-           -nodetach \
-           -user "$HADOOP_SECURE_DN_USER" \
-            -cp "$CLASSPATH" \
-           $JAVA_HEAP_MAX $HADOOP_OPTS \
-           org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
-elif [ "$starting_privileged_nfs" = "true" ] ; then
-  if [ "$HADOOP_PID_DIR" = "" ]; then
-    HADOOP_PRIVILEGED_NFS_PID="/tmp/hadoop_privileged_nfs3.pid"
+if [[ -n "${daemon}" ]]; then
+  if [[ -n "${secure_service}" ]]; then
+    hadoop_secure_daemon_handler \
+    "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
+    "${daemon_pidfile}" "${daemon_outfile}" \
+    "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
   else
-    HADOOP_PRIVILEGED_NFS_PID="$HADOOP_PID_DIR/hadoop_privileged_nfs3.pid"
+    hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}"\
+    "${daemon_pidfile}" "${daemon_outfile}" "$@"
   fi
-
-  JSVC=$JSVC_HOME/jsvc
-  if [ ! -f $JSVC ]; then
-    echo "JSVC_HOME is not set correctly so jsvc cannot be found. jsvc is required to run privileged NFS gateways. "
-    echo "Please download and install jsvc from http://archive.apache.org/dist/commons/daemon/binaries/ "\
-      "and set JSVC_HOME to the directory containing the jsvc binary."
-    exit
-  fi
-
-  if [[ ! $JSVC_OUTFILE ]]; then
-    JSVC_OUTFILE="$HADOOP_LOG_DIR/nfs3_jsvc.out"
-  fi
-
-  if [[ ! $JSVC_ERRFILE ]]; then
-    JSVC_ERRFILE="$HADOOP_LOG_DIR/nfs3_jsvc.err"
-  fi
-
-  exec "$JSVC" \
-           -Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
-           -errfile "$JSVC_ERRFILE" \
-           -pidfile "$HADOOP_PRIVILEGED_NFS_PID" \
-           -nodetach \
-           -user "$HADOOP_PRIVILEGED_NFS_USER" \
-           -cp "$CLASSPATH" \
-           $JAVA_HEAP_MAX $HADOOP_OPTS \
-           org.apache.hadoop.hdfs.nfs.nfs3.PrivilegedNfsGatewayStarter "$@"
+  exit $?
 else
-  # run it
-  exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
+  # shellcheck disable=SC2086
+  hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
 fi
-

+ 58 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh

@@ -18,19 +18,67 @@
 # included in all the hdfs scripts with source command
 # should not be executed directly
 
-bin=`which "$0"`
-bin=`dirname "${bin}"`
-bin=`cd "$bin"; pwd`
+function hadoop_subproject_init
+{
+  if [ -e "${HADOOP_CONF_DIR}/hdfs-env.sh" ]; then
+    . "${HADOOP_CONF_DIR}/hdfs-env.sh"
+  fi
+  
+  # at some point in time, someone thought it would be a good idea to
+  # create separate vars for every subproject.  *sigh*
+  # let's perform some overrides and setup some defaults for bw compat
+  # this way the common hadoop var's == subproject vars and can be
+  # used interchangeable from here on out
+  # ...
+  # this should get deprecated at some point.
+  HADOOP_LOG_DIR="${HADOOP_HDFS_LOG_DIR:-$HADOOP_LOG_DIR}"
+  HADOOP_HDFS_LOG_DIR="${HADOOP_LOG_DIR}"
+  
+  HADOOP_LOGFILE="${HADOOP_HDFS_LOGFILE:-$HADOOP_LOGFILE}"
+  HADOOP_HDFS_LOGFILE="${HADOOP_LOGFILE}"
+  
+  HADOOP_NICENESS=${HADOOP_HDFS_NICENESS:-$HADOOP_NICENESS}
+  HADOOP_HDFS_NICENESS="${HADOOP_NICENESS}"
+  
+  HADOOP_STOP_TIMEOUT=${HADOOP_HDFS_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}
+  HADOOP_HDFS_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
+  
+  HADOOP_PID_DIR="${HADOOP_HDFS_PID_DIR:-$HADOOP_PID_DIR}"
+  HADOOP_HDFS_PID_DIR="${HADOOP_PID_DIR}"
+  
+  HADOOP_ROOT_LOGGER=${HADOOP_HDFS_ROOT_LOGGER:-$HADOOP_ROOT_LOGGER}
+  HADOOP_HDFS_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
+  
+  HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME_DIR}"
+  
+  HADOOP_IDENT_STRING="${HADOOP_HDFS_IDENT_STRING:-$HADOOP_IDENT_STRING}"
+  HADOOP_HDFS_IDENT_STRING="${HADOOP_IDENT_STRING}"
+  
+  # turn on the defaults
+  
+  export HADOOP_NAMENODE_OPTS=${HADOOP_NAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
+  export HADOOP_SECONDARYNAMENODE_OPTS=${HADOOP_SECONDARYNAMENODE_OPTS:-"-Dhadoop.security.logger=INFO,RFAS -Dhdfs.audit.logger=INFO,NullAppender"}
+  export HADOOP_DATANODE_OPTS=${HADOOP_DATANODE_OPTS:-"-Dhadoop.security.logger=ERROR,RFAS"}
+  export HADOOP_DN_SECURE_EXTRA_OPTS=${HADOOP_DN_SECURE_EXTRA_OPTS:-"-jvm server"}
+  export HADOOP_NFS3_SECURE_EXTRA_OPTS=${HADOOP_NFS3_SECURE_EXTRA_OPTS:-"-jvm server"}
+  export HADOOP_PORTMAP_OPTS=${HADOOP_PORTMAP_OPTS:-"-Xmx512m"}
+  
+  
+}
+
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+  _hd_this="${BASH_SOURCE-$0}"
+  HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P)
+fi
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
-  . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
 elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
-  . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
+  . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
 elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
-  . "$HADOOP_HOME"/libexec/hadoop-config.sh
+  . "${HADOOP_HOME}/libexec/hadoop-config.sh"
 else
-  echo "Hadoop common not found."
-  exit
+  echo "ERROR: Hadoop common not found." 2>&1
+  exit 1
 fi
+

+ 28 - 12
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/refresh-namenodes.sh

@@ -20,24 +20,40 @@
 # This script refreshes all namenodes, it's a simple wrapper
 # for dfsadmin to support multiple namenodes.
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
 
-namenodes=$("$HADOOP_PREFIX/bin/hdfs" getconf -nnRpcAddresses)
-if [ "$?" != '0' ] ; then errorFlag='1' ; 
+namenodes=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -nnRpcAddresses)
+if [[ "$?" != '0' ]] ; then
+  errorFlag='1' ;
 else
-  for namenode in $namenodes ; do
-    echo "Refreshing namenode [$namenode]"
-    "$HADOOP_PREFIX/bin/hdfs" dfsadmin -fs hdfs://$namenode -refreshNodes
-    if [ "$?" != '0' ] ; then errorFlag='1' ; fi
+  for namenode in ${namenodes} ; do
+    echo "Refreshing namenode [${namenode}]"
+    "${HADOOP_HDFS_HOME}/bin/hdfs" dfsadmin \
+    -fs hdfs://${namenode} -refreshNodes
+    if [[ "$?" != '0' ]]; then
+      errorFlag='1'
+    fi
   done
 fi
 
-if [ "$errorFlag" = '1' ] ; then
+if [[ "${errorFlag}" = '1' ]] ; then
   echo "Error: refresh of namenodes failed, see error messages above."
   exit 1
 else

+ 24 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-balancer.sh

@@ -15,13 +15,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function usage
+{
+  echo "Usage: start-balancer.sh [--config confdir]  [-policy <policy>] [-threshold <threshold>]"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
 
 # Start balancer daemon.
 
-"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start balancer $@
+exec "${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" start balancer "$@"

+ 86 - 56
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh

@@ -20,98 +20,128 @@
 # Optinally upgrade or rollback dfs state.
 # Run this on master node.
 
-usage="Usage: start-dfs.sh [-upgrade|-rollback] [other options such as -clusterId]"
+function hadoop_usage
+{
+  echo "Usage: start-dfs.sh [-upgrade|-rollback] [-clusterId]"
+}
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
 
 # get arguments
-if [ $# -ge 1 ]; then
-	nameStartOpt="$1"
-	shift
-	case "$nameStartOpt" in
-	  (-upgrade)
-	  	;;
-	  (-rollback) 
-	  	dataStartOpt="$nameStartOpt"
-	  	;;
-	  (*)
-		  echo $usage
-		  exit 1
-	    ;;
-	esac
+if [[ $# -ge 1 ]]; then
+  nameStartOpt="$1"
+  shift
+  case "$nameStartOpt" in
+    -upgrade)
+    ;;
+    -rollback)
+      dataStartOpt="$nameStartOpt"
+    ;;
+    *)
+      hadoop_exit_with_usage 1
+    ;;
+  esac
 fi
 
+
 #Add other possible options
 nameStartOpt="$nameStartOpt $@"
 
 #---------------------------------------------------------
 # namenodes
 
-NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
+NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes 2>/dev/null)
+
+if [[ -z "${NAMENODES}" ]]; then
+  NAMENODES=$(hostname)
+fi
 
 echo "Starting namenodes on [$NAMENODES]"
 
-"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-  --config "$HADOOP_CONF_DIR" \
-  --hostnames "$NAMENODES" \
-  --script "$bin/hdfs" start namenode $nameStartOpt
+"${bin}/hadoop-daemons.sh" \
+--config "${HADOOP_CONF_DIR}" \
+--hostnames "${NAMENODES}" \
+start namenode ${nameStartOpt}
 
 #---------------------------------------------------------
 # datanodes (using default slaves file)
 
-if [ -n "$HADOOP_SECURE_DN_USER" ]; then
-  echo \
-    "Attempting to start secure cluster, skipping datanodes. " \
-    "Run start-secure-dns.sh as root to complete startup."
+if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
+[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
+  echo "ERROR: Attempting to start secure cluster, skipping datanodes. "
+  echo "Run start-secure-dns.sh as root or configure "
+  echo "\${HADOOP_SECURE_COMMAND} to complete startup."
 else
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-    --config "$HADOOP_CONF_DIR" \
-    --script "$bin/hdfs" start datanode $dataStartOpt
+  
+  echo "Starting datanodes"
+  
+  "${bin}/hadoop-daemons.sh" \
+  --config "${HADOOP_CONF_DIR}" \
+  start datanode ${dataStartOpt}
 fi
 
 #---------------------------------------------------------
 # secondary namenodes (if any)
 
-SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
+SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
 
-if [ -n "$SECONDARY_NAMENODES" ]; then
-  echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"
+if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
+  SECONDARY_NAMENODES=$(hostname)
+fi
 
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-      --config "$HADOOP_CONF_DIR" \
-      --hostnames "$SECONDARY_NAMENODES" \
-      --script "$bin/hdfs" start secondarynamenode
+if [[ -n "${SECONDARY_NAMENODES}" ]]; then
+  echo "Starting secondary namenodes [${SECONDARY_NAMENODES}]"
+  
+  "${bin}/hadoop-daemons.sh" \
+  --config "${HADOOP_CONF_DIR}" \
+  --hostnames "${SECONDARY_NAMENODES}" \
+  start secondarynamenode
 fi
 
 #---------------------------------------------------------
 # quorumjournal nodes (if any)
 
-SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
-
-case "$SHARED_EDITS_DIR" in
-qjournal://*)
-  JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
-  echo "Starting journal nodes [$JOURNAL_NODES]"
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-      --config "$HADOOP_CONF_DIR" \
-      --hostnames "$JOURNAL_NODES" \
-      --script "$bin/hdfs" start journalnode ;;
+SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
+
+case "${SHARED_EDITS_DIR}" in
+  qjournal://*)
+    JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
+    echo "Starting journal nodes [${JOURNAL_NODES}]"
+    "${bin}/hadoop-daemons.sh" \
+    --config "${HADOOP_CONF_DIR}" \
+    --hostnames "${JOURNAL_NODES}" \
+    start journalnode
+  ;;
 esac
 
 #---------------------------------------------------------
 # ZK Failover controllers, if auto-HA is enabled
-AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
-if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
-  echo "Starting ZK Failover Controllers on NN hosts [$NAMENODES]"
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-    --config "$HADOOP_CONF_DIR" \
-    --hostnames "$NAMENODES" \
-    --script "$bin/hdfs" start zkfc
+AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
+if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
+  echo "Starting ZK Failover Controllers on NN hosts [${NAMENODES}]"
+  "${bin}/hadoop-daemons.sh" \
+  --config "${HADOOP_CONF_DIR}" \
+  --hostnames "${NAMENODES}" \
+  start zkfc
 fi
 
 # eof

+ 25 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-secure-dns.sh

@@ -17,17 +17,33 @@
 
 # Run as root to start secure datanodes in a security-enabled cluster.
 
-usage="Usage (run as root in order to start secure datanodes): start-secure-dns.sh"
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage {
+  echo "Usage: start-secure-dns.sh"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
 
-if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
-  "$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start datanode $dataStartOpt
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
 else
-  echo $usage
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
+
+if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
+  exec "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" start datanode "${dataStartOpt}"
+else
+  echo hadoop_usage_and_exit 1
 fi

+ 24 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-balancer.sh

@@ -15,14 +15,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage
+{
+  echo "Usage: stop-balancer.sh [--config confdir]"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
 
 # Stop balancer daemon.
 # Run this on the machine where the balancer is running
 
-"$HADOOP_PREFIX"/sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop balancer
+"${bin}/hadoop-daemon.sh" --config "${HADOOP_CONF_DIR}" stop balancer

+ 65 - 40
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh

@@ -15,75 +15,100 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage
+{
+  echo "Usage: start-balancer.sh [--config confdir]  [-policy <policy>] [-threshold <threshold>]"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
 
 #---------------------------------------------------------
 # namenodes
 
-NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -namenodes)
+NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -namenodes)
 
 echo "Stopping namenodes on [$NAMENODES]"
 
-"$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-  --config "$HADOOP_CONF_DIR" \
-  --hostnames "$NAMENODES" \
-  --script "$bin/hdfs" stop namenode
+"${bin}/hadoop-daemons.sh" \
+--config "${HADOOP_CONF_DIR}" \
+--hostnames "${NAMENODES}" \
+stop namenode
 
 #---------------------------------------------------------
 # datanodes (using default slaves file)
 
-if [ -n "$HADOOP_SECURE_DN_USER" ]; then
+if [[ -n "${HADOOP_SECURE_DN_USER}" ]] &&
+[[ -z "${HADOOP_SECURE_COMMAND}" ]]; then
   echo \
-    "Attempting to stop secure cluster, skipping datanodes. " \
-    "Run stop-secure-dns.sh as root to complete shutdown."
+  "ERROR: Attempting to stop secure cluster, skipping datanodes. " \
+  "Run stop-secure-dns.sh as root to complete shutdown."
 else
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-    --config "$HADOOP_CONF_DIR" \
-    --script "$bin/hdfs" stop datanode
+  
+  echo "Stopping datanodes"
+  
+  "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
 fi
 
 #---------------------------------------------------------
 # secondary namenodes (if any)
 
-SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
+SECONDARY_NAMENODES=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -secondarynamenodes 2>/dev/null)
 
-if [ -n "$SECONDARY_NAMENODES" ]; then
-  echo "Stopping secondary namenodes [$SECONDARY_NAMENODES]"
+if [[ "${SECONDARY_NAMENODES}" == "0.0.0.0" ]]; then
+  SECONDARY_NAMENODES=$(hostname)
+fi
 
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-      --config "$HADOOP_CONF_DIR" \
-      --hostnames "$SECONDARY_NAMENODES" \
-      --script "$bin/hdfs" stop secondarynamenode
+if [[ -n "${SECONDARY_NAMENODES}" ]]; then
+  echo "Stopping secondary namenodes [${SECONDARY_NAMENODES}]"
+  
+  "${bin}/hadoop-daemons.sh" \
+  --config "${HADOOP_CONF_DIR}" \
+  --hostnames "${SECONDARY_NAMENODES}" \
+  stop secondarynamenode
 fi
 
 #---------------------------------------------------------
 # quorumjournal nodes (if any)
 
-SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
-
-case "$SHARED_EDITS_DIR" in
-qjournal://*)
-  JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
-  echo "Stopping journal nodes [$JOURNAL_NODES]"
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-      --config "$HADOOP_CONF_DIR" \
-      --hostnames "$JOURNAL_NODES" \
-      --script "$bin/hdfs" stop journalnode ;;
+SHARED_EDITS_DIR=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
+
+case "${SHARED_EDITS_DIR}" in
+  qjournal://*)
+    JOURNAL_NODES=$(echo "${SHARED_EDITS_DIR}" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
+    echo "Stopping journal nodes [${JOURNAL_NODES}]"
+    "${bin}/hadoop-daemons.sh" \
+    --config "${HADOOP_CONF_DIR}" \
+    --hostnames "${JOURNAL_NODES}" \
+    stop journalnode
+  ;;
 esac
 
 #---------------------------------------------------------
 # ZK Failover controllers, if auto-HA is enabled
-AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)
-if [ "$(echo "$AUTOHA_ENABLED" | tr A-Z a-z)" = "true" ]; then
-  echo "Stopping ZK Failover Controllers on NN hosts [$NAMENODES]"
-  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
-    --config "$HADOOP_CONF_DIR" \
-    --hostnames "$NAMENODES" \
-    --script "$bin/hdfs" stop zkfc
+AUTOHA_ENABLED=$("${HADOOP_HDFS_HOME}/bin/hdfs" getconf -confKey dfs.ha.automatic-failover.enabled | tr '[:upper:]' '[:lower:]')
+if [[ "${AUTOHA_ENABLED}" = "true" ]]; then
+  echo "Stopping ZK Failover Controllers on NN hosts [${NAMENODES}]"
+  "${bin}/hadoop-daemons.sh" \
+  --config "${HADOOP_CONF_DIR}" \
+  --hostnames "${NAMENODES}" \
+  stop zkfc
 fi
 # eof

+ 25 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-secure-dns.sh

@@ -17,17 +17,33 @@
 
 # Run as root to start secure datanodes in a security-enabled cluster.
 
-usage="Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+function hadoop_usage {
+  echo "Usage (run as root in order to stop secure datanodes): stop-secure-dns.sh"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/hdfs-config.sh
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
 
-if [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
-  "$HADOOP_PREFIX"/sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs stop datanode
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
 else
-  echo $usage
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hdfs-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/hdfs-config.sh." 2>&1
+  exit 1
+fi
+
+if [[ "${EUID}" -eq 0 ]] && [[ -n "${HADOOP_SECURE_DN_USER}" ]]; then
+  "${bin}/hadoop-daemons.sh" --config "${HADOOP_CONF_DIR}" stop datanode
+else
+  hadoop_exit_with_usage 1
 fi

+ 104 - 113
hadoop-mapreduce-project/bin/mapred

@@ -15,138 +15,129 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-bin=`which $0`
-bin=`dirname ${bin}`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
-  . ${HADOOP_LIBEXEC_DIR}/mapred-config.sh
-else
-  . "$bin/mapred-config.sh"
-fi
-
-function print_usage(){
-  echo "Usage: mapred [--config confdir] COMMAND"
+function hadoop_usage
+{
+  echo "Usage: mapred [--config confdir] [--daemon (start|stop|status)] COMMAND"
   echo "       where COMMAND is one of:"
-  echo "  pipes                run a Pipes job"
-  echo "  job                  manipulate MapReduce jobs"
-  echo "  queue                get information regarding JobQueues"
+  
+  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
   echo "  classpath            prints the class path needed for running"
   echo "                       mapreduce subcommands"
-  echo "  historyserver        run job history servers as a standalone daemon"
   echo "  distcp <srcurl> <desturl> copy file or directories recursively"
-  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
-  echo "  hsadmin              job history server admin interface"
+  echo "  job                  manipulate MapReduce jobs"
+  echo "  historyserver        run job history servers as a standalone daemon"
+  echo "  pipes                run a Pipes job"
+  echo "  queue                get information regarding JobQueues"
+  echo "  sampler              sampler"
   echo ""
   echo "Most commands print help when invoked w/o parameters."
 }
 
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/mapred-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/mapred-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/mapred-config.sh." 2>&1
+  exit 1
+fi
+
+
 if [ $# = 0 ]; then
-  print_usage
-  exit
+  hadoop_exit_with_usage 1
 fi
 
 COMMAND=$1
 shift
 
-case $COMMAND in
-  # usage flags
-  --help|-help|-h)
-    print_usage
-    exit
-    ;;
+case ${COMMAND} in
+  mradmin|jobtracker|tasktracker|groups)
+    echo "Sorry, the ${COMMAND} command is no longer supported."
+    echo "You may find similar functionality with the \"yarn\" shell command."
+    hadoop_exit_with_usage 1
+  ;;
+  archive)
+    CLASS=org.apache.hadoop.tools.HadoopArchives
+    hadoop_add_classpath "${TOOL_PATH}"
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  classpath)
+    hadoop_finalize
+    echo "${CLASSPATH}"
+    exit 0
+  ;;
+  distcp)
+    CLASS=org.apache.hadoop.tools.DistCp
+    hadoop_add_classpath "${TOOL_PATH}"
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  historyserver)
+    daemon="true"
+    CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_JOB_HISTORYSERVER_OPTS}"
+    if [ -n "${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}" ]; then
+      JAVA_HEAP_MAX="-Xmx${HADOOP_JOB_HISTORYSERVER_HEAPSIZE}m"
+    fi
+    HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_JHS_LOGGER:-$HADOOP_DAEMON_ROOT_LOGGER}
+  ;;
+  job)
+    CLASS=org.apache.hadoop.mapred.JobClient
+  ;;
+  pipes)
+    CLASS=org.apache.hadoop.mapred.pipes.Submitter
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  queue)
+    CLASS=org.apache.hadoop.mapred.JobQueueClient
+  ;;
+  sampler)
+    CLASS=org.apache.hadoop.mapred.lib.InputSampler
+    HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_CLIENT_OPTS}"
+  ;;
+  -*|*)
+    hadoop_exit_with_usage 1
+  ;;
 esac
 
-if [ "$COMMAND" = "job" ] ; then
-  CLASS=org.apache.hadoop.mapred.JobClient
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "queue" ] ; then
-  CLASS=org.apache.hadoop.mapred.JobQueueClient
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "pipes" ] ; then
-  CLASS=org.apache.hadoop.mapred.pipes.Submitter
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "sampler" ] ; then
-  CLASS=org.apache.hadoop.mapred.lib.InputSampler
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "classpath" ] ; then
-  echo -n 
-elif [ "$COMMAND" = "historyserver" ] ; then
-  CLASS=org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
-  HADOOP_OPTS="$HADOOP_OPTS -Dmapred.jobsummary.logger=${HADOOP_JHS_LOGGER:-INFO,console} $HADOOP_JOB_HISTORYSERVER_OPTS"
-  if [ "$HADOOP_JOB_HISTORYSERVER_HEAPSIZE" != "" ]; then
-    JAVA_HEAP_MAX="-Xmx""$HADOOP_JOB_HISTORYSERVER_HEAPSIZE""m"
-  fi
-elif [ "$COMMAND" = "mradmin" ] \
-    || [ "$COMMAND" = "jobtracker" ] \
-    || [ "$COMMAND" = "tasktracker" ] \
-    || [ "$COMMAND" = "groups" ] ; then
-  echo "Sorry, the $COMMAND command is no longer supported."
-  echo "You may find similar functionality with the \"yarn\" shell command."
-  print_usage
-  exit 1
-elif [ "$COMMAND" = "distcp" ] ; then
-  CLASS=org.apache.hadoop.tools.DistCp
-  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "archive" ] ; then
-  CLASS=org.apache.hadoop.tools.HadoopArchives
-  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "hsadmin" ] ; then
-  CLASS=org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-else
-  echo $COMMAND - invalid command
-  print_usage
-  exit 1
-fi
+daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
 
-# for developers, add mapred classes to CLASSPATH
-if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
-fi
-if [ -d "$HADOOP_MAPRED_HOME/build/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build
-fi
-if [ -d "$HADOOP_MAPRED_HOME/build/test/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/test/classes
-fi
-if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
-fi
 
-# for releases, add core mapred jar & webapps to CLASSPATH
-if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR}
-fi
-for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f;
-done
-
-# Need YARN jars also
-for f in $HADOOP_YARN_HOME/${YARN_DIR}/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f;
-done
-
-# add libs to CLASSPATH
-for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f;
-done
-
-# add modules to CLASSPATH
-for f in $HADOOP_MAPRED_HOME/modules/*.jar; do
-  CLASSPATH=${CLASSPATH}:$f;
-done
-
-if [ "$COMMAND" = "classpath" ] ; then
-  echo $CLASSPATH
-  exit
+if [[  "${HADOOP_DAEMON_MODE}" != "default" ]]; then
+  # shellcheck disable=SC2034
+  HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+  hadoop_add_param HADOOP_OPTS mapred.jobsummary.logger "-Dmapred.jobsummary.logger=${HADOOP_ROOT_LOGGER}"
+  # shellcheck disable=SC2034
+  HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
 fi
 
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
+hadoop_finalize
 
 export CLASSPATH
-exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
+
+if [[ -n "${daemon}" ]]; then
+  if [[ -n "${secure_service}" ]]; then
+    hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}"\
+    "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
+    "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
+  else
+    hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
+    "${daemon_pidfile}" "${daemon_outfile}" "$@"
+  fi
+  exit $?
+else
+  hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
+fi
+

+ 46 - 26
hadoop-mapreduce-project/bin/mapred-config.sh

@@ -18,35 +18,55 @@
 # included in all the mapred scripts with source command
 # should not be executed directly
 
-bin=`which "$0"`
-bin=`dirname "${bin}"`
-bin=`cd "$bin"; pwd`
+function hadoop_subproject_init
+{
+  if [ -e "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
+    . "${HADOOP_CONF_DIR}/mapred-env.sh"
+  fi
+  
+  # at some point in time, someone thought it would be a good idea to
+  # create separate vars for every subproject.  *sigh*
+  # let's perform some overrides and setup some defaults for bw compat
+  # this way the common hadoop var's == subproject vars and can be
+  # used interchangeable from here on out
+  # ...
+  # this should get deprecated at some point.
+  HADOOP_LOG_DIR="${HADOOP_MAPRED_LOG_DIR:-$HADOOP_LOG_DIR}"
+  HADOOP_MAPRED_LOG_DIR="${HADOOP_LOG_DIR}"
+  
+  HADOOP_LOGFILE="${HADOOP_MAPRED_LOGFILE:-$HADOOP_LOGFILE}"
+  HADOOP_MAPRED_LOGFILE="${HADOOP_LOGFILE}"
+  
+  HADOOP_NICENESS="${HADOOP_MAPRED_NICENESS:-$HADOOP_NICENESS}"
+  HADOOP_MAPRED_NICENESS="${HADOOP_NICENESS}"
+  
+  HADOOP_STOP_TIMEOUT="${HADOOP_MAPRED_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
+  HADOOP_MAPRED_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
+  
+  HADOOP_PID_DIR="${HADOOP_MAPRED_PID_DIR:-$HADOOP_PID_DIR}"
+  HADOOP_MAPRED_PID_DIR="${HADOOP_PID_DIR}"
+  
+  HADOOP_ROOT_LOGGER="${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}"
+  HADOOP_MAPRED_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
+  
+  HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_HOME_DIR}"
+  
+  HADOOP_IDENT_STRING="${HADOOP_MAPRED_IDENT_STRING:-$HADOOP_IDENT_STRING}"
+  HADOOP_MAPRED_IDENT_STRING="${HADOOP_IDENT_STRING}"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+  _mc_this="${BASH_SOURCE-$0}"
+  HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_mc_this}")" >/dev/null && pwd -P)
+fi
+
+if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
   . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
-elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
-  . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
-elif [ -e "${HADOOP_COMMON_HOME}/bin/hadoop-config.sh" ]; then
-  . "$HADOOP_COMMON_HOME"/bin/hadoop-config.sh
-elif [ -e "${HADOOP_HOME}/bin/hadoop-config.sh" ]; then
-  . "$HADOOP_HOME"/bin/hadoop-config.sh
-elif [ -e "${HADOOP_MAPRED_HOME}/bin/hadoop-config.sh" ]; then
-  . "$HADOOP_MAPRED_HOME"/bin/hadoop-config.sh
+elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
+  . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
+elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
+  . "${HADOOP_HOME}/libexec/hadoop-config.sh"
 else
   echo "Hadoop common not found."
   exit
 fi
-
-# Only set locally to use in HADOOP_OPTS. No need to export.
-# The following defaults are useful when somebody directly invokes bin/mapred.
-HADOOP_MAPRED_LOG_DIR=${HADOOP_MAPRED_LOG_DIR:-${HADOOP_MAPRED_HOME}/logs}
-HADOOP_MAPRED_LOGFILE=${HADOOP_MAPRED_LOGFILE:-hadoop.log}
-HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}
-
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_MAPRED_LOG_DIR"
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_MAPRED_LOGFILE"
-export HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_MAPRED_ROOT_LOGGER}"
-
-

+ 21 - 122
hadoop-mapreduce-project/bin/mr-jobhistory-daemon.sh

@@ -15,133 +15,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-#
-# Environment Variables
-#
-#   HADOOP_JHS_LOGGER  Hadoop JobSummary logger.
-#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_MAPRED_HOME}/conf.
-#   HADOOP_MAPRED_PID_DIR   The pid files are stored. /tmp by default.
-#   HADOOP_MAPRED_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
-
-usage="Usage: mr-jobhistory-daemon.sh [--config <conf-dir>] (start|stop) <mapred-command> "
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e ${HADOOP_LIBEXEC_DIR}/mapred-config.sh ]; then
-  . $HADOOP_LIBEXEC_DIR/mapred-config.sh
-fi
-
-# get arguments
-startStop=$1
-shift
-command=$1
-shift
-
-hadoop_rotate_log ()
+function hadoop_usage
 {
-  log=$1;
-  num=5;
-  if [ -n "$2" ]; then
-    num=$2
-  fi
-  if [ -f "$log" ]; then # rotate logs
-    while [ $num -gt 1 ]; do
-      prev=`expr $num - 1`
-      [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
-      num=$prev
-    done
-    mv "$log" "$log.$num";
-  fi
+  echo "Usage: mr-jobhistory-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
 }
 
-if [ "$HADOOP_MAPRED_IDENT_STRING" = "" ]; then
-  export HADOOP_MAPRED_IDENT_STRING="$USER"
-fi
-
-export HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-${HADOOP_PREFIX}}
-export HADOOP_MAPRED_LOGFILE=mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.log
-export HADOOP_MAPRED_ROOT_LOGGER=${HADOOP_MAPRED_ROOT_LOGGER:-INFO,RFA}
-export HADOOP_JHS_LOGGER=${HADOOP_JHS_LOGGER:-INFO,JSA}
-
-if [ -f "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then
-  . "${HADOOP_CONF_DIR}/mapred-env.sh"
-fi
-
-mkdir -p "$HADOOP_MAPRED_LOG_DIR"
-chown $HADOOP_MAPRED_IDENT_STRING $HADOOP_MAPRED_LOG_DIR
-
-if [ "$HADOOP_MAPRED_PID_DIR" = "" ]; then
-  HADOOP_MAPRED_PID_DIR=/tmp
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
 fi
 
-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_MAPRED_IDENT_STRING"
-
-log=$HADOOP_MAPRED_LOG_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command-$HOSTNAME.out
-pid=$HADOOP_MAPRED_PID_DIR/mapred-$HADOOP_MAPRED_IDENT_STRING-$command.pid
-
-HADOOP_MAPRED_STOP_TIMEOUT=${HADOOP_MAPRED_STOP_TIMEOUT:-5}
-
-# Set default scheduling priority
-if [ "$HADOOP_MAPRED_NICENESS" = "" ]; then
-  export HADOOP_MAPRED_NICENESS=0
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
+  exit 1
 fi
 
-case $startStop in
-
-  (start)
-
-    mkdir -p "$HADOOP_MAPRED_PID_DIR"
-
-    if [ -f $pid ]; then
-      if kill -0 `cat $pid` > /dev/null 2>&1; then
-        echo $command running as process `cat $pid`.  Stop it first.
-        exit 1
-      fi
-    fi
-
-    hadoop_rotate_log $log
-    echo starting $command, logging to $log
-    cd "$HADOOP_MAPRED_HOME"
-    nohup nice -n $HADOOP_MAPRED_NICENESS "$HADOOP_MAPRED_HOME"/bin/mapred --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
-    echo $! > $pid
-    sleep 1; head "$log"
-    ;;
-
-  (stop)
-
-    if [ -f $pid ]; then
-      TARGET_PID=`cat $pid`
-      if kill -0 $TARGET_PID > /dev/null 2>&1; then
-        echo stopping $command
-        kill $TARGET_PID
-        sleep $HADOOP_MAPRED_STOP_TIMEOUT
-        if kill -0 $TARGET_PID > /dev/null 2>&1; then
-          echo "$command did not stop gracefully after $HADOOP_MAPRED_STOP_TIMEOUT seconds: killing with kill -9"
-          kill -9 $TARGET_PID
-        fi
-      else
-        echo no $command to stop
-      fi
-      rm -f $pid
-    else
-      echo no $command to stop
-    fi
-    ;;
-
-  (*)
-    echo $usage
-    exit 1
-    ;;
+daemonmode=$1
+shift
 
-esac
+exec "${HADOOP_MAPRED_HOME}/bin/mapred" \
+--config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"

+ 52 - 8
hadoop-mapreduce-project/conf/mapred-env.sh

@@ -13,15 +13,59 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
+##
+## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
+## WORK DONE BY THE mapred AND RELATED COMMANDS.
+##
+## Precedence rules:
+##
+## mapred-env.sh > hadoop-env.sh > hard-coded defaults
+##
+## MAPRED_xyz > HADOOP_xyz > hard-coded defaults
+##
 
-export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+###
+# Generic settings for MapReduce
+###
 
-export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
+#Override the log4j settings for all MR apps
+# export MAPRED_ROOT_LOGGER="INFO,console"
 
+# Override Hadoop's log directory & file
+# export HADOOP_MAPRED_LOG_DIR=""
+
+# Override Hadoop's pid directory
+# export HADOOP_MAPRED_PID_DIR=
+
+# Override Hadoop's identity string. $USER by default.
+# This is used in writing log and pid files, so keep that in mind!
+# export HADOOP_MAPRED_IDENT_STRING=$USER
+
+# Override Hadoop's process priority
+# Note that sub-processes will also run at this level!
+# export HADOOP_MAPRED_NICENESS=0
+
+###
+# Job History Server specific parameters
+###
+
+# Specify the max heapsize for the Job History Server using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either
+# MAPRED_OPTS, HADOOP_OPTS, and/or HADOOP_JOB_HISTORYSERVER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
+#export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
+
+# Specify the JVM options to be used when starting the ResourceManager.
+# These options will be appended to the options specified as YARN_OPTS
+# and therefore may override any similar flags set in YARN_OPTS
 #export HADOOP_JOB_HISTORYSERVER_OPTS=
-#export HADOOP_MAPRED_LOG_DIR="" # Where log files are stored.  $HADOOP_MAPRED_HOME/logs by default.
-#export HADOOP_JHS_LOGGER=INFO,RFA # Hadoop JobSummary logger.
-#export HADOOP_MAPRED_PID_DIR= # The pid files are stored. /tmp by default.
-#export HADOOP_MAPRED_IDENT_STRING= #A string representing this instance of hadoop. $USER by default
-#export HADOOP_MAPRED_NICENESS= #The scheduling priority for daemons. Defaults to 0.
+
+# Specify the log4j settings for the JobHistoryServer
+#export HADOOP_JHS_LOGGER=INFO,RFA
+
+
+

+ 0 - 70
hadoop-yarn-project/hadoop-yarn/bin/slaves.sh

@@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Run a shell command on all slave hosts.
-#
-# Environment Variables
-#
-#   YARN_SLAVES    File naming remote hosts.
-#     Default is ${YARN_CONF_DIR}/slaves.
-#   YARN_CONF_DIR  Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
-#   YARN_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
-#   YARN_SSH_OPTS Options passed to ssh when running remote commands.
-##
-
-usage="Usage: slaves.sh [--config confdir] command..."
-
-# if no args specified, show usage
-if [ $# -le 0 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-# If the slaves file is specified in the command line,
-# then it takes precedence over the definition in 
-# yarn-env.sh. Save it here.
-HOSTLIST=$YARN_SLAVES
-
-if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
-  . "${YARN_CONF_DIR}/yarn-env.sh"
-fi
-
-if [ "$HOSTLIST" = "" ]; then
-  if [ "$YARN_SLAVES" = "" ]; then
-    export HOSTLIST="${YARN_CONF_DIR}/slaves"
-  else
-    export HOSTLIST="${YARN_SLAVES}"
-  fi
-fi
-
-for slave in `cat "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
- ssh $YARN_SSH_OPTS $slave $"${@// /\\ }" \
-   2>&1 | sed "s/^/$slave: /" &
- if [ "$YARN_SLAVE_SLEEP" != "" ]; then
-   sleep $YARN_SLAVE_SLEEP
- fi
-done
-
-wait

+ 24 - 10
hadoop-yarn-project/hadoop-yarn/bin/start-yarn.sh

@@ -16,20 +16,34 @@
 # limitations under the License.
 
 
-# Start all yarn daemons.  Run this on master node.
+function hadoop_usage
+{
+  echo "Usage: start-yarn.sh [--config confdir]"
+}
 
-echo "starting yarn daemons"
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
+  exit 1
+fi
 
 # start resourceManager
-"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start resourcemanager
+"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}"  start resourcemanager
 # start nodeManager
-"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR  start nodemanager
+"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}"  start nodemanager
 # start proxyserver
-#"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  start proxyserver
+#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}"  start proxyserver

+ 28 - 12
hadoop-yarn-project/hadoop-yarn/bin/stop-yarn.sh

@@ -18,18 +18,34 @@
 
 # Stop all yarn daemons.  Run this on master node.
 
-echo "stopping yarn daemons"
+function hadoop_usage
+{
+  echo "Usage: stop-yarn.sh [--config confdir]"
+}
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
-# stop resourceManager
-"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  stop resourcemanager
-# stop nodeManager
-"$bin"/yarn-daemons.sh --config $YARN_CONF_DIR  stop nodemanager
-# stop proxy server
-"$bin"/yarn-daemon.sh --config $YARN_CONF_DIR  stop proxyserver
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
+  exit 1
+fi
+
+# start resourceManager
+"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}"  stop resourcemanager
+# start nodeManager
+"${bin}/yarn-daemons.sh" --config "${YARN_CONF_DIR}"  stop nodemanager
+# start proxyserver
+#"${bin}/yarn-daemon.sh" --config "${YARN_CONF_DIR}"  stop proxyserver

+ 148 - 232
hadoop-yarn-project/hadoop-yarn/bin/yarn

@@ -15,266 +15,182 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-# The Hadoop command script
-#
-# Environment Variables
-#
-#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
-#
-#   YARN_USER_CLASSPATH Additional user CLASSPATH entries.
-#
-#   YARN_USER_CLASSPATH_FIRST  If set to non empty value then the user classpath
-#                              specified in YARN_USER_CLASSPATH will be
-#                              appended at the beginning of YARN's final
-#                              classpath instead of at the end.
-#
-#   YARN_HEAPSIZE  The maximum amount of heap to use, in MB. 
-#                    Default is 1000.
-#
-#   YARN_{COMMAND}_HEAPSIZE overrides YARN_HEAPSIZE for a given command
-#                           eg YARN_NODEMANAGER_HEAPSIZE sets the heap
-#                           size for the NodeManager.  If you set the
-#                           heap size in YARN_{COMMAND}_OPTS or YARN_OPTS
-#                           they take precedence.
-#
-#   YARN_OPTS      Extra Java runtime options.
-#   
-#   YARN_CLIENT_OPTS         when the respective command is run.
-#   YARN_{COMMAND}_OPTS etc  YARN_NODEMANAGER_OPTS applies to NodeManager 
-#                              for e.g.  YARN_CLIENT_OPTS applies to 
-#                              more than one command (fs, dfs, fsck, 
-#                              dfsadmin etc)  
-#
-#   YARN_CONF_DIR  Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
-#
-#   YARN_ROOT_LOGGER The root appender. Default is INFO,console
-#
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin" > /dev/null; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-function print_usage(){
-  echo "Usage: yarn [--config confdir] COMMAND"
+function hadoop_usage
+{
+  echo "Usage: yarn [--config confdir] [--daemon (start|stop|status)] COMMAND"
   echo "where COMMAND is one of:"
-  echo "  resourcemanager -format-state-store   deletes the RMStateStore"
-  echo "  resourcemanager                       run the ResourceManager"
+  echo "  application                           prints application(s) report/kill application"
+  echo "  applicationattempt                    prints applicationattempt(s) report"
+  echo "  classpath                             prints the class path needed to get the"
+  echo "                                        Hadoop jar and the required libraries"
+  echo "  container                             prints container(s) report"
+  echo "  daemonlog                             get/set the log level for each daemon"
+  echo "  jar <jar>                             run a jar file"
+  echo "  logs                                  dump container logs"
+  echo "  node                                  prints node report(s)"
   echo "  nodemanager                           run a nodemanager on each slave"
-  echo "  timelineserver                        run the timeline server"
+  echo "  proxyserver                           run the web app proxy server"
+  echo "  resourcemanager                       run the ResourceManager"
+  echo "  resourcemanager -format-state-store   deletes the RMStateStore"
   echo "  rmadmin                               admin tools"
+  echo "  timelineserver                        run the timeline server"
   echo "  version                               print the version"
-  echo "  jar <jar>                             run a jar file"
-  echo "  application                           prints application(s)"
-  echo "                                        report/kill application"
-  echo "  applicationattempt                    prints applicationattempt(s)"
-  echo "                                        report"
-  echo "  container                             prints container(s) report"
-  echo "  node                                  prints node report(s)"
-  echo "  logs                                  dump container logs"
-  echo "  classpath                             prints the class path needed to"
-  echo "                                        get the Hadoop jar and the"
-  echo "                                        required libraries"
-  echo "  daemonlog                             get/set the log level for each"
-  echo "                                        daemon"
   echo " or"
   echo "  CLASSNAME                             run the class named CLASSNAME"
   echo "Most commands print help when invoked w/o parameters."
 }
 
-# if no args specified, show usage
-if [ $# = 0 ]; then
-  print_usage
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
+
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
   exit 1
 fi
 
+# if no args specified, show usage
+if [[ $# = 0 ]]; then
+  hadoop_exit_with_usage 1
+fi
+
 # get arguments
 COMMAND=$1
 shift
 
-case $COMMAND in
-  # usage flags
-  --help|-help|-h)
-    print_usage
+case "${COMMAND}" in
+  application|applicationattempt|container)
+    CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  classpath)
+    hadoop_finalize
+    echo "${CLASSPATH}"
     exit
-    ;;
+  ;;
+  daemonlog)
+    CLASS=org.apache.hadoop.log.LogLevel
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  jar)
+    CLASS=org.apache.hadoop.util.RunJar
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  historyserver)
+    daemon="true"
+    echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
+    echo "Instead use the timelineserver command for it." 1>&2
+    echo "Starting the History Server anyway..." 1>&2
+    CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
+  ;;
+  logs)
+    CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  node)
+    CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  nodemanager)
+    daemon="true"
+    CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
+    YARN_OPTS="${YARN_OPTS} ${YARN_NODEMANAGER_OPTS}"
+    if [[ -n "${YARN_NODEMANAGER_HEAPSIZE}" ]]; then
+      JAVA_HEAP_MAX="-Xmx${YARN_NODEMANAGER_HEAPSIZE}m"
+    fi
+  ;;
+  proxyserver)
+    daemon="true"
+    CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
+    YARN_OPTS="${YARN_OPTS} ${YARN_PROXYSERVER_OPTS}"
+    if [[ -n "${YARN_PROXYSERVER_HEAPSIZE}" ]]; then
+      JAVA_HEAP_MAX="-Xmx${YARN_PROXYSERVER_HEAPSIZE}m"
+    fi
+  ;;
+  resourcemanager)
+    daemon="true"
+    CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
+    YARN_OPTS="${YARN_OPTS} ${YARN_RESOURCEMANAGER_OPTS}"
+    if [[ -n "${YARN_RESOURCEMANAGER_HEAPSIZE}" ]]; then
+      JAVA_HEAP_MAX="-Xmx${YARN_RESOURCEMANAGER_HEAPSIZE}m"
+    fi
+  ;;
+  rmadmin)
+    CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  timelineserver)
+    daemon="true"
+    CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
+    YARN_OPTS="${YARN_OPTS} ${YARN_TIMELINESERVER_OPTS}"
+    if [[ -n "${YARN_TIMELINESERVER_HEAPSIZE}" ]]; then
+      JAVA_HEAP_MAX="-Xmx${YARN_TIMELINESERVER_HEAPSIZE}m"
+    fi
+  ;;
+  version)
+    CLASS=org.apache.hadoop.util.VersionInfo
+    YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}"
+  ;;
+  -*)
+    hadoop_exit_with_usage 1
+  ;;
+  *)
+    CLASS="${COMMAND}"
+  ;;
 esac
 
-if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
-  . "${YARN_CONF_DIR}/yarn-env.sh"
-fi
+# set HADOOP_OPTS to YARN_OPTS so that we can use
+# finalize, etc, without doing anything funky
+HADOOP_OPTS="${YARN_OPTS}"
 
-# some Java parameters
-if [ "$JAVA_HOME" != "" ]; then
-  #echo "run java in $JAVA_HOME"
-  JAVA_HOME=$JAVA_HOME
-fi
-  
-if [ "$JAVA_HOME" = "" ]; then
-  echo "Error: JAVA_HOME is not set."
-  exit 1
-fi
-
-JAVA=$JAVA_HOME/bin/java
-JAVA_HEAP_MAX=-Xmx1000m 
+daemon_outfile="${HADOOP_LOG_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.out"
+daemon_pidfile="${HADOOP_PID_DIR}/hadoop-${HADOOP_IDENT_STRING}-${COMMAND}.pid"
 
-# check envvars which might override default args
-if [ "$YARN_HEAPSIZE" != "" ]; then
-  #echo "run with heapsize $YARN_HEAPSIZE"
-  JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
-  #echo $JAVA_HEAP_MAX
+if [[  "${HADOOP_DAEMON_MODE}" != "default" ]]; then
+  # shellcheck disable=SC2034
+  HADOOP_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+  YARN_ROOT_LOGGER="${HADOOP_DAEMON_ROOT_LOGGER}"
+  HADOOP_LOGFILE="hadoop-${HADOOP_IDENT_STRING}-${COMMAND}-${HOSTNAME}.log"
 fi
 
-# CLASSPATH initially contains $HADOOP_CONF_DIR & $YARN_CONF_DIR
-if [ ! -d "$HADOOP_CONF_DIR" ]; then
-  echo No HADOOP_CONF_DIR set. 
-  echo Please specify it either in yarn-env.sh or in the environment.
-  exit 1
-fi
+hadoop_add_param HADOOP_OPTS Xmx "${JAVA_HEAP_MAX}"
 
-CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}"
+# Add YARN custom options to comamnd line in case someone actaully 
+# used these.
+#
+# Note that we are replacing ' ' with '\ ' so that when we exec
+# stuff it works
+#
+hadoop_add_param HADOOP_OPTS yarn.log.dir "-Dyarn.log.dir=${HADOOP_LOG_DIR/ /\ }"
+hadoop_add_param HADOOP_OPTS yarn.log.file "-Dyarn.log.file=${HADOOP_LOGFILE/ /\ }"
+hadoop_add_param HADOOP_OPTS yarn.home.dir "-Dyarn.home.dir=${HADOOP_YARN_HOME/ /\ }"
+hadoop_add_param HADOOP_OPTS yarn.root.logger "-Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
 
-# for developers, add Hadoop classes to CLASSPATH
-if [ -d "$HADOOP_YARN_HOME/yarn-api/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-api/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-common/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-common/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-mapreduce/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-mapreduce/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-master-worker/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-master-worker/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-nodemanager/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/build/test/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/target/test/classes
-fi
-if [ -d "$HADOOP_YARN_HOME/build/tools" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/build/tools
-fi
+hadoop_finalize
 
-CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_DIR}/*
-CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/${YARN_LIB_JARS_DIR}/*
+export CLASSPATH
 
-# Add user defined YARN_USER_CLASSPATH to the class path (if defined)
-if [ -n "$YARN_USER_CLASSPATH" ]; then
-  if [ -n "$YARN_USER_CLASSPATH_FIRST" ]; then
-    # User requested to add the custom entries at the beginning
-    CLASSPATH=${YARN_USER_CLASSPATH}:${CLASSPATH}
+if [[ -n "${daemon}" ]]; then
+  if [[ -n "${secure_service}" ]]; then
+    hadoop_secure_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" \
+    "${CLASS}" "${daemon_pidfile}" "${daemon_outfile}" \
+    "${priv_pidfile}" "${priv_outfile}" "${priv_errfile}" "$@"
   else
-    # By default we will just append the extra entries at the end
-    CLASSPATH=${CLASSPATH}:${YARN_USER_CLASSPATH}
+    hadoop_daemon_handler "${HADOOP_DAEMON_MODE}" "${COMMAND}" "${CLASS}" \
+    "${daemon_pidfile}" "${daemon_outfile}" "$@"
   fi
-fi
-
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
-
-# default log directory & file
-if [ "$YARN_LOG_DIR" = "" ]; then
-  YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
-fi
-if [ "$YARN_LOGFILE" = "" ]; then
-  YARN_LOGFILE='yarn.log'
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-# figure out which class to run
-if [ "$COMMAND" = "classpath" ] ; then
-  echo $CLASSPATH
-  exit
-elif [ "$COMMAND" = "rmadmin" ] ; then
-  CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "application" ] || 
-     [ "$COMMAND" = "applicationattempt" ] || 
-     [ "$COMMAND" = "container" ]; then
-  CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-  set -- $COMMAND $@
-elif [ "$COMMAND" = "node" ] ; then
-  CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "resourcemanager" ] ; then
-  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/rm-config/log4j.properties
-  CLASS='org.apache.hadoop.yarn.server.resourcemanager.ResourceManager'
-  YARN_OPTS="$YARN_OPTS $YARN_RESOURCEMANAGER_OPTS"
-  if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
-    JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
-  fi
-elif [ "$COMMAND" = "historyserver" ] ; then
-  echo "DEPRECATED: Use of this command to start the timeline server is deprecated." 1>&2
-  echo "Instead use the timelineserver command for it." 1>&2
-  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
-  CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
-  YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
-  if [ "$YARN_HISTORYSERVER_HEAPSIZE" != "" ]; then
-    JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
-  fi
-elif [ "$COMMAND" = "timelineserver" ] ; then
-  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/timelineserver-config/log4j.properties
-  CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
-  YARN_OPTS="$YARN_OPTS $YARN_TIMELINESERVER_OPTS"
-  if [ "$YARN_TIMELINESERVER_HEAPSIZE" != "" ]; then
-    JAVA_HEAP_MAX="-Xmx""$YARN_TIMELINESERVER_HEAPSIZE""m"
-  fi
-elif [ "$COMMAND" = "nodemanager" ] ; then
-  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
-  CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'
-  YARN_OPTS="$YARN_OPTS -server $YARN_NODEMANAGER_OPTS"
-  if [ "$YARN_NODEMANAGER_HEAPSIZE" != "" ]; then
-    JAVA_HEAP_MAX="-Xmx""$YARN_NODEMANAGER_HEAPSIZE""m"
-  fi
-elif [ "$COMMAND" = "proxyserver" ] ; then
-  CLASS='org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer'
-  YARN_OPTS="$YARN_OPTS $YARN_PROXYSERVER_OPTS"
-  if [ "$YARN_PROXYSERVER_HEAPSIZE" != "" ]; then
-    JAVA_HEAP_MAX="-Xmx""$YARN_PROXYSERVER_HEAPSIZE""m"
-  fi
-elif [ "$COMMAND" = "version" ] ; then
-  CLASS=org.apache.hadoop.util.VersionInfo
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "jar" ] ; then
-  CLASS=org.apache.hadoop.util.RunJar
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "logs" ] ; then
-  CLASS=org.apache.hadoop.yarn.client.cli.LogsCLI
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "daemonlog" ] ; then
-  CLASS=org.apache.hadoop.log.LogLevel
-  YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
+  exit $?
 else
-  CLASS=$COMMAND
+  hadoop_java_exec "${COMMAND}" "${CLASS}" "$@"
 fi
-
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$HADOOP_YARN_HOME"
-YARN_OPTS="$YARN_OPTS -Dhadoop.home.dir=$HADOOP_YARN_HOME"
-YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-  YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
-fi  
-
-exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $YARN_OPTS -classpath "$CLASSPATH" $CLASS "$@"

+ 72 - 44
hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh

@@ -13,53 +13,81 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# included in all the hadoop scripts with source command
-# should not be executable directly
-bin=`which "$0"`
-bin=`dirname "${bin}"`
-bin=`cd "$bin"; pwd`
+function hadoop_subproject_init
+{
+  
+  # at some point in time, someone thought it would be a good idea to
+  # create separate vars for every subproject.  *sigh*
+  # let's perform some overrides and setup some defaults for bw compat
+  # this way the common hadoop var's == subproject vars and can be
+  # used interchangeable from here on out
+  # ...
+  # this should get deprecated at some point.
+  
+  if [[ -e "${YARN_CONF_DIR}/yarn-env.sh" ]]; then
+    . "${YARN_CONF_DIR}/yarn-env.sh"
+  elif [[ -e "${HADOOP_CONF_DIR}/yarn-env.sh" ]]; then
+    . "${HADOOP_CONF_DIR}/yarn-env.sh"
+  fi
+  
+  if [[ -n "${YARN_CONF_DIR}" ]]; then
+    HADOOP_CONF_DIR="${YARN_CONF_DIR}"
+  fi
+  
+  YARN_CONF_DIR="${HADOOP_CONF_DIR}"
+  
+  # YARN_CONF_DIR needs precedence over HADOOP_CONF_DIR
+  # and the various jar dirs
+  hadoop_add_classpath "${YARN_CONF_DIR}" before
+  
+  HADOOP_LOG_DIR="${YARN_LOG_DIR:-$HADOOP_LOG_DIR}"
+  YARN_LOG_DIR="${HADOOP_LOG_DIR}"
+  
+  HADOOP_LOGFILE="${YARN_LOGFILE:-$HADOOP_LOGFILE}"
+  YARN_LOGFILE="${HADOOP_LOGFILE}"
+  
+  HADOOP_NICENESS="${YARN_NICENESS:-$HADOOP_NICENESS}"
+  YARN_NICENESS="${HADOOP_NICENESS}"
+  
+  HADOOP_STOP_TIMEOUT="${YARN_STOP_TIMEOUT:-$HADOOP_STOP_TIMEOUT}"
+  YARN_STOP_TIMEOUT="${HADOOP_STOP_TIMEOUT}"
+  
+  HADOOP_PID_DIR="${YARN_PID_DIR:-$HADOOP_PID_DIR}"
+  YARN_PID_DIR="${HADOOP_PID_DIR}"
+  
+  HADOOP_ROOT_LOGGER="${YARN_ROOT_LOGGER:-INFO,console}"
+  YARN_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}"
+  
+  HADOOP_YARN_HOME="${HADOOP_YARN_HOME:-$HADOOP_PREFIX}"
+  
+  HADOOP_IDENT_STRING="${YARN_IDENT_STRING:-$HADOOP_IDENT_STRING}"
+  YARN_IDENT_STRING="${HADOOP_IDENT_STRING}"
+  
+  YARN_OPTS="${YARN_OPTS:-$HADOOP_OPTS}"
+  
+  # YARN-1429 added the completely superfluous YARN_USER_CLASSPATH
+  # env var.  We're going to override HADOOP_USER_CLASSPATH to keep
+  # consistency with the rest of the duplicate/useless env vars
+  HADOOP_USER_CLASSPATH="${YARN_USER_CLASSPATH:-$HADOOP_USER_CLASSPATH}"
+  YARN_USER_CLASSPATH="${HADOOP_USER_CLASSPATH}"
+  
+  HADOOP_USER_CLASSPATH_FIRST="${YARN_USER_CLASSPATH_FIRST:-$HADOOP_USER_CLASSPATH_FIRST}"
+  YARN_USER_CLASSPATH_FIRST="${HADOOP_USER_CLASSPATH_FIRST}"
+}
 
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
-  . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
-elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
-  . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
-elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
-  . "$HADOOP_HOME"/libexec/hadoop-config.sh
+if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then
+  _yc_this="${BASH_SOURCE-$0}"
+  HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_yc_this}")" >/dev/null && pwd -P)
+fi
+
+if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh"
+elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then
+  . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh"
+elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then
+  . "${HADOOP_HOME}/libexec/hadoop-config.sh"
 else
   echo "Hadoop common not found."
   exit
 fi
 
-# Same glibc bug that discovered in Hadoop.
-# Without this you can see very large vmem settings on containers.
-export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
-
-#check to see if the conf dir is given as an optional argument
-if [ $# -gt 1 ]
-then
-    if [ "--config" = "$1" ]
-	  then
-	      shift
-	      confdir=$1
-	      shift
-	      YARN_CONF_DIR=$confdir
-    fi
-fi
- 
-# Allow alternate conf dir location.
-export YARN_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
-
-#check to see it is specified whether to use the slaves or the
-# masters file
-if [ $# -gt 1 ]
-then
-    if [ "--hosts" = "$1" ]
-    then
-        shift
-        slavesfile=$1
-        shift
-        export YARN_SLAVES="${YARN_CONF_DIR}/$slavesfile"
-    fi
-fi

+ 21 - 136
hadoop-yarn-project/hadoop-yarn/bin/yarn-daemon.sh

@@ -15,147 +15,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-# Runs a yarn command as a daemon.
-#
-# Environment Variables
-#
-#   YARN_CONF_DIR  Alternate conf dir. Default is ${HADOOP_YARN_HOME}/conf.
-#   YARN_LOG_DIR   Where log files are stored.  PWD by default.
-#   YARN_MASTER    host:path where hadoop code should be rsync'd from
-#   YARN_PID_DIR   The pid files are stored. /tmp by default.
-#   YARN_IDENT_STRING   A string representing this instance of hadoop. $USER by default
-#   YARN_NICENESS The scheduling priority for daemons. Defaults to 0.
-##
-
-usage="Usage: yarn-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <yarn-command> "
-
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
-  exit 1
-fi
-
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-# get arguments
-startStop=$1
-shift
-command=$1
-shift
-
-hadoop_rotate_log ()
+function hadoop_usage
 {
-    log=$1;
-    num=5;
-    if [ -n "$2" ]; then
-	num=$2
-    fi
-    if [ -f "$log" ]; then # rotate logs
-	while [ $num -gt 1 ]; do
-	    prev=`expr $num - 1`
-	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
-	    num=$prev
-	done
-	mv "$log" "$log.$num";
-    fi
+  echo "Usage: yarn-daemon.sh [--config confdir] (start|stop|status) <hadoop-command> <args...>"
 }
 
-if [ -f "${YARN_CONF_DIR}/yarn-env.sh" ]; then
-  . "${YARN_CONF_DIR}/yarn-env.sh"
-fi
-
-if [ "$YARN_IDENT_STRING" = "" ]; then
-  export YARN_IDENT_STRING="$USER"
-fi
-
-# get log directory
-if [ "$YARN_LOG_DIR" = "" ]; then
-  export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  this="${BASH_SOURCE-$0}"
+  bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
 fi
 
-if [ ! -w "$YARN_LOG_DIR" ] ; then
-  mkdir -p "$YARN_LOG_DIR"
-  chown $YARN_IDENT_STRING $YARN_LOG_DIR 
-fi
-
-if [ "$YARN_PID_DIR" = "" ]; then
-  YARN_PID_DIR=/tmp
-fi
-
-# some variables
-export YARN_LOGFILE=yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.log
-export YARN_ROOT_LOGGER=${YARN_ROOT_LOGGER:-INFO,RFA}
-log=$YARN_LOG_DIR/yarn-$YARN_IDENT_STRING-$command-$HOSTNAME.out
-pid=$YARN_PID_DIR/yarn-$YARN_IDENT_STRING-$command.pid
-YARN_STOP_TIMEOUT=${YARN_STOP_TIMEOUT:-5}
-
-# Set default scheduling priority
-if [ "$YARN_NICENESS" = "" ]; then
-    export YARN_NICENESS=0
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
+  exit 1
 fi
 
-case $startStop in
-
-  (start)
-
-    [ -w "$YARN_PID_DIR" ] || mkdir -p "$YARN_PID_DIR"
-
-    if [ -f $pid ]; then
-      if kill -0 `cat $pid` > /dev/null 2>&1; then
-        echo $command running as process `cat $pid`.  Stop it first.
-        exit 1
-      fi
-    fi
-
-    if [ "$YARN_MASTER" != "" ]; then
-      echo rsync from $YARN_MASTER
-      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $YARN_MASTER/ "$HADOOP_YARN_HOME"
-    fi
-
-    hadoop_rotate_log $log
-    echo starting $command, logging to $log
-    cd "$HADOOP_YARN_HOME"
-    nohup nice -n $YARN_NICENESS "$HADOOP_YARN_HOME"/bin/yarn --config $YARN_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
-    echo $! > $pid
-    sleep 1
-    head "$log"
-    # capture the ulimit output
-    echo "ulimit -a" >> $log
-    ulimit -a >> $log 2>&1
-    ;;
-          
-  (stop)
-
-    if [ -f $pid ]; then
-      TARGET_PID=`cat $pid`
-      if kill -0 $TARGET_PID > /dev/null 2>&1; then
-        echo stopping $command
-        kill $TARGET_PID
-        sleep $YARN_STOP_TIMEOUT
-        if kill -0 $TARGET_PID > /dev/null 2>&1; then
-          echo "$command did not stop gracefully after $YARN_STOP_TIMEOUT seconds: killing with kill -9"
-          kill -9 $TARGET_PID
-        fi
-      else
-        echo no $command to stop
-      fi
-      rm -f $pid
-    else
-      echo no $command to stop
-    fi
-    ;;
-
-  (*)
-    echo $usage
-    exit 1
-    ;;
-
-esac
-
+daemonmode=$1
+shift
 
+exec "${HADOOP_YARN_HOME}/bin/yarn" \
+--config "${HADOOP_CONF_DIR}" --daemon "${daemonmode}" "$@"

+ 23 - 15
hadoop-yarn-project/hadoop-yarn/bin/yarn-daemons.sh

@@ -16,23 +16,31 @@
 # limitations under the License.
 
 
-# Run a Yarn command on all slave hosts.
-
-usage="Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] [start
-|stop] command args..."
+function hadoop_usage
+{
+  echo "Usage: yarn-daemons.sh [--config confdir] [--hosts hostlistfile] (start|stop|status) <yarn-command> <args...>"
+}
+
+this="${BASH_SOURCE-$0}"
+bin=$(cd -P -- "$(dirname -- "${this}")" >/dev/null && pwd -P)
+
+# let's locate libexec...
+if [[ -n "${HADOOP_PREFIX}" ]]; then
+  DEFAULT_LIBEXEC_DIR="${HADOOP_PREFIX}/libexec"
+else
+  DEFAULT_LIBEXEC_DIR="${bin}/../libexec"
+fi
 
-# if no args specified, show usage
-if [ $# -le 1 ]; then
-  echo $usage
+HADOOP_LIBEXEC_DIR="${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}"
+# shellcheck disable=SC2034
+HADOOP_NEW_CONFIG=true
+if [[ -f "${HADOOP_LIBEXEC_DIR}/yarn-config.sh" ]]; then
+  . "${HADOOP_LIBEXEC_DIR}/yarn-config.sh"
+else
+  echo "ERROR: Cannot execute ${HADOOP_LIBEXEC_DIR}/yarn-config.sh." 2>&1
   exit 1
 fi
 
-bin=`dirname "${BASH_SOURCE-$0}"`
-bin=`cd "$bin"; pwd`
-
-DEFAULT_LIBEXEC_DIR="$bin"/../libexec
-HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
-. $HADOOP_LIBEXEC_DIR/yarn-config.sh
-
-exec "$bin/slaves.sh" --config $YARN_CONF_DIR cd "$HADOOP_YARN_HOME" \; "$bin/yarn-daemon.sh" --config $YARN_CONF_DIR "$@"
+hadoop_connect_to_hosts "${bin}/yarn-daemon.sh" \
+--config "${HADOOP_CONF_DIR}" "$@"
 

+ 80 - 83
hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh

@@ -13,118 +13,115 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+##
+## THIS FILE ACTS AS AN OVERRIDE FOR hadoop-env.sh FOR ALL
+## WORK DONE BY THE yarn AND RELATED COMMANDS.
+##
+## Precedence rules:
+##
+## yarn-env.sh > hadoop-env.sh > hard-coded defaults
+##
+## YARN_xyz > HADOOP_xyz > hard-coded defaults
+##
+
+###
+# Generic settings for YARN
+###
+
 # User for YARN daemons
 export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
 
-# resolve links - $0 may be a softlink
-export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
-
-# some Java parameters
-# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
-if [ "$JAVA_HOME" != "" ]; then
-  #echo "run java in $JAVA_HOME"
-  JAVA_HOME=$JAVA_HOME
-fi
-  
-if [ "$JAVA_HOME" = "" ]; then
-  echo "Error: JAVA_HOME is not set."
-  exit 1
-fi
-
-JAVA=$JAVA_HOME/bin/java
-JAVA_HEAP_MAX=-Xmx1000m 
-
-# For setting YARN specific HEAP sizes please use this
-# Parameter and set appropriately
-# YARN_HEAPSIZE=1000
-
-# check envvars which might override default args
-if [ "$YARN_HEAPSIZE" != "" ]; then
-  JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
-fi
+#
+# By default, YARN will use HADOOP_CONF_DIR. Specify a custom
+# YARN_CONF_DIR here
+# export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}"
+#
+
+# Override Hadoop's log directory & file
+# export YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
+# export YARN_LOGFILE='yarn.log'
+
+# Need a custom-to-YARN service-level authorization policy file?
+# export YARN_POLICYFILE="yarn-policy.xml"
 
+#Override the log4j settings for all YARN apps
+# export YARN_ROOT_LOGGER="INFO,console"
+
+###
 # Resource Manager specific parameters
+###
 
-# Specify the max Heapsize for the ResourceManager using a numerical value
+# Specify the max heapsize for the ResourceManager using a numerical value
 # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 # the value to 1000.
-# This value will be overridden by an Xmx setting specified in either YARN_OPTS
-# and/or YARN_RESOURCEMANAGER_OPTS.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
+# HADOOP_OPTS, and/or YARN_RESOURCEMANAGER_OPTS.
 # If not specified, the default value will be picked from either YARN_HEAPMAX
 # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
 #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
 
-# Specify the max Heapsize for the timeline server using a numerical value
-# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
-# the value to 1000.
-# This value will be overridden by an Xmx setting specified in either YARN_OPTS
-# and/or YARN_TIMELINESERVER_OPTS.
-# If not specified, the default value will be picked from either YARN_HEAPMAX
-# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
-#export YARN_TIMELINESERVER_HEAPSIZE=1000
-
 # Specify the JVM options to be used when starting the ResourceManager.
 # These options will be appended to the options specified as YARN_OPTS
 # and therefore may override any similar flags set in YARN_OPTS
-#export YARN_RESOURCEMANAGER_OPTS=
+#
+# Examples for a Sun/Oracle JDK:
+# a) override the appsummary log file:
+# export YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log -Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY"
+#
+# b) Set JMX options
+# export YARN_RESOURCEMANAGER_OPTS="-Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1026"
+#
+# c) Set garbage collection logs from hadoop-env.sh
+# export YARN_RESOURCE_MANAGER_OPTS="${HADOOP_GC_SETTINGS} -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+# d) ... or set them directly
+# export YARN_RESOURCEMANAGER_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:${HADOOP_LOG_DIR}/gc-rm.log-$(date +'%Y%m%d%H%M')"
+#
+#
+# export YARN_RESOURCEMANAGER_OPTS=
 
+###
 # Node Manager specific parameters
+###
 
 # Specify the max Heapsize for the NodeManager using a numerical value
 # in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
 # the value to 1000.
-# This value will be overridden by an Xmx setting specified in either YARN_OPTS
-# and/or YARN_NODEMANAGER_OPTS.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
+# HADOOP_OPTS, and/or YARN_NODEMANAGER_OPTS.
 # If not specified, the default value will be picked from either YARN_HEAPMAX
 # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
 #export YARN_NODEMANAGER_HEAPSIZE=1000
 
 # Specify the JVM options to be used when starting the NodeManager.
 # These options will be appended to the options specified as YARN_OPTS
 # and therefore may override any similar flags set in YARN_OPTS
+#
+# See ResourceManager for some examples
+#
 #export YARN_NODEMANAGER_OPTS=
 
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
-
-
-# default log directory & file
-if [ "$YARN_LOG_DIR" = "" ]; then
-  YARN_LOG_DIR="$HADOOP_YARN_HOME/logs"
-fi
-if [ "$YARN_LOGFILE" = "" ]; then
-  YARN_LOGFILE='yarn.log'
-fi
-
-# default policy file for service-level authorization
-if [ "$YARN_POLICYFILE" = "" ]; then
-  YARN_POLICYFILE="hadoop-policy.xml"
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-MAC_OSX=false
-case "`uname`" in
-Darwin*) MAC_OSX=true;;
-esac
-
-if $MAC_OSX; then
-  YARN_OPTS="$YARN_OPTS -Djava.security.krb5.realm= -Djava.security.krb5.kdc="
-fi
-
-
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.dir=$YARN_LOG_DIR"
-YARN_OPTS="$YARN_OPTS -Dhadoop.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.log.file=$YARN_LOGFILE"
-YARN_OPTS="$YARN_OPTS -Dyarn.home.dir=$YARN_COMMON_HOME"
-YARN_OPTS="$YARN_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
-YARN_OPTS="$YARN_OPTS -Dhadoop.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-YARN_OPTS="$YARN_OPTS -Dyarn.root.logger=${YARN_ROOT_LOGGER:-INFO,console}"
-if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-  YARN_OPTS="$YARN_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
-fi  
-YARN_OPTS="$YARN_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
+###
+# TimeLineServer specifc parameters
+###
 
+# Specify the max Heapsize for the timeline server using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS,
+# HADOOP_OPTS, and/or YARN_TIMELINESERVER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#
+#export YARN_TIMELINESERVER_HEAPSIZE=1000
+
+# Specify the JVM options to be used when starting the TimeLineServer.
+# These options will be appended to the options specified as YARN_OPTS
+# and therefore may override any similar flags set in YARN_OPTS
+#
+# See ResourceManager for some examples
+#
+#export YARN_TIMELINESERVER_OPTS=