Browse Source

Merge r1236386 through r1237583 from 0.23.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23-PB@1237590 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 13 years ago
parent
commit
ab824d9e9b
69 changed files with 1514 additions and 394 deletions
  1. 1 1
      hadoop-common-project/dev-support/test-patch.properties
  2. 21 1
      hadoop-common-project/hadoop-common/CHANGES.txt
  3. 1 1
      hadoop-common-project/hadoop-common/src/main/bin/hadoop
  4. 59 22
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
  5. 12 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  6. 3 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
  7. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java
  8. 4 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
  9. 20 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestDeprecatedKeys.java
  10. 70 23
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestChecksumFileSystem.java
  11. 17 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java
  12. 10 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java
  13. 2 2
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh
  14. 1 1
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java
  15. 5 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  16. 0 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh
  17. 18 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  18. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
  19. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
  20. 39 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java
  21. 86 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java
  22. 23 0
      hadoop-mapreduce-project/CHANGES.txt
  23. 4 4
      hadoop-mapreduce-project/bin/mapred
  24. 6 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
  25. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
  26. 19 13
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java
  27. 24 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
  28. 24 13
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java
  29. 3 3
      hadoop-mapreduce-project/hadoop-yarn/bin/yarn
  30. 0 2
      hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh
  31. 4 4
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  32. 4 4
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
  33. 17 6
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java
  34. 2 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
  35. 8 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
  36. 0 4
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
  37. 24 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
  38. 109 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java
  39. 45 20
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
  40. 40 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
  41. 3 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
  42. 7 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java
  43. 2 1
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
  44. 26 8
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
  45. 7 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
  46. 16 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
  47. 2 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java
  48. 25 5
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
  49. 30 5
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java
  50. 8 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java
  51. 4 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java
  52. 8 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
  53. 113 74
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java
  54. 6 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
  55. 3 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
  56. 2 2
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java
  57. 6 3
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
  58. 148 35
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
  59. 45 18
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
  60. 11 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java
  61. 12 6
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java
  62. 63 0
      hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java
  63. 9 4
      hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java
  64. 7 3
      hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
  65. 9 2
      hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java
  66. 165 60
      hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java
  67. 14 6
      hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java
  68. 16 7
      hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java
  69. 10 4
      hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java

+ 1 - 1
hadoop-common-project/dev-support/test-patch.properties

@@ -18,4 +18,4 @@
 
 OK_RELEASEAUDIT_WARNINGS=0
 OK_FINDBUGS_WARNINGS=0
-OK_JAVADOC_WARNINGS=6
+OK_JAVADOC_WARNINGS=13

+ 21 - 1
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -142,10 +142,21 @@ Release 0.23.1 - Unreleased
     HADOOP-7919. Remove the unused hadoop.logfile.* properties from the 
     core-default.xml file. (harsh)
 
+    HADOOP-7939. Improve Hadoop subcomponent integration in Hadoop 0.23. (rvs via tucu)
+
+    HADOOP-7988. Upper case in hostname part of the principals doesn't work with
+    kerberos. (jitendra)
+
+    HADOOP-8002. SecurityUtil acquired token message should be a debug rather than info.
+    (Arpit Gupta via mahadev)
+
   OPTIMIZATIONS
 
   BUG FIXES
-   
+ 
+   HADOOP-7998. CheckFileSystem does not correctly honor setVerifyChecksum
+   (Daryn Sharp via bobby)
+  
    HADOOP-7811. TestUserGroupInformation#testGetServerSideGroups test fails in chroot.
    (Jonathan Eagles via mahadev)
 
@@ -227,6 +238,15 @@ Release 0.23.1 - Unreleased
    HADOOP-7981. Improve documentation for org.apache.hadoop.io.compress.
    Decompressor.getRemaining (Jonathan Eagles via mahadev)
 
+   HADOOP-7997. SequenceFile.createWriter(...createParent...) no
+   longer works on existing file. (Gregory Chanan via eli)
+
+   HADOOP-7993. Hadoop ignores old-style config options for enabling compressed 
+   output. (Anupam Seth via mahadev)
+
+   HADOOP-8000. fetchdt command not available in bin/hadoop.
+   (Arpit Gupta via mahadev)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/bin/hadoop

@@ -50,7 +50,7 @@ fi
 COMMAND=$1
 case $COMMAND in
   #hdfs commands
-  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer)
+  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt)
     echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
     echo "Instead use the hdfs command for it." 1>&2
     echo "" 1>&2

+ 59 - 22
hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh

@@ -25,9 +25,21 @@ common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
 script="$(basename -- "$this")"
 this="$common_bin/$script"
 
+[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
+
+HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
+HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
+HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
+HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
+HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
+YARN_DIR=${YARN_DIR:-"share/hadoop/mapreduce"}
+YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
+MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
+MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
+
 # the root of the Hadoop installation
 # See HADOOP-6255 for directory structure layout
-HADOOP_DEFAULT_PREFIX=`dirname "$this"`/..
+HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)
 HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
 export HADOOP_PREFIX
 
@@ -144,16 +156,22 @@ CLASSPATH="${HADOOP_CONF_DIR}"
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
 
+if [ "$HADOOP_COMMON_HOME" = "" ]; then
+  if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then
+    HADOOP_COMMON_HOME=$HADOOP_PREFIX
+  fi
+fi
+
 # for releases, add core hadoop jar & webapps to CLASSPATH
-if [ -d "$HADOOP_PREFIX/share/hadoop/common/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/common/webapps
+if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR
 fi
 
-if [ -d "$HADOOP_PREFIX/share/hadoop/common/lib" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/common/lib'/*'
+if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*'
 fi
 
-CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/common'/*'
+CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'
 
 # add user-specified CLASSPATH last
 if [ "$HADOOP_CLASSPATH" != "" ]; then
@@ -185,13 +203,13 @@ fi
 
 # setup 'java.library.path' for native-hadoop code if necessary
 
-if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/lib/native" ]; then
+if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
     
-  if [ -d "${HADOOP_PREFIX}/lib/native" ]; then
+  if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then
     if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/lib/native
+      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
     else
-      JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib/native
+      JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR
     fi
   fi
 fi
@@ -216,37 +234,56 @@ HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
 
 # put hdfs in classpath if present
 if [ "$HADOOP_HDFS_HOME" = "" ]; then
-  if [ -d "${HADOOP_PREFIX}/share/hadoop/hdfs" ]; then
+  if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then
     HADOOP_HDFS_HOME=$HADOOP_PREFIX
   fi
 fi
 
-if [ -d "$HADOOP_HDFS_HOME/share/hadoop/hdfs/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs
+if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR
 fi
 
-if [ -d "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib'/*'
+if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*'
 fi
 
-CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*'
+CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*'
 
 # put yarn in classpath if present
 if [ "$YARN_HOME" = "" ]; then
-  if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then
+  if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then
     YARN_HOME=$HADOOP_PREFIX
   fi
 fi
 
-if [ -d "$YARN_HOME/share/hadoop/mapreduce/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce
+if [ -d "$YARN_HOME/$YARN_DIR/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$YARN_HOME/$YARN_DIR
 fi
 
-if [ -d "$YARN_HOME/share/hadoop/mapreduce/lib" ]; then
-  CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib'/*'
+if [ -d "$YARN_HOME/$YARN_LIB_JARS_DIR" ]; then
+  CLASSPATH=${CLASSPATH}:$YARN_HOME/$YARN_LIB_JARS_DIR'/*'
 fi
 
-CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce'/*'
+CLASSPATH=${CLASSPATH}:$YARN_HOME/$YARN_DIR'/*'
+
+# put mapred in classpath if present AND different from YARN
+if [ "$HADOOP_MAPRED_HOME" = "" ]; then
+  if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then
+    HADOOP_MAPRED_HOME=$HADOOP_PREFIX
+  fi
+fi
+
+if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$YARN_HOME/$YARN_DIR" ] ; then
+  if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR
+  fi
+
+  if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*'
+  fi
+
+  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'
+fi
 
 # cygwin path translation
 if $cygwin; then

+ 12 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -345,7 +345,17 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     }
     return name;
   }
-  
+ 
+  private void handleDeprecation() {
+    LOG.debug("Handling deprecation for all properties in config...");
+    Set<Object> keys = new HashSet<Object>();
+    keys.addAll(getProps().keySet());
+    for (Object item: keys) {
+      LOG.debug("Handling deprecation for " + (String)item);
+      handleDeprecation((String)item);
+    }
+  }
+ 
   static{
     //print deprecation warning if hadoop-site.xml is found in classpath
     ClassLoader cL = Thread.currentThread().getContextClassLoader();
@@ -1665,7 +1675,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     Element conf = doc.createElement("configuration");
     doc.appendChild(conf);
     conf.appendChild(doc.createTextNode("\n"));
-    getProps(); // ensure properties is set
+    handleDeprecation(); //ensure properties is set and deprecation is handled
     for (Enumeration e = properties.keys(); e.hasMoreElements();) {
       String name = (String)e.nextElement();
       Object object = properties.get(name);

+ 3 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java

@@ -304,8 +304,9 @@ public abstract class ChecksumFileSystem extends FilterFileSystem {
    */
   @Override
   public FSDataInputStream open(Path f, int bufferSize) throws IOException {
-    return new FSDataInputStream(
-        new ChecksumFSInputChecker(this, f, bufferSize));
+    return verifyChecksum
+      ? new FSDataInputStream(new ChecksumFSInputChecker(this, f, bufferSize))
+      : getRawFileSystem().open(f, bufferSize);
   }
 
   /** {@inheritDoc} */

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java

@@ -467,7 +467,7 @@ public class SequenceFile {
                Metadata metadata) throws IOException {
     return createWriter(FileContext.getFileContext(fs.getUri(), conf),
         conf, name, keyClass, valClass, compressionType, codec,
-        metadata, EnumSet.of(CreateFlag.CREATE),
+        metadata, EnumSet.of(CreateFlag.CREATE,CreateFlag.OVERWRITE),
         CreateOpts.bufferSize(bufferSize),
         createParent ? CreateOpts.createParent()
                      : CreateOpts.donotCreateParent(),

+ 4 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java

@@ -236,7 +236,7 @@ public class SecurityUtil {
     if (fqdn == null || fqdn.equals("") || fqdn.equals("0.0.0.0")) {
       fqdn = getLocalHostName();
     }
-    return components[0] + "/" + fqdn + "@" + components[2];
+    return components[0] + "/" + fqdn.toLowerCase() + "@" + components[2];
   }
   
   static String getLocalHostName() throws UnknownHostException {
@@ -409,7 +409,9 @@ public class SecurityUtil {
     Text service = buildTokenService(addr);
     if (token != null) {
       token.setService(service);
-      LOG.info("Acquired token "+token);  // Token#toString() prints service
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Acquired token "+token);  // Token#toString() prints service
+      }
     } else {
       LOG.warn("Failed to get token for service "+service);
     }

+ 20 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestDeprecatedKeys.java

@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.conf;
 
+import java.io.ByteArrayOutputStream;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 
@@ -32,4 +34,22 @@ public class TestDeprecatedKeys extends TestCase {
     String scriptFile = conf.get(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY);
     assertTrue(scriptFile.equals("xyz")) ;
   }
+  
+  //Tests reading / writing a conf file with deprecation after setting
+  public void testReadWriteWithDeprecatedKeys() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setBoolean("old.config.yet.to.be.deprecated", true);
+    Configuration.addDeprecation("old.config.yet.to.be.deprecated", 
+	new String[]{"new.conf.to.replace.deprecated.conf"});
+    ByteArrayOutputStream out=new ByteArrayOutputStream();
+    String fileContents;
+    try {
+      conf.writeXml(out);
+      fileContents = out.toString();
+    } finally {
+      out.close();
+    }
+    assertTrue(fileContents.contains("old.config.yet.to.be.deprecated"));
+    assertTrue(fileContents.contains("new.conf.to.replace.deprecated.conf"));
+  }
 }

+ 70 - 23
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestChecksumFileSystem.java

@@ -22,12 +22,22 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import static org.apache.hadoop.fs.FileSystemTestHelper.*;
 import org.apache.hadoop.conf.Configuration;
-import junit.framework.TestCase;
+import org.junit.*;
+import static org.junit.Assert.*;
 
-public class TestChecksumFileSystem extends TestCase {
+public class TestChecksumFileSystem {
   static final String TEST_ROOT_DIR
     = System.getProperty("test.build.data","build/test/data/work-dir/localfs");
 
+  static LocalFileSystem localFs;
+
+  @Before
+  public void resetLocalFs() throws Exception {
+    localFs = FileSystem.getLocal(new Configuration());
+    localFs.setVerifyChecksum(true);
+  }
+
+  @Test
   public void testgetChecksumLength() throws Exception {
     assertEquals(8, ChecksumFileSystem.getChecksumLength(0L, 512));
     assertEquals(12, ChecksumFileSystem.getChecksumLength(1L, 512));
@@ -40,9 +50,8 @@ public class TestChecksumFileSystem extends TestCase {
                  ChecksumFileSystem.getChecksumLength(10000000000000L, 10));    
   } 
   
+  @Test
   public void testVerifyChecksum() throws Exception {    
-    Configuration conf = new Configuration();
-    LocalFileSystem localFs = FileSystem.getLocal(conf);
     Path testPath = new Path(TEST_ROOT_DIR, "testPath");
     Path testPath11 = new Path(TEST_ROOT_DIR, "testPath11");
     FSDataOutputStream fout = localFs.create(testPath);
@@ -68,7 +77,7 @@ public class TestChecksumFileSystem extends TestCase {
     
     //copying the wrong checksum file
     FileUtil.copy(localFs, localFs.getChecksumFile(testPath11), localFs, 
-        localFs.getChecksumFile(testPath),false,true,conf);
+        localFs.getChecksumFile(testPath),false,true,localFs.getConf());
     assertTrue("checksum exists", localFs.exists(localFs.getChecksumFile(testPath)));
     
     boolean errorRead = false;
@@ -80,20 +89,13 @@ public class TestChecksumFileSystem extends TestCase {
     assertTrue("error reading", errorRead);
     
     //now setting verify false, the read should succeed
-    try {
-      localFs.setVerifyChecksum(false);
-      String str = readFile(localFs, testPath, 1024).toString();
-      assertTrue("read", "testing".equals(str));
-    } finally {
-      // reset for other tests
-      localFs.setVerifyChecksum(true);
-    }
-    
+    localFs.setVerifyChecksum(false);
+    String str = readFile(localFs, testPath, 1024).toString();
+    assertTrue("read", "testing".equals(str));
   }
 
+  @Test
   public void testMultiChunkFile() throws Exception {
-    Configuration conf = new Configuration();
-    LocalFileSystem localFs = FileSystem.getLocal(conf);
     Path testPath = new Path(TEST_ROOT_DIR, "testMultiChunk");
     FSDataOutputStream fout = localFs.create(testPath);
     for (int i = 0; i < 1000; i++) {
@@ -116,9 +118,8 @@ public class TestChecksumFileSystem extends TestCase {
    * Test to ensure that if the checksum file is truncated, a
    * ChecksumException is thrown
    */
+  @Test
   public void testTruncatedChecksum() throws Exception { 
-    Configuration conf = new Configuration();
-    LocalFileSystem localFs = FileSystem.getLocal(conf);
     Path testPath = new Path(TEST_ROOT_DIR, "testtruncatedcrc");
     FSDataOutputStream fout = localFs.create(testPath);
     fout.write("testing truncation".getBytes());
@@ -146,14 +147,60 @@ public class TestChecksumFileSystem extends TestCase {
     }
 
     // telling it not to verify checksums, should avoid issue.
+    localFs.setVerifyChecksum(false);
+    String str = readFile(localFs, testPath, 1024).toString();
+    assertTrue("read", "testing truncation".equals(str));
+  }
+  
+  @Test
+  public void testStreamType() throws Exception {
+    Path testPath = new Path(TEST_ROOT_DIR, "testStreamType");
+    localFs.create(testPath).close();    
+    FSDataInputStream in = null;
+    
+    localFs.setVerifyChecksum(true);
+    in = localFs.open(testPath);
+    assertTrue("stream is input checker",
+        in.getWrappedStream() instanceof FSInputChecker);
+    
+    localFs.setVerifyChecksum(false);
+    in = localFs.open(testPath);
+    assertFalse("stream is not input checker",
+        in.getWrappedStream() instanceof FSInputChecker);
+  }
+  
+  @Test
+  public void testCorruptedChecksum() throws Exception {
+    Path testPath = new Path(TEST_ROOT_DIR, "testCorruptChecksum");
+    Path checksumPath = localFs.getChecksumFile(testPath);
+
+    // write a file to generate checksum
+    FSDataOutputStream out = localFs.create(testPath, true);
+    out.write("testing 1 2 3".getBytes());
+    out.close();
+    assertTrue(localFs.exists(checksumPath));
+    FileStatus stat = localFs.getFileStatus(checksumPath);
+    
+    // alter file directly so checksum is invalid
+    out = localFs.getRawFileSystem().create(testPath, true);
+    out.write("testing stale checksum".getBytes());
+    out.close();
+    assertTrue(localFs.exists(checksumPath));
+    // checksum didn't change on disk
+    assertEquals(stat, localFs.getFileStatus(checksumPath));
+
+    Exception e = null;
     try {
-      localFs.setVerifyChecksum(false);
-      String str = readFile(localFs, testPath, 1024).toString();
-      assertTrue("read", "testing truncation".equals(str));
-    } finally {
-      // reset for other tests
       localFs.setVerifyChecksum(true);
+      readFile(localFs, testPath, 1024);
+    } catch (ChecksumException ce) {
+      e = ce;
+    } finally {
+      assertNotNull("got checksum error", e);
     }
 
+    localFs.setVerifyChecksum(false);
+    String str = readFile(localFs, testPath, 1024);
+    assertEquals("testing stale checksum", str);
   }
 }

+ 17 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java

@@ -517,6 +517,23 @@ public class TestSequenceFile extends TestCase {
     assertTrue("InputStream for " + path + " should have been closed.", openedFile[0].isClosed());
   }
 
+   /**
+   * Test that makes sure createWriter succeeds on a file that was 
+   * already created
+   * @throws IOException
+   */
+  public void testCreateWriterOnExistingFile() throws IOException {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path name = new Path(new Path(System.getProperty("test.build.data","."),
+        "createWriterOnExistingFile") , "file");
+
+    fs.create(name);
+    SequenceFile.createWriter(fs, conf, name, RandomDatum.class,
+        RandomDatum.class, 512, (short) 1, 4096, false,
+        CompressionType.NONE, null, new Metadata());
+  }
+
   public void testRecursiveSeqFileCreate() throws IOException {
     Configuration conf = new Configuration();
     FileSystem fs = FileSystem.getLocal(conf);

+ 10 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java

@@ -89,6 +89,16 @@ public class TestSecurityUtil {
     Mockito.verify(notUsed, Mockito.never()).getCanonicalHostName();
   }
 
+  @Test
+  public void testPrincipalsWithLowerCaseHosts() throws IOException {
+    String service = "xyz/";
+    String realm = "@REALM";
+    String principalInConf = service + SecurityUtil.HOSTNAME_PATTERN + realm;
+    String hostname = "FooHost";
+    String principal = service + hostname.toLowerCase() + realm;
+    verify(principalInConf, hostname, principal);
+  }
+
   @Test
   public void testLocalHostNameForNullOrWild() throws Exception {
     String local = SecurityUtil.getLocalHostName();

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh

@@ -55,8 +55,8 @@ if [ "${1}" = "stop" ]; then
 fi
 
 if [ "${HTTPFS_SILENT}" != "true" ]; then
-  ${BASEDIR}/share/hadoop/httpfs/tomcat/bin/catalina.sh "$@"
+  ${CATALINA_BASE:-"${BASEDIR}/share/hadoop/httpfs/tomcat"}/bin/catalina.sh "$@"
 else
-  ${BASEDIR}/share/hadoop/httpfs/tomcat/bin/catalina.sh "$@" > /dev/null
+  ${CATALINA_BASE:-"${BASEDIR}/share/hadoop/httpfs/tomcat"}/bin/catalina.sh "$@" > /dev/null
 fi
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java

@@ -47,7 +47,7 @@ public class TestHostnameFilter extends HTestCase {
       @Override
       public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse)
         throws IOException, ServletException {
-        Assert.assertEquals(HostnameFilter.get(), "localhost");
+        Assert.assertTrue(HostnameFilter.get().contains("localhost"));
         invoked.set(true);
       }
     };

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -295,6 +295,11 @@ Release 0.23.1 - UNRELEASED
 
     HDFS-2837. mvn javadoc:javadoc not seeing LimitedPrivate class (revans2 via tucu)
 
+    HDFS-2840. TestHostnameFilter should work with localhost or localhost.localdomain (tucu)
+
+    HDFS-2791. If block report races with closing of file, replica is
+    incorrectly marked corrupt. (todd)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES

+ 0 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh

@@ -22,8 +22,6 @@ bin=`which "$0"`
 bin=`dirname "${bin}"`
 bin=`cd "$bin"; pwd`
 
-export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}"
-
 DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then

+ 18 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1571,7 +1571,24 @@ public class BlockManager {
       }
     case RBW:
     case RWR:
-      return storedBlock.isComplete();
+      if (!storedBlock.isComplete()) {
+        return false;
+      } else if (storedBlock.getGenerationStamp() != iblk.getGenerationStamp()) {
+        return true;
+      } else { // COMPLETE block, same genstamp
+        if (reportedState == ReplicaState.RBW) {
+          // If it's a RBW report for a COMPLETE block, it may just be that
+          // the block report got a little bit delayed after the pipeline
+          // closed. So, ignore this report, assuming we will get a
+          // FINALIZED replica later. See HDFS-2791
+          LOG.info("Received an RBW replica for block " + storedBlock +
+              " on " + dn.getName() + ": ignoring it, since the block is " +
+              "complete with the same generation stamp.");
+          return false;
+        } else {
+          return true;
+        }
+      }
     case RUR:       // should not be reported
     case TEMPORARY: // should not be reported
     default:

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java

@@ -781,4 +781,13 @@ class BPOfferService implements Runnable {
     return;
   }
 
+  @VisibleForTesting
+  DatanodeProtocol getBpNamenode() {
+    return bpNamenode;
+  }
+
+  @VisibleForTesting
+  void setBpNamenode(DatanodeProtocol bpNamenode) {
+    this.bpNamenode = bpNamenode;
+  }
 }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java

@@ -101,7 +101,7 @@ public class AppendTestUtil {
     return DFSTestUtil.getFileSystemAs(ugi, conf);
   }
 
-  static void write(OutputStream out, int offset, int length) throws IOException {
+  public static void write(OutputStream out, int offset, int length) throws IOException {
     final byte[] bytes = new byte[length];
     for(int i = 0; i < length; i++) {
       bytes[i] = (byte)(offset + i);

+ 39 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java

@@ -17,6 +17,15 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.mockito.Mockito;
+
+import com.google.common.base.Preconditions;
+
 /**
  * WARNING!! This is TEST ONLY class: it never has to be used
  * for ANY development purposes.
@@ -42,4 +51,34 @@ public class DataNodeAdapter {
       boolean heartbeatsDisabledForTests) {
     dn.setHeartbeatsDisabledForTests(heartbeatsDisabledForTests);
   }
+  
+  /**
+   * Insert a Mockito spy object between the given DataNode and
+   * the given NameNode. This can be used to delay or wait for
+   * RPC calls on the datanode->NN path.
+   */
+  public static DatanodeProtocol spyOnBposToNN(
+      DataNode dn, NameNode nn) {
+    String bpid = nn.getNamesystem().getBlockPoolId();
+    
+    BPOfferService bpos = null;
+    for (BPOfferService thisBpos : dn.getAllBpOs()) {
+      if (thisBpos.getBlockPoolId().equals(bpid)) {
+        bpos = thisBpos;
+        break;
+      }
+    }
+    Preconditions.checkArgument(bpos != null,
+        "No such bpid: %s", bpid);
+
+    // When protobufs are merged, the following can be converted
+    // to a simple spy. Because you can't spy on proxy objects,
+    // we have to use the DelegateAnswer trick.
+    DatanodeProtocol origNN = bpos.getBpNamenode();
+    DatanodeProtocol spy = Mockito.mock(DatanodeProtocol.class,
+        new GenericTestUtils.DelegateAnswer(origNN));
+
+    bpos.setBpNamenode(spy);
+    return spy;
+  }
 }

+ 86 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java

@@ -21,7 +21,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.AppendTestUtil;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -35,14 +37,19 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.apache.log4j.Level;
 import org.junit.After;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
 
 import java.io.File;
 import java.io.FilenameFilter;
@@ -50,6 +57,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
+import java.util.concurrent.CountDownLatch;
 
 /**
  * This test simulates a variety of situations when blocks are being
@@ -491,6 +499,84 @@ public class TestBlockReport {
       resetConfiguration(); // return the initial state of the configuration
     }
   }
+  
+  /**
+   * Test for the case where one of the DNs in the pipeline is in the
+   * process of doing a block report exactly when the block is closed.
+   * In this case, the block report becomes delayed until after the
+   * block is marked completed on the NN, and hence it reports an RBW
+   * replica for a COMPLETE block. Such a report should not be marked
+   * corrupt.
+   * This is a regression test for HDFS-2791.
+   */
+  @Test
+  public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
+    final CountDownLatch brFinished = new CountDownLatch(1);
+    DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
+      @Override
+      protected Object passThrough(InvocationOnMock invocation)
+          throws Throwable {
+        try {
+          return super.passThrough(invocation);
+        } finally {
+          // inform the test that our block report went through.
+          brFinished.countDown();
+        }
+      }
+    };
+    
+    final String METHOD_NAME = GenericTestUtils.getMethodName();
+    Path filePath = new Path("/" + METHOD_NAME + ".dat");
+
+    // Start a second DN for this test -- we're checking
+    // what happens when one of the DNs is slowed for some reason.
+    REPL_FACTOR = 2;
+    startDNandWait(null, false);
+    
+    NameNode nn = cluster.getNameNode();
+    
+    FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
+    try {
+      AppendTestUtil.write(out, 0, 10);
+      out.hflush();
+
+      // Set up a spy so that we can delay the block report coming
+      // from this node.
+      DataNode dn = cluster.getDataNodes().get(0);
+      DatanodeProtocol spy =
+        DataNodeAdapter.spyOnBposToNN(dn, nn);
+      
+      Mockito.doAnswer(delayer)
+        .when(spy).blockReport(
+          Mockito.<DatanodeRegistration>anyObject(),
+          Mockito.anyString(),
+          Mockito.<long[]>anyObject());
+      
+      // Force a block report to be generated. The block report will have
+      // an RBW replica in it. Wait for the RPC to be sent, but block
+      // it before it gets to the NN.
+      dn.scheduleAllBlockReport(0);
+      delayer.waitForCall();
+      
+    } finally {
+      IOUtils.closeStream(out);
+    }
+
+    // Now that the stream is closed, the NN will have the block in COMPLETE
+    // state.
+    delayer.proceed();
+    brFinished.await();
+    
+    // Verify that no replicas are marked corrupt, and that the
+    // file is still readable.
+    BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
+    assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
+    DFSTestUtil.readFile(fs, filePath);
+    
+    // Ensure that the file is readable even from the DN that we futzed with.
+    cluster.stopDataNode(1);
+    DFSTestUtil.readFile(fs, filePath);    
+  }
 
   private void waitForTempReplica(Block bl, int DN_N1) throws IOException {
     final boolean tooLongWait = false;

+ 23 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -36,6 +36,8 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk)
 
   IMPROVEMENTS
+    MAPREDUCE-3481. [Gridmix] Improve Gridmix STRESS mode. (amarrk)
+
     MAPREDUCE-3597. [Rumen] Rumen should provide APIs to access all the 
                     job-history related information.
 
@@ -153,6 +155,15 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3737. The Web Application Proxy's is not documented very well.
     (Robert Evans via mahadev)
 
+    MAPREDUCE-3699. Increased RPC handlers for all YARN servers to reasonable
+    values for working at scale. (Hitesh Shah via vinodkv)
+
+    MAPREDUCE-3693. Added mapreduce.admin.user.env to mapred-default.xml.
+    (Roman Shapshonik via acmurthy) 
+
+    MAPREDUCE-3732. Modified CapacityScheduler to use only users with pending
+    requests for computing user-limits. (Arun C Murthy via vinodkv)
+
   OPTIMIZATIONS
 
     MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar
@@ -179,7 +190,13 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3718. Change default AM heartbeat interval to 1 second. (Hitesh
     Shah via sseth)
 
+    MAPREDUCE-3360. Added information about lost/rebooted/decommissioned nodes
+    on the webapps. (Bhallamudi Venkata Siva Kamesh and Jason Lowe via vinodkv)
+
   BUG FIXES
+    MAPREDUCE-2784. [Gridmix] Bug fixes in ExecutionSummarizer and 
+                    ResourceUsageMatcher. (amarrk)
+
     MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2
                      (Jason Lowe via bobby)
 
@@ -558,6 +575,12 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3735. Add distcp jar to the distribution (tar).
     (mahadev)
 
+    MAPREDUCE-3720. Changed bin/mapred job -list to not print job-specific
+    information not available at RM. (vinodkv via acmurthy) 
+
+    MAPREDUCE-3742. "yarn logs" command fails with ClassNotFoundException.
+    (Jason Lowe via mahadev)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES

+ 4 - 4
hadoop-mapreduce-project/bin/mapred

@@ -91,15 +91,15 @@ if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
 fi
 
 # for releases, add core mapred jar & webapps to CLASSPATH
-if [ -d "$HADOOP_PREFIX/share/hadoop/mapreduce/webapps" ]; then
-  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/mapreduce
+if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR}
 fi
-for f in $HADOOP_MAPRED_HOME/share/hadoop-mapreduce/*.jar; do
+for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do
   CLASSPATH=${CLASSPATH}:$f;
 done
 
 # add libs to CLASSPATH
-for f in $HADOOP_MAPRED_HOME/lib/*.jar; do
+for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do
   CLASSPATH=${CLASSPATH}:$f;
 done
 

+ 6 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java

@@ -455,10 +455,14 @@ public class Job extends JobContextImpl implements JobContext {
   public String toString() {
     ensureState(JobState.RUNNING);
     String reasonforFailure = " ";
+    int numMaps = 0;
+    int numReduces = 0;
     try {
       updateStatus();
       if (status.getState().equals(JobStatus.State.FAILED))
         reasonforFailure = getTaskFailureEventString();
+      numMaps = getTaskReports(TaskType.MAP).length;
+      numReduces = getTaskReports(TaskType.REDUCE).length;
     } catch (IOException e) {
     } catch (InterruptedException ie) {
     }
@@ -468,6 +472,8 @@ public class Job extends JobContextImpl implements JobContext {
     sb.append("Job Tracking URL : ").append(status.getTrackingUrl());
     sb.append("\n");
     sb.append("Uber job : ").append(status.isUber()).append("\n");
+    sb.append("Number of maps: ").append(numMaps);
+    sb.append("Number of reduces: ").append(numReduces);
     sb.append("map() completion: ");
     sb.append(status.getMapProgress()).append("\n");
     sb.append("reduce() completion: ");

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

@@ -412,7 +412,7 @@ public interface MRJobConfig {
   /** The number of threads used to handle task RPC calls.*/
   public static final String MR_AM_TASK_LISTENER_THREAD_COUNT =
     MR_AM_PREFIX + "job.task.listener.thread-count";
-  public static final int DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT = 10;
+  public static final int DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT = 30;
 
   /** How often the AM should send heartbeats to the RM.*/
   public static final String MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS =

+ 19 - 13
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.mapreduce.tools;
 
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -25,6 +26,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.ipc.RemoteException;
@@ -560,25 +562,28 @@ public class CLI extends Configured implements Tool {
       }
     }
   }
-  
+
   public void displayJobList(JobStatus[] jobs) 
       throws IOException, InterruptedException {
-    System.out.println("Total jobs:" + jobs.length);
-    System.out.println("JobId\tState\tStartTime\t" +
-        "UserName\tQueue\tPriority\tMaps\tReduces\tUsedContainers\t" +
-        "RsvdContainers\tUsedMem\tRsvdMem\tNeededMem\tAM info");
-    for (JobStatus job : jobs) {
-      TaskReport[] mapReports =
-                 cluster.getJob(job.getJobID()).getTaskReports(TaskType.MAP);
-      TaskReport[] reduceReports =
-                 cluster.getJob(job.getJobID()).getTaskReports(TaskType.REDUCE);
+    displayJobList(jobs, new PrintWriter(System.out));
+  }
+
+  @Private
+  public static String headerPattern = "%23s\t%10s\t%14s\t%12s\t%12s\t%10s\t%15s\t%15s\t%8s\t%8s\t%10s\t%10s\n";
+  @Private
+  public static String dataPattern   = "%23s\t%10s\t%14d\t%12s\t%12s\t%10s\t%14d\t%14d\t%7dM\t%7sM\t%9dM\t%10s\n";
 
-      System.out.printf("%s\t%s\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%dM\t%dM\t%dM\t%s\n",
+  @Private
+  public void displayJobList(JobStatus[] jobs, PrintWriter writer) {
+    writer.println("Total jobs:" + jobs.length);
+    writer.printf(headerPattern, "JobId", "State", "StartTime", "UserName",
+      "Queue", "Priority", "UsedContainers",
+      "RsvdContainers", "UsedMem", "RsvdMem", "NeededMem", "AM info");
+    for (JobStatus job : jobs) {
+      writer.printf(dataPattern,
           job.getJobID().toString(), job.getState(), job.getStartTime(),
           job.getUsername(), job.getQueue(), 
           job.getPriority().name(),
-          mapReports.length,
-          reduceReports.length,
           job.getNumUsedSlots(),
           job.getNumReservedSlots(),
           job.getUsedMem(),
@@ -586,6 +591,7 @@ public class CLI extends Configured implements Tool {
           job.getNeededMem(),
           job.getSchedulingInfo());
     }
+    writer.flush();
   }
   
   public static void main(String[] argv) throws Exception {

+ 24 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

@@ -440,6 +440,16 @@
   </description>
 </property>
 
+<property>
+  <name>mapreduce.admin.user.env</name>
+  <value>LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native</value>
+  <description>Expert: Additional execution environment entries for 
+  map and reduce task processes. This is not an additive property.
+  You must preserve the original value if you want your map and
+  reduce tasks to have access to native libraries (compression, etc). 
+  </description>
+</property>
+
 <property>
   <name>mapreduce.task.tmp.dir</name>
   <value>./tmp</value>
@@ -1224,4 +1234,18 @@
     mapreduce.job.end-notification.max.retry.interval</description>
 </property>
 
+<property>
+  <name>yarn.app.mapreduce.am.job.task.listener.thread-count</name>
+  <value>30</value>
+  <description>The number of threads used to handle RPC calls in the 
+    MR AppMaster from remote tasks</description>
+</property>
+
+<property>
+  <name>yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms</name>
+  <value>1000</value>
+  <description>The interval in ms at which the MR AppMaster should send
+    heartbeats to the ResourceManager</description>
+</property>
+
 </configuration>

+ 24 - 13
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java

@@ -22,19 +22,24 @@ import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.isA;
 import static org.mockito.Mockito.atLeastOnce;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import org.apache.hadoop.mapred.JobConf;
+import java.io.PrintWriter;
+
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobPriority;
 import org.apache.hadoop.mapreduce.JobStatus;
-import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TaskReport;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.junit.Assert;
 import org.junit.Test;
 
+@SuppressWarnings("deprecation")
 public class JobClientUnitTest {
   
   public class TestJobClient extends JobClient {
@@ -48,7 +53,6 @@ public class JobClientUnitTest {
     }
   }
 
-  @SuppressWarnings("deprecation")
   @Test
   public void testMapTaskReportsWithNullJob() throws Exception {
     TestJobClient client = new TestJobClient(new JobConf());
@@ -64,7 +68,6 @@ public class JobClientUnitTest {
     verify(mockCluster).getJob(id);
   }
   
-  @SuppressWarnings("deprecation")
   @Test
   public void testReduceTaskReportsWithNullJob() throws Exception {
     TestJobClient client = new TestJobClient(new JobConf());
@@ -80,7 +83,6 @@ public class JobClientUnitTest {
     verify(mockCluster).getJob(id);
   }
   
-  @SuppressWarnings("deprecation")
   @Test
   public void testSetupTaskReportsWithNullJob() throws Exception {
     TestJobClient client = new TestJobClient(new JobConf());
@@ -96,7 +98,6 @@ public class JobClientUnitTest {
     verify(mockCluster).getJob(id);
   }
   
-  @SuppressWarnings("deprecation")
   @Test
   public void testCleanupTaskReportsWithNullJob() throws Exception {
     TestJobClient client = new TestJobClient(new JobConf());
@@ -115,12 +116,15 @@ public class JobClientUnitTest {
   @Test
   public void testShowJob() throws Exception {
     TestJobClient client = new TestJobClient(new JobConf());
-    JobID jobID = new JobID("test", 0);
+
+    long startTime = System.currentTimeMillis();
+
+    JobID jobID = new JobID(String.valueOf(startTime), 12345);
 
     JobStatus mockJobStatus = mock(JobStatus.class);
     when(mockJobStatus.getJobID()).thenReturn(jobID);
     when(mockJobStatus.getState()).thenReturn(JobStatus.State.RUNNING);
-    when(mockJobStatus.getStartTime()).thenReturn(0L);
+    when(mockJobStatus.getStartTime()).thenReturn(startTime);
     when(mockJobStatus.getUsername()).thenReturn("mockuser");
     when(mockJobStatus.getQueue()).thenReturn("mockqueue");
     when(mockJobStatus.getPriority()).thenReturn(JobPriority.NORMAL);
@@ -132,18 +136,21 @@ public class JobClientUnitTest {
     when(mockJobStatus.getSchedulingInfo()).thenReturn("NA");
 
     Job mockJob = mock(Job.class);
-    when(mockJob.getTaskReports(isA(TaskType.class))).thenReturn(new TaskReport[0]);
+    when(mockJob.getTaskReports(isA(TaskType.class))).thenReturn(
+      new TaskReport[5]);
 
     Cluster mockCluster = mock(Cluster.class);
     when(mockCluster.getJob(jobID)).thenReturn(mockJob);
 
     client.setCluster(mockCluster);
     
-    
-    client.displayJobList(new JobStatus[] {mockJobStatus});
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    client.displayJobList(new JobStatus[] {mockJobStatus}, new PrintWriter(out));
+    String commandLineOutput = out.toString();
+    System.out.println(commandLineOutput);
+    Assert.assertTrue(commandLineOutput.contains("Total jobs:1"));
+
     verify(mockJobStatus, atLeastOnce()).getJobID();
-    verify(mockJob, atLeastOnce()).getTaskReports(isA(TaskType.class));
-    verify(mockCluster, atLeastOnce()).getJob(jobID);
     verify(mockJobStatus).getState();
     verify(mockJobStatus).getStartTime();
     verify(mockJobStatus).getUsername();
@@ -155,5 +162,9 @@ public class JobClientUnitTest {
     verify(mockJobStatus).getReservedMem();
     verify(mockJobStatus).getNeededMem();
     verify(mockJobStatus).getSchedulingInfo();
+
+    // This call should not go to each AM.
+    verify(mockCluster, never()).getJob(jobID);
+    verify(mockJob, never()).getTaskReports(isA(TaskType.class));
   }
 }

+ 3 - 3
hadoop-mapreduce-project/hadoop-yarn/bin/yarn

@@ -140,8 +140,8 @@ if [ -d "$YARN_HOME/build/tools" ]; then
   CLASSPATH=${CLASSPATH}:$YARN_HOME/build/tools
 fi
 
-CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/*
-CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib/*
+CLASSPATH=${CLASSPATH}:$YARN_HOME/${YARN_DIR}/*
+CLASSPATH=${CLASSPATH}:$YARN_HOME/${YARN_LIB_JARS_DIR}/*
 
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
@@ -194,7 +194,7 @@ elif [ "$COMMAND" = "jar" ] ; then
   CLASS=org.apache.hadoop.util.RunJar
   YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
 elif [ "$COMMAND" = "logs" ] ; then
-  CLASS=org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.LogDumper
+  CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper
   YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
 elif [ "$COMMAND" = "daemonlog" ] ; then
   CLASS=org.apache.hadoop.log.LogLevel

+ 0 - 2
hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh

@@ -19,8 +19,6 @@ bin=`which "$0"`
 bin=`dirname "${bin}"`
 bin=`cd "$bin"; pwd`
 
-export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}"
-
 DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then

+ 4 - 4
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -90,7 +90,7 @@ public class YarnConfiguration extends Configuration {
   /** The number of threads used to handle applications manager requests.*/
   public static final String RM_CLIENT_THREAD_COUNT =
     RM_PREFIX + "client.thread-count";
-  public static final int DEFAULT_RM_CLIENT_THREAD_COUNT = 10;
+  public static final int DEFAULT_RM_CLIENT_THREAD_COUNT = 50;
 
   /** The Kerberos principal for the resource manager.*/
   public static final String RM_PRINCIPAL =
@@ -106,7 +106,7 @@ public class YarnConfiguration extends Configuration {
   /** Number of threads to handle scheduler interface.*/
   public static final String RM_SCHEDULER_CLIENT_THREAD_COUNT =
     RM_PREFIX + "scheduler.client.thread-count";
-  public static final int DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT = 10;
+  public static final int DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT = 50;
   
   /** The address of the RM web application.*/
   public static final String RM_WEBAPP_ADDRESS = 
@@ -184,7 +184,7 @@ public class YarnConfiguration extends Configuration {
   /** Number of threads to handle resource tracker calls.*/
   public static final String RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT =
     RM_PREFIX + "resource-tracker.client.thread-count";
-  public static final int DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = 10;
+  public static final int DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = 50;
   
   /** The class to use as the resource scheduler.*/
   public static final String RM_SCHEDULER = 
@@ -257,7 +257,7 @@ public class YarnConfiguration extends Configuration {
   /** Number of threads container manager uses.*/
   public static final String NM_CONTAINER_MGR_THREAD_COUNT =
     NM_PREFIX + "container-manager.thread-count";
-  public static final int DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT = 5;
+  public static final int DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT = 20;
   
   /** Number of threads used in cleanup.*/
   public static final String NM_DELETE_THREAD_COUNT = 

+ 4 - 4
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml

@@ -67,7 +67,7 @@
   <property>
     <description>The number of threads used to handle applications manager requests.</description>
     <name>yarn.resourcemanager.client.thread-count</name>
-    <value>10</value>
+    <value>50</value>
   </property>
 
   <property>
@@ -90,7 +90,7 @@
   <property>
     <description>Number of threads to handle scheduler interface.</description>
     <name>yarn.resourcemanager.scheduler.client.thread-count</name>
-    <value>10</value>
+    <value>50</value>
   </property>
 
   <property>
@@ -179,7 +179,7 @@
   <property>
     <description>Number of threads to handle resource tracker calls.</description>
     <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
-    <value>10</value>
+    <value>50</value>
   </property>
 
   <property>
@@ -244,7 +244,7 @@
   <property>
     <description>Number of threads container manager uses.</description>
     <name>yarn.nodemanager.container-manager.thread-count</name>
-    <value>5</value>
+    <value>20</value>
   </property>
 
   <property>

+ 17 - 6
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java

@@ -29,7 +29,6 @@ import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.lib.MetricsRegistry;
-import org.apache.hadoop.metrics2.lib.MutableCounterInt;
 import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
 
@@ -39,9 +38,9 @@ public class ClusterMetrics {
   
   private static AtomicBoolean isInitialized = new AtomicBoolean(false);
   
-  @Metric("# of NMs") MutableGaugeInt numNMs;
-  @Metric("# of decommissioned NMs") MutableCounterInt numDecommissionedNMs;
-  @Metric("# of lost NMs") MutableCounterInt numLostNMs;
+  @Metric("# of active NMs") MutableGaugeInt numNMs;
+  @Metric("# of decommissioned NMs") MutableGaugeInt numDecommissionedNMs;
+  @Metric("# of lost NMs") MutableGaugeInt numLostNMs;
   @Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs;
   @Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs;
   
@@ -73,8 +72,8 @@ public class ClusterMetrics {
     }
   }
   
-  //Total Nodemanagers
-  public int getNumNMs() {
+  //Active Nodemanagers
+  public int getNumActiveNMs() {
     return numNMs.value();
   }
   
@@ -87,6 +86,10 @@ public class ClusterMetrics {
     numDecommissionedNMs.incr();
   }
   
+  public void decrDecommisionedNMs() {
+    numDecommissionedNMs.decr();
+  }
+  
   //Lost NMs
   public int getNumLostNMs() {
     return numLostNMs.value();
@@ -96,6 +99,10 @@ public class ClusterMetrics {
     numLostNMs.incr();
   }
   
+  public void decrNumLostNMs() {
+    numLostNMs.decr();
+  }
+  
   //Unhealthy NMs
   public int getUnhealthyNMs() {
     return numUnhealthyNMs.value();
@@ -118,6 +125,10 @@ public class ClusterMetrics {
     numRebootedNMs.incr();
   }
   
+  public void decrNumRebootedNMs() {
+    numRebootedNMs.decr();
+  }
+  
   public void removeNode(RMNodeEventType nodeEventType) {
     numNMs.decr();
     switch(nodeEventType){

+ 2 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java

@@ -43,6 +43,8 @@ public interface RMContext {
   ApplicationsStore getApplicationsStore();
 
   ConcurrentMap<ApplicationId, RMApp> getRMApps();
+  
+  ConcurrentMap<String, RMNode> getInactiveRMNodes();
 
   ConcurrentMap<NodeId, RMNode> getRMNodes();
 

+ 8 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java

@@ -43,6 +43,9 @@ public class RMContextImpl implements RMContext {
 
   private final ConcurrentMap<NodeId, RMNode> nodes
     = new ConcurrentHashMap<NodeId, RMNode>();
+  
+  private final ConcurrentMap<String, RMNode> inactiveNodes
+    = new ConcurrentHashMap<String, RMNode>();
 
   private AMLivelinessMonitor amLivelinessMonitor;
   private ContainerAllocationExpirer containerAllocationExpirer;
@@ -83,6 +86,11 @@ public class RMContextImpl implements RMContext {
   public ConcurrentMap<NodeId, RMNode> getRMNodes() {
     return this.nodes;
   }
+  
+  @Override
+  public ConcurrentMap<String, RMNode> getInactiveRMNodes() {
+    return this.inactiveNodes;
+  }
 
   @Override
   public ContainerAllocationExpirer getContainerAllocationExpirer() {

+ 0 - 4
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java

@@ -220,10 +220,6 @@ public class ResourceTrackerService extends AbstractService implements
     if (rmNode == null) {
       /* node does not exist */
       LOG.info("Node not found rebooting " + remoteNodeStatus.getNodeId());
-      
-      // Updating the metrics directly as reboot event cannot be 
-      // triggered on a null rmNode
-      ClusterMetrics.getMetrics().incrNumRebootedNMs();
       return reboot;
     }
 

+ 24 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

@@ -119,7 +119,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
          RMNodeEventType.DECOMMISSION, new RemoveNodeTransition())
      .addTransition(RMNodeState.RUNNING, RMNodeState.LOST,
          RMNodeEventType.EXPIRE, new RemoveNodeTransition())
-     .addTransition(RMNodeState.RUNNING, RMNodeState.LOST,
+     .addTransition(RMNodeState.RUNNING, RMNodeState.REBOOTED,
          RMNodeEventType.REBOOTING, new RemoveNodeTransition())
      .addTransition(RMNodeState.RUNNING, RMNodeState.RUNNING,
          RMNodeEventType.CLEANUP_APP, new CleanUpAppTransition())
@@ -307,6 +307,21 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
   
   public static class AddNodeTransition implements
       SingleArcTransition<RMNodeImpl, RMNodeEvent> {
+	  
+    private void updateMetrics(RMNodeState nodeState) {
+      ClusterMetrics metrics = ClusterMetrics.getMetrics();
+      switch (nodeState) {
+      case LOST:
+        metrics.decrNumLostNMs();
+        break;
+      case REBOOTED:
+        metrics.decrNumRebootedNMs();
+        break;
+      case DECOMMISSIONED:
+        metrics.decrDecommisionedNMs();
+        break;
+      }
+    }
 
     @SuppressWarnings("unchecked")
     @Override
@@ -315,6 +330,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
 
       rmNode.context.getDispatcher().getEventHandler().handle(
           new NodeAddedSchedulerEvent(rmNode));
+      
+      String host = rmNode.nodeId.getHost();
+      if (rmNode.context.getInactiveRMNodes().containsKey(host)) {
+        RMNode node = rmNode.context.getInactiveRMNodes().get(host);
+        rmNode.context.getInactiveRMNodes().remove(host);
+        updateMetrics(node.getState());
+      }
 
       ClusterMetrics.getMetrics().addNode();
     }
@@ -353,7 +375,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
       // Remove the node from the system.
       rmNode.context.getRMNodes().remove(rmNode.nodeId);
       LOG.info("Removed Node " + rmNode.nodeId);
-      
+      rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode);
       //Update the metrics 
       ClusterMetrics.getMetrics().removeNode(event.getType());
     }

+ 109 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java

@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.yarn.Lock;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+
+/**
+ * {@link ActiveUsersManager} tracks active users in the system.
+ * A user is deemed to be active if he has any running applications with
+ * outstanding resource requests.
+ * 
+ * An active user is defined as someone with outstanding resource requests.
+ */
+@Private
+public class ActiveUsersManager {
+  
+  private static final Log LOG = LogFactory.getLog(ActiveUsersManager.class);
+  
+  private final QueueMetrics metrics;
+  
+  private int activeUsers = 0;
+  private Map<String, Set<ApplicationId>> usersApplications = 
+      new HashMap<String, Set<ApplicationId>>();
+  
+  public ActiveUsersManager(QueueMetrics metrics) {
+    this.metrics = metrics;
+  }
+  
+  /**
+   * An application has new outstanding requests.
+   * 
+   * @param user application user 
+   * @param applicationId activated application
+   */
+  @Lock({Queue.class, SchedulerApp.class})
+  synchronized public void activateApplication(
+      String user, ApplicationId applicationId) {
+    Set<ApplicationId> userApps = usersApplications.get(user);
+    if (userApps == null) {
+      userApps = new HashSet<ApplicationId>();
+      usersApplications.put(user, userApps);
+      ++activeUsers;
+      metrics.incrActiveUsers();
+      LOG.debug("User " + user + " added to activeUsers, currently: " + 
+          activeUsers);
+    }
+    if (userApps.add(applicationId)) {
+      metrics.activateApp(user);
+    }
+  }
+  
+  /**
+   * An application has no more outstanding requests.
+   * 
+   * @param user application user 
+   * @param applicationId deactivated application
+   */
+  @Lock({Queue.class, SchedulerApp.class})
+  synchronized public void deactivateApplication(
+      String user, ApplicationId applicationId) {
+    Set<ApplicationId> userApps = usersApplications.get(user);
+    if (userApps != null) {
+      if (userApps.remove(applicationId)) {
+        metrics.deactivateApp(user);
+      }
+      if (userApps.isEmpty()) {
+        usersApplications.remove(user);
+        --activeUsers;
+        metrics.decrActiveUsers();
+        LOG.debug("User " + user + " removed from activeUsers, currently: " + 
+            activeUsers);
+      }
+    }
+  }
+  
+  /**
+   * Get number of active users i.e. users with applications which have pending
+   * resource requests.
+   * @return number of active users
+   */
+  @Lock({Queue.class, SchedulerApp.class})
+  synchronized public int getNumActiveUsers() {
+    return activeUsers;
+  }
+}

+ 45 - 20
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java

@@ -36,12 +36,11 @@ import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
 
 /**
  * This class keeps track of all the consumption of an application. This also
@@ -59,27 +58,27 @@ public class AppSchedulingInfo {
   final String user;
   private final AtomicInteger containerIdCounter = new AtomicInteger(0);
 
-  private final RecordFactory recordFactory = RecordFactoryProvider
-      .getRecordFactory(null);
-
   final Set<Priority> priorities = new TreeSet<Priority>(
       new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator());
   final Map<Priority, Map<String, ResourceRequest>> requests = 
     new HashMap<Priority, Map<String, ResourceRequest>>();
 
-  private final ApplicationStore store;
-
+  //private final ApplicationStore store;
+  private final ActiveUsersManager activeUsersManager;
+  
   /* Allocated by scheduler */
   boolean pending = true; // for app metrics
 
   public AppSchedulingInfo(ApplicationAttemptId appAttemptId,
-      String user, Queue queue, ApplicationStore store) {
+      String user, Queue queue, ActiveUsersManager activeUsersManager,
+      ApplicationStore store) {
     this.applicationAttemptId = appAttemptId;
     this.applicationId = appAttemptId.getApplicationId();
     this.queue = queue;
     this.queueName = queue.getQueueName();
     this.user = user;
-    this.store = store;
+    //this.store = store;
+    this.activeUsersManager = activeUsersManager;
   }
 
   public ApplicationId getApplicationId() {
@@ -123,7 +122,8 @@ public class AppSchedulingInfo {
    * @param requests
    *          resources to be acquired
    */
-  synchronized public void updateResourceRequests(List<ResourceRequest> requests) {
+  synchronized public void updateResourceRequests(
+      List<ResourceRequest> requests) {
     QueueMetrics metrics = queue.getMetrics();
     // Update resource requests
     for (ResourceRequest request : requests) {
@@ -138,6 +138,16 @@ public class AppSchedulingInfo {
               + request);
         }
         updatePendingResources = true;
+        
+        // Premature optimization?
+        // Assumes that we won't see more than one priority request updated
+        // in one call, reasonable assumption... however, it's totally safe
+        // to activate same application more than once.
+        // Thus we don't need another loop ala the one in decrementOutstanding()  
+        // which is needed during deactivate.
+        if (request.getNumContainers() > 0) {
+          activeUsersManager.activateApplication(user, applicationId);
+        }
       }
 
       Map<String, ResourceRequest> asks = this.requests.get(priority);
@@ -246,10 +256,7 @@ public class AppSchedulingInfo {
       this.requests.get(priority).remove(node.getRackName());
     }
 
-    // Do not remove ANY
-    ResourceRequest offSwitchRequest = requests.get(priority).get(
-        RMNode.ANY);
-    offSwitchRequest.setNumContainers(offSwitchRequest.getNumContainers() - 1);
+    decrementOutstanding(requests.get(priority).get(RMNode.ANY));
   }
 
   /**
@@ -271,10 +278,7 @@ public class AppSchedulingInfo {
       this.requests.get(priority).remove(node.getRackName());
     }
 
-    // Do not remove ANY
-    ResourceRequest offSwitchRequest = requests.get(priority).get(
-        RMNode.ANY);
-    offSwitchRequest.setNumContainers(offSwitchRequest.getNumContainers() - 1);
+    decrementOutstanding(requests.get(priority).get(RMNode.ANY));
   }
 
   /**
@@ -291,11 +295,32 @@ public class AppSchedulingInfo {
     allocate(container);
 
     // Update future requirements
+    decrementOutstanding(offSwitchRequest);
+  }
+
+  synchronized private void decrementOutstanding(
+      ResourceRequest offSwitchRequest) {
+    int numOffSwitchContainers = offSwitchRequest.getNumContainers() - 1;
 
     // Do not remove ANY
-    offSwitchRequest.setNumContainers(offSwitchRequest.getNumContainers() - 1);
+    offSwitchRequest.setNumContainers(numOffSwitchContainers);
+    
+    // Do we have any outstanding requests?
+    // If there is nothing, we need to deactivate this application
+    if (numOffSwitchContainers == 0) {
+      boolean deactivate = true;
+      for (Priority priority : getPriorities()) {
+        ResourceRequest request = getResourceRequest(priority, RMNodeImpl.ANY);
+        if (request.getNumContainers() > 0) {
+          deactivate = false;
+          break;
+        }
+      }
+      if (deactivate) {
+        activeUsersManager.deactivateApplication(user, applicationId);
+      }
+    }
   }
-
   synchronized private void allocate(Container container) {
     // Update consumption and track allocations
     //TODO: fixme sharad

+ 40 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java

@@ -60,6 +60,8 @@ public class QueueMetrics {
   @Metric("# of pending containers") MutableGaugeInt pendingContainers;
   @Metric("# of reserved memory in GiB") MutableGaugeInt reservedGB;
   @Metric("# of reserved containers") MutableGaugeInt reservedContainers;
+  @Metric("# of active users") MutableGaugeInt activeUsers;
+  @Metric("# of active users") MutableGaugeInt activeApplications;
 
   static final Logger LOG = LoggerFactory.getLogger(QueueMetrics.class);
   static final int GB = 1024; // resource.memory is in MB
@@ -287,6 +289,36 @@ public class QueueMetrics {
     }
   }
 
+  public void incrActiveUsers() {
+    activeUsers.incr();
+  }
+  
+  public void decrActiveUsers() {
+    activeUsers.decr();
+  }
+  
+  public void activateApp(String user) {
+    activeApplications.incr();
+    QueueMetrics userMetrics = getUserMetrics(user);
+    if (userMetrics != null) {
+      userMetrics.activateApp(user);
+    }
+    if (parent != null) {
+      parent.activateApp(user);
+    }
+  }
+  
+  public void deactivateApp(String user) {
+    activeApplications.decr();
+    QueueMetrics userMetrics = getUserMetrics(user);
+    if (userMetrics != null) {
+      userMetrics.deactivateApp(user);
+    }
+    if (parent != null) {
+      parent.deactivateApp(user);
+    }
+  }
+  
   public int getAppsSubmitted() {
     return appsSubmitted.value();
   }
@@ -338,4 +370,12 @@ public class QueueMetrics {
   public int getReservedContainers() {
     return reservedContainers.value();
   }
+  
+  public int getActiveUsers() {
+    return activeUsers.value();
+  }
+  
+  public int getActiveApps() {
+    return activeApplications.value();
+  }
 }

+ 3 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java

@@ -102,11 +102,12 @@ public class SchedulerApp {
 
   private final RMContext rmContext;
   public SchedulerApp(ApplicationAttemptId applicationAttemptId, 
-      String user, Queue queue, 
+      String user, Queue queue, ActiveUsersManager activeUsersManager,
       RMContext rmContext, ApplicationStore store) {
     this.rmContext = rmContext;
     this.appSchedulingInfo = 
-        new AppSchedulingInfo(applicationAttemptId, user, queue, store);
+        new AppSchedulingInfo(applicationAttemptId, user, queue,  
+            activeUsersManager, store);
     this.queue = queue;
   }
 

+ 7 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.QueueState;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
 
@@ -197,6 +198,12 @@ extends org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue {
    */
   public void updateClusterResource(Resource clusterResource);
   
+  /**
+   * Get the {@link ActiveUsersManager} for the queue.
+   * @return the <code>ActiveUsersManager</code> for the queue
+   */
+  public ActiveUsersManager getActiveUsersManager();
+  
   /**
    * Recover the state of the queue
    * @param clusterResource the resource of the cluster

+ 2 - 1
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java

@@ -355,7 +355,8 @@ implements ResourceScheduler, CapacitySchedulerContext {
 
     // TODO: Fix store
     SchedulerApp SchedulerApp = 
-        new SchedulerApp(applicationAttemptId, user, queue, rmContext, null);
+        new SchedulerApp(applicationAttemptId, user, queue, 
+            queue.getActiveUsersManager(), rmContext, null);
 
     // Submit to the queue
     try {

+ 26 - 8
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -37,6 +37,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
+import org.apache.hadoop.yarn.Lock;
+import org.apache.hadoop.yarn.Lock.NoLock;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -58,6 +60,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
@@ -120,6 +123,8 @@ public class LeafQueue implements CSQueue {
 
   private CapacitySchedulerContext scheduler;
   
+  private final ActiveUsersManager activeUsersManager;
+  
   final static int DEFAULT_AM_RESOURCE = 2 * 1024;
   
   public LeafQueue(CapacitySchedulerContext cs, 
@@ -132,7 +137,7 @@ public class LeafQueue implements CSQueue {
     this.metrics = old != null ? old.getMetrics() :
         QueueMetrics.forQueue(getQueuePath(), parent,
         cs.getConfiguration().getEnableUserMetrics());
-    
+    this.activeUsersManager = new ActiveUsersManager(metrics);
     this.minimumAllocation = cs.getMinimumResourceCapability();
     this.maximumAllocation = cs.getMaximumResourceCapability();
     this.minimumAllocationFactor = 
@@ -348,6 +353,11 @@ public class LeafQueue implements CSQueue {
     return maxActiveApplicationsPerUser;
   }
 
+  @Override
+  public ActiveUsersManager getActiveUsersManager() {
+    return activeUsersManager;
+  }
+
   @Override
   public synchronized float getUsedCapacity() {
     return usedCapacity;
@@ -674,6 +684,12 @@ public class LeafQueue implements CSQueue {
     // Check if we can activate more applications
     activateApplications();
     
+    // Inform the activeUsersManager
+    synchronized (application) {
+      activeUsersManager.deactivateApplication(
+          application.getUser(), application.getApplicationId());
+    }
+    
     LOG.info("Application removed -" +
         " appId: " + application.getApplicationId() + 
         " user: " + application.getUser() + 
@@ -837,6 +853,7 @@ public class LeafQueue implements CSQueue {
     return true;
   }
 
+  @Lock({LeafQueue.class, SchedulerApp.class})
   private Resource computeAndSetUserResourceLimit(SchedulerApp application, 
       Resource clusterResource, Resource required) {
     String user = application.getUser();
@@ -853,6 +870,7 @@ public class LeafQueue implements CSQueue {
         minimumAllocation.getMemory();
   }
   
+  @Lock(NoLock.class)
   private Resource computeUserLimit(SchedulerApp application, 
       Resource clusterResource, Resource required) {
     // What is our current capacity? 
@@ -877,11 +895,8 @@ public class LeafQueue implements CSQueue {
     // queue's configured capacity * user-limit-factor.
     // Also, the queue's configured capacity should be higher than 
     // queue-hard-limit * ulMin
-
-    String userName = application.getUser();
     
-    final int activeUsers = users.size();  
-    User user = getUser(userName);
+    final int activeUsers = activeUsersManager.getNumActiveUsers();  
 
     int limit = 
       roundUp(
@@ -893,12 +908,13 @@ public class LeafQueue implements CSQueue {
           );
 
     if (LOG.isDebugEnabled()) {
+      String userName = application.getUser();
       LOG.debug("User limit computation for " + userName + 
           " in queue " + getQueueName() +
           " userLimit=" + userLimit +
           " userLimitFactor=" + userLimitFactor +
           " required: " + required + 
-          " consumed: " + user.getConsumedResources() + 
+          " consumed: " + getUser(userName).getConsumedResources() + 
           " limit: " + limit +
           " queueCapacity: " + queueCapacity + 
           " qconsumed: " + consumed +
@@ -1308,8 +1324,10 @@ public class LeafQueue implements CSQueue {
     
     // Update application properties
     for (SchedulerApp application : activeApplications) {
-      computeAndSetUserResourceLimit(
-          application, clusterResource, Resources.none());
+      synchronized (application) {
+        computeAndSetUserResourceLimit(
+            application, clusterResource, Resources.none());
+      }
     }
   }
   

+ 7 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java

@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
@@ -240,6 +241,12 @@ public class ParentQueue implements CSQueue {
     return maximumCapacity;
   }
 
+  @Override
+  public ActiveUsersManager getActiveUsersManager() {
+    // Should never be called since all applications are submitted to LeafQueues
+    return null;
+  }
+
   @Override
   public synchronized float getUsedCapacity() {
     return usedCapacity;

+ 16 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java

@@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
@@ -124,10 +125,11 @@ public class FifoScheduler implements ResourceScheduler {
 
   private Map<ApplicationAttemptId, SchedulerApp> applications
       = new TreeMap<ApplicationAttemptId, SchedulerApp>();
+  
+  private final ActiveUsersManager activeUsersManager;
 
   private static final String DEFAULT_QUEUE_NAME = "default";
-  private final QueueMetrics metrics =
-    QueueMetrics.forQueue(DEFAULT_QUEUE_NAME, null, false);
+  private final QueueMetrics metrics;
 
   private final Queue DEFAULT_QUEUE = new Queue() {
     @Override
@@ -174,6 +176,11 @@ public class FifoScheduler implements ResourceScheduler {
     }
   };
 
+  public FifoScheduler() {
+    metrics = QueueMetrics.forQueue(DEFAULT_QUEUE_NAME, null, false);
+    activeUsersManager = new ActiveUsersManager(metrics);
+  }
+  
   @Override
   public Resource getMinimumResourceCapability() {
     return minimumAllocation;
@@ -288,7 +295,7 @@ public class FifoScheduler implements ResourceScheduler {
       String user) {
     // TODO: Fix store
     SchedulerApp schedulerApp = 
-        new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, 
+        new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager,
             this.rmContext, null);
     applications.put(appAttemptId, schedulerApp);
     metrics.submitApp(user);
@@ -318,6 +325,12 @@ public class FifoScheduler implements ResourceScheduler {
           RMContainerEventType.KILL);
     }
 
+    // Inform the activeUsersManager
+    synchronized (application) {
+      activeUsersManager.deactivateApplication(
+          application.getUser(), application.getApplicationId());
+    }
+
     // Clean up pending requests, metrics etc.
     application.stop(rmAppAttemptFinalState);
 

+ 2 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java

@@ -68,7 +68,7 @@ public class MetricsOverviewTable extends HtmlBlock {
         th().$class("ui-state-default")._("Memory Used")._().
         th().$class("ui-state-default")._("Memory Total")._().
         th().$class("ui-state-default")._("Memory Reserved")._().
-        th().$class("ui-state-default")._("Total Nodes")._().
+        th().$class("ui-state-default")._("Active Nodes")._().
         th().$class("ui-state-default")._("Decommissioned Nodes")._().
         th().$class("ui-state-default")._("Lost Nodes")._().
         th().$class("ui-state-default")._("Unhealthy Nodes")._().
@@ -82,7 +82,7 @@ public class MetricsOverviewTable extends HtmlBlock {
         td(StringUtils.byteDesc(clusterMetrics.getAllocatedMB() * BYTES_IN_MB)).
         td(StringUtils.byteDesc(clusterMetrics.getTotalMB() * BYTES_IN_MB)).
         td(StringUtils.byteDesc(clusterMetrics.getReservedMB() * BYTES_IN_MB)).
-        td().a(url("nodes"),String.valueOf(clusterMetrics.getTotalNodes()))._().
+        td().a(url("nodes"),String.valueOf(clusterMetrics.getActiveNodes()))._().
         td().a(url("nodes/decommissioned"),String.valueOf(clusterMetrics.getDecommissionedNodes()))._().
         td().a(url("nodes/lost"),String.valueOf(clusterMetrics.getLostNodes()))._().
         td().a(url("nodes/unhealthy"),String.valueOf(clusterMetrics.getUnhealthyNodes()))._().

+ 25 - 5
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java

@@ -24,6 +24,8 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit;
 
+import java.util.Collection;
+
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@@ -36,6 +38,7 @@ import org.apache.hadoop.yarn.webapp.SubView;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TR;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
 
 import com.google.inject.Inject;
@@ -79,7 +82,19 @@ class NodesPage extends RmView {
       if(type != null && !type.isEmpty()) {
         stateFilter = RMNodeState.valueOf(type.toUpperCase());
       }
-      for (RMNode ni : this.rmContext.getRMNodes().values()) {
+      Collection<RMNode> rmNodes = this.rmContext.getRMNodes().values();
+      boolean isInactive = false;
+      if (stateFilter != null) {
+        switch (stateFilter) {
+        case DECOMMISSIONED:
+        case LOST:
+        case REBOOTED:
+          rmNodes = this.rmContext.getInactiveRMNodes().values();
+          isInactive = true;
+          break;
+        }
+      }
+      for (RMNode ni : rmNodes) {
         if(stateFilter != null) {
           RMNodeState state = ni.getState();
           if(!stateFilter.equals(state)) {
@@ -89,12 +104,17 @@ class NodesPage extends RmView {
         NodeInfo info = new NodeInfo(ni, sched);
         int usedMemory = (int)info.getUsedMemory();
         int availableMemory = (int)info.getAvailableMemory();
-        tbody.tr().
+        TR<TBODY<TABLE<Hamlet>>> row = tbody.tr().
             td(info.getRack()).
             td(info.getState()).
-            td(info.getNodeId()).
-            td().a("http://" + info.getNodeHTTPAddress(), info.getNodeHTTPAddress())._().
-            td(info.getHealthStatus()).
+            td(info.getNodeId());
+        if (isInactive) {
+          row.td()._("N/A")._();
+        } else {
+          String httpAddress = info.getNodeHTTPAddress();
+          row.td().a("http://" + httpAddress, httpAddress)._();
+        }
+        row.td(info.getHealthStatus()).
             td(Times.format(info.getLastHealthUpdate())).
             td(info.getHealthReport()).
             td(String.valueOf(info.getNumContainers())).

+ 30 - 5
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java

@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.server.resourcemanager.webapp;
 
 import java.io.IOException;
+import java.util.Collection;
 import java.util.concurrent.ConcurrentMap;
 
 import javax.servlet.http.HttpServletRequest;
@@ -68,6 +69,7 @@ import com.google.inject.Singleton;
 @Singleton
 @Path("/ws/v1/cluster")
 public class RMWebServices {
+  private static final String EMPTY = "";
   private static final Log LOG = LogFactory.getLog(RMWebServices.class);
   private final ResourceManager rm;
   private static RecordFactory recordFactory = RecordFactoryProvider
@@ -144,12 +146,23 @@ public class RMWebServices {
     if (sched == null) {
       throw new NotFoundException("Null ResourceScheduler instance");
     }
-
+    Collection<RMNode> rmNodes = this.rm.getRMContext().getRMNodes().values();
+    boolean isInactive = false;
+    if (filterState != null && !filterState.isEmpty()) {
+      RMNodeState nodeState = RMNodeState.valueOf(filterState.toUpperCase());
+      switch (nodeState) {
+      case DECOMMISSIONED:
+      case LOST:
+      case REBOOTED:
+        rmNodes = this.rm.getRMContext().getInactiveRMNodes().values();
+        isInactive = true;
+        break;
+      }
+    }
     NodesInfo allNodes = new NodesInfo();
-    for (RMNode ni : this.rm.getRMContext().getRMNodes().values()) {
+    for (RMNode ni : rmNodes) {
       NodeInfo nodeInfo = new NodeInfo(ni, sched);
       if (filterState != null) {
-        RMNodeState.valueOf(filterState);
         if (!(nodeInfo.getState().equalsIgnoreCase(filterState))) {
           continue;
         }
@@ -165,6 +178,9 @@ public class RMWebServices {
           continue;
         }
       }
+      if (isInactive) {
+        nodeInfo.setNodeHTTPAddress(EMPTY);
+      }
       allNodes.add(nodeInfo);
     }
     return allNodes;
@@ -183,10 +199,19 @@ public class RMWebServices {
     }
     NodeId nid = ConverterUtils.toNodeId(nodeId);
     RMNode ni = this.rm.getRMContext().getRMNodes().get(nid);
+    boolean isInactive = false;
     if (ni == null) {
-      throw new NotFoundException("nodeId, " + nodeId + ", is not found");
+      ni = this.rm.getRMContext().getInactiveRMNodes().get(nid.getHost());
+      if (ni == null) {
+        throw new NotFoundException("nodeId, " + nodeId + ", is not found");
+      }
+      isInactive = true;
+    }
+    NodeInfo nodeInfo = new NodeInfo(ni, sched);
+    if (isInactive) {
+      nodeInfo.setNodeHTTPAddress(EMPTY);
     }
-    return new NodeInfo(ni, sched);
+    return nodeInfo;
   }
 
   @GET

+ 8 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java

@@ -44,6 +44,7 @@ public class ClusterMetricsInfo {
   protected int unhealthyNodes;
   protected int decommissionedNodes;
   protected int rebootedNodes;
+  protected int activeNodes;
 
   public ClusterMetricsInfo() {
   } // JAXB needs this
@@ -59,12 +60,13 @@ public class ClusterMetricsInfo {
     this.allocatedMB = metrics.getAllocatedGB() * MB_IN_GB;
     this.containersAllocated = metrics.getAllocatedContainers();
     this.totalMB = availableMB + reservedMB + allocatedMB;
-    this.totalNodes = clusterMetrics.getNumNMs();
+    this.activeNodes = clusterMetrics.getNumActiveNMs();
     this.lostNodes = clusterMetrics.getNumLostNMs();
     this.unhealthyNodes = clusterMetrics.getUnhealthyNMs();
     this.decommissionedNodes = clusterMetrics.getNumDecommisionedNMs();
     this.rebootedNodes = clusterMetrics.getNumRebootedNMs();
-
+    this.totalNodes = activeNodes + lostNodes + decommissionedNodes
+        + rebootedNodes;
   }
 
   public int getAppsSubmitted() {
@@ -94,6 +96,10 @@ public class ClusterMetricsInfo {
   public int getTotalNodes() {
     return this.totalNodes;
   }
+  
+  public int getActiveNodes() {
+    return this.activeNodes;
+  }
 
   public int getLostNodes() {
     return this.lostNodes;

+ 4 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java

@@ -94,6 +94,10 @@ public class NodeInfo {
   public String getNodeHTTPAddress() {
     return this.nodeHTTPAddress;
   }
+  
+  public void setNodeHTTPAddress(String nodeHTTPAddress) {
+    this.nodeHTTPAddress = nodeHTTPAddress;
+  }
 
   public String getHealthStatus() {
     return this.healthStatus;

+ 8 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java

@@ -81,13 +81,20 @@ public class MockNM {
   }
 
   public HeartbeatResponse nodeHeartbeat(boolean b) throws Exception {
-    return nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), b);
+    return nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(),
+        b, ++responseId);
   }
 
   public HeartbeatResponse nodeHeartbeat(Map<ApplicationId, 
       List<ContainerStatus>> conts, boolean isHealthy) throws Exception {
+    return nodeHeartbeat(conts, isHealthy, ++responseId);
+  }
+
+  public HeartbeatResponse nodeHeartbeat(Map<ApplicationId, 
+      List<ContainerStatus>> conts, boolean isHealthy, int resId) throws Exception {
     NodeHeartbeatRequest req = Records.newRecord(NodeHeartbeatRequest.class);
     NodeStatus status = Records.newRecord(NodeStatus.class);
+    status.setResponseId(resId);
     status.setNodeId(nodeId);
     for (Map.Entry<ApplicationId, List<ContainerStatus>> entry : conts.entrySet()) {
       status.setContainersStatuses(entry.getValue());
@@ -97,7 +104,6 @@ public class MockNM {
     healthStatus.setIsNodeHealthy(isHealthy);
     healthStatus.setLastHealthReportTime(1);
     status.setNodeHealthStatus(healthStatus);
-    status.setResponseId(++responseId);
     req.setNodeStatus(status);
     return resourceTracker.nodeHeartbeat(req).getHeartbeatResponse();
   }

+ 113 - 74
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java

@@ -56,6 +56,17 @@ public class MockNodes {
     }
     return list;
   }
+  
+  public static List<RMNode> lostNodes(int racks, int nodesPerRack,
+      Resource perNode) {
+    List<RMNode> list = Lists.newArrayList();
+    for (int i = 0; i < racks; ++i) {
+      for (int j = 0; j < nodesPerRack; ++j) {
+        list.add(lostNodeInfo(i, perNode, RMNodeState.LOST));
+      }
+    }
+    return list;
+  }
 
   public static NodeId newNodeID(String host, int port) {
     NodeId nid = recordFactory.newRecordInstance(NodeId.class);
@@ -82,92 +93,120 @@ public class MockNodes {
     return rs;
   }
 
-  public static RMNode newNodeInfo(int rack, final Resource perNode) {
-    final String rackName = "rack"+ rack;
-    final int nid = NODE_ID++;
-    final String hostName = "host"+ nid;
-    final int port = 123;
-    final NodeId nodeID = newNodeID(hostName, port);
-    final String httpAddress = "localhost:0";
-    final NodeHealthStatus nodeHealthStatus =
-        recordFactory.newRecordInstance(NodeHealthStatus.class);
-    final Resource used = newUsedResource(perNode);
-    final Resource avail = newAvailResource(perNode, used);
-    return new RMNode() {
-      @Override
-      public NodeId getNodeID() {
-        return nodeID;
-      }
+  private static class MockRMNodeImpl implements RMNode {
+    private NodeId nodeId;
+    private String hostName;
+    private String nodeAddr;
+    private String httpAddress;
+    private int cmdPort;
+    private Resource perNode;
+    private String rackName;
+    private NodeHealthStatus nodeHealthStatus;
+    private RMNodeState state;
+
+    public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
+        Resource perNode, String rackName, NodeHealthStatus nodeHealthStatus,
+        int cmdPort, String hostName, RMNodeState state) {
+      this.nodeId = nodeId;
+      this.nodeAddr = nodeAddr;
+      this.httpAddress = httpAddress;
+      this.perNode = perNode;
+      this.rackName = rackName;
+      this.nodeHealthStatus = nodeHealthStatus;
+      this.cmdPort = cmdPort;
+      this.hostName = hostName;
+      this.state = state;
+    }
 
-      @Override
-      public String getNodeAddress() {
-        return hostName;
-      }
+    @Override
+    public NodeId getNodeID() {
+      return this.nodeId;
+    }
 
-      @Override
-      public String getHttpAddress() {
-        return httpAddress;
-      }
+    @Override
+    public String getHostName() {
+      return this.hostName;
+    }
 
-      @Override
-      public Resource getTotalCapability() {
-        return perNode;
-      }
+    @Override
+    public int getCommandPort() {
+      return this.cmdPort;
+    }
 
-      @Override
-      public String getRackName() {
-        return rackName;
-      }
+    @Override
+    public int getHttpPort() {
+      return 0;
+    }
 
-      @Override
-      public Node getNode() {
-        throw new UnsupportedOperationException("Not supported yet.");
-      }
+    @Override
+    public String getNodeAddress() {
+      return this.nodeAddr;
+    }
 
-      @Override
-      public NodeHealthStatus getNodeHealthStatus() {
-        return nodeHealthStatus;
-      }
+    @Override
+    public String getHttpAddress() {
+      return this.httpAddress;
+    }
 
-      @Override
-      public int getCommandPort() {
-        return nid;
-      }
+    @Override
+    public NodeHealthStatus getNodeHealthStatus() {
+      return this.nodeHealthStatus;
+    }
 
-      @Override
-      public int getHttpPort() {
-        // TODO Auto-generated method stub
-        return 0;
-      }
+    @Override
+    public Resource getTotalCapability() {
+      return this.perNode;
+    }
 
-      @Override
-      public String getHostName() {
-        return hostName;
-      }
+    @Override
+    public String getRackName() {
+      return this.rackName;
+    }
 
-      @Override
-      public RMNodeState getState() {
-        // TODO Auto-generated method stub
-        return null;
-      }
+    @Override
+    public Node getNode() {
+      throw new UnsupportedOperationException("Not supported yet.");
+    }
 
-      @Override
-      public List<ApplicationId> getAppsToCleanup() {
-        // TODO Auto-generated method stub
-        return null;
-      }
+    @Override
+    public RMNodeState getState() {
+      return this.state;
+    }
 
-      @Override
-      public List<ContainerId> getContainersToCleanUp() {
-        // TODO Auto-generated method stub
-        return null;
-      }
+    @Override
+    public List<ContainerId> getContainersToCleanUp() {
+      return null;
+    }
 
-      @Override
-      public HeartbeatResponse getLastHeartBeatResponse() {
-        // TODO Auto-generated method stub
-        return null;
-      }
-    };
+    @Override
+    public List<ApplicationId> getAppsToCleanup() {
+      return null;
+    }
+
+    @Override
+    public HeartbeatResponse getLastHeartBeatResponse() {
+      return null;
+    }
+  };
+
+  private static RMNode buildRMNode(int rack, final Resource perNode, RMNodeState state, String httpAddr) {
+    final String rackName = "rack"+ rack;
+    final int nid = NODE_ID++;
+    final String hostName = "host"+ nid;
+    final int port = 123;
+    final NodeId nodeID = newNodeID(hostName, port);
+    final String httpAddress = httpAddr;
+    final NodeHealthStatus nodeHealthStatus =
+        recordFactory.newRecordInstance(NodeHealthStatus.class);
+    return new MockRMNodeImpl(nodeID, hostName, httpAddress, perNode, rackName,
+        nodeHealthStatus, nid, hostName, state); 
+  }
+
+  public static RMNode lostNodeInfo(int rack, final Resource perNode, RMNodeState state) {
+    return buildRMNode(rack, perNode, state, "N/A");
+  }
+
+  public static RMNode newNodeInfo(int rack, final Resource perNode) {
+    return buildRMNode(rack, perNode, null, "localhost:0");
   }
 }

+ 6 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java

@@ -130,6 +130,12 @@ public class MockRM extends ResourceManager {
         nm.getNodeId());
     node.handle(new RMNodeEvent(nm.getNodeId(), RMNodeEventType.STARTED));
   }
+  
+  public void sendNodeLost(MockNM nm) throws Exception {
+    RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get(
+        nm.getNodeId());
+    node.handle(new RMNodeEvent(nm.getNodeId(), RMNodeEventType.EXPIRE));
+  }
 
   public void NMwaitForState(NodeId nodeid, RMNodeState finalState)
       throws Exception {

+ 3 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java

@@ -31,6 +31,7 @@ import junit.framework.Assert;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
@@ -100,8 +101,8 @@ public class TestRMNodeTransitions {
     rmDispatcher.register(SchedulerEventType.class, 
         new TestSchedulerEventDispatcher());
     
-    
-    node = new RMNodeImpl(null, rmContext, null, 0, 0, null, null);
+    NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
+    node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null);
 
   }
   

+ 2 - 2
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java

@@ -157,14 +157,14 @@ public class TestResourceTrackerService {
     rm.start();
 
     MockNM nm1 = rm.registerNode("host1:1234", 5120);
-    MockNM nm2 = new MockNM("host2:1234", 2048, rm.getResourceTrackerService());
+    MockNM nm2 = rm.registerNode("host2:1234", 2048);
 
     int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs();
     HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
     Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
 
     nodeHeartbeat = nm2.nodeHeartbeat(
-      new HashMap<ApplicationId, List<ContainerStatus>>(), true);
+      new HashMap<ApplicationId, List<ContainerStatus>>(), true, -100);
     Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction()));
     checkRebootedNMCount(rm, ++initialMetricCount);
   }

+ 6 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java

@@ -302,7 +302,8 @@ public class TestApplicationLimits {
     final ApplicationAttemptId appAttemptId_0_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0_0 = 
-        spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, rmContext, null));
+        spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, 
+            queue.getActiveUsersManager(), rmContext, null));
     queue.submitApplication(app_0_0, user_0, A);
 
     List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
@@ -320,7 +321,8 @@ public class TestApplicationLimits {
     final ApplicationAttemptId appAttemptId_0_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     SchedulerApp app_0_1 = 
-        spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, rmContext, null));
+        spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, 
+            queue.getActiveUsersManager(), rmContext, null));
     queue.submitApplication(app_0_1, user_0, A);
     
     List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
@@ -338,7 +340,8 @@ public class TestApplicationLimits {
     final ApplicationAttemptId appAttemptId_1_0 = 
         TestUtils.getMockApplicationAttemptId(2, 0); 
     SchedulerApp app_1_0 = 
-        spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, rmContext, null));
+        spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, 
+            queue.getActiveUsersManager(), rmContext, null));
     queue.submitApplication(app_1_0, user_1, A);
 
     List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();

+ 148 - 35
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java

@@ -18,8 +18,18 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
 
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -28,9 +38,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -48,19 +55,17 @@ import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
-
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
 public class TestLeafQueue {
-  private static final Log LOG = LogFactory.getLog(TestLeafQueue.class);
   
   private final RecordFactory recordFactory = 
       RecordFactoryProvider.getRecordFactory(null);
@@ -136,7 +141,6 @@ public class TestLeafQueue {
     final String Q_C1 = Q_C + "." + C1;
     conf.setCapacity(Q_C1, 100);
     
-    LOG.info("Setup top-level queues a and b");
   }
 
   static LeafQueue stubLeafQueue(LeafQueue queue) {
@@ -217,13 +221,15 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_0, user_0, B);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     SchedulerApp app_1 = 
-        new SchedulerApp(appAttemptId_1, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_1, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_1, user_0, B);  // same user
 
     
@@ -264,13 +270,15 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     SchedulerApp app_1 = 
-        new SchedulerApp(appAttemptId_1, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_1, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_1, user_0, A);  // same user
 
     
@@ -371,6 +379,99 @@ public class TestLeafQueue {
     assertEquals(1, a.getMetrics().getAvailableGB());
   }
   
+  @Test
+  public void testUserLimits() throws Exception {
+    // Mock the queue
+    LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
+    //unset maxCapacity
+    a.setMaxCapacity(1.0f);
+    
+    // Users
+    final String user_0 = "user_0";
+    final String user_1 = "user_1";
+
+    // Submit applications
+    final ApplicationAttemptId appAttemptId_0 = 
+        TestUtils.getMockApplicationAttemptId(0, 0); 
+    SchedulerApp app_0 = 
+        new SchedulerApp(appAttemptId_0, user_0, a, 
+            a.getActiveUsersManager(), rmContext, null);
+    a.submitApplication(app_0, user_0, A);
+
+    final ApplicationAttemptId appAttemptId_1 = 
+        TestUtils.getMockApplicationAttemptId(1, 0); 
+    SchedulerApp app_1 = 
+        new SchedulerApp(appAttemptId_1, user_0, a, 
+            a.getActiveUsersManager(), rmContext, null);
+    a.submitApplication(app_1, user_0, A);  // same user
+
+    final ApplicationAttemptId appAttemptId_2 = 
+        TestUtils.getMockApplicationAttemptId(2, 0); 
+    SchedulerApp app_2 = 
+        new SchedulerApp(appAttemptId_2, user_1, a, 
+            a.getActiveUsersManager(), rmContext, null);
+    a.submitApplication(app_2, user_1, A);
+
+    // Setup some nodes
+    String host_0 = "host_0";
+    SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB);
+    String host_1 = "host_1";
+    SchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8*GB);
+    
+    final int numNodes = 2;
+    Resource clusterResource = Resources.createResource(numNodes * (8*GB));
+    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
+ 
+    // Setup resource-requests
+    Priority priority = TestUtils.createMockPriority(1);
+    app_0.updateResourceRequests(Collections.singletonList(
+            TestUtils.createResourceRequest(RMNodeImpl.ANY, 2*GB, 1, priority,
+                recordFactory))); 
+
+    app_1.updateResourceRequests(Collections.singletonList(
+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, priority,
+            recordFactory))); 
+
+    /**
+     * Start testing...
+     */
+    
+    // Set user-limit
+    a.setUserLimit(50);
+    a.setUserLimitFactor(2);
+    
+    // Now, only user_0 should be active since he is the only one with
+    // outstanding requests
+    assertEquals("There should only be 1 active user!", 
+        1, a.getActiveUsersManager().getNumActiveUsers());
+
+    // This commented code is key to test 'activeUsers'. 
+    // It should fail the test if uncommented since
+    // it would increase 'activeUsers' to 2 and stop user_2
+    // Pre MAPREDUCE-3732 this test should fail without this block too
+//    app_2.updateResourceRequests(Collections.singletonList(
+//        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 1, priority,
+//            recordFactory))); 
+
+    // 1 container to user_0
+    a.assignContainers(clusterResource, node_0);
+    assertEquals(2*GB, a.getUsedResources().getMemory());
+    assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
+    assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+
+    // Again one to user_0 since he hasn't exceeded user limit yet
+    a.assignContainers(clusterResource, node_0);
+    assertEquals(3*GB, a.getUsedResources().getMemory());
+    assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
+    assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
+
+    // One more to user_0 since he is the only active user
+    a.assignContainers(clusterResource, node_1);
+    assertEquals(4*GB, a.getUsedResources().getMemory());
+    assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
+    assertEquals(2*GB, app_1.getCurrentConsumption().getMemory());
+  }
+  
   @Test
   public void testSingleQueueWithMultipleUsers() throws Exception {
     
@@ -388,15 +489,31 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_0, user_0, a, 
+            a.getActiveUsersManager(), rmContext, null);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     SchedulerApp app_1 = 
-        new SchedulerApp(appAttemptId_1, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_1, user_0, a, 
+            a.getActiveUsersManager(), rmContext, null);
     a.submitApplication(app_1, user_0, A);  // same user
 
+    final ApplicationAttemptId appAttemptId_2 = 
+        TestUtils.getMockApplicationAttemptId(2, 0); 
+    SchedulerApp app_2 = 
+        new SchedulerApp(appAttemptId_2, user_1, a, 
+            a.getActiveUsersManager(), rmContext, null);
+    a.submitApplication(app_2, user_1, A);
+
+    final ApplicationAttemptId appAttemptId_3 = 
+        TestUtils.getMockApplicationAttemptId(3, 0); 
+    SchedulerApp app_3 = 
+        new SchedulerApp(appAttemptId_3, user_2, a, 
+            a.getActiveUsersManager(), rmContext, null);
+    a.submitApplication(app_3, user_2, A);
+    
     // Setup some nodes
     String host_0 = "host_0";
     SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB);
@@ -438,19 +555,8 @@ public class TestLeafQueue {
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-
-    // Submit more apps
-    final ApplicationAttemptId appAttemptId_2 = 
-        TestUtils.getMockApplicationAttemptId(2, 0); 
-    SchedulerApp app_2 = 
-        new SchedulerApp(appAttemptId_2, user_1, a, rmContext, null);
-    a.submitApplication(app_2, user_1, A);
-
-    final ApplicationAttemptId appAttemptId_3 = 
-        TestUtils.getMockApplicationAttemptId(3, 0); 
-    SchedulerApp app_3 = 
-        new SchedulerApp(appAttemptId_3, user_2, a, rmContext, null);
-    a.submitApplication(app_3, user_2, A);
+    
+    // Submit resource requests for other apps now to 'activate' them
     
     app_2.updateResourceRequests(Collections.singletonList(
         TestUtils.createResourceRequest(RMNodeImpl.ANY, 3*GB, 1, priority,
@@ -558,13 +664,15 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     SchedulerApp app_1 = 
-        new SchedulerApp(appAttemptId_1, user_1, a, rmContext, null);
+        new SchedulerApp(appAttemptId_1, user_1, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_1, user_1, A);  
 
     // Setup some nodes
@@ -657,13 +765,15 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null);
+        new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     SchedulerApp app_1 = 
-        new SchedulerApp(appAttemptId_1, user_1, a, rmContext, null);
+        new SchedulerApp(appAttemptId_1, user_1, a, 
+            mock(ActiveUsersManager.class), rmContext, null);
     a.submitApplication(app_1, user_1, A);  
 
     // Setup some nodes
@@ -770,7 +880,8 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        spy(new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null));
+        spy(new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null));
     a.submitApplication(app_0, user_0, A);
     
     // Setup some nodes and racks
@@ -899,7 +1010,8 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        spy(new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null));
+        spy(new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null));
     a.submitApplication(app_0, user_0, A);
     
     // Setup some nodes and racks
@@ -1028,7 +1140,8 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = 
         TestUtils.getMockApplicationAttemptId(0, 0); 
     SchedulerApp app_0 = 
-        spy(new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null));
+        spy(new SchedulerApp(appAttemptId_0, user_0, a, 
+            mock(ActiveUsersManager.class), rmContext, null));
     a.submitApplication(app_0, user_0, A);
     
     // Setup some nodes and racks

+ 45 - 18
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodesPage.NodesBlock;
 import org.apache.hadoop.yarn.webapp.test.WebAppTests;
+import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
 
@@ -36,39 +37,65 @@ import com.google.inject.Module;
  * data for all the columns in the table as specified in the header.
  */
 public class TestNodesPage {
+  
+  final int numberOfRacks = 2;
+  final int numberOfNodesPerRack = 2;
+  // Number of Actual Table Headers for NodesPage.NodesBlock might change in
+  // future. In that case this value should be adjusted to the new value.
+  final int numberOfThInMetricsTable = 10;
+  final int numberOfActualTableHeaders = 10;
 
-  @Test
-  public void testNodesBlockRender() throws Exception {
-    final int numberOfRacks = 2;
-    final int numberOfNodesPerRack = 2;
-    // Number of Actual Table Headers for NodesPage.NodesBlock might change in
-    // future. In that case this value should be adjusted to the new value.
-    final int numberOfThInMetricsTable = 10;
-    final int numberOfActualTableHeaders = 10;
-
-    Injector injector = WebAppTests.createMockInjector(RMContext.class,
-        TestRMWebApp.mockRMContext(3, numberOfRacks, numberOfNodesPerRack, 8*TestRMWebApp.GiB),
-        new Module() {
+  private Injector injector;
+  
+  @Before
+  public void setUp() throws Exception {
+    injector = WebAppTests.createMockInjector(RMContext.class, TestRMWebApp
+        .mockRMContext(3, numberOfRacks, numberOfNodesPerRack,
+            8 * TestRMWebApp.GiB), new Module() {
       @Override
       public void configure(Binder binder) {
         try {
-          binder.bind(ResourceManager.class).toInstance(TestRMWebApp.mockRm(3,
-              numberOfRacks, numberOfNodesPerRack, 8*TestRMWebApp.GiB));
+          binder.bind(ResourceManager.class).toInstance(
+              TestRMWebApp.mockRm(3, numberOfRacks, numberOfNodesPerRack,
+                  8 * TestRMWebApp.GiB));
         } catch (IOException e) {
           throw new IllegalStateException(e);
         }
       }
     });
+  }
+
+  @Test
+  public void testNodesBlockRender() throws Exception {
     injector.getInstance(NodesBlock.class).render();
     PrintWriter writer = injector.getInstance(PrintWriter.class);
     WebAppTests.flushOutput(injector);
 
-    Mockito.verify(writer, Mockito.times(numberOfActualTableHeaders + 
-        numberOfThInMetricsTable)).print(
-        "<th");
+    Mockito.verify(writer,
+        Mockito.times(numberOfActualTableHeaders + numberOfThInMetricsTable))
+        .print("<th");
+    Mockito.verify(
+        writer,
+        Mockito.times(numberOfRacks * numberOfNodesPerRack
+            * numberOfActualTableHeaders + numberOfThInMetricsTable)).print(
+        "<td");
+  }
+  
+  @Test
+  public void testNodesBlockRenderForLostNodes() {
+    NodesBlock nodesBlock = injector.getInstance(NodesBlock.class);
+    nodesBlock.set("node.state", "lost");
+    nodesBlock.render();
+    PrintWriter writer = injector.getInstance(PrintWriter.class);
+    WebAppTests.flushOutput(injector);
+
+    Mockito.verify(writer,
+        Mockito.times(numberOfActualTableHeaders + numberOfThInMetricsTable))
+        .print("<th");
     Mockito.verify(
         writer,
         Mockito.times(numberOfRacks * numberOfNodesPerRack
-            * numberOfActualTableHeaders + numberOfThInMetricsTable)).print("<td");
+            * numberOfActualTableHeaders + numberOfThInMetricsTable)).print(
+        "<td");
   }
 }

+ 11 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java

@@ -120,12 +120,23 @@ public class TestRMWebApp {
     for (RMNode node : nodes) {
       nodesMap.put(node.getNodeID(), node);
     }
+    
+    final List<RMNode> lostNodes = MockNodes.lostNodes(racks, numNodes,
+        newResource(mbsPerNode));
+    final ConcurrentMap<String, RMNode> lostNodesMap = Maps.newConcurrentMap();
+    for (RMNode node : lostNodes) {
+      lostNodesMap.put(node.getHostName(), node);
+    }
    return new RMContextImpl(new MemStore(), null, null, null, null) {
       @Override
       public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
         return applicationsMaps;
       }
       @Override
+      public ConcurrentMap<String, RMNode> getInactiveRMNodes() {
+        return lostNodesMap;
+      }
+      @Override
       public ConcurrentMap<NodeId, RMNode> getRMNodes() {
         return nodesMap;
       }

+ 12 - 6
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java

@@ -370,7 +370,8 @@ public class TestRMWebServices extends JerseyTest {
           WebServicesTestUtils.getXmlInt(element, "lostNodes"),
           WebServicesTestUtils.getXmlInt(element, "unhealthyNodes"),
           WebServicesTestUtils.getXmlInt(element, "decommissionedNodes"),
-          WebServicesTestUtils.getXmlInt(element, "rebootedNodes"));
+          WebServicesTestUtils.getXmlInt(element, "rebootedNodes"),
+          WebServicesTestUtils.getXmlInt(element, "activeNodes"));
     }
   }
 
@@ -378,7 +379,7 @@ public class TestRMWebServices extends JerseyTest {
       Exception {
     assertEquals("incorrect number of elements", 1, json.length());
     JSONObject clusterinfo = json.getJSONObject("clusterMetrics");
-    assertEquals("incorrect number of elements", 11, clusterinfo.length());
+    assertEquals("incorrect number of elements", 12, clusterinfo.length());
     verifyClusterMetrics(clusterinfo.getInt("appsSubmitted"),
         clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"),
         clusterinfo.getInt("allocatedMB"),
@@ -386,13 +387,13 @@ public class TestRMWebServices extends JerseyTest {
         clusterinfo.getInt("totalMB"), clusterinfo.getInt("totalNodes"),
         clusterinfo.getInt("lostNodes"), clusterinfo.getInt("unhealthyNodes"),
         clusterinfo.getInt("decommissionedNodes"),
-        clusterinfo.getInt("rebootedNodes"));
+        clusterinfo.getInt("rebootedNodes"),clusterinfo.getInt("activeNodes"));
   }
 
   public void verifyClusterMetrics(int sub, int reservedMB, int availableMB,
       int allocMB, int containersAlloc, int totalMB, int totalNodes,
       int lostNodes, int unhealthyNodes, int decommissionedNodes,
-      int rebootedNodes) throws JSONException, Exception {
+      int rebootedNodes, int activeNodes) throws JSONException, Exception {
 
     ResourceScheduler rs = rm.getResourceScheduler();
     QueueMetrics metrics = rs.getRootQueueMetrics();
@@ -412,8 +413,11 @@ public class TestRMWebServices extends JerseyTest {
         * MB_IN_GB, allocMB);
     assertEquals("containersAllocated doesn't match", 0, containersAlloc);
     assertEquals("totalMB doesn't match", totalMBExpect, totalMB);
-    assertEquals("totalNodes doesn't match", clusterMetrics.getNumNMs(),
-        totalNodes);
+    assertEquals(
+        "totalNodes doesn't match",
+        clusterMetrics.getNumActiveNMs() + clusterMetrics.getNumLostNMs()
+            + clusterMetrics.getNumDecommisionedNMs()
+            + clusterMetrics.getNumRebootedNMs(), totalNodes);
     assertEquals("lostNodes doesn't match", clusterMetrics.getNumLostNMs(),
         lostNodes);
     assertEquals("unhealthyNodes doesn't match",
@@ -422,6 +426,8 @@ public class TestRMWebServices extends JerseyTest {
         clusterMetrics.getNumDecommisionedNMs(), decommissionedNodes);
     assertEquals("rebootedNodes doesn't match",
         clusterMetrics.getNumRebootedNMs(), rebootedNodes);
+    assertEquals("activeNodes doesn't match", clusterMetrics.getNumActiveNMs(),
+        activeNodes);
   }
 
   @Test

+ 63 - 0
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java

@@ -202,6 +202,69 @@ public class TestRMWebServicesNodes extends JerseyTest {
       rm.stop();
     }
   }
+  
+  @Test
+  public void testNodesQueryStateLost() throws JSONException, Exception {
+    WebResource r = resource();
+    MockNM nm1 = rm.registerNode("h1:1234", 5120);
+    MockNM nm2 = rm.registerNode("h2:1234", 5120);
+    rm.sendNodeStarted(nm1);
+    rm.sendNodeStarted(nm2);
+    rm.NMwaitForState(nm1.getNodeId(), RMNodeState.RUNNING);
+    rm.NMwaitForState(nm2.getNodeId(), RMNodeState.RUNNING);
+    rm.sendNodeLost(nm1);
+    rm.sendNodeLost(nm2);
+
+    ClientResponse response = r.path("ws").path("v1").path("cluster")
+        .path("nodes").queryParam("state", RMNodeState.LOST.toString())
+        .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
+
+    assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
+    JSONObject json = response.getEntity(JSONObject.class);
+    JSONObject nodes = json.getJSONObject("nodes");
+    assertEquals("incorrect number of elements", 1, nodes.length());
+    JSONArray nodeArray = nodes.getJSONArray("node");
+    assertEquals("incorrect number of elements", 2, nodeArray.length());
+    for (int i = 0; i < nodeArray.length(); ++i) {
+      JSONObject info = nodeArray.getJSONObject(i);
+      String host = info.get("id").toString().split(":")[0];
+      RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get(host);
+      WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", "",
+          info.getString("nodeHTTPAddress"));
+      WebServicesTestUtils.checkStringMatch("state", rmNode.getState()
+          .toString(), info.getString("state"));
+    }
+  }
+  
+  @Test
+  public void testSingleNodeQueryStateLost() throws JSONException, Exception {
+    WebResource r = resource();
+    MockNM nm1 = rm.registerNode("h1:1234", 5120);
+    MockNM nm2 = rm.registerNode("h2:1234", 5120);
+    rm.sendNodeStarted(nm1);
+    rm.sendNodeStarted(nm2);
+    rm.NMwaitForState(nm1.getNodeId(), RMNodeState.RUNNING);
+    rm.NMwaitForState(nm2.getNodeId(), RMNodeState.RUNNING);
+    rm.sendNodeLost(nm1);
+    rm.sendNodeLost(nm2);
+
+    ClientResponse response = r.path("ws").path("v1").path("cluster")
+        .path("nodes").path("h2:1234").accept(MediaType.APPLICATION_JSON)
+        .get(ClientResponse.class);
+
+    assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType());
+    JSONObject json = response.getEntity(JSONObject.class);
+    JSONObject info = json.getJSONObject("node");
+    String id = info.get("id").toString();
+
+    assertEquals("Incorrect Node Information.", "h2:1234", id);
+
+    RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get("h2");
+    WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", "",
+        info.getString("nodeHTTPAddress"));
+    WebServicesTestUtils.checkStringMatch("state",
+        rmNode.getState().toString(), info.getString("state"));
+  }
 
   @Test
   public void testNodesQueryHealthy() throws JSONException, Exception {

+ 9 - 4
hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java

@@ -149,10 +149,15 @@ class ExecutionSummarizer implements StatListener<JobStats> {
   throws IOException {
     numJobsInInputTrace = factory.numJobsInTrace;
     endTime = System.currentTimeMillis();
-    Path inputTracePath = new Path(inputPath);
-    FileSystem fs = inputTracePath.getFileSystem(conf);
-    inputTraceLocation = fs.makeQualified(inputTracePath).toString();
-    inputTraceSignature = getTraceSignature(inputTraceLocation);
+     if ("-".equals(inputPath)) {
+      inputTraceLocation = Summarizer.NA;
+      inputTraceSignature = Summarizer.NA;
+    } else {
+      Path inputTracePath = new Path(inputPath);
+      FileSystem fs = inputTracePath.getFileSystem(conf);
+      inputTraceLocation = fs.makeQualified(inputTracePath).toString();
+      inputTraceSignature = getTraceSignature(inputPath);
+    }
     jobSubmissionPolicy = Gridmix.getJobSubmissionPolicy(conf).name();
     resolver = userResolver.getClass().getName();
     if (dataSize > 0) {

+ 7 - 3
hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java

@@ -314,9 +314,13 @@ public class Gridmix extends Configured implements Tool {
       }
     });
     
-    // print the run summary
-    System.out.print("\n\n");
-    System.out.println(summarizer.toString());
+    // print the gridmix summary if the run was successful
+    if (val == 0) {
+        // print the run summary
+        System.out.print("\n\n");
+        System.out.println(summarizer.toString());
+    }
+    
     return val; 
   }
 

+ 9 - 2
hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java

@@ -101,13 +101,15 @@ public class Statistics implements Component<Job> {
     }
     
     int maps = 0;
+    int reds = 0;
     if (jobdesc == null) {
       throw new IllegalArgumentException(
         " JobStory not available for job " + job.getJobName());
     } else {
       maps = jobdesc.getNumberMaps();
+      reds = jobdesc.getNumberReduces();
     }
-    JobStats stats = new JobStats(maps,job);
+    JobStats stats = new JobStats(maps, reds, job);
     jobMaps.put(seq,stats);
   }
 
@@ -258,15 +260,20 @@ public class Statistics implements Component<Job> {
    */
   static class JobStats {
     private int noOfMaps;
+    private int noOfReds;
     private Job job;
 
-    public JobStats(int noOfMaps,Job job){
+    public JobStats(int noOfMaps,int numOfReds, Job job){
       this.job = job;
       this.noOfMaps = noOfMaps;
+      this.noOfReds = numOfReds;
     }
     public int getNoOfMaps() {
       return noOfMaps;
     }
+    public int getNoOfReds() {
+      return noOfReds;
+    }
 
     /**
      * Returns the job ,

+ 165 - 60
hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java

@@ -31,13 +31,12 @@ import org.apache.hadoop.tools.rumen.JobStoryProducer;
 
 import java.io.IOException;
 import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.locks.Condition;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
   public static final Log LOG = LogFactory.getLog(StressJobFactory.class);
 
   private final LoadStatus loadStatus = new LoadStatus();
-  private final Condition condUnderloaded = this.lock.newCondition();
   /**
    * The minimum ratio between pending+running map tasks (aka. incomplete map
    * tasks) and cluster map slot capacity for us to consider the cluster is
@@ -150,23 +149,32 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
         }
         LOG.info("START STRESS @ " + System.currentTimeMillis());
         while (!Thread.currentThread().isInterrupted()) {
-          lock.lock();
           try {
             while (loadStatus.overloaded()) {
-              //Wait while JT is overloaded.
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Cluster overloaded in run! Sleeping...");
+              }
+              // sleep 
               try {
-                condUnderloaded.await();
+                Thread.sleep(1000);
               } catch (InterruptedException ie) {
                 return;
               }
             }
 
             while (!loadStatus.overloaded()) {
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Cluster underloaded in run! Stressing...");
+              }
               try {
+                //TODO This in-line read can block submission for large jobs.
                 final JobStory job = getNextJobFiltered();
                 if (null == job) {
                   return;
                 }
+                if (LOG.isDebugEnabled()) {
+                  LOG.debug("Job Selected: " + job.getJobID());
+                }
                 submitter.add(
                   jobCreator.createGridmixJob(
                     conf, 0L, job, scratch, 
@@ -175,14 +183,20 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
                     sequence.getAndIncrement()));
                 // TODO: We need to take care of scenario when one map/reduce
                 // takes more than 1 slot.
-                loadStatus.mapSlotsBackfill -= 
-                  calcEffectiveIncompleteMapTasks(
-                    loadStatus.mapSlotCapacity, job.getNumberMaps(), 0.0f);
-                loadStatus.reduceSlotsBackfill -= 
-                  calcEffectiveIncompleteReduceTasks(
-                    loadStatus.reduceSlotCapacity, job.getNumberReduces(), 
-                    0.0f);
-                --loadStatus.numJobsBackfill;
+                
+                // Lock the loadjob as we are making updates
+                int incompleteMapTasks = (int) calcEffectiveIncompleteMapTasks(
+                                                 loadStatus.getMapCapacity(), 
+                                                 job.getNumberMaps(), 0.0f);
+                loadStatus.decrementMapLoad(incompleteMapTasks);
+                
+                int incompleteReduceTasks = 
+                  (int) calcEffectiveIncompleteReduceTasks(
+                          loadStatus.getReduceCapacity(), 
+                          job.getNumberReduces(), 0.0f);
+                loadStatus.decrementReduceLoad(incompleteReduceTasks);
+                  
+                loadStatus.decrementJobLoad(1);
               } catch (IOException e) {
                 LOG.error("Error while submitting the job ", e);
                 error = e;
@@ -191,7 +205,7 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
 
             }
           } finally {
-            lock.unlock();
+            // do nothing
           }
         }
       } catch (InterruptedException e) {
@@ -210,19 +224,11 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
    */
   @Override
   public void update(Statistics.ClusterStats item) {
-    lock.lock();
+    ClusterStatus clusterMetrics = item.getStatus();
     try {
-      ClusterStatus clusterMetrics = item.getStatus();
-      try {
-        checkLoadAndGetSlotsToBackfill(item,clusterMetrics);
-      } catch (Exception e) {
-        LOG.error("Couldn't get the new Status",e);
-      }
-      if (!loadStatus.overloaded()) {
-        condUnderloaded.signalAll();
-      }
-    } finally {
-      lock.unlock();
+      checkLoadAndGetSlotsToBackfill(item, clusterMetrics);
+    } catch (Exception e) {
+      LOG.error("Couldn't get the new Status",e);
     }
   }
 
@@ -254,18 +260,25 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
    */
   private void checkLoadAndGetSlotsToBackfill(
     ClusterStats stats, ClusterStatus clusterStatus) throws IOException, InterruptedException {
-    loadStatus.mapSlotCapacity = clusterStatus.getMaxMapTasks();
-    loadStatus.reduceSlotCapacity = clusterStatus.getMaxReduceTasks();
     
+    // update the max cluster capacity incase its updated
+    int mapCapacity = clusterStatus.getMaxMapTasks();
+    loadStatus.updateMapCapacity(mapCapacity);
+    
+    int reduceCapacity = clusterStatus.getMaxReduceTasks();
     
-    loadStatus.numJobsBackfill = 
-      (int) (maxJobTrackerRatio * clusterStatus.getTaskTrackers())
-        - stats.getNumRunningJob();
-    if (loadStatus.numJobsBackfill <= 0) {
+    loadStatus.updateReduceCapacity(reduceCapacity);
+    
+    int numTrackers = clusterStatus.getTaskTrackers();
+    
+    int jobLoad = 
+      (int) (maxJobTrackerRatio * numTrackers) - stats.getNumRunningJob();
+    loadStatus.updateJobLoad(jobLoad);
+    if (loadStatus.getJobLoad() <= 0) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug(System.currentTimeMillis() + " Overloaded is "
+        LOG.debug(System.currentTimeMillis() + " [JobLoad] Overloaded is "
                   + Boolean.TRUE.toString() + " NumJobsBackfill is "
-                  + loadStatus.numJobsBackfill);
+                  + loadStatus.getJobLoad());
       }
       return; // stop calculation because we know it is overloaded.
     }
@@ -275,56 +288,84 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
       float mapProgress = job.getJob().mapProgress();
       int noOfMaps = job.getNoOfMaps();
       incompleteMapTasks += 
-        calcEffectiveIncompleteMapTasks(
-          clusterStatus.getMaxMapTasks(), noOfMaps, mapProgress);
+        calcEffectiveIncompleteMapTasks(mapCapacity, noOfMaps, mapProgress);
     }
-    loadStatus.mapSlotsBackfill = 
-    (int) ((overloadMapTaskMapSlotRatio * clusterStatus.getMaxMapTasks()) 
-           - incompleteMapTasks);
-    if (loadStatus.mapSlotsBackfill <= 0) {
+    
+    int mapSlotsBackFill = 
+      (int) ((overloadMapTaskMapSlotRatio * mapCapacity) - incompleteMapTasks);
+    loadStatus.updateMapLoad(mapSlotsBackFill);
+    
+    if (loadStatus.getMapLoad() <= 0) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug(System.currentTimeMillis() + " Overloaded is "
+        LOG.debug(System.currentTimeMillis() + " [MAP-LOAD] Overloaded is "
                   + Boolean.TRUE.toString() + " MapSlotsBackfill is "
-                  + loadStatus.mapSlotsBackfill);
+                  + loadStatus.getMapLoad());
       }
       return; // stop calculation because we know it is overloaded.
     }
 
     float incompleteReduceTasks = 0; // include pending & running reduce tasks.
     for (JobStats job : ClusterStats.getRunningJobStats()) {
-      int noOfReduces = job.getJob().getNumReduceTasks();
+      // Cached the num-reds value in JobStats
+      int noOfReduces = job.getNoOfReds();
       if (noOfReduces > 0) {
         float reduceProgress = job.getJob().reduceProgress();
         incompleteReduceTasks += 
-          calcEffectiveIncompleteReduceTasks(
-            clusterStatus.getMaxReduceTasks(), noOfReduces, reduceProgress);
+          calcEffectiveIncompleteReduceTasks(reduceCapacity, noOfReduces, 
+                                             reduceProgress);
       }
     }
-    loadStatus.reduceSlotsBackfill = 
-      (int) ((overloadReduceTaskReduceSlotRatio * clusterStatus.getMaxReduceTasks()) 
+    
+    int reduceSlotsBackFill = 
+      (int)((overloadReduceTaskReduceSlotRatio * reduceCapacity) 
              - incompleteReduceTasks);
-    if (loadStatus.reduceSlotsBackfill <= 0) {
+    loadStatus.updateReduceLoad(reduceSlotsBackFill);
+    if (loadStatus.getReduceLoad() <= 0) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug(System.currentTimeMillis() + " Overloaded is "
+        LOG.debug(System.currentTimeMillis() + " [REDUCE-LOAD] Overloaded is "
                   + Boolean.TRUE.toString() + " ReduceSlotsBackfill is "
-                  + loadStatus.reduceSlotsBackfill);
+                  + loadStatus.getReduceLoad());
       }
       return; // stop calculation because we know it is overloaded.
     }
 
     if (LOG.isDebugEnabled()) {
-      LOG.debug(System.currentTimeMillis() + " Overloaded is "
+      LOG.debug(System.currentTimeMillis() + " [OVERALL] Overloaded is "
                 + Boolean.FALSE.toString() + "Current load Status is " 
                 + loadStatus);
     }
   }
 
   static class LoadStatus {
-    int mapSlotsBackfill;
-    int mapSlotCapacity;
-    int reduceSlotsBackfill;
-    int reduceSlotCapacity;
-    int numJobsBackfill;
+    /**
+     * Additional number of map slots that can be requested before
+     * declaring (by Gridmix STRESS mode) the cluster as overloaded. 
+     */
+    private volatile int mapSlotsBackfill;
+    
+    /**
+     * Determines the total map slot capacity of the cluster.
+     */
+    private volatile int mapSlotCapacity;
+    
+    /**
+     * Additional number of reduce slots that can be requested before
+     * declaring (by Gridmix STRESS mode) the cluster as overloaded.
+     */
+    private volatile int reduceSlotsBackfill;
+    
+    /**
+     * Determines the total reduce slot capacity of the cluster.
+     */
+    private volatile int reduceSlotCapacity;
+
+    /**
+     * Determines the max count of running jobs in the cluster.
+     */
+    private volatile int numJobsBackfill;
+    
+    // set the default to true
+    private AtomicBoolean overloaded = new AtomicBoolean(true);
 
     /**
      * Construct the LoadStatus in an unknown state - assuming the cluster is
@@ -339,12 +380,76 @@ public class StressJobFactory extends JobFactory<Statistics.ClusterStats> {
       reduceSlotCapacity = -1;
     }
     
-    public boolean overloaded() {
-      return (mapSlotsBackfill <= 0) || (reduceSlotsBackfill <= 0)
-             || (numJobsBackfill <= 0);
+    public synchronized int getMapLoad() {
+      return mapSlotsBackfill;
+    }
+    
+    public synchronized int getMapCapacity() {
+      return mapSlotCapacity;
+    }
+    
+    public synchronized int getReduceLoad() {
+      return reduceSlotsBackfill;
+    }
+    
+    public synchronized int getReduceCapacity() {
+      return reduceSlotCapacity;
+    }
+    
+    public synchronized int getJobLoad() {
+      return numJobsBackfill;
+    }
+    
+    public synchronized void decrementMapLoad(int mapSlotsConsumed) {
+      this.mapSlotsBackfill -= mapSlotsConsumed;
+      updateOverloadStatus();
+    }
+    
+    public synchronized void decrementReduceLoad(int reduceSlotsConsumed) {
+      this.reduceSlotsBackfill -= reduceSlotsConsumed;
+      updateOverloadStatus();
+    }
+
+    public synchronized void decrementJobLoad(int numJobsConsumed) {
+      this.numJobsBackfill -= numJobsConsumed;
+      updateOverloadStatus();
+    }
+    
+    public synchronized void updateMapCapacity(int mapSlotsCapacity) {
+      this.mapSlotCapacity = mapSlotsCapacity;
+      updateOverloadStatus();
+    }
+    
+    public synchronized void updateReduceCapacity(int reduceSlotsCapacity) {
+      this.reduceSlotCapacity = reduceSlotsCapacity;
+      updateOverloadStatus();
+    }
+    
+    public synchronized void updateMapLoad(int mapSlotsBackfill) {
+      this.mapSlotsBackfill = mapSlotsBackfill;
+      updateOverloadStatus();
+    }
+    
+    public synchronized void updateReduceLoad(int reduceSlotsBackfill) {
+      this.reduceSlotsBackfill = reduceSlotsBackfill;
+      updateOverloadStatus();
+    }
+    
+    public synchronized void updateJobLoad(int numJobsBackfill) {
+      this.numJobsBackfill = numJobsBackfill;
+      updateOverloadStatus();
+    }
+    
+    private synchronized void updateOverloadStatus() {
+      overloaded.set((mapSlotsBackfill <= 0) || (reduceSlotsBackfill <= 0)
+                     || (numJobsBackfill <= 0));
+    }
+    
+    public synchronized boolean overloaded() {
+      return overloaded.get();
     }
     
-    public String toString() {
+    public synchronized String toString() {
     // TODO Use StringBuilder instead
       return " Overloaded = " + overloaded()
              + ", MapSlotBackfill = " + mapSlotsBackfill 

+ 14 - 6
hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java

@@ -52,15 +52,23 @@ public class ResourceUsageMatcher {
   @SuppressWarnings("unchecked")
   public void configure(Configuration conf, ResourceCalculatorPlugin monitor, 
                         ResourceUsageMetrics metrics, Progressive progress) {
-    Class[] plugins = 
-      conf.getClasses(RESOURCE_USAGE_EMULATION_PLUGINS, 
-                      ResourceUsageEmulatorPlugin.class);
+    Class[] plugins = conf.getClasses(RESOURCE_USAGE_EMULATION_PLUGINS);
     if (plugins == null) {
       System.out.println("No resource usage emulator plugins configured.");
     } else {
-      for (Class<? extends ResourceUsageEmulatorPlugin> plugin : plugins) {
-        if (plugin != null) {
-          emulationPlugins.add(ReflectionUtils.newInstance(plugin, conf));
+      for (Class clazz : plugins) {
+        if (clazz != null) {
+          if (ResourceUsageEmulatorPlugin.class.isAssignableFrom(clazz)) {
+            ResourceUsageEmulatorPlugin plugin = 
+              (ResourceUsageEmulatorPlugin) ReflectionUtils.newInstance(clazz, 
+                                                                        conf);
+            emulationPlugins.add(plugin);
+          } else {
+            throw new RuntimeException("Misconfigured resource usage plugins. " 
+                + "Class " + clazz.getClass().getName() + " is not a resource "
+                + "usage plugin as it does not extend "
+                + ResourceUsageEmulatorPlugin.class.getName());
+          }
         }
       }
     }

+ 16 - 7
hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java

@@ -101,10 +101,17 @@ public class TestGridmixSubmission {
       retiredJobs = new LinkedBlockingQueue<Job>();
     }
 
-    public void verify(ArrayList<JobStory> submitted) throws Exception {
+    public void verify(ArrayList<JobStory> submitted, Configuration clientConf) 
+    throws Exception {
       final ArrayList<Job> succeeded = new ArrayList<Job>();
       assertEquals("Bad job count", expected, retiredJobs.drainTo(succeeded));
       final HashMap<String,JobStory> sub = new HashMap<String,JobStory>();
+      
+      // define the input and output path for the run
+      final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs);
+      final Path out = 
+        new Path(in, clientConf.get(Gridmix.GRIDMIX_OUT_DIR, "gridmix"));
+      
       for (JobStory spec : submitted) {
         sub.put(spec.getJobID().toString(), spec);
       }
@@ -115,8 +122,7 @@ public class TestGridmixSubmission {
         Configuration conf = job.getConfiguration();
         if (GenerateData.JOB_NAME.equals(jobName)) {
           verifyQueue(conf, jobName);
-          final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs);
-          final Path out = new Path("/gridmix").makeQualified(GridmixTestUtils.dfs);
+          
           final ContentSummary generated = GridmixTestUtils.dfs.getContentSummary(in);
           assertTrue("Mismatched data gen", // +/- 100k for logs
               (GENDATA << 20) < generated.getLength() + GENSLOP ||
@@ -164,7 +170,7 @@ public class TestGridmixSubmission {
 
         final FileStatus stat = 
           GridmixTestUtils.dfs.getFileStatus(
-            new Path(GridmixTestUtils.DEST, "" + Integer.valueOf(jobSeqNum)));
+            new Path(out, "" + Integer.valueOf(jobSeqNum)));
         assertEquals("Wrong owner for " + jobName, spec.getUser(),
                      stat.getOwner());
 
@@ -337,8 +343,9 @@ public class TestGridmixSubmission {
     private JobFactory factory;
     private TestMonitor monitor;
 
-    public void checkMonitor() throws Exception {
-      monitor.verify(((DebugJobFactory.Debuggable)factory).getSubmitted());
+    public void checkMonitor(Configuration conf) throws Exception {
+      monitor.verify(((DebugJobFactory.Debuggable)factory).getSubmitted(), 
+                     conf);
     }
 
     @Override
@@ -534,9 +541,11 @@ public class TestGridmixSubmission {
       GridmixTestUtils.dfs.setPermission(root, new FsPermission((short)0777));
       int res = ToolRunner.run(conf, client, argv);
       assertEquals("Client exited with nonzero status", 0, res);
-      client.checkMonitor();
+      client.checkMonitor(conf);
     } catch (Exception e) {
       e.printStackTrace();
+      // fail the test if there is an exception
+      throw new RuntimeException(e);
     } finally {
       in.getFileSystem(conf).delete(in, true);
       out.getFileSystem(conf).delete(out, true);

+ 10 - 4
hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java

@@ -159,7 +159,7 @@ public class TestGridmixSummary {
     
     @Override
     protected Thread createReaderThread() {
-      return null;
+      return new Thread();
     }
   }
   
@@ -243,7 +243,7 @@ public class TestGridmixSummary {
                  tid, es.getInputTraceSignature());
     // test trace location
     Path qPath = fs.makeQualified(testTraceFile);
-    assertEquals("Mismatch in trace signature", 
+    assertEquals("Mismatch in trace filename", 
                  qPath.toString(), es.getInputTraceLocation());
     // test expected data size
     assertEquals("Mismatch in expected data size", 
@@ -275,7 +275,7 @@ public class TestGridmixSummary {
     es.finalize(factory, testTraceFile.toString(), 0L, resolver, dataStats, 
                 conf);
     // test missing expected data size
-    assertEquals("Mismatch in trace signature", 
+    assertEquals("Mismatch in trace data size", 
                  Summarizer.NA, es.getExpectedDataSize());
     assertFalse("Mismatch in trace signature", 
                 tid.equals(es.getInputTraceSignature()));
@@ -295,6 +295,12 @@ public class TestGridmixSummary {
     assertEquals("Mismatch in trace signature", 
                  tid, es.getInputTraceSignature());
     
+    // finalize trace identifier '-' input
+    es.finalize(factory, "-", 0L, resolver, dataStats, conf);
+    assertEquals("Mismatch in trace signature",
+                 Summarizer.NA, es.getInputTraceSignature());
+    assertEquals("Mismatch in trace file location", 
+                 Summarizer.NA, es.getInputTraceLocation());
   }
   
   // test the ExecutionSummarizer
@@ -332,7 +338,7 @@ public class TestGridmixSummary {
         return isSuccessful;
       };
     };
-    return new JobStats(numMaps, fakeJob);
+    return new JobStats(numMaps, numReds, fakeJob);
   }
   
   /**