Bladeren bron

Merging r1557221 through r1557907 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5698@1557908 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao 11 jaren geleden
bovenliggende
commit
9233552e65
62 gewijzigde bestanden met toevoegingen van 3891 en 493 verwijderingen
  1. 6 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 81 53
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  3. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java
  4. 27 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationDeprecation.java
  5. 9 1
      hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java
  6. 11 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  7. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockCollection.java
  8. 5 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
  9. 13 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  10. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  11. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
  12. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  13. 12 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c
  14. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/test_libhdfs_zerocopy.c
  15. 50 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
  16. 22 1
      hadoop-project/pom.xml
  17. 17 0
      hadoop-yarn-project/CHANGES.txt
  18. 50 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
  19. 32 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java
  20. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
  21. 73 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
  22. 28 16
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
  23. 16 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
  24. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java
  25. 78 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java
  26. 29 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
  27. 34 16
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml
  28. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml
  29. 93 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml
  30. 131 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml
  31. 52 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java
  32. 28 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestCompositeService.java
  33. 62 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml
  34. 135 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
  35. 154 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
  36. 7 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
  37. 24 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
  38. 64 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java
  39. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
  40. 41 25
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
  41. 6 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java
  42. 7 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
  43. 3 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
  44. 14 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
  45. 1361 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java.orig
  46. 14 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
  47. 80 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java
  48. 124 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
  49. 24 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
  50. 58 24
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java
  51. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
  52. 6 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
  53. 2 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
  54. 2 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
  55. 615 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java.orig
  56. 14 18
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java
  57. 72 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml
  58. 80 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml
  59. 1 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml
  60. 3 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml
  61. 1 106
      hadoop-yarn-project/hadoop-yarn/pom.xml
  62. 1 126
      hadoop-yarn-project/pom.xml

+ 6 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -511,6 +511,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10214. Fix multithreaded correctness warnings in ActiveStandbyElector
     (Liang Xie via kasha)
 
+    HADOOP-10223. MiniKdc#main() should close the FileReader it creates. 
+    (Ted Yu via tucu)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -596,6 +599,9 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream. 
     (gchanan via tucu)
 
+    HADOOP-10178. Configuration deprecation always emit "deprecated" warnings
+    when a new key is used. (Shanyu Zhao via cnauroth)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 81 - 53
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -552,36 +552,6 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     return deprecationContext.get().getDeprecatedKeyMap().containsKey(key);
   }
 
-  /**
-   * Returns the alternate name for a key if the property name is deprecated
-   * or if deprecates a property name.
-   *
-   * @param name property name.
-   * @return alternate name.
-   */
-  private String[] getAlternateNames(String name) {
-    String altNames[] = null;
-    DeprecationContext cur = deprecationContext.get();
-    DeprecatedKeyInfo keyInfo = cur.getDeprecatedKeyMap().get(name);
-    if (keyInfo == null) {
-      altNames = (cur.getReverseDeprecatedKeyMap().get(name) != null ) ? 
-        new String [] {cur.getReverseDeprecatedKeyMap().get(name)} : null;
-      if(altNames != null && altNames.length > 0) {
-    	//To help look for other new configs for this deprecated config
-    	keyInfo = cur.getDeprecatedKeyMap().get(altNames[0]);
-      }      
-    } 
-    if(keyInfo != null && keyInfo.newKeys.length > 0) {
-      List<String> list = new ArrayList<String>(); 
-      if(altNames != null) {
-    	  list.addAll(Arrays.asList(altNames));
-      }
-      list.addAll(Arrays.asList(keyInfo.newKeys));
-      altNames = list.toArray(new String[list.size()]);
-    }
-    return altNames;
-  }
-
   /**
    * Checks for the presence of the property <code>name</code> in the
    * deprecation map. Returns the first of the list of new keys if present
@@ -933,6 +903,37 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     return result;
   }
 
+  /**
+   * Returns alternative names (non-deprecated keys or previously-set deprecated keys)
+   * for a given non-deprecated key.
+   * If the given key is deprecated, return null.
+   *
+   * @param name property name.
+   * @return alternative names.
+   */
+  private String[] getAlternativeNames(String name) {
+    String altNames[] = null;
+    DeprecatedKeyInfo keyInfo = null;
+    DeprecationContext cur = deprecationContext.get();
+    String depKey = cur.getReverseDeprecatedKeyMap().get(name);
+    if(depKey != null) {
+      keyInfo = cur.getDeprecatedKeyMap().get(depKey);
+      if(keyInfo.newKeys.length > 0) {
+        if(getProps().containsKey(depKey)) {
+          //if deprecated key is previously set explicitly
+          List<String> list = new ArrayList<String>();
+          list.addAll(Arrays.asList(keyInfo.newKeys));
+          list.add(depKey);
+          altNames = list.toArray(new String[list.size()]);
+        }
+        else {
+          altNames = keyInfo.newKeys;
+        }
+      }
+    }
+    return altNames;
+  }
+
   /** 
    * Set the <code>value</code> of the <code>name</code> property. If 
    * <code>name</code> is deprecated or there is a deprecated name associated to it,
@@ -947,9 +948,9 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
   
   /** 
    * Set the <code>value</code> of the <code>name</code> property. If 
-   * <code>name</code> is deprecated or there is a deprecated name associated to it,
-   * it sets the value to both names.
-   * 
+   * <code>name</code> is deprecated, it also sets the <code>value</code> to
+   * the keys that replace the deprecated key.
+   *
    * @param name property name.
    * @param value property value.
    * @param source the place that this configuration value came from 
@@ -969,23 +970,30 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     }
     getOverlay().setProperty(name, value);
     getProps().setProperty(name, value);
-    if(source == null) {
-      updatingResource.put(name, new String[] {"programatically"});
-    } else {
-      updatingResource.put(name, new String[] {source});
+    String newSource = (source == null ? "programatically" : source);
+
+    if (!isDeprecated(name)) {
+      updatingResource.put(name, new String[] {newSource});
+      String[] altNames = getAlternativeNames(name);
+      if(altNames != null) {
+        for(String n: altNames) {
+          if(!n.equals(name)) {
+            getOverlay().setProperty(n, value);
+            getProps().setProperty(n, value);
+            updatingResource.put(n, new String[] {newSource});
+          }
+        }
+      }
     }
-    String[] altNames = getAlternateNames(name);
-    if (altNames != null && altNames.length > 0) {
+    else {
+      String[] names = handleDeprecation(deprecationContext.get(), name);
       String altSource = "because " + name + " is deprecated";
-      for(String altName : altNames) {
-        if(!altName.equals(name)) {
-          getOverlay().setProperty(altName, value);
-          getProps().setProperty(altName, value);
-          updatingResource.put(altName, new String[] {altSource});
-        }
+      for(String n : names) {
+        getOverlay().setProperty(n, value);
+        getProps().setProperty(n, value);
+        updatingResource.put(n, new String[] {altSource});
       }
     }
-    warnOnceIfDeprecated(deprecations, name);
   }
 
   private void warnOnceIfDeprecated(DeprecationContext deprecations, String name) {
@@ -999,15 +1007,21 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
    * Unset a previously set property.
    */
   public synchronized void unset(String name) {
-    String[] altNames = getAlternateNames(name);
-    getOverlay().remove(name);
-    getProps().remove(name);
-    if (altNames !=null && altNames.length > 0) {
-      for(String altName : altNames) {
-    	getOverlay().remove(altName);
-    	getProps().remove(altName);
+    String[] names = null;
+    if (!isDeprecated(name)) {
+      names = getAlternativeNames(name);
+      if(names == null) {
+    	  names = new String[]{name};
       }
     }
+    else {
+      names = handleDeprecation(deprecationContext.get(), name);
+    }
+
+    for(String n: names) {
+      getOverlay().remove(n);
+      getProps().remove(n);
+    }
   }
 
   /**
@@ -2600,4 +2614,18 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
       System.out.println(entry.getKey() + "\t" + newKeys.toString());
     }
   }
+
+  /**
+   * Returns whether or not a deprecated name has been warned. If the name is not
+   * deprecated then always return false
+   */
+  public static boolean hasWarnedDeprecation(String name) {
+    DeprecationContext deprecations = deprecationContext.get();
+    if(deprecations.getDeprecatedKeyMap().containsKey(name)) {
+      if(deprecations.getDeprecatedKeyMap().get(name).accessed.get()) {
+        return true;
+      }
+    }
+    return false;
+  }
 }

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/service/CompositeService.java

@@ -95,7 +95,7 @@ public class CompositeService extends AbstractService {
 
   protected synchronized boolean removeService(Service service) {
     synchronized (serviceList) {
-      return serviceList.add(service);
+      return serviceList.remove(service);
     }
   }
 

+ 27 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigurationDeprecation.java

@@ -26,6 +26,7 @@ import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.ByteArrayOutputStream;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -399,4 +400,30 @@ public class TestConfigurationDeprecation {
       Uninterruptibles.getUninterruptibly(future);
     }
   }
+
+  @Test
+  public void testNoFalseDeprecationWarning() throws IOException {
+    Configuration conf = new Configuration();
+    Configuration.addDeprecation("AA", "BB");
+    conf.set("BB", "bb");
+    conf.get("BB");
+    conf.writeXml(new ByteArrayOutputStream());
+    assertEquals(false, Configuration.hasWarnedDeprecation("AA"));
+    conf.set("AA", "aa");
+    assertEquals(true, Configuration.hasWarnedDeprecation("AA"));
+  }
+  
+  @Test
+  public void testDeprecationSetUnset() throws IOException {
+    addDeprecationToConfiguration();
+    Configuration conf = new Configuration();
+    //"X" is deprecated by "Y" and "Z"
+    conf.set("Y", "y");
+    assertEquals("y", conf.get("Z"));
+    conf.set("X", "x");
+    assertEquals("x", conf.get("Z"));
+    conf.unset("Y");
+    assertEquals(null, conf.get("Z"));
+    assertEquals(null, conf.get("X"));
+  }
 }

+ 9 - 1
hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java

@@ -125,7 +125,15 @@ public class MiniKdc {
               + file.getAbsolutePath());
     }
     Properties userConf = new Properties();
-    userConf.load(new FileReader(file));
+    FileReader r = null;
+    try {
+      r = new FileReader(file);
+      userConf.load(r);
+    } finally {
+      if (r != null) {
+        r.close();
+      }
+    }
     for (Map.Entry entry : userConf.entrySet()) {
       conf.put(entry.getKey(), entry.getValue());
     }

+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -741,6 +741,17 @@ Release 2.4.0 - UNRELEASED
 
     HDFS-5449. WebHdfs compatibility broken between 2.2 and 1.x / 23.x (kihwal)
 
+    HDFS-5756. hadoopRzOptionsSetByteBufferPool does not accept NULL argument,
+    contrary to docs. (cmccabe via wang)
+
+    HDFS-5747. Fix NPEs in BlockManager. (Arpit Agarwal)
+
+    HDFS-5710. FSDirectory#getFullPathName should check inodes against null.
+    (Uma Maheswara Rao G via jing9)
+
+    HDFS-5579. Under construction files make DataNode decommission take very long
+    hours. (zhaoyunjiong via jing9)
+
   BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
 
     HDFS-4985. Add storage type to the protocol and expose it in block report

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockCollection.java

@@ -31,7 +31,7 @@ public interface BlockCollection {
   /**
    * Get the last block of the collection.
    */
-  public BlockInfo getLastBlock() throws IOException;
+  public BlockInfo getLastBlock();
 
   /** 
    * Get content summary.

+ 5 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java

@@ -324,12 +324,14 @@ public class BlockInfoUnderConstruction extends BlockInfo {
     Iterator<ReplicaUnderConstruction> it = replicas.iterator();
     while (it.hasNext()) {
       ReplicaUnderConstruction r = it.next();
-      if(r.getExpectedStorageLocation() == storage) {
+      DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation();
+      if(expectedLocation == storage) {
         // Record the gen stamp from the report
         r.setGenerationStamp(block.getGenerationStamp());
         return;
-      } else if (r.getExpectedStorageLocation().getDatanodeDescriptor() ==
-          storage.getDatanodeDescriptor()) {
+      } else if (expectedLocation != null &&
+                 expectedLocation.getDatanodeDescriptor() ==
+                     storage.getDatanodeDescriptor()) {
 
         // The Datanode reported that the block is on a different storage
         // than the one chosen by BlockPlacementPolicy. This can occur as

+ 13 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1214,8 +1214,10 @@ public class BlockManager {
             // block should belong to a file
             bc = blocksMap.getBlockCollection(block);
             // abandoned block or block reopened for append
-            if(bc == null || bc.isUnderConstruction()) {
-              neededReplications.remove(block, priority); // remove from neededReplications
+            if (bc == null
+                || (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
+              // remove from neededReplications
+              neededReplications.remove(block, priority);
               continue;
             }
 
@@ -1295,7 +1297,7 @@ public class BlockManager {
           // block should belong to a file
           bc = blocksMap.getBlockCollection(block);
           // abandoned block or block reopened for append
-          if(bc == null || bc.isUnderConstruction()) {
+          if(bc == null || (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
             neededReplications.remove(block, priority); // remove from neededReplications
             rw.targets = null;
             continue;
@@ -2906,8 +2908,16 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
         NumberReplicas num = countNodes(block);
         int curReplicas = num.liveReplicas();
         int curExpectedReplicas = getReplication(block);
+                
         if (isNeededReplication(block, curExpectedReplicas, curReplicas)) {
           if (curExpectedReplicas > curReplicas) {
+            if (bc.isUnderConstruction()) {
+              if (block.equals(bc.getLastBlock()) && curReplicas > minReplication) {
+                continue;
+              }
+              underReplicatedInOpenFiles++;
+            }
+            
             // Log info about one block for this node which needs replication
             if (!status) {
               status = true;
@@ -2924,9 +2934,6 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
             if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) {
               decommissionOnlyReplicas++;
             }
-            if (bc.isUnderConstruction()) {
-              underReplicatedInOpenFiles++;
-            }
           }
           if (!neededReplications.contains(block) &&
             pendingReplications.getNumReplicas(block) == 0) {

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -1842,7 +1842,8 @@ public class FSDirectory implements Closeable {
   /** Return the full path name of the specified inode */
   static String getFullPathName(INode inode) {
     INode[] inodes = getFullPathINodes(inode);
-    return getFullPathName(inodes, inodes.length - 1);
+    // inodes can be null only when its called without holding lock
+    return inodes == null ? "" : getFullPathName(inodes, inodes.length - 1);
   }
   
   /**

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java

@@ -640,7 +640,7 @@ public class INodeFile extends INodeWithAdditionalFields
   }
 
   @Override
-  public BlockInfo getLastBlock() throws IOException {
+  public BlockInfo getLastBlock() {
     return blocks == null || blocks.length == 0? null: blocks[blocks.length-1];
   }
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -547,8 +547,8 @@ public class NameNode implements NameNodeStatusMXBean {
   }
   
   private void stopCommonServices() {
-    if(namesystem != null) namesystem.close();
     if(rpcServer != null) rpcServer.stop();
+    if(namesystem != null) namesystem.close();
     if (pauseMonitor != null) pauseMonitor.stop();
     if (plugins != null) {
       for (ServicePlugin p : plugins) {

+ 12 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c

@@ -2174,16 +2174,18 @@ int hadoopRzOptionsSetByteBufferPool(
         return -1;
     }
 
-    // Note: we don't have to call hadoopRzOptionsClearCached in this
-    // function, since the ByteBufferPool is passed separately from the
-    // EnumSet of ReadOptions.
-
-    jthr = constructNewObjectOfClass(env, &byteBufferPool, className, "()V");
-    if (jthr) {
-        printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
-            "hadoopRzOptionsSetByteBufferPool(className=%s): ", className);
-        errno = EINVAL;
-        return -1;
+    if (className) {
+      // Note: we don't have to call hadoopRzOptionsClearCached in this
+      // function, since the ByteBufferPool is passed separately from the
+      // EnumSet of ReadOptions.
+
+      jthr = constructNewObjectOfClass(env, &byteBufferPool, className, "()V");
+      if (jthr) {
+          printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+              "hadoopRzOptionsSetByteBufferPool(className=%s): ", className);
+          errno = EINVAL;
+          return -1;
+      }
     }
     if (opts->byteBufferPool) {
         // Delete any previous ByteBufferPool we had.

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/test_libhdfs_zerocopy.c

@@ -140,6 +140,12 @@ static int doTestZeroCopyReads(hdfsFS fs, const char *fileName)
     EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
     EXPECT_INT_EQ(EPROTONOSUPPORT, errno);
 
+    /* Verify that setting a NULL ByteBufferPool class works. */
+    EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts, NULL));
+    EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0));
+    EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
+    EXPECT_INT_EQ(EPROTONOSUPPORT, errno);
+
     /* Now set a ByteBufferPool and try again.  It should succeed this time. */
     EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts,
           ELASTIC_BYTE_BUFFER_POOL_CLASS));

+ 50 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java

@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@@ -779,4 +780,53 @@ public class TestDecommission {
       Thread.sleep(HEARTBEAT_INTERVAL * 1000);
     }
   }
+  
+  @Test(timeout=120000)
+  public void testDecommissionWithOpenfile() throws IOException, InterruptedException {
+    LOG.info("Starting test testDecommissionWithOpenfile");
+    
+    //At most 4 nodes will be decommissioned
+    startCluster(1, 7, conf);
+        
+    FileSystem fileSys = cluster.getFileSystem(0);
+    FSNamesystem ns = cluster.getNamesystem(0);
+    
+    String openFile = "/testDecommissionWithOpenfile.dat";
+           
+    writeFile(fileSys, new Path(openFile), (short)3);   
+    // make sure the file was open for write
+    FSDataOutputStream fdos =  fileSys.append(new Path(openFile)); 
+    
+    LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(cluster.getNameNode(0), openFile, 0, fileSize);
+              
+    DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
+    DatanodeInfo[] dnInfos4FirstBlock = lbs.get(0).getLocations();
+    
+    ArrayList<String> nodes = new ArrayList<String>();
+    ArrayList<DatanodeInfo> dnInfos = new ArrayList<DatanodeInfo>();
+   
+    for (DatanodeInfo datanodeInfo : dnInfos4FirstBlock) {
+      DatanodeInfo found = datanodeInfo;
+      for (DatanodeInfo dif: dnInfos4LastBlock) {
+        if (datanodeInfo.equals(dif)) {
+         found = null;         
+        }
+      }
+      if (found != null) {
+        nodes.add(found.getXferAddr());
+        dnInfos.add(found);
+      }
+    }
+    //decommission one of the 3 nodes which have last block
+    nodes.add(dnInfos4LastBlock[0].getXferAddr());
+    dnInfos.add(dnInfos4LastBlock[0]);
+    
+    writeConfigFile(excludeFile, nodes);
+    refreshNodes(ns, conf);  
+    for (DatanodeInfo dn : dnInfos) {
+      waitNodeState(dn, AdminStates.DECOMMISSIONED);
+    }           
+
+    fdos.close();
+  }
 }

+ 22 - 1
hadoop-project/pom.xml

@@ -760,7 +760,28 @@
         <groupId>com.google.code.findbugs</groupId>
         <artifactId>jsr305</artifactId>
         <version>1.3.9</version>
-      </dependency>    
+      </dependency>
+      <dependency>
+        <groupId>javax.xml.bind</groupId>
+        <artifactId>jaxb-api</artifactId>
+        <version>2.2.2</version>
+      </dependency>
+      <dependency>
+        <groupId>org.codehaus.jettison</groupId>
+        <artifactId>jettison</artifactId>
+        <version>1.1</version>
+      </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-client</artifactId>
+        <version>${jersey.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.glassfish.grizzly</groupId>
+        <artifactId>grizzly-http-servlet</artifactId>
+        <version>2.1.2</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 

+ 17 - 0
hadoop-yarn-project/CHANGES.txt

@@ -63,6 +63,13 @@ Release 2.4.0 - UNRELEASED
 
     YARN-1033. Expose RM active/standby state to Web UI and REST API (kasha)
 
+    YARN-1041. Added the ApplicationMasterProtocol API for applications to use the
+    ability in ResourceManager to optionally not kill containers when the
+    ApplicationMaster exits. (Jian He via vinodkv)
+
+    YARN-1566. Changed Distributed Shell to retain containers across application
+    attempts. (Jian He via vinodkv)
+
   IMPROVEMENTS
 
     YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu)
@@ -212,6 +219,10 @@ Release 2.4.0 - UNRELEASED
 
     YARN-1579. ActiveRMInfoProto fields should be optional (kasha)
 
+    YARN-888. Cleaned up POM files so that non-leaf modules don't include any
+    dependencies and thus compact the dependency list for leaf modules.
+    (Alejandro Abdelnur via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -313,6 +324,12 @@ Release 2.4.0 - UNRELEASED
     YARN-1293. Fixed TestContainerLaunch#testInvalidEnvSyntaxDiagnostics failure
     caused by non-English system locale. (Tsuyoshi OZAWA via jianhe)
 
+    YARN-1574. RMDispatcher should be reset on transition to standby. (Xuan Gong
+    via kasha)
+
+    YARN-1166. Fixed app-specific and attempt-specific QueueMetrics to be
+    triggered by accordingly app event and attempt event. 
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 50 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml

@@ -32,6 +32,56 @@
     <yarn.basedir>${project.parent.basedir}</yarn.basedir>
   </properties>
 
+  <dependencies>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+  </dependencies>
+
   <build>
     <plugins>
       <plugin>

+ 32 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java

@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.api.protocolrecords;
 
 import java.nio.ByteBuffer;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience.Private;
@@ -27,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.util.Records;
 
@@ -47,16 +49,19 @@ import org.apache.hadoop.yarn.util.Records;
 @Public
 @Stable
 public abstract class RegisterApplicationMasterResponse {
+
   @Private
   @Unstable
   public static RegisterApplicationMasterResponse newInstance(
       Resource minCapability, Resource maxCapability,
-      Map<ApplicationAccessType, String> acls, ByteBuffer key) {
+      Map<ApplicationAccessType, String> acls, ByteBuffer key,
+      List<Container> containersFromPreviousAttempt) {
     RegisterApplicationMasterResponse response =
         Records.newRecord(RegisterApplicationMasterResponse.class);
     response.setMaximumResourceCapability(maxCapability);
     response.setApplicationACLs(acls);
     response.setClientToAMTokenMasterKey(key);
+    response.setContainersFromPreviousAttempt(containersFromPreviousAttempt);
     return response;
   }
 
@@ -105,4 +110,30 @@ public abstract class RegisterApplicationMasterResponse {
   @Public
   @Stable
   public abstract void setClientToAMTokenMasterKey(ByteBuffer key);
+
+  /**
+   * <p>
+   * Get the list of running containers as viewed by
+   * <code>ResourceManager</code> from previous application attempt.
+   * </p>
+   * 
+   * @return the list of running containers as viewed by
+   *         <code>ResourceManager</code> from previous application attempt
+   */
+  @Public
+  @Unstable
+  public abstract List<Container> getContainersFromPreviousAttempt();
+
+  /**
+   * Set the list of running containers as viewed by
+   * <code>ResourceManager</code> from previous application attempt.
+   * 
+   * @param containersFromPreviousAttempt
+   *          the list of running containers as viewed by
+   *          <code>ResourceManager</code> from previous application attempt.
+   */
+  @Private
+  @Unstable
+  public abstract void setContainersFromPreviousAttempt(
+      List<Container> containersFromPreviousAttempt);
 }

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto

@@ -44,6 +44,7 @@ message RegisterApplicationMasterResponseProto {
   optional ResourceProto maximumCapability = 1;
   optional bytes client_to_am_token_master_key = 2;
   repeated ApplicationACLMapProto application_ACLs = 3;
+  repeated ContainerProto containers_from_previous_attempt = 4;
 }
 
 message FinishApplicationMasterRequestProto {

+ 73 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml

@@ -33,33 +33,101 @@
   </properties>
 
   <dependencies>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-api</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-common</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-client</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-nodemanager</artifactId>
       <scope>test</scope>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-common</artifactId>
-      <scope>test</scope>
-    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-tests</artifactId>

+ 28 - 16
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java

@@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.GnuParser;
 import org.apache.commons.cli.HelpFormatter;
@@ -89,6 +88,8 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.Records;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * An ApplicationMaster for executing shell commands on a set of launched
  * containers using the YARN framework.
@@ -169,7 +170,8 @@ public class ApplicationMaster {
   private NMCallbackHandler containerListener;
   
   // Application Attempt Id ( combination of attemptId and fail count )
-  private ApplicationAttemptId appAttemptID;
+  @VisibleForTesting
+  protected ApplicationAttemptId appAttemptID;
 
   // TODO
   // For status update for clients - yet to be implemented
@@ -194,13 +196,15 @@ public class ApplicationMaster {
   private AtomicInteger numCompletedContainers = new AtomicInteger();
   // Allocated container count so that we know how many containers has the RM
   // allocated to us
-  private AtomicInteger numAllocatedContainers = new AtomicInteger();
+  @VisibleForTesting
+  protected AtomicInteger numAllocatedContainers = new AtomicInteger();
   // Count of failed containers
   private AtomicInteger numFailedContainers = new AtomicInteger();
   // Count of containers already requested from the RM
   // Needed as once requested, we should not request for containers again.
   // Only request for more if the original requirement changes.
-  private AtomicInteger numRequestedContainers = new AtomicInteger();
+  @VisibleForTesting
+  protected AtomicInteger numRequestedContainers = new AtomicInteger();
 
   // Shell command to be executed
   private String shellCommand = "";
@@ -251,6 +255,7 @@ public class ApplicationMaster {
         System.exit(0);
       }
       result = appMaster.run();
+      appMaster.finish();
     } catch (Throwable t) {
       LOG.fatal("Error running ApplicationMaster", t);
       System.exit(1);
@@ -537,26 +542,25 @@ public class ApplicationMaster {
       containerVirtualCores = maxVCores;
     }
 
+    List<Container> previousAMRunningContainers =
+        response.getContainersFromPreviousAttempt();
+    LOG.info("Received " + previousAMRunningContainers.size()
+        + " previous AM's running containers on AM registration.");
+    numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
+
+    int numTotalContainersToRequest =
+        numTotalContainers - previousAMRunningContainers.size();
     // Setup ask for containers from RM
     // Send request for containers to RM
     // Until we get our fully allocated quota, we keep on polling RM for
     // containers
     // Keep looping until all the containers are launched and shell script
     // executed on them ( regardless of success/failure).
-    for (int i = 0; i < numTotalContainers; ++i) {
+    for (int i = 0; i < numTotalContainersToRequest; ++i) {
       ContainerRequest containerAsk = setupContainerAskForRM();
       amRMClient.addContainerRequest(containerAsk);
     }
-    numRequestedContainers.set(numTotalContainers);
-
-    while (!done
-        && (numCompletedContainers.get() != numTotalContainers)) {
-      try {
-        Thread.sleep(200);
-      } catch (InterruptedException ex) {}
-    }
-    finish();
-    
+    numRequestedContainers.set(numTotalContainersToRequest);
     return success;
   }
 
@@ -565,7 +569,15 @@ public class ApplicationMaster {
     return new NMCallbackHandler(this);
   }
 
-  private void finish() {
+  protected void finish() {
+    // wait for completion.
+    while (!done
+        && (numCompletedContainers.get() != numTotalContainers)) {
+      try {
+        Thread.sleep(200);
+      } catch (InterruptedException ex) {}
+    }
+
     // Join all launched threads
     // needed for when we time out
     // and we need to release containers

+ 16 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java

@@ -162,6 +162,9 @@ public class Client {
   // Timeout threshold for client. Kill app after time interval expires.
   private long clientTimeout = 600000;
 
+  // flag to indicate whether to keep containers across application attempts.
+  private boolean keepContainers = false;
+
   // Debug flag
   boolean debugFlag = false;	
 
@@ -243,6 +246,11 @@ public class Client {
     opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command");
     opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed");
     opts.addOption("log_properties", true, "log4j.properties file");
+    opts.addOption("keep_containers_across_application_attempts", false,
+      "Flag to indicate whether to keep containers across application attempts." +
+      " If the flag is true, running containers will not be killed when" +
+      " application attempt fails and these containers will be retrieved by" +
+      " the new application attempt ");
     opts.addOption("debug", false, "Dump out debug information");
     opts.addOption("help", false, "Print usage");
 
@@ -294,12 +302,17 @@ public class Client {
 
     }
 
+    if (cliParser.hasOption("keep_containers_across_application_attempts")) {
+      LOG.info("keep_containers_across_application_attempts");
+      keepContainers = true;
+    }
+
     appName = cliParser.getOptionValue("appname", "DistributedShell");
     amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0"));
     amQueue = cliParser.getOptionValue("queue", "default");
     amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10"));		
     amVCores = Integer.parseInt(cliParser.getOptionValue("master_vcores", "1"));
-    
+
     if (amMemory < 0) {
       throw new IllegalArgumentException("Invalid memory specified for application master, exiting."
           + " Specified memory=" + amMemory);
@@ -442,6 +455,8 @@ public class Client {
     // set the application name
     ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
     ApplicationId appId = appContext.getApplicationId();
+
+    appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
     appContext.setApplicationName(appName);
 
     // Set up the container launch context for the application master

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/ContainerLaunchFailAppMaster.java

@@ -67,6 +67,7 @@ public class ContainerLaunchFailAppMaster extends ApplicationMaster {
         System.exit(0);
       }
       result = appMaster.run();
+      appMaster.finish();
     } catch (Throwable t) {
       LOG.fatal("Error running ApplicationMaster", t);
       System.exit(1);

+ 78 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSFailedAppMaster.java

@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.applications.distributedshell;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+public class TestDSFailedAppMaster extends ApplicationMaster {
+
+  private static final Log LOG = LogFactory.getLog(TestDSFailedAppMaster.class);
+
+  @Override
+  public boolean run() throws YarnException, IOException {
+    boolean res = super.run();
+
+    // for the 2nd attempt.
+    if (appAttemptID.getAttemptId() == 2) {
+      // should reuse the earlier running container, so numAllocatedContainers
+      // should be set to 1. And should ask no more containers, so
+      // numRequestedContainers should be set to 0.
+      if (numAllocatedContainers.get() != 1
+          || numRequestedContainers.get() != 0) {
+        LOG.info("Application Master failed. exiting");
+        System.exit(200);
+      }
+    }
+    return res;
+  }
+
+  public static void main(String[] args) {
+    boolean result = false;
+    try {
+      TestDSFailedAppMaster appMaster = new TestDSFailedAppMaster();
+      boolean doRun = appMaster.init(args);
+      if (!doRun) {
+        System.exit(0);
+      }
+      result = appMaster.run();
+      if (appMaster.appAttemptID.getAttemptId() == 1) {
+        try {
+          // sleep some time, wait for the AM to launch a container.
+          Thread.sleep(3000);
+        } catch (InterruptedException e) {}
+        // fail the first am.
+        System.exit(100);
+      }
+      appMaster.finish();
+    } catch (Throwable t) {
+      System.exit(1);
+    }
+    if (result) {
+      LOG.info("Application Master completed successfully. exiting");
+      System.exit(0);
+    } else {
+      LOG.info("Application Master failed. exiting");
+      System.exit(2);
+    }
+  }
+}

+ 29 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java

@@ -174,6 +174,35 @@ public class TestDistributedShell {
 
   }
 
+  @Test(timeout=90000)
+  public void testDSRestartWithPreviousRunningContainers() throws Exception {
+    String[] args = {
+        "--jar",
+        APPMASTER_JAR,
+        "--num_containers",
+        "1",
+        "--shell_command",
+        Shell.WINDOWS ? "timeout 8" : "sleep 8",
+        "--master_memory",
+        "512",
+        "--container_memory",
+        "128",
+        "--keep_containers_across_application_attempts"
+      };
+
+      LOG.info("Initializing DS Client");
+      Client client = new Client(TestDSFailedAppMaster.class.getName(),
+        new Configuration(yarnCluster.getConfig()));
+
+      client.init(args);
+      LOG.info("Running DS Client");
+      boolean result = client.run();
+
+      LOG.info("Client run completed. Result=" + result);
+      // application should succeed
+      Assert.assertTrue(result);
+    }
+
   @Test(timeout=90000)
   public void testDSShellWithCustomLogPropertyFile() throws Exception {
     final File basedir =

+ 34 - 16
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml

@@ -34,42 +34,60 @@
 
   <dependencies>
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-api</artifactId>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-common</artifactId>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-nodemanager</artifactId>
-      <scope>test</scope>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-      <scope>test</scope>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-      <scope>test</scope>
+      <artifactId>hadoop-common</artifactId>
       <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
+
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-common</artifactId>
-      <scope>test</scope>
+      <artifactId>hadoop-yarn-client</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-client</artifactId>
+      <artifactId>hadoop-yarn-api</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-mapreduce-client-core</artifactId>
-      <scope>test</scope>
+      <artifactId>hadoop-yarn-common</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml

@@ -32,6 +32,8 @@
     <hadoop.common.build.dir>${basedir}/../../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
   </properties>
 
+  <!-- Do not add dependencies here, add them to the POM of the leaf module -->
+
   <modules>
     <module>hadoop-yarn-applications-distributedshell</module>
     <module>hadoop-yarn-applications-unmanaged-am-launcher</module>

+ 93 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml

@@ -30,37 +30,121 @@
   </properties>
 
   <dependencies>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
   		<groupId>org.apache.hadoop</groupId>
   		<artifactId>hadoop-yarn-api</artifactId>
   	</dependency>
-  	<dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
   		<groupId>org.apache.hadoop</groupId>
   		<artifactId>hadoop-yarn-common</artifactId>
   	</dependency>
-  	<dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-common</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
   		<groupId>org.apache.hadoop</groupId>
   		<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <scope>test</scope>
   	</dependency>
-  	<dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
   		<groupId>org.apache.hadoop</groupId>
   		<artifactId>hadoop-yarn-server-tests</artifactId>
       <scope>test</scope>
       <type>test-jar</type>
   	</dependency>
-      <dependency>
-  		<groupId>org.apache.hadoop</groupId>
-  		<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-		<scope>test</scope>
-		<type>test-jar</type>
-      </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
 
   </dependencies>
 </project>

+ 131 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml

@@ -33,15 +33,137 @@
   </properties>
 
   <dependencies>
-  <dependency>
-    <groupId>log4j</groupId>
-    <artifactId>log4j</artifactId>
-   </dependency>
-   
-  <dependency>
-    <groupId>org.apache.hadoop</groupId>
-    <artifactId>hadoop-yarn-api</artifactId>
-  </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>servlet-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.inject.extensions</groupId>
+      <artifactId>guice-servlet</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.inject</groupId>
+      <artifactId>guice</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-server</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-json</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey.contribs</groupId>
+      <artifactId>jersey-guice</artifactId>
+    </dependency>
+    <dependency>
+     <groupId>log4j</groupId>
+     <artifactId>log4j</artifactId>
+    </dependency>
   </dependencies>
 
   <build>

+ 52 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -29,10 +30,13 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
 import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProtoOrBuilder;
@@ -52,6 +56,7 @@ public class RegisterApplicationMasterResponsePBImpl extends
 
   private Resource maximumResourceCapability;
   private Map<ApplicationAccessType, String> applicationACLS = null;
+  private List<Container> containersFromPreviousAttempt = null;
 
   public RegisterApplicationMasterResponsePBImpl() {
     builder = RegisterApplicationMasterResponseProto.newBuilder();
@@ -105,6 +110,9 @@ public class RegisterApplicationMasterResponsePBImpl extends
     if (this.applicationACLS != null) {
       addApplicationACLs();
     }
+    if (this.containersFromPreviousAttempt != null) {
+      addRunningContainersToProto();
+    }
   }
 
 
@@ -226,6 +234,43 @@ public class RegisterApplicationMasterResponsePBImpl extends
         ByteBuffer.wrap(builder.getClientToAmTokenMasterKey().toByteArray());
     return key;
   }
+
+  @Override
+  public List<Container> getContainersFromPreviousAttempt() {
+    if (this.containersFromPreviousAttempt != null) {
+      return this.containersFromPreviousAttempt;
+    }
+    initRunningContainersList();
+    return this.containersFromPreviousAttempt;
+  }
+
+  @Override
+  public void setContainersFromPreviousAttempt(final List<Container> containers) {
+    if (containers == null) {
+      return;
+    }
+    this.containersFromPreviousAttempt = new ArrayList<Container>();
+    this.containersFromPreviousAttempt.addAll(containers);
+  }
+
+  private void initRunningContainersList() {
+    RegisterApplicationMasterResponseProtoOrBuilder p = viaProto ? proto : builder;
+    List<ContainerProto> list = p.getContainersFromPreviousAttemptList();
+    containersFromPreviousAttempt = new ArrayList<Container>();
+    for (ContainerProto c : list) {
+      containersFromPreviousAttempt.add(convertFromProtoFormat(c));
+    }
+  }
+
+  private void addRunningContainersToProto() {
+    maybeInitBuilder();
+    builder.clearContainersFromPreviousAttempt();
+    List<ContainerProto> list = new ArrayList<ContainerProto>();
+    for (Container c : containersFromPreviousAttempt) {
+      list.add(convertToProtoFormat(c));
+    }
+    builder.addAllContainersFromPreviousAttempt(list);
+  }
   
   private Resource convertFromProtoFormat(ResourceProto resource) {
     return new ResourcePBImpl(resource);
@@ -235,4 +280,11 @@ public class RegisterApplicationMasterResponsePBImpl extends
     return ((ResourcePBImpl)resource).getProto();
   }
 
+  private ContainerPBImpl convertFromProtoFormat(ContainerProto p) {
+    return new ContainerPBImpl(p);
+  }
+
+  private ContainerProto convertToProtoFormat(Container t) {
+    return ((ContainerPBImpl) t).getProto();
+  }
 }

+ 28 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestCompositeService.java

@@ -337,7 +337,34 @@ public class TestCompositeService {
     assertEquals("Incorrect number of services",
         1, testService.getServices().size());
   }
-  
+
+  @Test
+  public void testRemoveService() {
+    CompositeService testService = new CompositeService("TestService") {
+      @Override
+      public void serviceInit(Configuration conf) {
+        Integer notAService = new Integer(0);
+        assertFalse("Added an integer as a service",
+            addIfService(notAService));
+
+        Service service1 = new AbstractService("Service1") {};
+        addIfService(service1);
+
+        Service service2 = new AbstractService("Service2") {};
+        addIfService(service2);
+
+        Service service3 = new AbstractService("Service3") {};
+        addIfService(service3);
+
+        removeService(service1);
+      }
+    };
+
+    testService.init(new Configuration());
+    assertEquals("Incorrect number of services",
+        2, testService.getServices().size());
+  }
+
   public static class CompositeServiceAddingAChild extends CompositeService{
     Service child;
 

+ 62 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml

@@ -33,10 +33,72 @@
   </properties>
 
   <dependencies>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-common</artifactId>
     </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>

+ 135 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml

@@ -35,6 +35,141 @@
   </properties>
 
   <dependencies>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jettison</groupId>
+      <artifactId>jettison</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>servlet-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.inject.extensions</groupId>
+      <artifactId>guice-servlet</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.inject</groupId>
+      <artifactId>guice</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey.jersey-test-framework</groupId>
+      <artifactId>jersey-test-framework-grizzly2</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-json</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey.contribs</groupId>
+      <artifactId>jersey-guice</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>

+ 154 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml

@@ -33,10 +33,156 @@
   </properties>
 
   <dependencies>
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>servlet-api</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.inject.extensions</groupId>
+      <artifactId>guice-servlet</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.inject</groupId>
+      <artifactId>guice</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey.jersey-test-framework</groupId>
+      <artifactId>jersey-test-framework-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-json</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey.contribs</groupId>
+      <artifactId>jersey-guice</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jettison</groupId>
+      <artifactId>jettison</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-web-proxy</artifactId>
@@ -51,12 +197,20 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-hdfs</artifactId>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>com.sun.jersey.jersey-test-framework</groupId>
+      <artifactId>jersey-test-framework-grizzly2</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>

+ 7 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java

@@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRespo
 import org.apache.hadoop.yarn.api.records.AMCommand;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.PreemptionContainer;
@@ -78,6 +79,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@@ -271,6 +273,11 @@ public class ApplicationMasterService extends AbstractService implements
             .getClientToAMTokenSecretManager()
             .getMasterKey(applicationAttemptId).getEncoded()));        
       }
+
+      List<Container> containerList =
+          ((AbstractYarnScheduler) rScheduler)
+            .getTransferredContainers(applicationAttemptId);
+      response.setContainersFromPreviousAttempt(containerList);
       return response;
     }
   }

+ 24 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.service.CompositeService;
+import org.apache.hadoop.service.Service;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
@@ -180,13 +181,11 @@ public class ResourceManager extends CompositeService implements Recoverable {
     this.conf = conf;
     this.rmContext = new RMContextImpl();
 
-    rmDispatcher = createDispatcher();
+    // register the handlers for all AlwaysOn services using setupDispatcher().
+    rmDispatcher = setupDispatcher();
     addIfService(rmDispatcher);
     rmContext.setDispatcher(rmDispatcher);
 
-    rmDispatcher.register(RMFatalEventType.class,
-        new ResourceManager.RMFatalEventDispatcher(this.rmContext, this));
-
     adminService = createAdminService();
     addService(adminService);
     rmContext.setRMAdminService(adminService);
@@ -832,6 +831,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
         HAServiceProtocol.HAServiceState.ACTIVE) {
       stopActiveServices();
       if (initialize) {
+        resetDispatcher();
         createAndInitActiveServices();
       }
     }
@@ -994,4 +994,24 @@ public class ResourceManager extends CompositeService implements Recoverable {
       YarnConfiguration.YARN_HTTP_POLICY_KEY,
       YarnConfiguration.YARN_HTTP_POLICY_DEFAULT)));
   }
+
+  /**
+   * Register the handlers for alwaysOn services
+   */
+  private Dispatcher setupDispatcher() {
+    Dispatcher dispatcher = createDispatcher();
+    dispatcher.register(RMFatalEventType.class,
+        new ResourceManager.RMFatalEventDispatcher(this.rmContext, this));
+    return dispatcher;
+  }
+
+  private void resetDispatcher() {
+    Dispatcher dispatcher = setupDispatcher();
+    ((Service)dispatcher).init(this.conf);
+    ((Service)dispatcher).start();
+    removeService((Service)rmDispatcher);
+    rmDispatcher = dispatcher;
+    addIfService(rmDispatcher);
+    rmContext.setDispatcher(rmDispatcher);
+  }
 }

+ 64 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java

@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+
+public class AbstractYarnScheduler {
+
+  protected RMContext rmContext;
+  protected Map<ApplicationId, SchedulerApplication> applications;
+
+  public synchronized List<Container> getTransferredContainers(
+      ApplicationAttemptId currentAttempt) {
+    ApplicationId appId = currentAttempt.getApplicationId();
+    SchedulerApplication app = applications.get(appId);
+    List<Container> containerList = new ArrayList<Container>();
+    RMApp appImpl = this.rmContext.getRMApps().get(appId);
+    if (appImpl.getApplicationSubmissionContext().getUnmanagedAM()) {
+      return containerList;
+    }
+    Collection<RMContainer> liveContainers =
+        app.getCurrentAppAttempt().getLiveContainers();
+    ContainerId amContainerId =
+        rmContext.getRMApps().get(appId).getCurrentAppAttempt()
+          .getMasterContainer().getId();
+    for (RMContainer rmContainer : liveContainers) {
+      if (!rmContainer.getContainerId().equals(amContainerId)) {
+        containerList.add(rmContainer.getContainer());
+      }
+    }
+    return containerList;
+  }
+
+  public Map<ApplicationId, SchedulerApplication> getSchedulerApplications() {
+    return applications;
+  }
+}

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java

@@ -260,7 +260,7 @@ public class AppSchedulingInfo {
       // once an allocation is done we assume the application is
       // running from scheduler's POV.
       pending = false;
-      metrics.incrAppsRunning(this, user);
+      metrics.runAppAttempt(applicationId, user);
     }
     LOG.debug("allocate: user: " + user + ", memory: "
         + request.getCapability());
@@ -390,7 +390,7 @@ public class AppSchedulingInfo {
                 .getNumContainers()));
       }
     }
-    metrics.finishApp(this, rmAppAttemptFinalState);
+    metrics.finishAppAttempt(applicationId, pending, user);
     
     // Clear requests themselves
     clearRequests();

+ 41 - 25
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java

@@ -41,7 +41,7 @@ import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.resource.Resources;
 import org.slf4j.Logger;
@@ -57,7 +57,7 @@ public class QueueMetrics implements MetricsSource {
   @Metric("# of pending apps") MutableGaugeInt appsPending;
   @Metric("# of apps completed") MutableCounterInt appsCompleted;
   @Metric("# of apps killed") MutableCounterInt appsKilled;
-  @Metric("# of apps failed") MutableGaugeInt appsFailed;
+  @Metric("# of apps failed") MutableCounterInt appsFailed;
 
   @Metric("Allocated memory in MB") MutableGaugeInt allocatedMB;
   @Metric("Allocated CPU in virtual cores") MutableGaugeInt allocatedVCores;
@@ -214,54 +214,70 @@ public class QueueMetrics implements MetricsSource {
     registry.snapshot(collector.addRecord(registry.info()), all);
   }
 
-  public void submitApp(String user, int attemptId) {
-    if (attemptId == 1) {
-      appsSubmitted.incr();
-    } else {
-      appsFailed.decr();
+  public void submitApp(String user) {
+    appsSubmitted.incr();
+    QueueMetrics userMetrics = getUserMetrics(user);
+    if (userMetrics != null) {
+      userMetrics.submitApp(user);
     }
+    if (parent != null) {
+      parent.submitApp(user);
+    }
+  }
+
+  public void submitAppAttempt(String user) {
     appsPending.incr();
     QueueMetrics userMetrics = getUserMetrics(user);
     if (userMetrics != null) {
-      userMetrics.submitApp(user, attemptId);
+      userMetrics.submitAppAttempt(user);
     }
     if (parent != null) {
-      parent.submitApp(user, attemptId);
+      parent.submitAppAttempt(user);
     }
   }
 
-  public void incrAppsRunning(AppSchedulingInfo app, String user) {
-    runBuckets.add(app.getApplicationId(), System.currentTimeMillis());
+  public void runAppAttempt(ApplicationId appId, String user) {
+    runBuckets.add(appId, System.currentTimeMillis());
     appsRunning.incr();
     appsPending.decr();
     QueueMetrics userMetrics = getUserMetrics(user);
     if (userMetrics != null) {
-      userMetrics.incrAppsRunning(app, user);
+      userMetrics.runAppAttempt(appId, user);
     }
     if (parent != null) {
-      parent.incrAppsRunning(app, user);
+      parent.runAppAttempt(appId, user);
     }
   }
 
-  public void finishApp(AppSchedulingInfo app,
-      RMAppAttemptState rmAppAttemptFinalState) {
-    runBuckets.remove(app.getApplicationId());
-    switch (rmAppAttemptFinalState) {
-      case KILLED: appsKilled.incr(); break;
-      case FAILED: appsFailed.incr(); break;
-      default: appsCompleted.incr();  break;
-    }
-    if (app.isPending()) {
+  public void finishAppAttempt(
+      ApplicationId appId, boolean isPending, String user) {
+    runBuckets.remove(appId);
+    if (isPending) {
       appsPending.decr();
     } else {
       appsRunning.decr();
     }
-    QueueMetrics userMetrics = getUserMetrics(app.getUser());
+    QueueMetrics userMetrics = getUserMetrics(user);
+    if (userMetrics != null) {
+      userMetrics.finishAppAttempt(appId, isPending, user);
+    }
+    if (parent != null) {
+      parent.finishAppAttempt(appId, isPending, user);
+    }
+  }
+
+  public void finishApp(String user, RMAppState rmAppFinalState) {
+    switch (rmAppFinalState) {
+      case KILLED: appsKilled.incr(); break;
+      case FAILED: appsFailed.incr(); break;
+      default: appsCompleted.incr();  break;
+    }
+    QueueMetrics userMetrics = getUserMetrics(user);
     if (userMetrics != null) {
-      userMetrics.finishApp(app, rmAppAttemptFinalState);
+      userMetrics.finishApp(user, rmAppFinalState);
     }
     if (parent != null) {
-      parent.finishApp(app, rmAppAttemptFinalState);
+      parent.finishApp(user, rmAppFinalState);
     }
   }
 

+ 6 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
 
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 
 @Private
 @Unstable
@@ -48,4 +49,9 @@ public class SchedulerApplication {
   public void setCurrentAppAttempt(SchedulerApplicationAttempt currentAttempt) {
     this.currentAttempt = currentAttempt;
   }
+
+  public void stop(RMAppState rmAppFinalState) {
+    queue.getMetrics().finishApp(user, rmAppFinalState);
+  }
+
 }

+ 7 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java

@@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
@@ -95,7 +96,7 @@ import com.google.common.annotations.VisibleForTesting;
 @LimitedPrivate("yarn")
 @Evolving
 @SuppressWarnings("unchecked")
-public class CapacityScheduler
+public class CapacityScheduler extends AbstractYarnScheduler
   implements PreemptableResourceScheduler, CapacitySchedulerContext,
              Configurable {
 
@@ -177,7 +178,6 @@ public class CapacityScheduler
 
   private CapacitySchedulerConfiguration conf;
   private Configuration yarnConf;
-  private RMContext rmContext;
 
   private Map<String, CSQueue> queues = new ConcurrentHashMap<String, CSQueue>();
 
@@ -191,10 +191,6 @@ public class CapacityScheduler
   private Resource minimumAllocation;
   private Resource maximumAllocation;
 
-  @VisibleForTesting
-  protected Map<ApplicationId, SchedulerApplication> applications =
-      new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
-
   private boolean initialized = false;
 
   private ResourceCalculator calculator;
@@ -271,9 +267,10 @@ public class CapacityScheduler
       this.maximumAllocation = this.conf.getMaximumAllocation();
       this.calculator = this.conf.getResourceCalculator();
       this.usePortForNodeName = this.conf.getUsePortForNodeName();
-
+      this.applications =
+          new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
       this.rmContext = rmContext;
-      
+
       initializeQueues(this.conf);
       
       initialized = true;
@@ -491,6 +488,7 @@ public class CapacityScheduler
     if (application == null){
       // The AppRemovedSchedulerEvent maybe sent on recovery for completed apps,
       // ignore it.
+      LOG.warn("Couldn't find application " + applicationId);
       return;
     }
     CSQueue queue = (CSQueue) application.getQueue();
@@ -500,6 +498,7 @@ public class CapacityScheduler
     } else {
       queue.finishApplication(applicationId, application.getUser());
     }
+    application.stop(finalState);
     applications.remove(applicationId);
   }
 

+ 3 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -644,8 +644,7 @@ public class LeafQueue implements CSQueue {
       addApplicationAttempt(application, user);
     }
 
-    int attemptId = application.getApplicationAttemptId().getAttemptId();
-    metrics.submitApp(userName, attemptId);
+    metrics.submitAppAttempt(userName);
     getParent().submitApplicationAttempt(application, userName);
   }
 
@@ -702,6 +701,8 @@ public class LeafQueue implements CSQueue {
           getParent().getQueuePath(), ace);
       throw ace;
     }
+
+    metrics.submitApp(userName);
   }
 
   private synchronized void activateApplications() {

+ 14 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

@@ -71,6 +71,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEven
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
@@ -120,10 +121,10 @@ import com.google.common.annotations.VisibleForTesting;
 @LimitedPrivate("yarn")
 @Unstable
 @SuppressWarnings("unchecked")
-public class FairScheduler implements ResourceScheduler {
+public class FairScheduler extends AbstractYarnScheduler implements
+    ResourceScheduler {
   private boolean initialized;
   private FairSchedulerConfiguration conf;
-  private RMContext rmContext;
   private Resource minimumAllocation;
   private Resource maximumAllocation;
   private Resource incrAllocation;
@@ -157,11 +158,6 @@ public class FairScheduler implements ResourceScheduler {
   // Time we last ran preemptTasksIfNecessary
   private long lastPreemptCheckTime;
 
-  // This stores per-application scheduling information,
-  @VisibleForTesting
-  protected Map<ApplicationId, SchedulerApplication> applications =
-      new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
-
   // Nodes in the cluster, indexed by NodeId
   private Map<NodeId, FSSchedulerNode> nodes = 
       new ConcurrentHashMap<NodeId, FSSchedulerNode>();
@@ -643,6 +639,7 @@ public class FairScheduler implements ResourceScheduler {
     SchedulerApplication application =
         new SchedulerApplication(queue, user);
     applications.put(applicationId, application);
+    queue.getMetrics().submitApp(user);
 
     LOG.info("Accepted application " + applicationId + " from user: " + user
         + ", in queue: " + queueName + ", currently num of applications: "
@@ -680,7 +677,7 @@ public class FairScheduler implements ResourceScheduler {
       maxRunningEnforcer.trackNonRunnableApp(attempt);
     }
     
-    queue.getMetrics().submitApp(user, applicationAttemptId.getAttemptId());
+    queue.getMetrics().submitAppAttempt(user);
 
     LOG.info("Added Application Attempt " + applicationAttemptId
         + " to scheduler from user: " + user);
@@ -714,6 +711,12 @@ public class FairScheduler implements ResourceScheduler {
 
   private synchronized void removeApplication(ApplicationId applicationId,
       RMAppState finalState) {
+    SchedulerApplication application = applications.get(applicationId);
+    if (application == null){
+      LOG.warn("Couldn't find application " + applicationId);
+      return;
+    }
+    application.stop(finalState);
     applications.remove(applicationId);
   }
 
@@ -1228,6 +1231,9 @@ public class FairScheduler implements ResourceScheduler {
       
       rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
       this.rmContext = rmContext;
+      // This stores per-application scheduling information
+      this.applications =
+          new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
       this.eventLog = new FairSchedulerEventLog();
       eventLog.init(this.conf);
 
@@ -1350,5 +1356,4 @@ public class FairScheduler implements ResourceScheduler {
     queue.collectSchedulerApplications(apps);
     return apps;
   }
-
 }

+ 1361 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java.orig

@@ -0,0 +1,1361 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
+import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.SystemClock;
+import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.Resources;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * A scheduler that schedules resources between a set of queues. The scheduler
+ * keeps track of the resources used by each queue, and attempts to maintain
+ * fairness by scheduling tasks at queues whose allocations are farthest below
+ * an ideal fair distribution.
+ * 
+ * The fair scheduler supports hierarchical queues. All queues descend from a
+ * queue named "root". Available resources are distributed among the children
+ * of the root queue in the typical fair scheduling fashion. Then, the children
+ * distribute the resources assigned to them to their children in the same
+ * fashion.  Applications may only be scheduled on leaf queues. Queues can be
+ * specified as children of other queues by placing them as sub-elements of their
+ * parents in the fair scheduler configuration file.
+ * 
+ * A queue's name starts with the names of its parents, with periods as
+ * separators.  So a queue named "queue1" under the root named, would be 
+ * referred to as "root.queue1", and a queue named "queue2" under a queue
+ * named "parent1" would be referred to as "root.parent1.queue2".
+ */
+@LimitedPrivate("yarn")
+@Unstable
+@SuppressWarnings("unchecked")
+public class FairScheduler implements ResourceScheduler {
+  private boolean initialized;
+  private FairSchedulerConfiguration conf;
+  private RMContext rmContext;
+  private Resource minimumAllocation;
+  private Resource maximumAllocation;
+  private Resource incrAllocation;
+  private QueueManager queueMgr;
+  private Clock clock;
+  private boolean usePortForNodeName;
+
+  private static final Log LOG = LogFactory.getLog(FairScheduler.class);
+  
+  private static final ResourceCalculator RESOURCE_CALCULATOR =
+      new DefaultResourceCalculator();
+  
+  // Value that container assignment methods return when a container is
+  // reserved
+  public static final Resource CONTAINER_RESERVED = Resources.createResource(-1);
+
+  // How often fair shares are re-calculated (ms)
+  protected long UPDATE_INTERVAL = 500;
+
+  private final static List<Container> EMPTY_CONTAINER_LIST =
+      new ArrayList<Container>();
+
+  private static final Allocation EMPTY_ALLOCATION =
+      new Allocation(EMPTY_CONTAINER_LIST, Resources.createResource(0));
+
+  // Aggregate metrics
+  FSQueueMetrics rootMetrics;
+
+  // Time when we last updated preemption vars
+  protected long lastPreemptionUpdateTime;
+  // Time we last ran preemptTasksIfNecessary
+  private long lastPreemptCheckTime;
+
+  // This stores per-application scheduling information,
+  @VisibleForTesting
+  protected Map<ApplicationId, SchedulerApplication> applications =
+      new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
+
+  // Nodes in the cluster, indexed by NodeId
+  private Map<NodeId, FSSchedulerNode> nodes = 
+      new ConcurrentHashMap<NodeId, FSSchedulerNode>();
+
+  // Aggregate capacity of the cluster
+  private Resource clusterCapacity = 
+      RecordFactoryProvider.getRecordFactory(null).newRecordInstance(Resource.class);
+
+  // How often tasks are preempted 
+  protected long preemptionInterval; 
+  
+  // ms to wait before force killing stuff (must be longer than a couple
+  // of heartbeats to give task-kill commands a chance to act).
+  protected long waitTimeBeforeKill; 
+  
+  // Containers whose AMs have been warned that they will be preempted soon.
+  private List<RMContainer> warnedContainers = new ArrayList<RMContainer>();
+  
+  protected boolean preemptionEnabled;
+  protected boolean sizeBasedWeight; // Give larger weights to larger jobs
+  protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster
+  protected boolean continuousSchedulingEnabled; // Continuous Scheduling enabled or not
+  protected int continuousSchedulingSleepMs; // Sleep time for each pass in continuous scheduling
+  private Comparator nodeAvailableResourceComparator =
+          new NodeAvailableResourceComparator(); // Node available resource comparator
+  protected double nodeLocalityThreshold; // Cluster threshold for node locality
+  protected double rackLocalityThreshold; // Cluster threshold for rack locality
+  protected long nodeLocalityDelayMs; // Delay for node locality
+  protected long rackLocalityDelayMs; // Delay for rack locality
+  private FairSchedulerEventLog eventLog; // Machine-readable event log
+  protected boolean assignMultiple; // Allocate multiple containers per
+                                    // heartbeat
+  protected int maxAssign; // Max containers to assign per heartbeat
+
+  @VisibleForTesting
+  final MaxRunningAppsEnforcer maxRunningEnforcer;
+
+  private AllocationFileLoaderService allocsLoader;
+  @VisibleForTesting
+  AllocationConfiguration allocConf;
+  
+  public FairScheduler() {
+    clock = new SystemClock();
+    allocsLoader = new AllocationFileLoaderService();
+    queueMgr = new QueueManager(this);
+    maxRunningEnforcer = new MaxRunningAppsEnforcer(this);
+  }
+
+  private void validateConf(Configuration conf) {
+    // validate scheduler memory allocation setting
+    int minMem = conf.getInt(
+      YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+      YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+    int maxMem = conf.getInt(
+      YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB,
+      YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB);
+
+    if (minMem < 0 || minMem > maxMem) {
+      throw new YarnRuntimeException("Invalid resource scheduler memory"
+        + " allocation configuration"
+        + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB
+        + "=" + minMem
+        + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB
+        + "=" + maxMem + ", min should equal greater than 0"
+        + ", max should be no smaller than min.");
+    }
+
+    // validate scheduler vcores allocation setting
+    int minVcores = conf.getInt(
+      YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
+      YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
+    int maxVcores = conf.getInt(
+      YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES,
+      YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES);
+
+    if (minVcores < 0 || minVcores > maxVcores) {
+      throw new YarnRuntimeException("Invalid resource scheduler vcores"
+        + " allocation configuration"
+        + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES
+        + "=" + minVcores
+        + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES
+        + "=" + maxVcores + ", min should equal greater than 0"
+        + ", max should be no smaller than min.");
+    }
+  }
+
+  public FairSchedulerConfiguration getConf() {
+    return conf;
+  }
+
+  public QueueManager getQueueManager() {
+    return queueMgr;
+  }
+
+  @Override
+  public RMContainer getRMContainer(ContainerId containerId) {
+    FSSchedulerApp attempt = getCurrentAttemptForContainer(containerId);
+    return (attempt == null) ? null : attempt.getRMContainer(containerId);
+  }
+
+  private FSSchedulerApp getCurrentAttemptForContainer(
+      ContainerId containerId) {
+    SchedulerApplication app =
+        applications.get(containerId.getApplicationAttemptId()
+          .getApplicationId());
+    if (app != null) {
+      return (FSSchedulerApp) app.getCurrentAppAttempt();
+    }
+    return null;
+  }
+
+  /**
+   * A runnable which calls {@link FairScheduler#update()} every
+   * <code>UPDATE_INTERVAL</code> milliseconds.
+   */
+  private class UpdateThread implements Runnable {
+    public void run() {
+      while (true) {
+        try {
+          Thread.sleep(UPDATE_INTERVAL);
+          update();
+          preemptTasksIfNecessary();
+        } catch (Exception e) {
+          LOG.error("Exception in fair scheduler UpdateThread", e);
+        }
+      }
+    }
+  }
+
+  /**
+   * Recompute the internal variables used by the scheduler - per-job weights,
+   * fair shares, deficits, minimum slot allocations, and amount of used and
+   * required resources per job.
+   */
+  protected synchronized void update() {
+    updatePreemptionVariables(); // Determine if any queues merit preemption
+
+    FSQueue rootQueue = queueMgr.getRootQueue();
+
+    // Recursively update demands for all queues
+    rootQueue.updateDemand();
+
+    rootQueue.setFairShare(clusterCapacity);
+    // Recursively compute fair shares for all queues
+    // and update metrics
+    rootQueue.recomputeShares();
+  }
+
+  /**
+   * Update the preemption fields for all QueueScheduables, i.e. the times since
+   * each queue last was at its guaranteed share and at > 1/2 of its fair share
+   * for each type of task.
+   */
+  private void updatePreemptionVariables() {
+    long now = clock.getTime();
+    lastPreemptionUpdateTime = now;
+    for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
+      if (!isStarvedForMinShare(sched)) {
+        sched.setLastTimeAtMinShare(now);
+      }
+      if (!isStarvedForFairShare(sched)) {
+        sched.setLastTimeAtHalfFairShare(now);
+      }
+    }
+  }
+
+  /**
+   * Is a queue below its min share for the given task type?
+   */
+  boolean isStarvedForMinShare(FSLeafQueue sched) {
+    Resource desiredShare = Resources.min(RESOURCE_CALCULATOR, clusterCapacity,
+      sched.getMinShare(), sched.getDemand());
+    return Resources.lessThan(RESOURCE_CALCULATOR, clusterCapacity,
+        sched.getResourceUsage(), desiredShare);
+  }
+
+  /**
+   * Is a queue being starved for fair share for the given task type? This is
+   * defined as being below half its fair share.
+   */
+  boolean isStarvedForFairShare(FSLeafQueue sched) {
+    Resource desiredFairShare = Resources.min(RESOURCE_CALCULATOR, clusterCapacity,
+        Resources.multiply(sched.getFairShare(), .5), sched.getDemand());
+    return Resources.lessThan(RESOURCE_CALCULATOR, clusterCapacity,
+        sched.getResourceUsage(), desiredFairShare);
+  }
+
+  /**
+   * Check for queues that need tasks preempted, either because they have been
+   * below their guaranteed share for minSharePreemptionTimeout or they have
+   * been below half their fair share for the fairSharePreemptionTimeout. If
+   * such queues exist, compute how many tasks of each type need to be preempted
+   * and then select the right ones using preemptTasks.
+   */
+  protected synchronized void preemptTasksIfNecessary() {
+    if (!preemptionEnabled) {
+      return;
+    }
+
+    long curTime = clock.getTime();
+    if (curTime - lastPreemptCheckTime < preemptionInterval) {
+      return;
+    }
+    lastPreemptCheckTime = curTime;
+
+    Resource resToPreempt = Resources.none();
+
+    for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
+      resToPreempt = Resources.add(resToPreempt, resToPreempt(sched, curTime));
+    }
+    if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity, resToPreempt,
+        Resources.none())) {
+      preemptResources(queueMgr.getLeafQueues(), resToPreempt);
+    }
+  }
+
+  /**
+   * Preempt a quantity of resources from a list of QueueSchedulables. The
+   * policy for this is to pick apps from queues that are over their fair share,
+   * but make sure that no queue is placed below its fair share in the process.
+   * We further prioritize preemption by choosing containers with lowest
+   * priority to preempt.
+   */
+  protected void preemptResources(Collection<FSLeafQueue> scheds,
+      Resource toPreempt) {
+    if (scheds.isEmpty() || Resources.equals(toPreempt, Resources.none())) {
+      return;
+    }
+
+    Map<RMContainer, FSSchedulerApp> apps = 
+        new HashMap<RMContainer, FSSchedulerApp>();
+    Map<RMContainer, FSLeafQueue> queues = 
+        new HashMap<RMContainer, FSLeafQueue>();
+
+    // Collect running containers from over-scheduled queues
+    List<RMContainer> runningContainers = new ArrayList<RMContainer>();
+    for (FSLeafQueue sched : scheds) {
+      if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity,
+          sched.getResourceUsage(), sched.getFairShare())) {
+        for (AppSchedulable as : sched.getRunnableAppSchedulables()) {
+          for (RMContainer c : as.getApp().getLiveContainers()) {
+            runningContainers.add(c);
+            apps.put(c, as.getApp());
+            queues.put(c, sched);
+          }
+        }
+      }
+    }
+
+    // Sort containers into reverse order of priority
+    Collections.sort(runningContainers, new Comparator<RMContainer>() {
+      public int compare(RMContainer c1, RMContainer c2) {
+        int ret = c1.getContainer().getPriority().compareTo(
+            c2.getContainer().getPriority());
+        if (ret == 0) {
+          return c2.getContainerId().compareTo(c1.getContainerId());
+        }
+        return ret;
+      }
+    });
+    
+    // Scan down the list of containers we've already warned and kill them
+    // if we need to.  Remove any containers from the list that we don't need
+    // or that are no longer running.
+    Iterator<RMContainer> warnedIter = warnedContainers.iterator();
+    Set<RMContainer> preemptedThisRound = new HashSet<RMContainer>();
+    while (warnedIter.hasNext()) {
+      RMContainer container = warnedIter.next();
+      if (container.getState() == RMContainerState.RUNNING &&
+          Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity,
+              toPreempt, Resources.none())) {
+        warnOrKillContainer(container, apps.get(container), queues.get(container));
+        preemptedThisRound.add(container);
+        Resources.subtractFrom(toPreempt, container.getContainer().getResource());
+      } else {
+        warnedIter.remove();
+      }
+    }
+
+    // Scan down the rest of the containers until we've preempted enough, making
+    // sure we don't preempt too many from any queue
+    Iterator<RMContainer> runningIter = runningContainers.iterator();
+    while (runningIter.hasNext() &&
+        Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity,
+            toPreempt, Resources.none())) {
+      RMContainer container = runningIter.next();
+      FSLeafQueue sched = queues.get(container);
+      if (!preemptedThisRound.contains(container) &&
+          Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity,
+              sched.getResourceUsage(), sched.getFairShare())) {
+        warnOrKillContainer(container, apps.get(container), sched);
+        
+        warnedContainers.add(container);
+        Resources.subtractFrom(toPreempt, container.getContainer().getResource());
+      }
+    }
+  }
+  
+  private void warnOrKillContainer(RMContainer container, FSSchedulerApp app,
+      FSLeafQueue queue) {
+    LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
+        "res=" + container.getContainer().getResource() +
+        ") from queue " + queue.getName());
+    
+    Long time = app.getContainerPreemptionTime(container);
+
+    if (time != null) {
+      // if we asked for preemption more than maxWaitTimeBeforeKill ms ago,
+      // proceed with kill
+      if (time + waitTimeBeforeKill < clock.getTime()) {
+        ContainerStatus status =
+          SchedulerUtils.createPreemptedContainerStatus(
+            container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);
+
+        // TODO: Not sure if this ever actually adds this to the list of cleanup
+        // containers on the RMNode (see SchedulerNode.releaseContainer()).
+        completedContainer(container, status, RMContainerEventType.KILL);
+        LOG.info("Killing container" + container +
+            " (after waiting for premption for " +
+            (clock.getTime() - time) + "ms)");
+      }
+    } else {
+      // track the request in the FSSchedulerApp itself
+      app.addPreemption(container, clock.getTime());
+    }
+  }
+
+  /**
+   * Return the resource amount that this queue is allowed to preempt, if any.
+   * If the queue has been below its min share for at least its preemption
+   * timeout, it should preempt the difference between its current share and
+   * this min share. If it has been below half its fair share for at least the
+   * fairSharePreemptionTimeout, it should preempt enough tasks to get up to its
+   * full fair share. If both conditions hold, we preempt the max of the two
+   * amounts (this shouldn't happen unless someone sets the timeouts to be
+   * identical for some reason).
+   */
+  protected Resource resToPreempt(FSLeafQueue sched, long curTime) {
+    String queue = sched.getName();
+    long minShareTimeout = allocConf.getMinSharePreemptionTimeout(queue);
+    long fairShareTimeout = allocConf.getFairSharePreemptionTimeout();
+    Resource resDueToMinShare = Resources.none();
+    Resource resDueToFairShare = Resources.none();
+    if (curTime - sched.getLastTimeAtMinShare() > minShareTimeout) {
+      Resource target = Resources.min(RESOURCE_CALCULATOR, clusterCapacity,
+          sched.getMinShare(), sched.getDemand());
+      resDueToMinShare = Resources.max(RESOURCE_CALCULATOR, clusterCapacity,
+          Resources.none(), Resources.subtract(target, sched.getResourceUsage()));
+    }
+    if (curTime - sched.getLastTimeAtHalfFairShare() > fairShareTimeout) {
+      Resource target = Resources.min(RESOURCE_CALCULATOR, clusterCapacity,
+          sched.getFairShare(), sched.getDemand());
+      resDueToFairShare = Resources.max(RESOURCE_CALCULATOR, clusterCapacity,
+          Resources.none(), Resources.subtract(target, sched.getResourceUsage()));
+    }
+    Resource resToPreempt = Resources.max(RESOURCE_CALCULATOR, clusterCapacity,
+        resDueToMinShare, resDueToFairShare);
+    if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity,
+        resToPreempt, Resources.none())) {
+      String message = "Should preempt " + resToPreempt + " res for queue "
+          + sched.getName() + ": resDueToMinShare = " + resDueToMinShare
+          + ", resDueToFairShare = " + resDueToFairShare;
+      LOG.info(message);
+    }
+    return resToPreempt;
+  }
+
+  public RMContainerTokenSecretManager getContainerTokenSecretManager() {
+    return rmContext.getContainerTokenSecretManager();
+  }
+
+  // synchronized for sizeBasedWeight
+  public synchronized ResourceWeights getAppWeight(AppSchedulable app) {
+    double weight = 1.0;
+    if (sizeBasedWeight) {
+      // Set weight based on current memory demand
+      weight = Math.log1p(app.getDemand().getMemory()) / Math.log(2);
+    }
+    weight *= app.getPriority().getPriority();
+    if (weightAdjuster != null) {
+      // Run weight through the user-supplied weightAdjuster
+      weight = weightAdjuster.adjustWeight(app, weight);
+    }
+    return new ResourceWeights((float)weight);
+  }
+
+  @Override
+  public Resource getMinimumResourceCapability() {
+    return minimumAllocation;
+  }
+
+  public Resource getIncrementResourceCapability() {
+    return incrAllocation;
+  }
+
+  @Override
+  public Resource getMaximumResourceCapability() {
+    return maximumAllocation;
+  }
+
+  public double getNodeLocalityThreshold() {
+    return nodeLocalityThreshold;
+  }
+
+  public double getRackLocalityThreshold() {
+    return rackLocalityThreshold;
+  }
+
+  public long getNodeLocalityDelayMs() {
+    return nodeLocalityDelayMs;
+  }
+
+  public long getRackLocalityDelayMs() {
+    return rackLocalityDelayMs;
+  }
+
+  public boolean isContinuousSchedulingEnabled() {
+    return continuousSchedulingEnabled;
+  }
+
+  public synchronized int getContinuousSchedulingSleepMs() {
+    return continuousSchedulingSleepMs;
+  }
+
+  public Resource getClusterCapacity() {
+    return clusterCapacity;
+  }
+
+  public synchronized Clock getClock() {
+    return clock;
+  }
+
+  protected synchronized void setClock(Clock clock) {
+    this.clock = clock;
+  }
+
+  public FairSchedulerEventLog getEventLog() {
+    return eventLog;
+  }
+
+  /**
+   * Add a new application to the scheduler, with a given id, queue name, and
+   * user. This will accept a new app even if the user or queue is above
+   * configured limits, but the app will not be marked as runnable.
+   */
+  protected synchronized void addApplication(ApplicationId applicationId,
+      String queueName, String user) {
+    if (queueName == null || queueName.isEmpty()) {
+      String message = "Reject application " + applicationId +
+              " submitted by user " + user + " with an empty queue name.";
+      LOG.info(message);
+      rmContext.getDispatcher().getEventHandler()
+          .handle(new RMAppRejectedEvent(applicationId, message));
+      return;
+    }
+
+    RMApp rmApp = rmContext.getRMApps().get(applicationId);
+    FSLeafQueue queue = assignToQueue(rmApp, queueName, user);
+    if (queue == null) {
+      rmContext.getDispatcher().getEventHandler().handle(
+          new RMAppRejectedEvent(applicationId,
+              "Application rejected by queue placement policy"));
+      return;
+    }
+
+    // Enforce ACLs
+    UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user);
+
+    if (!queue.hasAccess(QueueACL.SUBMIT_APPLICATIONS, userUgi)
+        && !queue.hasAccess(QueueACL.ADMINISTER_QUEUE, userUgi)) {
+      String msg = "User " + userUgi.getUserName() +
+              " cannot submit applications to queue " + queue.getName();
+      LOG.info(msg);
+      rmContext.getDispatcher().getEventHandler()
+          .handle(new RMAppRejectedEvent(applicationId, msg));
+      return;
+    }
+  
+    SchedulerApplication application =
+        new SchedulerApplication(queue, user);
+    applications.put(applicationId, application);
+    queue.getMetrics().submitApp(user);
+
+    LOG.info("Accepted application " + applicationId + " from user: " + user
+        + ", in queue: " + queueName + ", currently num of applications: "
+        + applications.size());
+    rmContext.getDispatcher().getEventHandler()
+        .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
+  }
+
+  /**
+   * Add a new application attempt to the scheduler.
+   */
+  protected synchronized void addApplicationAttempt(
+      ApplicationAttemptId applicationAttemptId,
+      boolean transferStateFromPreviousAttempt) {
+    SchedulerApplication application =
+        applications.get(applicationAttemptId.getApplicationId());
+    String user = application.getUser();
+    FSLeafQueue queue = (FSLeafQueue) application.getQueue();
+
+    FSSchedulerApp attempt =
+        new FSSchedulerApp(applicationAttemptId, user,
+            queue, new ActiveUsersManager(getRootQueueMetrics()),
+            rmContext);
+    if (transferStateFromPreviousAttempt) {
+      attempt.transferStateFromPreviousAttempt(application
+        .getCurrentAppAttempt());
+    }
+    application.setCurrentAppAttempt(attempt);
+
+    boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user);
+    queue.addApp(attempt, runnable);
+    if (runnable) {
+      maxRunningEnforcer.trackRunnableApp(attempt);
+    } else {
+      maxRunningEnforcer.trackNonRunnableApp(attempt);
+    }
+    
+    queue.getMetrics().submitAppAttempt(user);
+
+    LOG.info("Added Application Attempt " + applicationAttemptId
+        + " to scheduler from user: " + user);
+    rmContext.getDispatcher().getEventHandler().handle(
+        new RMAppAttemptEvent(applicationAttemptId,
+            RMAppAttemptEventType.ATTEMPT_ADDED));
+  }
+  
+  @VisibleForTesting
+  FSLeafQueue assignToQueue(RMApp rmApp, String queueName, String user) {
+    FSLeafQueue queue = null;
+    try {
+      QueuePlacementPolicy placementPolicy = allocConf.getPlacementPolicy();
+      queueName = placementPolicy.assignAppToQueue(queueName, user);
+      if (queueName == null) {
+        return null;
+      }
+      queue = queueMgr.getLeafQueue(queueName, true);
+    } catch (IOException ex) {
+      LOG.error("Error assigning app to queue, rejecting", ex);
+    }
+    
+    if (rmApp != null) {
+      rmApp.setQueue(queue.getName());
+    } else {
+      LOG.warn("Couldn't find RM app to set queue name on");
+    }
+    
+    return queue;
+  }
+
+  private synchronized void removeApplication(ApplicationId applicationId,
+      RMAppState finalState) {
+    SchedulerApplication application = applications.get(applicationId);
+    if (application == null){
+      LOG.warn("Couldn't find application " + applicationId);
+      return;
+    }
+    application.stop(finalState);
+    applications.remove(applicationId);
+  }
+
+  private synchronized void removeApplicationAttempt(
+      ApplicationAttemptId applicationAttemptId,
+      RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
+    LOG.info("Application " + applicationAttemptId + " is done." +
+        " finalState=" + rmAppAttemptFinalState);
+    SchedulerApplication application =
+        applications.get(applicationAttemptId.getApplicationId());
+    FSSchedulerApp attempt = getSchedulerApp(applicationAttemptId);
+
+    if (attempt == null || application == null) {
+      LOG.info("Unknown application " + applicationAttemptId + " has completed!");
+      return;
+    }
+
+    // Release all the running containers
+    for (RMContainer rmContainer : attempt.getLiveContainers()) {
+      if (keepContainers
+          && rmContainer.getState().equals(RMContainerState.RUNNING)) {
+        // do not kill the running container in the case of work-preserving AM
+        // restart.
+        LOG.info("Skip killing " + rmContainer.getContainerId());
+        continue;
+      }
+      completedContainer(rmContainer,
+          SchedulerUtils.createAbnormalContainerStatus(
+              rmContainer.getContainerId(),
+              SchedulerUtils.COMPLETED_APPLICATION),
+              RMContainerEventType.KILL);
+    }
+
+    // Release all reserved containers
+    for (RMContainer rmContainer : attempt.getReservedContainers()) {
+      completedContainer(rmContainer,
+          SchedulerUtils.createAbnormalContainerStatus(
+              rmContainer.getContainerId(),
+              "Application Complete"),
+              RMContainerEventType.KILL);
+    }
+    // Clean up pending requests, metrics etc.
+    attempt.stop(rmAppAttemptFinalState);
+
+    // Inform the queue
+    FSLeafQueue queue = queueMgr.getLeafQueue(attempt.getQueue()
+        .getQueueName(), false);
+    boolean wasRunnable = queue.removeApp(attempt);
+
+    if (wasRunnable) {
+      maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt);
+    } else {
+      maxRunningEnforcer.untrackNonRunnableApp(attempt);
+    }
+  }
+
+  /**
+   * Clean up a completed container.
+   */
+  private synchronized void completedContainer(RMContainer rmContainer,
+      ContainerStatus containerStatus, RMContainerEventType event) {
+    if (rmContainer == null) {
+      LOG.info("Null container completed...");
+      return;
+    }
+
+    Container container = rmContainer.getContainer();
+
+    // Get the application for the finished container
+    FSSchedulerApp application =
+        getCurrentAttemptForContainer(container.getId());
+    ApplicationId appId =
+        container.getId().getApplicationAttemptId().getApplicationId();
+    if (application == null) {
+      LOG.info("Container " + container + " of" +
+          " unknown application attempt " + appId +
+          " completed with event " + event);
+      return;
+    }
+
+    // Get the node on which the container was allocated
+    FSSchedulerNode node = nodes.get(container.getNodeId());
+
+    if (rmContainer.getState() == RMContainerState.RESERVED) {
+      application.unreserve(node, rmContainer.getReservedPriority());
+      node.unreserveResource(application);
+    } else {
+      application.containerCompleted(rmContainer, containerStatus, event);
+      node.releaseContainer(container);
+      updateRootQueueMetrics();
+    }
+
+    LOG.info("Application attempt " + application.getApplicationAttemptId()
+        + " released container " + container.getId() + " on node: " + node
+        + " with event: " + event);
+  }
+
+  private synchronized void addNode(RMNode node) {
+    nodes.put(node.getNodeID(), new FSSchedulerNode(node, usePortForNodeName));
+    Resources.addTo(clusterCapacity, node.getTotalCapability());
+    updateRootQueueMetrics();
+
+    LOG.info("Added node " + node.getNodeAddress() +
+        " cluster capacity: " + clusterCapacity);
+  }
+
+  private synchronized void removeNode(RMNode rmNode) {
+    FSSchedulerNode node = nodes.get(rmNode.getNodeID());
+    // This can occur when an UNHEALTHY node reconnects
+    if (node == null) {
+      return;
+    }
+    Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability());
+    updateRootQueueMetrics();
+
+    // Remove running containers
+    List<RMContainer> runningContainers = node.getRunningContainers();
+    for (RMContainer container : runningContainers) {
+      completedContainer(container,
+          SchedulerUtils.createAbnormalContainerStatus(
+              container.getContainerId(),
+              SchedulerUtils.LOST_CONTAINER),
+          RMContainerEventType.KILL);
+    }
+
+    // Remove reservations, if any
+    RMContainer reservedContainer = node.getReservedContainer();
+    if (reservedContainer != null) {
+      completedContainer(reservedContainer,
+          SchedulerUtils.createAbnormalContainerStatus(
+              reservedContainer.getContainerId(),
+              SchedulerUtils.LOST_CONTAINER),
+          RMContainerEventType.KILL);
+    }
+
+    nodes.remove(rmNode.getNodeID());
+    LOG.info("Removed node " + rmNode.getNodeAddress() +
+        " cluster capacity: " + clusterCapacity);
+  }
+
+  @Override
+  public Allocation allocate(ApplicationAttemptId appAttemptId,
+      List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
+
+    // Make sure this application exists
+    FSSchedulerApp application = getSchedulerApp(appAttemptId);
+    if (application == null) {
+      LOG.info("Calling allocate on removed " +
+          "or non existant application " + appAttemptId);
+      return EMPTY_ALLOCATION;
+    }
+
+    // Sanity check
+    SchedulerUtils.normalizeRequests(ask, new DominantResourceCalculator(),
+        clusterCapacity, minimumAllocation, maximumAllocation, incrAllocation);
+
+    // Release containers
+    for (ContainerId releasedContainerId : release) {
+      RMContainer rmContainer = getRMContainer(releasedContainerId);
+      if (rmContainer == null) {
+        RMAuditLogger.logFailure(application.getUser(),
+            AuditConstants.RELEASE_CONTAINER,
+            "Unauthorized access or invalid container", "FairScheduler",
+            "Trying to release container not owned by app or with invalid id",
+            application.getApplicationId(), releasedContainerId);
+      }
+      completedContainer(rmContainer,
+          SchedulerUtils.createAbnormalContainerStatus(
+              releasedContainerId,
+              SchedulerUtils.RELEASED_CONTAINER),
+          RMContainerEventType.RELEASED);
+    }
+
+    synchronized (application) {
+      if (!ask.isEmpty()) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("allocate: pre-update" +
+              " applicationAttemptId=" + appAttemptId +
+              " application=" + application.getApplicationId());
+        }
+        application.showRequests();
+
+        // Update application requests
+        application.updateResourceRequests(ask);
+
+        LOG.debug("allocate: post-update");
+        application.showRequests();
+      }
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("allocate:" +
+            " applicationAttemptId=" + appAttemptId +
+            " #ask=" + ask.size());
+
+        LOG.debug("Preempting " + application.getPreemptionContainers().size()
+            + " container(s)");
+      }
+      
+      Set<ContainerId> preemptionContainerIds = new HashSet<ContainerId>();
+      for (RMContainer container : application.getPreemptionContainers()) {
+        preemptionContainerIds.add(container.getContainerId());
+      }
+
+      application.updateBlacklist(blacklistAdditions, blacklistRemovals);
+      
+      return new Allocation(application.pullNewlyAllocatedContainers(),
+          application.getHeadroom(), preemptionContainerIds);
+    }
+  }
+
+  /**
+   * Process a container which has launched on a node, as reported by the node.
+   */
+  private void containerLaunchedOnNode(ContainerId containerId, FSSchedulerNode node) {
+    // Get the application for the finished container
+    FSSchedulerApp application = getCurrentAttemptForContainer(containerId);
+    if (application == null) {
+      LOG.info("Unknown application "
+          + containerId.getApplicationAttemptId().getApplicationId()
+          + " launched container " + containerId + " on node: " + node);
+      return;
+    }
+
+    application.containerLaunchedOnNode(containerId, node.getNodeID());
+  }
+
+  /**
+   * Process a heartbeat update from a node.
+   */
+  private synchronized void nodeUpdate(RMNode nm) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterCapacity);
+    }
+    eventLog.log("HEARTBEAT", nm.getHostName());
+    FSSchedulerNode node = nodes.get(nm.getNodeID());
+
+    // Update resource if any change
+    SchedulerUtils.updateResourceIfChanged(node, nm, clusterCapacity, LOG);
+    
+    List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates();
+    List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>();
+    List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>();
+    for(UpdatedContainerInfo containerInfo : containerInfoList) {
+      newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers());
+      completedContainers.addAll(containerInfo.getCompletedContainers());
+    } 
+    // Processing the newly launched containers
+    for (ContainerStatus launchedContainer : newlyLaunchedContainers) {
+      containerLaunchedOnNode(launchedContainer.getContainerId(), node);
+    }
+
+    // Process completed containers
+    for (ContainerStatus completedContainer : completedContainers) {
+      ContainerId containerId = completedContainer.getContainerId();
+      LOG.debug("Container FINISHED: " + containerId);
+      completedContainer(getRMContainer(containerId),
+          completedContainer, RMContainerEventType.FINISHED);
+    }
+
+    if (continuousSchedulingEnabled) {
+      if (!completedContainers.isEmpty()) {
+        attemptScheduling(node);
+      }
+    } else {
+      attemptScheduling(node);
+    }
+  }
+
+  private void continuousScheduling() {
+    while (true) {
+      List<NodeId> nodeIdList = new ArrayList<NodeId>(nodes.keySet());
+      Collections.sort(nodeIdList, nodeAvailableResourceComparator);
+
+      // iterate all nodes
+      for (NodeId nodeId : nodeIdList) {
+        if (nodes.containsKey(nodeId)) {
+          FSSchedulerNode node = nodes.get(nodeId);
+          try {
+            if (Resources.fitsIn(minimumAllocation,
+                    node.getAvailableResource())) {
+              attemptScheduling(node);
+            }
+          } catch (Throwable ex) {
+            LOG.warn("Error while attempting scheduling for node " + node +
+                    ": " + ex.toString(), ex);
+          }
+        }
+      }
+      try {
+        Thread.sleep(getContinuousSchedulingSleepMs());
+      } catch (InterruptedException e) {
+        LOG.warn("Error while doing sleep in continuous scheduling: " +
+                e.toString(), e);
+      }
+    }
+  }
+
+  /** Sort nodes by available resource */
+  private class NodeAvailableResourceComparator implements Comparator<NodeId> {
+
+    @Override
+    public int compare(NodeId n1, NodeId n2) {
+      return RESOURCE_CALCULATOR.compare(clusterCapacity,
+              nodes.get(n2).getAvailableResource(),
+              nodes.get(n1).getAvailableResource());
+    }
+  }
+  
+  private synchronized void attemptScheduling(FSSchedulerNode node) {
+    // Assign new containers...
+    // 1. Check for reserved applications
+    // 2. Schedule if there are no reservations
+
+    AppSchedulable reservedAppSchedulable = node.getReservedAppSchedulable();
+    if (reservedAppSchedulable != null) {
+      Priority reservedPriority = node.getReservedContainer().getReservedPriority();
+      if (!reservedAppSchedulable.hasContainerForNode(reservedPriority, node)) {
+        // Don't hold the reservation if app can no longer use it
+        LOG.info("Releasing reservation that cannot be satisfied for application "
+            + reservedAppSchedulable.getApp().getApplicationAttemptId()
+            + " on node " + node);
+        reservedAppSchedulable.unreserve(reservedPriority, node);
+        reservedAppSchedulable = null;
+      } else {
+        // Reservation exists; try to fulfill the reservation
+        LOG.info("Trying to fulfill reservation for application "
+            + reservedAppSchedulable.getApp().getApplicationAttemptId()
+            + " on node: " + node);
+
+        node.getReservedAppSchedulable().assignReservedContainer(node);
+      }
+    }
+    if (reservedAppSchedulable == null) {
+      // No reservation, schedule at queue which is farthest below fair share
+      int assignedContainers = 0;
+      while (node.getReservedContainer() == null) {
+        boolean assignedContainer = false;
+        if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterCapacity,
+              queueMgr.getRootQueue().assignContainer(node),
+              Resources.none())) {
+          assignedContainers++;
+          assignedContainer = true;
+        }
+        if (!assignedContainer) { break; }
+        if (!assignMultiple) { break; }
+        if ((assignedContainers >= maxAssign) && (maxAssign > 0)) { break; }
+      }
+    }
+    updateRootQueueMetrics();
+  }
+
+  @Override
+  public SchedulerNodeReport getNodeReport(NodeId nodeId) {
+    FSSchedulerNode node = nodes.get(nodeId);
+    return node == null ? null : new SchedulerNodeReport(node);
+  }
+  
+  public FSSchedulerApp getSchedulerApp(ApplicationAttemptId appAttemptId) {
+    SchedulerApplication app =
+        applications.get(appAttemptId.getApplicationId());
+    if (app != null) {
+      return (FSSchedulerApp) app.getCurrentAppAttempt();
+    }
+    return null;
+  }
+  
+  @Override
+  public SchedulerAppReport getSchedulerAppInfo(
+      ApplicationAttemptId appAttemptId) {
+    FSSchedulerApp attempt = getSchedulerApp(appAttemptId);
+    if (attempt == null) {
+      LOG.error("Request for appInfo of unknown attempt" + appAttemptId);
+      return null;
+    }
+    return new SchedulerAppReport(attempt);
+  }
+  
+  @Override
+  public ApplicationResourceUsageReport getAppResourceUsageReport(
+      ApplicationAttemptId appAttemptId) {
+    FSSchedulerApp attempt = getSchedulerApp(appAttemptId);
+    if (attempt == null) {
+      LOG.error("Request for appInfo of unknown attempt" + appAttemptId);
+      return null;
+    }
+    return attempt.getResourceUsageReport();
+  }
+  
+  /**
+   * Subqueue metrics might be a little out of date because fair shares are
+   * recalculated at the update interval, but the root queue metrics needs to
+   * be updated synchronously with allocations and completions so that cluster
+   * metrics will be consistent.
+   */
+  private void updateRootQueueMetrics() {
+    rootMetrics.setAvailableResourcesToQueue(
+        Resources.subtract(
+            clusterCapacity, rootMetrics.getAllocatedResources()));
+  }
+
+  @Override
+  public QueueMetrics getRootQueueMetrics() {
+    return rootMetrics;
+  }
+
+  @Override
+  public void handle(SchedulerEvent event) {
+    switch (event.getType()) {
+    case NODE_ADDED:
+      if (!(event instanceof NodeAddedSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
+      addNode(nodeAddedEvent.getAddedRMNode());
+      break;
+    case NODE_REMOVED:
+      if (!(event instanceof NodeRemovedSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
+      removeNode(nodeRemovedEvent.getRemovedRMNode());
+      break;
+    case NODE_UPDATE:
+      if (!(event instanceof NodeUpdateSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
+      nodeUpdate(nodeUpdatedEvent.getRMNode());
+      break;
+    case APP_ADDED:
+      if (!(event instanceof AppAddedSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
+      addApplication(appAddedEvent.getApplicationId(),
+        appAddedEvent.getQueue(), appAddedEvent.getUser());
+      break;
+    case APP_REMOVED:
+      if (!(event instanceof AppRemovedSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
+      removeApplication(appRemovedEvent.getApplicationID(),
+        appRemovedEvent.getFinalState());
+      break;
+    case APP_ATTEMPT_ADDED:
+      if (!(event instanceof AppAttemptAddedSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
+          (AppAttemptAddedSchedulerEvent) event;
+      addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
+        appAttemptAddedEvent.getTransferStateFromPreviousAttempt());
+      break;
+    case APP_ATTEMPT_REMOVED:
+      if (!(event instanceof AppAttemptRemovedSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
+          (AppAttemptRemovedSchedulerEvent) event;
+      removeApplicationAttempt(
+          appAttemptRemovedEvent.getApplicationAttemptID(),
+          appAttemptRemovedEvent.getFinalAttemptState(),
+          appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
+      break;
+    case CONTAINER_EXPIRED:
+      if (!(event instanceof ContainerExpiredSchedulerEvent)) {
+        throw new RuntimeException("Unexpected event type: " + event);
+      }
+      ContainerExpiredSchedulerEvent containerExpiredEvent =
+          (ContainerExpiredSchedulerEvent)event;
+      ContainerId containerId = containerExpiredEvent.getContainerId();
+      completedContainer(getRMContainer(containerId),
+          SchedulerUtils.createAbnormalContainerStatus(
+              containerId,
+              SchedulerUtils.EXPIRED_CONTAINER),
+          RMContainerEventType.EXPIRE);
+      break;
+    default:
+      LOG.error("Unknown event arrived at FairScheduler: " + event.toString());
+    }
+  }
+
+  @Override
+  public void recover(RMState state) throws Exception {
+    // NOT IMPLEMENTED
+  }
+
+  @Override
+  public synchronized void reinitialize(Configuration conf, RMContext rmContext)
+      throws IOException {
+    if (!initialized) {
+      this.conf = new FairSchedulerConfiguration(conf);
+      validateConf(this.conf);
+      minimumAllocation = this.conf.getMinimumAllocation();
+      maximumAllocation = this.conf.getMaximumAllocation();
+      incrAllocation = this.conf.getIncrementAllocation();
+      continuousSchedulingEnabled = this.conf.isContinuousSchedulingEnabled();
+      continuousSchedulingSleepMs =
+              this.conf.getContinuousSchedulingSleepMs();
+      nodeLocalityThreshold = this.conf.getLocalityThresholdNode();
+      rackLocalityThreshold = this.conf.getLocalityThresholdRack();
+      nodeLocalityDelayMs = this.conf.getLocalityDelayNodeMs();
+      rackLocalityDelayMs = this.conf.getLocalityDelayRackMs();
+      preemptionEnabled = this.conf.getPreemptionEnabled();
+      assignMultiple = this.conf.getAssignMultiple();
+      maxAssign = this.conf.getMaxAssign();
+      sizeBasedWeight = this.conf.getSizeBasedWeight();
+      preemptionInterval = this.conf.getPreemptionInterval();
+      waitTimeBeforeKill = this.conf.getWaitTimeBeforeKill();
+      usePortForNodeName = this.conf.getUsePortForNodeName();
+      
+      rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
+      this.rmContext = rmContext;
+      this.eventLog = new FairSchedulerEventLog();
+      eventLog.init(this.conf);
+
+      initialized = true;
+
+      allocConf = new AllocationConfiguration(conf);
+      try {
+        queueMgr.initialize(conf);
+      } catch (Exception e) {
+        throw new IOException("Failed to start FairScheduler", e);
+      }
+
+      Thread updateThread = new Thread(new UpdateThread());
+      updateThread.setName("FairSchedulerUpdateThread");
+      updateThread.setDaemon(true);
+      updateThread.start();
+
+      if (continuousSchedulingEnabled) {
+        // start continuous scheduling thread
+        Thread schedulingThread = new Thread(
+          new Runnable() {
+            @Override
+            public void run() {
+              continuousScheduling();
+            }
+          }
+        );
+        schedulingThread.setName("ContinuousScheduling");
+        schedulingThread.setDaemon(true);
+        schedulingThread.start();
+      }
+      
+      allocsLoader.init(conf);
+      allocsLoader.setReloadListener(new AllocationReloadListener());
+      // If we fail to load allocations file on initialize, we want to fail
+      // immediately.  After a successful load, exceptions on future reloads
+      // will just result in leaving things as they are.
+      try {
+        allocsLoader.reloadAllocations();
+      } catch (Exception e) {
+        throw new IOException("Failed to initialize FairScheduler", e);
+      }
+      allocsLoader.start();
+    } else {
+      try {
+        allocsLoader.reloadAllocations();
+      } catch (Exception e) {
+        LOG.error("Failed to reload allocations file", e);
+      }
+    }
+  }
+
+  @Override
+  public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues,
+      boolean recursive) throws IOException {
+    if (!queueMgr.exists(queueName)) {
+      throw new IOException("queue " + queueName + " does not exist");
+    }
+    return queueMgr.getQueue(queueName).getQueueInfo(includeChildQueues,
+        recursive);
+  }
+
+  @Override
+  public List<QueueUserACLInfo> getQueueUserAclInfo() {
+    UserGroupInformation user = null;
+    try {
+      user = UserGroupInformation.getCurrentUser();
+    } catch (IOException ioe) {
+      return new ArrayList<QueueUserACLInfo>();
+    }
+
+    return queueMgr.getRootQueue().getQueueUserAclInfo(user);
+  }
+
+  @Override
+  public int getNumClusterNodes() {
+    return nodes.size();
+  }
+
+  @Override
+  public synchronized boolean checkAccess(UserGroupInformation callerUGI,
+      QueueACL acl, String queueName) {
+    FSQueue queue = getQueueManager().getQueue(queueName);
+    if (queue == null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("ACL not found for queue access-type " + acl
+            + " for queue " + queueName);
+      }
+      return false;
+    }
+    return queue.hasAccess(acl, callerUGI);
+  }
+  
+  public AllocationConfiguration getAllocationConfiguration() {
+    return allocConf;
+  }
+  
+  private class AllocationReloadListener implements
+      AllocationFileLoaderService.Listener {
+
+    @Override
+    public void onReload(AllocationConfiguration queueInfo) {
+      // Commit the reload; also create any queue defined in the alloc file
+      // if it does not already exist, so it can be displayed on the web UI.
+      synchronized (FairScheduler.this) {
+        allocConf = queueInfo;
+        allocConf.getDefaultSchedulingPolicy().initialize(clusterCapacity);
+        queueMgr.updateAllocationConfiguration(allocConf);
+      }
+    }
+  }
+
+  @Override
+  public List<ApplicationAttemptId> getAppsInQueue(String queueName) {
+    FSQueue queue = queueMgr.getQueue(queueName);
+    if (queue == null) {
+      return null;
+    }
+    List<ApplicationAttemptId> apps = new ArrayList<ApplicationAttemptId>();
+    queue.collectSchedulerApplications(apps);
+    return apps;
+  }
+
+}

+ 14 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java

@@ -71,6 +71,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
@@ -104,7 +105,8 @@ import com.google.common.annotations.VisibleForTesting;
 @LimitedPrivate("yarn")
 @Evolving
 @SuppressWarnings("unchecked")
-public class FifoScheduler implements ResourceScheduler, Configurable {
+public class FifoScheduler extends AbstractYarnScheduler implements
+    ResourceScheduler, Configurable {
 
   private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
 
@@ -115,7 +117,6 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
 
   private final static Container[] EMPTY_CONTAINER_ARRAY = new Container[] {};
   private final static List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
-  private RMContext rmContext;
 
   protected Map<NodeId, FiCaSchedulerNode> nodes = new ConcurrentHashMap<NodeId, FiCaSchedulerNode>();
 
@@ -124,11 +125,6 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
   private Resource maximumAllocation;
   private boolean usePortForNodeName;
 
-  // Use ConcurrentSkipListMap because applications need to be ordered
-  @VisibleForTesting
-  protected Map<ApplicationId, SchedulerApplication> applications =
-      new ConcurrentSkipListMap<ApplicationId, SchedulerApplication>();
-
   private ActiveUsersManager activeUsersManager;
 
   private static final String DEFAULT_QUEUE_NAME = "default";
@@ -243,6 +239,9 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
     if (!this.initialized) {
       validateConf(conf);
       this.rmContext = rmContext;
+      //Use ConcurrentSkipListMap because applications need to be ordered
+      this.applications =
+          new ConcurrentSkipListMap<ApplicationId, SchedulerApplication>();
       this.minimumAllocation = 
         Resources.createResource(conf.getInt(
             YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
@@ -363,8 +362,9 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
   private synchronized void addApplication(ApplicationId applicationId,
       String queue, String user) {
     SchedulerApplication application =
-        new SchedulerApplication(null, user);
+        new SchedulerApplication(DEFAULT_QUEUE, user);
     applications.put(applicationId, application);
+    metrics.submitApp(user);
     LOG.info("Accepted application " + applicationId + " from user: " + user
         + ", currently num of applications: " + applications.size());
     rmContext.getDispatcher().getEventHandler()
@@ -388,7 +388,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
     }
     application.setCurrentAppAttempt(schedulerApp);
 
-    metrics.submitApp(user, appAttemptId.getAttemptId());
+    metrics.submitAppAttempt(user);
     LOG.info("Added Application Attempt " + appAttemptId
         + " to scheduler from user " + application.getUser());
     rmContext.getDispatcher().getEventHandler().handle(
@@ -399,10 +399,15 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
   private synchronized void doneApplication(ApplicationId applicationId,
       RMAppState finalState) {
     SchedulerApplication application = applications.get(applicationId);
+    if (application == null){
+      LOG.warn("Couldn't find application " + applicationId);
+      return;
+    }
 
     // Inform the activeUsersManager
     activeUsersManager.deactivateApplication(application.getUser(),
       applicationId);
+    application.stop(finalState);
     applications.remove(applicationId);
   }
 

+ 80 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMHA.java

@@ -26,8 +26,11 @@ import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.HAUtil;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -222,4 +225,81 @@ public class TestRMHA {
     checkMonitorHealth();
     checkActiveRMFunctionality();
   }
+
+  @Test
+  public void testRMDispatcherForHA() throws IOException {
+    String errorMessageForEventHandler =
+        "Expect to get the same number of handlers";
+    String errorMessageForService = "Expect to get the same number of services";
+    Configuration conf = new YarnConfiguration(configuration);
+    rm = new MockRM(conf) {
+      @Override
+      protected Dispatcher createDispatcher() {
+        return new MyCountingDispatcher();
+      }
+    };
+    rm.init(conf);
+    int expectedEventHandlerCount =
+        ((MyCountingDispatcher) rm.getRMContext().getDispatcher())
+            .getEventHandlerCount();
+    int expectedServiceCount = rm.getServices().size();
+    assertTrue(expectedEventHandlerCount != 0);
+
+    StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
+        HAServiceProtocol.RequestSource.REQUEST_BY_USER);
+
+    assertEquals(STATE_ERR, HAServiceState.INITIALIZING,
+        rm.adminService.getServiceStatus().getState());
+    assertFalse("RM is ready to become active before being started",
+        rm.adminService.getServiceStatus().isReadyToBecomeActive());
+    rm.start();
+
+    //call transitions to standby and active a couple of times
+    rm.adminService.transitionToStandby(requestInfo);
+    rm.adminService.transitionToActive(requestInfo);
+    rm.adminService.transitionToStandby(requestInfo);
+    rm.adminService.transitionToActive(requestInfo);
+    rm.adminService.transitionToStandby(requestInfo);
+
+    rm.adminService.transitionToActive(requestInfo);
+    assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
+        ((MyCountingDispatcher) rm.getRMContext().getDispatcher())
+        .getEventHandlerCount());
+    assertEquals(errorMessageForService, expectedServiceCount,
+        rm.getServices().size());
+
+    rm.adminService.transitionToStandby(requestInfo);
+    assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
+        ((MyCountingDispatcher) rm.getRMContext().getDispatcher())
+        .getEventHandlerCount());
+    assertEquals(errorMessageForService, expectedServiceCount,
+        rm.getServices().size());
+
+    rm.stop();
+  }
+
+  @SuppressWarnings("rawtypes")
+  class MyCountingDispatcher extends AbstractService implements Dispatcher {
+
+    private int eventHandlerCount;
+
+    public MyCountingDispatcher() {
+      super("MyCountingDispatcher");
+      this.eventHandlerCount = 0;
+    }
+
+    @Override
+    public EventHandler getEventHandler() {
+      return null;
+    }
+
+    @Override
+    public void register(Class<? extends Enum> eventType, EventHandler handler) {
+      this.eventHandlerCount ++;
+    }
+
+    public int getEventHandlerCount() {
+      return this.eventHandlerCount;
+    }
+  }
 }

+ 124 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java

@@ -84,6 +84,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.log4j.Level;
@@ -179,7 +180,7 @@ public class TestRMRestart {
     am1.registerAppAttempt();
 
     // AM request for containers
-    am1.allocate("127.0.0.1" , 1000, 1, new ArrayList<ContainerId>());    
+    am1.allocate("127.0.0.1" , 1000, 1, new ArrayList<ContainerId>());   
     // kick the scheduler
     nm1.nodeHeartbeat(true);
     List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(),
@@ -1543,6 +1544,128 @@ public class TestRMRestart {
     Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp);
   }
 
+  @SuppressWarnings("resource")
+  @Test
+  public void testQueueMetricsOnRMRestart() throws Exception {
+    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+        YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
+    MemoryRMStateStore memStore = new MemoryRMStateStore();
+    memStore.init(conf);
+
+    // PHASE 1: create state in an RM
+    // start RM
+    MockRM rm1 = new MockRM(conf, memStore);
+    rm1.start();
+    MockNM nm1 =
+        new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
+    nm1.registerNode();
+    QueueMetrics qm1 = rm1.getResourceScheduler().getRootQueueMetrics();
+    resetQueueMetrics(qm1);
+    assertQueueMetrics(qm1, 0, 0, 0, 0);
+
+    // create app that gets launched and does allocate before RM restart
+    RMApp app1 = rm1.submitApp(200);
+    assertQueueMetrics(qm1, 1, 1, 0, 0);
+    nm1.nodeHeartbeat(true);
+    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
+    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
+    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
+    MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
+    am1.registerAppAttempt();
+    am1.allocate("127.0.0.1" , 1000, 1, new ArrayList<ContainerId>()); 
+    nm1.nodeHeartbeat(true);
+    List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(),
+        new ArrayList<ContainerId>()).getAllocatedContainers();
+    while (conts.size() == 0) {
+      nm1.nodeHeartbeat(true);
+      conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
+          new ArrayList<ContainerId>()).getAllocatedContainers());
+      Thread.sleep(500);
+    }
+    assertQueueMetrics(qm1, 1, 0, 1, 0);
+
+    // PHASE 2: create new RM and start from old state
+    // create new RM to represent restart and recover state
+    MockRM rm2 = new MockRM(conf, memStore);
+    rm2.start();
+    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
+    QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
+    resetQueueMetrics(qm2);
+    assertQueueMetrics(qm2, 0, 0, 0, 0);
+    // recover app
+    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
+    am1.setAMRMProtocol(rm2.getApplicationMasterService());
+    am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
+    nm1.nodeHeartbeat(true);
+    nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
+    List<ContainerStatus> containerStatuses = new ArrayList<ContainerStatus>();
+    ContainerStatus containerStatus =
+        BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(loadedApp1
+            .getCurrentAppAttempt().getAppAttemptId(), 1),
+            ContainerState.COMPLETE, "Killed AM container", 143);
+    containerStatuses.add(containerStatus);
+    nm1.registerNode(containerStatuses);
+    int timeoutSecs = 0;
+    while (loadedApp1.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {;
+      Thread.sleep(200);
+    }
+
+    assertQueueMetrics(qm2, 1, 1, 0, 0);
+    nm1.nodeHeartbeat(true);
+    attempt1 = loadedApp1.getCurrentAppAttempt();
+    attemptId1 = attempt1.getAppAttemptId();
+    rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
+    assertQueueMetrics(qm2, 1, 0, 1, 0);
+    am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
+    am1.registerAppAttempt();
+    am1.allocate("127.0.0.1" , 1000, 3, new ArrayList<ContainerId>());
+    nm1.nodeHeartbeat(true);
+    conts = am1.allocate(new ArrayList<ResourceRequest>(),
+        new ArrayList<ContainerId>()).getAllocatedContainers();
+    while (conts.size() == 0) {
+      nm1.nodeHeartbeat(true);
+      conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
+          new ArrayList<ContainerId>()).getAllocatedContainers());
+      Thread.sleep(500);
+    }
+
+    // finish the AMs
+    finishApplicationMaster(loadedApp1, rm2, nm1, am1);
+    assertQueueMetrics(qm2, 1, 0, 0, 1);
+
+    // stop RM's
+    rm2.stop();
+    rm1.stop();
+  }
+
+
+  // The metrics has some carry-on value from the previous RM, because the
+  // test case is in-memory, for the same queue name (e.g. root), there's
+  // always a singleton QueueMetrics object.
+  private int appsSubmittedCarryOn = 0;
+  private int appsPendingCarryOn = 0;
+  private int appsRunningCarryOn = 0;
+  private int appsCompletedCarryOn = 0;
+
+  private void resetQueueMetrics(QueueMetrics qm) {
+    appsSubmittedCarryOn = qm.getAppsSubmitted();
+    appsPendingCarryOn = qm.getAppsPending();
+    appsRunningCarryOn = qm.getAppsRunning();
+    appsCompletedCarryOn = qm.getAppsCompleted();
+  }
+
+  private void assertQueueMetrics(QueueMetrics qm, int appsSubmitted,
+      int appsPending, int appsRunning, int appsCompleted) {
+    Assert.assertEquals(qm.getAppsSubmitted(),
+        appsSubmitted + appsSubmittedCarryOn);
+    Assert.assertEquals(qm.getAppsPending(),
+        appsPending + appsPendingCarryOn);
+    Assert.assertEquals(qm.getAppsRunning(),
+        appsRunning + appsRunningCarryOn);
+    Assert.assertEquals(qm.getAppsCompleted(),
+        appsCompleted + appsCompletedCarryOn);
+  }
+
   public class TestMemoryRMStateStore extends MemoryRMStateStore {
     int count = 0;
     public int updateApp = 0;

+ 24 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java

@@ -24,6 +24,7 @@ import java.util.List;
 
 import junit.framework.Assert;
 
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -150,7 +151,29 @@ public class TestAMRestart {
     ApplicationAttemptId newAttemptId =
         app1.getCurrentAppAttempt().getAppAttemptId();
     Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
-    MockAM am2 = MockRM.launchAM(app1, rm1, nm1);
+
+    // launch the new AM
+    RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
+    nm1.nodeHeartbeat(true);
+    MockAM am2 = rm1.sendAMLaunched(attempt2.getAppAttemptId());
+    RegisterApplicationMasterResponse registerResponse =
+        am2.registerAppAttempt();
+
+    // Assert two containers are running: container2 and container3;
+    Assert.assertEquals(2, registerResponse.getContainersFromPreviousAttempt()
+      .size());
+    boolean containerId2Exists = false, containerId3Exists = false;
+    for (Container container : registerResponse
+      .getContainersFromPreviousAttempt()) {
+      if (container.getId().equals(containerId2)) {
+        containerId2Exists = true;
+      }
+      if (container.getId().equals(containerId3)) {
+        containerId3Exists = true;
+      }
+    }
+    Assert.assertTrue(containerId2Exists && containerId3Exists);
+    rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
 
     // complete container by sending the container complete event which has earlier
     // attempt's attemptId

+ 58 - 24
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestQueueMetrics.java

@@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.resource.Resources;
@@ -66,8 +66,10 @@ public class TestQueueMetrics {
     MetricsSource queueSource= queueSource(ms, queueName);
     AppSchedulingInfo app = mockApp(user);
 
-    metrics.submitApp(user, 1);
+    metrics.submitApp(user);
     MetricsSource userSource = userSource(ms, queueName, user);
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    metrics.submitAppAttempt(user);
     checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
 
     metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100));
@@ -76,7 +78,7 @@ public class TestQueueMetrics {
     // configurable cluster/queue resources
     checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
 
-    metrics.incrAppsRunning(app, user);
+    metrics.runAppAttempt(app.getApplicationId(), user);
     checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
 
     metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2));
@@ -85,7 +87,10 @@ public class TestQueueMetrics {
     metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2));
     checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
 
-    metrics.finishApp(app, RMAppAttemptState.FINISHED);
+    metrics.finishAppAttempt(
+        app.getApplicationId(), app.isPending(), app.getUser());
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    metrics.finishApp(user, RMAppState.FINISHED);
     checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
     assertNull(userSource);
   }
@@ -100,39 +105,47 @@ public class TestQueueMetrics {
     MetricsSource queueSource = queueSource(ms, queueName);
     AppSchedulingInfo app = mockApp(user);
 
-    metrics.submitApp(user, 1);
+    metrics.submitApp(user);
     MetricsSource userSource = userSource(ms, queueName, user);
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    metrics.submitAppAttempt(user);
     checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
 
-    metrics.incrAppsRunning(app, user);
+    metrics.runAppAttempt(app.getApplicationId(), user);
     checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
 
-    metrics.finishApp(app, RMAppAttemptState.FAILED);
-    checkApps(queueSource, 1, 0, 0, 0, 1, 0, true);
+    metrics.finishAppAttempt(
+        app.getApplicationId(), app.isPending(), app.getUser());
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
 
     // As the application has failed, framework retries the same application
     // based on configuration
-    metrics.submitApp(user, 2);
+    metrics.submitAppAttempt(user);
     checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
 
-    metrics.incrAppsRunning(app, user);
+    metrics.runAppAttempt(app.getApplicationId(), user);
     checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
 
     // Suppose say application has failed this time as well.
-    metrics.finishApp(app, RMAppAttemptState.FAILED);
-    checkApps(queueSource, 1, 0, 0, 0, 1, 0, true);
+    metrics.finishAppAttempt(
+        app.getApplicationId(), app.isPending(), app.getUser());
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
 
     // As the application has failed, framework retries the same application
     // based on configuration
-    metrics.submitApp(user, 3);
+    metrics.submitAppAttempt(user);
     checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
 
-    metrics.incrAppsRunning(app, user);
+    metrics.runAppAttempt(app.getApplicationId(), user);
     checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
 
-    // Suppose say application has finished.
-    metrics.finishApp(app, RMAppAttemptState.FINISHED);
-    checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
+    // Suppose say application has failed, and there's no more retries.
+    metrics.finishAppAttempt(
+        app.getApplicationId(), app.isPending(), app.getUser());
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+
+    metrics.finishApp(user, RMAppState.FAILED);
+    checkApps(queueSource, 1, 0, 0, 0, 1, 0, true);
 
     assertNull(userSource);
   }
@@ -146,9 +159,13 @@ public class TestQueueMetrics {
     MetricsSource queueSource = queueSource(ms, queueName);
     AppSchedulingInfo app = mockApp(user);
 
-    metrics.submitApp(user, 1);
+    metrics.submitApp(user);
     MetricsSource userSource = userSource(ms, queueName, user);
 
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
+
+    metrics.submitAppAttempt(user);
     checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
     checkApps(userSource, 1, 1, 0, 0, 0, 0, true);
 
@@ -160,7 +177,7 @@ public class TestQueueMetrics {
     checkResources(queueSource, 0, 0, 0, 0, 0,  100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
     checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
 
-    metrics.incrAppsRunning(app, user);
+    metrics.runAppAttempt(app.getApplicationId(), user);
     checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
     checkApps(userSource, 1, 0, 1, 0, 0, 0, true);
 
@@ -172,7 +189,11 @@ public class TestQueueMetrics {
     checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
     checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
 
-    metrics.finishApp(app, RMAppAttemptState.FINISHED);
+    metrics.finishAppAttempt(
+        app.getApplicationId(), app.isPending(), app.getUser());
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
+    metrics.finishApp(user, RMAppState.FINISHED);
     checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
     checkApps(userSource, 1, 0, 0, 1, 0, 0, true);
   }
@@ -192,10 +213,16 @@ public class TestQueueMetrics {
     MetricsSource queueSource = queueSource(ms, leafQueueName);
     AppSchedulingInfo app = mockApp(user);
 
-    metrics.submitApp(user, 1);
+    metrics.submitApp(user);
     MetricsSource userSource = userSource(ms, leafQueueName, user);
     MetricsSource parentUserSource = userSource(ms, parentQueueName, user);
 
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(parentQueueSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(parentUserSource, 1, 0, 0, 0, 0, 0, true);
+
+    metrics.submitAppAttempt(user);
     checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
     checkApps(parentQueueSource, 1, 1, 0, 0, 0, 0, true);
     checkApps(userSource, 1, 1, 0, 0, 0, 0, true);
@@ -211,7 +238,7 @@ public class TestQueueMetrics {
     checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
     checkResources(parentUserSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
 
-    metrics.incrAppsRunning(app, user);
+    metrics.runAppAttempt(app.getApplicationId(), user);
     checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
     checkApps(userSource, 1, 0, 1, 0, 0, 0, true);
 
@@ -231,7 +258,14 @@ public class TestQueueMetrics {
     checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
     checkResources(parentUserSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
 
-    metrics.finishApp(app, RMAppAttemptState.FINISHED);
+    metrics.finishAppAttempt(
+        app.getApplicationId(), app.isPending(), app.getUser());
+    checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(parentQueueSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
+    checkApps(parentUserSource, 1, 0, 0, 0, 0, 0, true);
+
+    metrics.finishApp(user, RMAppState.FINISHED);
     checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
     checkApps(parentQueueSource, 1, 0, 0, 1, 0, 0, true);
     checkApps(userSource, 1, 0, 0, 1, 0, 0, true);
@@ -308,7 +342,7 @@ public class TestQueueMetrics {
     assertGauge("AppsPending", pending, rb);
     assertGauge("AppsRunning", running, rb);
     assertCounter("AppsCompleted", completed, rb);
-    assertGauge("AppsFailed", failed, rb);
+    assertCounter("AppsFailed", failed, rb);
     assertCounter("AppsKilled", killed, rb);
   }
 

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java

@@ -642,7 +642,7 @@ public class TestCapacityScheduler {
 
     SchedulerApplication app =
         TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
-          cs.applications, cs, "a1");
+          cs.getSchedulerApplications(), cs, "a1");
     Assert.assertEquals("a1", app.getQueue().getQueueName());
   }
  }

+ 6 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java

@@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
@@ -66,6 +67,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaS
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
@@ -360,7 +362,7 @@ public class TestLeafQueue {
     cs.handle(event);
     
     assertEquals(0, a.getMetrics().getAppsPending());
-    assertEquals(1, a.getMetrics().getAppsFailed());
+    assertEquals(0, a.getMetrics().getAppsFailed());
 
     // Attempt the same application again
     final ApplicationAttemptId appAttemptId_1 = TestUtils
@@ -375,6 +377,9 @@ public class TestLeafQueue {
     event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0,
         RMAppAttemptState.FINISHED, false);
     cs.handle(event);
+    AppRemovedSchedulerEvent rEvent = new AppRemovedSchedulerEvent(
+        appAttemptId_0.getApplicationId(), RMAppState.FINISHED);
+    cs.handle(rEvent);
     
     assertEquals(1, a.getMetrics().getAppsSubmitted());
     assertEquals(0, a.getMetrics().getAppsPending());

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java

@@ -79,7 +79,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
@@ -260,7 +259,7 @@ public class TestFairScheduler {
     scheduler.addApplication(id.getApplicationId(), queueId, userId);
     // This conditional is for testAclSubmitApplication where app is rejected
     // and no app is added.
-    if (scheduler.applications.containsKey(id.getApplicationId())) {
+    if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) {
       scheduler.addApplicationAttempt(id, false);
     }
     List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
@@ -2546,6 +2545,6 @@ public class TestFairScheduler {
     FairScheduler scheduler =
         (FairScheduler) resourceManager.getResourceScheduler();
     TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
-      scheduler.applications, scheduler, "default");
+      scheduler.getSchedulerApplications(), scheduler, "default");
   }
 }

+ 2 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java

@@ -70,7 +70,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptA
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
 import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
@@ -160,11 +159,6 @@ public class TestFifoScheduler {
     schedular.handle(attemptEvent);
 
     appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2);
-
-    SchedulerEvent appEvent2 =
-        new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue",
-          "user");
-    schedular.handle(appEvent2);
     SchedulerEvent attemptEvent2 =
         new AppAttemptAddedSchedulerEvent(appAttemptId, false);
     schedular.handle(attemptEvent2);
@@ -597,8 +591,8 @@ public class TestFifoScheduler {
         ResourceScheduler.class);
     MockRM rm = new MockRM(conf);
     FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
-    TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications,
-      fs, "queue");
+    TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
+      fs.getSchedulerApplications(), fs, "queue");
   }
 
   private void checkApplicationResourceUsage(int expected, 

+ 615 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java.orig

@@ -0,0 +1,615 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceOption;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.InlineDispatcher;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.Application;
+import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.Task;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
+import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+import org.apache.hadoop.yarn.util.resource.Resources;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFifoScheduler {
+  private static final Log LOG = LogFactory.getLog(TestFifoScheduler.class);
+  private final int GB = 1024;
+
+  private ResourceManager resourceManager = null;
+  
+  private static final RecordFactory recordFactory = 
+      RecordFactoryProvider.getRecordFactory(null);
+  
+  @Before
+  public void setUp() throws Exception {
+    resourceManager = new ResourceManager();
+    Configuration conf = new Configuration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, 
+        FifoScheduler.class, ResourceScheduler.class);
+    resourceManager.init(conf);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    resourceManager.stop();
+  }
+  
+  private org.apache.hadoop.yarn.server.resourcemanager.NodeManager
+      registerNode(String hostName, int containerManagerPort, int nmHttpPort,
+          String rackName, Resource capability) throws IOException,
+          YarnException {
+    return new org.apache.hadoop.yarn.server.resourcemanager.NodeManager(
+        hostName, containerManagerPort, nmHttpPort, rackName, capability,
+        resourceManager);
+  }
+  
+  private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) {
+    ApplicationId appIdImpl = ApplicationId.newInstance(0, appId);
+    ApplicationAttemptId attId =
+        ApplicationAttemptId.newInstance(appIdImpl, attemptId);
+    return attId;
+  }
+
+  private ResourceRequest createResourceRequest(int memory, String host,
+      int priority, int numContainers) {
+    ResourceRequest request = recordFactory
+        .newRecordInstance(ResourceRequest.class);
+    request.setCapability(Resources.createResource(memory));
+    request.setResourceName(host);
+    request.setNumContainers(numContainers);
+    Priority prio = recordFactory.newRecordInstance(Priority.class);
+    prio.setPriority(priority);
+    request.setPriority(prio);
+    return request;
+  }
+
+  @Test(timeout=5000)
+  public void testFifoSchedulerCapacityWhenNoNMs() {
+    FifoScheduler scheduler = new FifoScheduler();
+    QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
+    Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
+  }
+  
+  @Test(timeout=5000)
+  public void testAppAttemptMetrics() throws Exception {
+    AsyncDispatcher dispatcher = new InlineDispatcher();
+    RMContext rmContext = new RMContextImpl(dispatcher, null,
+        null, null, null, null, null, null, null);
+
+    FifoScheduler schedular = new FifoScheduler();
+    schedular.reinitialize(new Configuration(), rmContext);
+    QueueMetrics metrics = schedular.getRootQueueMetrics();
+    int beforeAppsSubmitted = metrics.getAppsSubmitted();
+
+    ApplicationId appId = BuilderUtils.newApplicationId(200, 1);
+    ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
+        appId, 1);
+
+    SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user");
+    schedular.handle(appEvent);
+    SchedulerEvent attemptEvent =
+        new AppAttemptAddedSchedulerEvent(appAttemptId, false);
+    schedular.handle(attemptEvent);
+
+    appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2);
+    SchedulerEvent attemptEvent2 =
+        new AppAttemptAddedSchedulerEvent(appAttemptId, false);
+    schedular.handle(attemptEvent2);
+
+    int afterAppsSubmitted = metrics.getAppsSubmitted();
+    Assert.assertEquals(1, afterAppsSubmitted - beforeAppsSubmitted);
+  }
+
+  @Test(timeout=2000)
+  public void testNodeLocalAssignment() throws Exception {
+    AsyncDispatcher dispatcher = new InlineDispatcher();
+    Configuration conf = new Configuration();
+    RMContainerTokenSecretManager containerTokenSecretManager =
+        new RMContainerTokenSecretManager(conf);
+    containerTokenSecretManager.rollMasterKey();
+    NMTokenSecretManagerInRM nmTokenSecretManager =
+        new NMTokenSecretManagerInRM(conf);
+    nmTokenSecretManager.rollMasterKey();
+    RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
+        null, containerTokenSecretManager, nmTokenSecretManager, null);
+
+    FifoScheduler scheduler = new FifoScheduler();
+    scheduler.reinitialize(new Configuration(), rmContext);
+
+    RMNode node0 = MockNodes.newNodeInfo(1,
+        Resources.createResource(1024 * 64), 1, "127.0.0.1");
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
+    scheduler.handle(nodeEvent1);
+
+    int _appId = 1;
+    int _appAttemptId = 1;
+    ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
+        _appAttemptId);
+    AppAddedSchedulerEvent appEvent =
+        new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1",
+          "user1");
+    scheduler.handle(appEvent);
+    AppAttemptAddedSchedulerEvent attemptEvent =
+        new AppAttemptAddedSchedulerEvent(appAttemptId, false);
+    scheduler.handle(attemptEvent);
+
+    int memory = 64;
+    int nConts = 3;
+    int priority = 20;
+
+    List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
+    ResourceRequest nodeLocal = createResourceRequest(memory,
+        node0.getHostName(), priority, nConts);
+    ResourceRequest rackLocal = createResourceRequest(memory,
+        node0.getRackName(), priority, nConts);
+    ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
+        nConts);
+    ask.add(nodeLocal);
+    ask.add(rackLocal);
+    ask.add(any);
+    scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
+
+    NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
+
+    // Before the node update event, there are 3 local requests outstanding
+    Assert.assertEquals(3, nodeLocal.getNumContainers());
+
+    scheduler.handle(node0Update);
+
+    // After the node update event, check that there are no more local requests
+    // outstanding
+    Assert.assertEquals(0, nodeLocal.getNumContainers());
+    //Also check that the containers were scheduled
+    SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
+    Assert.assertEquals(3, info.getLiveContainers().size());
+  }
+  
+  @Test(timeout=2000)
+  public void testUpdateResourceOnNode() throws Exception {
+    AsyncDispatcher dispatcher = new InlineDispatcher();
+    Configuration conf = new Configuration();
+    RMContainerTokenSecretManager containerTokenSecretManager =
+        new RMContainerTokenSecretManager(conf);
+    containerTokenSecretManager.rollMasterKey();
+    NMTokenSecretManagerInRM nmTokenSecretManager =
+        new NMTokenSecretManagerInRM(conf);
+    nmTokenSecretManager.rollMasterKey();
+    RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
+        null, containerTokenSecretManager, nmTokenSecretManager, null);
+
+    FifoScheduler scheduler = new FifoScheduler(){
+      @SuppressWarnings("unused")
+      public Map<NodeId, FiCaSchedulerNode> getNodes(){
+        return nodes;
+      }
+    };
+    scheduler.reinitialize(new Configuration(), rmContext);
+    RMNode node0 = MockNodes.newNodeInfo(1,
+        Resources.createResource(2048, 4), 1, "127.0.0.1");
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
+    scheduler.handle(nodeEvent1);
+    
+    Method method = scheduler.getClass().getDeclaredMethod("getNodes");
+    @SuppressWarnings("unchecked")
+    Map<NodeId, FiCaSchedulerNode> schedulerNodes = 
+        (Map<NodeId, FiCaSchedulerNode>) method.invoke(scheduler);
+    assertEquals(schedulerNodes.values().size(), 1);
+    
+    // set resource of RMNode to 1024 and verify it works.
+    node0.setResourceOption(ResourceOption.newInstance(
+        Resources.createResource(1024, 4), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT));
+    assertEquals(node0.getTotalCapability().getMemory(), 1024);
+    // verify that SchedulerNode's resource hasn't been changed.
+    assertEquals(schedulerNodes.get(node0.getNodeID()).
+        getAvailableResource().getMemory(), 2048);
+    // now, NM heartbeat comes.
+    NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
+    scheduler.handle(node0Update);
+    // SchedulerNode's available resource is changed.
+    assertEquals(schedulerNodes.get(node0.getNodeID()).
+        getAvailableResource().getMemory(), 1024);
+    QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
+    Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
+    
+    int _appId = 1;
+    int _appAttemptId = 1;
+    ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
+        _appAttemptId);
+    AppAddedSchedulerEvent appEvent =
+        new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1",
+          "user1");
+    scheduler.handle(appEvent);
+    AppAttemptAddedSchedulerEvent attemptEvent =
+        new AppAttemptAddedSchedulerEvent(appAttemptId, false);
+    scheduler.handle(attemptEvent);
+
+    int memory = 1024;
+    int priority = 1;
+
+    List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
+    ResourceRequest nodeLocal = createResourceRequest(memory,
+        node0.getHostName(), priority, 1);
+    ResourceRequest rackLocal = createResourceRequest(memory,
+        node0.getRackName(), priority, 1);
+    ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
+        1);
+    ask.add(nodeLocal);
+    ask.add(rackLocal);
+    ask.add(any);
+    scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
+
+    // Before the node update event, there are one local request
+    Assert.assertEquals(1, nodeLocal.getNumContainers());
+
+    // Now schedule.
+    scheduler.handle(node0Update);
+
+    // After the node update event, check no local request
+    Assert.assertEquals(0, nodeLocal.getNumContainers());
+    // Also check that one container was scheduled
+    SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
+    Assert.assertEquals(1, info.getLiveContainers().size());
+    // And check the default Queue now is full.
+    queueInfo = scheduler.getQueueInfo(null, false, false);
+    Assert.assertEquals(1.0f, queueInfo.getCurrentCapacity());
+  }
+  
+//  @Test
+  public void testFifoScheduler() throws Exception {
+
+    LOG.info("--- START: testFifoScheduler ---");
+        
+    final int GB = 1024;
+    
+    // Register node1
+    String host_0 = "host_0";
+    org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_0 = 
+      registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK, 
+          Resources.createResource(4 * GB, 1));
+    nm_0.heartbeat();
+    
+    // Register node2
+    String host_1 = "host_1";
+    org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_1 = 
+      registerNode(host_1, 1234, 2345, NetworkTopology.DEFAULT_RACK, 
+          Resources.createResource(2 * GB, 1));
+    nm_1.heartbeat();
+
+    // ResourceRequest priorities
+    Priority priority_0 = 
+      org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(0); 
+    Priority priority_1 = 
+      org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(1);
+    
+    // Submit an application
+    Application application_0 = new Application("user_0", resourceManager);
+    application_0.submit();
+    
+    application_0.addNodeManager(host_0, 1234, nm_0);
+    application_0.addNodeManager(host_1, 1234, nm_1);
+
+    Resource capability_0_0 = Resources.createResource(GB);
+    application_0.addResourceRequestSpec(priority_1, capability_0_0);
+    
+    Resource capability_0_1 = Resources.createResource(2 * GB);
+    application_0.addResourceRequestSpec(priority_0, capability_0_1);
+
+    Task task_0_0 = new Task(application_0, priority_1, 
+        new String[] {host_0, host_1});
+    application_0.addTask(task_0_0);
+       
+    // Submit another application
+    Application application_1 = new Application("user_1", resourceManager);
+    application_1.submit();
+    
+    application_1.addNodeManager(host_0, 1234, nm_0);
+    application_1.addNodeManager(host_1, 1234, nm_1);
+    
+    Resource capability_1_0 = Resources.createResource(3 * GB);
+    application_1.addResourceRequestSpec(priority_1, capability_1_0);
+    
+    Resource capability_1_1 = Resources.createResource(4 * GB);
+    application_1.addResourceRequestSpec(priority_0, capability_1_1);
+
+    Task task_1_0 = new Task(application_1, priority_1, 
+        new String[] {host_0, host_1});
+    application_1.addTask(task_1_0);
+        
+    // Send resource requests to the scheduler
+    LOG.info("Send resource requests to the scheduler");
+    application_0.schedule();
+    application_1.schedule();
+    
+    // Send a heartbeat to kick the tires on the Scheduler
+    LOG.info("Send a heartbeat to kick the tires on the Scheduler... " +
+    		"nm0 -> task_0_0 and task_1_0 allocated, used=4G " +
+    		"nm1 -> nothing allocated");
+    nm_0.heartbeat();             // task_0_0 and task_1_0 allocated, used=4G
+    nm_1.heartbeat();             // nothing allocated
+    
+    // Get allocations from the scheduler
+    application_0.schedule();     // task_0_0 
+    checkApplicationResourceUsage(GB, application_0);
+
+    application_1.schedule();     // task_1_0
+    checkApplicationResourceUsage(3 * GB, application_1);
+    
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    
+    checkNodeResourceUsage(4*GB, nm_0);  // task_0_0 (1G) and task_1_0 (3G)
+    checkNodeResourceUsage(0*GB, nm_1);  // no tasks, 2G available
+    
+    LOG.info("Adding new tasks...");
+    
+    Task task_1_1 = new Task(application_1, priority_1, 
+        new String[] {ResourceRequest.ANY});
+    application_1.addTask(task_1_1);
+
+    Task task_1_2 = new Task(application_1, priority_1, 
+        new String[] {ResourceRequest.ANY});
+    application_1.addTask(task_1_2);
+
+    Task task_1_3 = new Task(application_1, priority_0, 
+        new String[] {ResourceRequest.ANY});
+    application_1.addTask(task_1_3);
+    
+    application_1.schedule();
+    
+    Task task_0_1 = new Task(application_0, priority_1, 
+        new String[] {host_0, host_1});
+    application_0.addTask(task_0_1);
+
+    Task task_0_2 = new Task(application_0, priority_1, 
+        new String[] {host_0, host_1});
+    application_0.addTask(task_0_2);
+    
+    Task task_0_3 = new Task(application_0, priority_0, 
+        new String[] {ResourceRequest.ANY});
+    application_0.addTask(task_0_3);
+
+    application_0.schedule();
+
+    // Send a heartbeat to kick the tires on the Scheduler
+    LOG.info("Sending hb from " + nm_0.getHostName());
+    nm_0.heartbeat();                   // nothing new, used=4G
+    
+    LOG.info("Sending hb from " + nm_1.getHostName());
+    nm_1.heartbeat();                   // task_0_3, used=2G
+    
+    // Get allocations from the scheduler
+    LOG.info("Trying to allocate...");
+    application_0.schedule();
+    checkApplicationResourceUsage(3 * GB, application_0);
+    application_1.schedule();
+    checkApplicationResourceUsage(3 * GB, application_1);
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkNodeResourceUsage(4*GB, nm_0);
+    checkNodeResourceUsage(2*GB, nm_1);
+    
+    // Complete tasks
+    LOG.info("Finishing up task_0_0");
+    application_0.finishTask(task_0_0); // Now task_0_1
+    application_0.schedule();
+    application_1.schedule();
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(3 * GB, application_0);
+    checkApplicationResourceUsage(3 * GB, application_1);
+    checkNodeResourceUsage(4*GB, nm_0);
+    checkNodeResourceUsage(2*GB, nm_1);
+
+    LOG.info("Finishing up task_1_0");
+    application_1.finishTask(task_1_0);  // Now task_0_2
+    application_0.schedule(); // final overcommit for app0 caused here
+    application_1.schedule();
+    nm_0.heartbeat(); // final overcommit for app0 occurs here
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(4 * GB, application_0);
+    checkApplicationResourceUsage(0 * GB, application_1);
+    //checkNodeResourceUsage(1*GB, nm_0);  // final over-commit -> rm.node->1G, test.node=2G
+    checkNodeResourceUsage(2*GB, nm_1);
+
+    LOG.info("Finishing up task_0_3");
+    application_0.finishTask(task_0_3); // No more
+    application_0.schedule();
+    application_1.schedule();
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(2 * GB, application_0);
+    checkApplicationResourceUsage(0 * GB, application_1);
+    //checkNodeResourceUsage(2*GB, nm_0);  // final over-commit, rm.node->1G, test.node->2G
+    checkNodeResourceUsage(0*GB, nm_1);
+    
+    LOG.info("Finishing up task_0_1");
+    application_0.finishTask(task_0_1);
+    application_0.schedule();
+    application_1.schedule();
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(1 * GB, application_0);
+    checkApplicationResourceUsage(0 * GB, application_1);
+    
+    LOG.info("Finishing up task_0_2");
+    application_0.finishTask(task_0_2); // now task_1_3 can go!
+    application_0.schedule();
+    application_1.schedule();
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(0 * GB, application_0);
+    checkApplicationResourceUsage(4 * GB, application_1);
+    
+    LOG.info("Finishing up task_1_3");
+    application_1.finishTask(task_1_3); // now task_1_1
+    application_0.schedule();
+    application_1.schedule();
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(0 * GB, application_0);
+    checkApplicationResourceUsage(3 * GB, application_1);
+    
+    LOG.info("Finishing up task_1_1");
+    application_1.finishTask(task_1_1);
+    application_0.schedule();
+    application_1.schedule();
+    nm_0.heartbeat();
+    nm_1.heartbeat();
+    checkApplicationResourceUsage(0 * GB, application_0);
+    checkApplicationResourceUsage(3 * GB, application_1);
+    
+    LOG.info("--- END: testFifoScheduler ---");
+  }
+
+  @SuppressWarnings("resource")
+  @Test
+  public void testBlackListNodes() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
+        ResourceScheduler.class);
+    MockRM rm = new MockRM(conf);
+    rm.start();
+    FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
+
+    String host = "127.0.0.1";
+    RMNode node =
+        MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, host);
+    fs.handle(new NodeAddedSchedulerEvent(node));
+
+    ApplicationId appId = BuilderUtils.newApplicationId(100, 1);
+    ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
+        appId, 1);
+    SchedulerEvent appEvent =
+        new AppAddedSchedulerEvent(appId, "default",
+          "user");
+    fs.handle(appEvent);
+    SchedulerEvent attemptEvent =
+        new AppAttemptAddedSchedulerEvent(appAttemptId, false);
+    fs.handle(attemptEvent);
+
+    // Verify the blacklist can be updated independent of requesting containers
+    fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
+        Collections.<ContainerId>emptyList(),
+        Collections.singletonList(host), null);
+    Assert.assertTrue(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
+    fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
+        Collections.<ContainerId>emptyList(), null,
+        Collections.singletonList(host));
+    Assert.assertFalse(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
+    rm.stop();
+  }
+  
+  @Test
+  public void testGetAppsInQueue() throws Exception {
+    Application application_0 = new Application("user_0", resourceManager);
+    application_0.submit();
+    
+    Application application_1 = new Application("user_0", resourceManager);
+    application_1.submit();
+    
+    ResourceScheduler scheduler = resourceManager.getResourceScheduler();
+    
+    List<ApplicationAttemptId> appsInDefault = scheduler.getAppsInQueue("default");
+    assertTrue(appsInDefault.contains(application_0.getApplicationAttemptId()));
+    assertTrue(appsInDefault.contains(application_1.getApplicationAttemptId()));
+    assertEquals(2, appsInDefault.size());
+    
+    Assert.assertNull(scheduler.getAppsInQueue("someotherqueue"));
+  }
+
+  @Test
+  public void testAddAndRemoveAppFromFiFoScheduler() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
+        ResourceScheduler.class);
+    MockRM rm = new MockRM(conf);
+    FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
+    TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications,
+      fs, "queue");
+  }
+
+  private void checkApplicationResourceUsage(int expected, 
+      Application application) {
+    Assert.assertEquals(expected, application.getUsedResources().getMemory());
+  }
+  
+  private void checkNodeResourceUsage(int expected,
+      org.apache.hadoop.yarn.server.resourcemanager.NodeManager node) {
+    Assert.assertEquals(expected, node.getUsed().getMemory());
+    node.checkResourceUsage();
+  }
+
+  public static void main(String[] arg) throws Exception {
+    TestFifoScheduler t = new TestFifoScheduler();
+    t.setUp();
+    t.testFifoScheduler();
+    t.tearDown();
+  }
+}

+ 14 - 18
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java

@@ -41,9 +41,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
@@ -1387,31 +1384,30 @@ public class TestRMWebServicesApps extends JerseyTest {
     rm.stop();
   }
 
-  @Test
+  @Test (timeout = 20000)
   public void testMultipleAppAttempts() throws JSONException, Exception {
     rm.start();
     MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 8192);
     RMApp app1 = rm.submitApp(CONTAINER_MB, "testwordcount", "user1");
-    amNodeManager.nodeHeartbeat(true);
-    rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(),
-      RMAppAttemptState.ALLOCATED);
+    MockAM am = MockRM.launchAM(app1, rm, amNodeManager);
     int maxAppAttempts = rm.getConfig().getInt(
         YarnConfiguration.RM_AM_MAX_ATTEMPTS,
         YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
     assertTrue(maxAppAttempts > 1);
-    int retriesLeft = maxAppAttempts;
-    while (--retriesLeft > 0) {
-      RMAppEvent event =
-          new RMAppFailedAttemptEvent(app1.getApplicationId(),
-              RMAppEventType.ATTEMPT_FAILED, "", false);
-      app1.handle(event);
+    int numAttempt = 1;
+    while (true) {
+      // fail the AM by sending CONTAINER_FINISHED event without registering.
+      amNodeManager.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+      am.waitForState(RMAppAttemptState.FAILED);
+      if (numAttempt == maxAppAttempts) {
+        rm.waitForState(app1.getApplicationId(), RMAppState.FAILED);
+        break;
+      }
+      // wait for app to start a new attempt.
       rm.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
-      amNodeManager.nodeHeartbeat(true);
+      am = MockRM.launchAM(app1, rm, amNodeManager);
+      numAttempt++;
     }
-    // kick the scheduler to allocate the am container.
-    amNodeManager.nodeHeartbeat(true);
-    rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(),
-      RMAppAttemptState.ALLOCATED);
     assertEquals("incorrect number of attempts", maxAppAttempts,
         app1.getAppAttempts().values().size());
     testAppAttemptsHelper(app1.getApplicationId().toString(), app1,

+ 72 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml

@@ -32,29 +32,101 @@
   </properties>
 
   <dependencies>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-nodemanager</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-minikdc</artifactId>
       <scope>test</scope>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-common</artifactId>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
   </dependencies>
 
   <build>

+ 80 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml

@@ -38,14 +38,94 @@
       <artifactId>servlet-api</artifactId>
       <scope>compile</scope>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jsp-2.1-jetty</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-common</artifactId>
     </dependency>
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.glassfish.grizzly</groupId>
+      <artifactId>grizzly-http-servlet</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
+    <dependency>
+      <groupId>com.sun.jersey.jersey-test-framework</groupId>
+      <artifactId>jersey-test-framework-grizzly2</artifactId>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
 
   <build>

+ 1 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml

@@ -32,14 +32,7 @@
     <hadoop.common.build.dir>${basedir}/../../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
   </properties>
 
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-common</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
+  <!-- Do not add dependencies here, add them to the POM of the leaf module -->
 
   <modules>
     <module>hadoop-yarn-server-common</module>

+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml

@@ -26,10 +26,13 @@
   <artifactId>hadoop-yarn-site</artifactId>
   <version>3.0.0-SNAPSHOT</version>
   <name>hadoop-yarn-site</name>
+  <packaging>pom</packaging>
 
   <properties>
     <!-- Needed for generating FindBugs warnings using parent pom -->
     <yarn.basedir>${project.parent.parent.basedir}</yarn.basedir>
   </properties>
 
+  <!-- Do not add dependencies here, this is a documentation only module -->
+
 </project>

+ 1 - 106
hadoop-yarn-project/hadoop-yarn/pom.xml

@@ -33,112 +33,7 @@
     <hadoop.common.build.dir>${basedir}/../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
   </properties>
 
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
-      <scope>provided</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>commons-el</groupId>
-          <artifactId>commons-el</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>jasper-runtime</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>jasper-compiler</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>jsp-2.1-jetty</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-     <groupId>org.slf4j</groupId>
-       <artifactId>slf4j-api</artifactId>
-    </dependency>
-    <dependency>
-     <groupId>org.slf4j</groupId>
-       <artifactId>slf4j-log4j12</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-annotations</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.google.inject.extensions</groupId>
-      <artifactId>guice-servlet</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>io.netty</groupId>
-      <artifactId>netty</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-hdfs</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.google.inject</groupId>
-      <artifactId>guice</artifactId>
-    </dependency>
-    <dependency>
-        <groupId>cglib</groupId>
-        <artifactId>cglib</artifactId>
-        <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey.jersey-test-framework</groupId>
-      <artifactId>jersey-test-framework-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey.jersey-test-framework</groupId>
-      <artifactId>jersey-test-framework-grizzly2</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey</groupId>
-      <artifactId>jersey-server</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey</groupId>
-      <artifactId>jersey-json</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey.contribs</groupId>
-      <artifactId>jersey-guice</artifactId>
-    </dependency>
-  </dependencies>
+  <!-- Do not add dependencies here, add them to the POM of the leaf module -->
 
   <build>
     <plugins>

+ 1 - 126
hadoop-yarn-project/pom.xml

@@ -41,132 +41,7 @@
     <module>hadoop-yarn</module>
   </modules>
 
-  <dependencies>
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro</artifactId>
-      <exclusions>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>jetty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.ant</groupId>
-          <artifactId>ant</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.apache.velocity</groupId>
-          <artifactId>velocity</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-api</artifactId>
-        </exclusion>
-        <exclusion>
-          <artifactId>paranamer-ant</artifactId>
-          <groupId>com.thoughtworks.paranamer</groupId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
-      <scope>provided</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>commons-el</groupId>
-          <artifactId>commons-el</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>jasper-runtime</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>tomcat</groupId>
-          <artifactId>jasper-compiler</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>jsp-2.1-jetty</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-     <groupId>org.slf4j</groupId>
-       <artifactId>slf4j-api</artifactId>
-    </dependency>
-    <dependency>
-     <groupId>org.slf4j</groupId>
-       <artifactId>slf4j-log4j12</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-annotations</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-common</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-hdfs</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.google.inject</groupId>
-      <artifactId>guice</artifactId>
-    </dependency>
-    <dependency>
-        <groupId>cglib</groupId>
-        <artifactId>cglib</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey</groupId>
-      <artifactId>jersey-server</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.jersey.contribs</groupId>
-      <artifactId>jersey-guice</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.inject.extensions</groupId>
-      <artifactId>guice-servlet</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>io.netty</groupId>
-      <artifactId>netty</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.hsqldb</groupId>
-      <artifactId>hsqldb</artifactId>
-      <scope>compile</scope>
-    </dependency>
-
-  </dependencies>
+  <!-- Do not add dependencies here, add them to the POM of the leaf module -->
 
   <build>
     <plugins>