瀏覽代碼

Merge r1414455 through r1415803 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1415809 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 12 年之前
父節點
當前提交
6ad4752853
共有 94 個文件被更改,包括 2237 次插入1747 次删除
  1. 3 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 16 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
  3. 6 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java
  4. 6 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java
  5. 13 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  6. 42 11
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
  7. 21 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java
  8. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
  9. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
  10. 5 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
  11. 185 57
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
  12. 1 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
  13. 6 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  14. 18 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
  15. 10 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
  16. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
  17. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsBinaryLoader.java
  18. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
  19. 4 4
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
  20. 228 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
  21. 128 23
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java
  22. 290 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
  23. 6 0
      hadoop-mapreduce-project/CHANGES.txt
  24. 0 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
  25. 3 24
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java
  26. 1 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
  27. 16 0
      hadoop-yarn-project/CHANGES.txt
  28. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java
  29. 1 10
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  30. 0 11
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  31. 1 7
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
  32. 0 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
  33. 1 16
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
  34. 6 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
  35. 1 23
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
  36. 0 39
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ApplicationsStore.java
  37. 2 12
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java
  38. 0 128
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemStore.java
  39. 24 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
  40. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Recoverable.java
  41. 0 46
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Store.java
  42. 3 39
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java
  43. 0 509
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java
  44. 23 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java
  45. 0 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
  46. 0 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
  47. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
  48. 1 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java
  49. 3 14
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
  50. 2 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
  51. 3 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
  52. 88 166
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
  53. 158 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java
  54. 96 49
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java
  55. 2 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java
  56. 133 134
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
  57. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java
  58. 8 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerEventLog.java
  59. 185 62
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java
  60. 0 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java
  61. 3 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
  62. 3 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java
  63. 9 11
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java
  64. 1 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
  65. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
  66. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
  67. 1 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java
  68. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
  69. 0 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java
  70. 0 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java
  71. 1 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
  72. 0 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java
  73. 0 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java
  74. 1 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java
  75. 1 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java
  76. 1 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
  77. 0 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
  78. 2 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
  79. 9 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
  80. 3 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
  81. 6 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
  82. 27 27
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
  83. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
  84. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
  85. 0 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java
  86. 83 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java
  87. 0 42
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSQueueSchedulable.java
  88. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java
  89. 206 108
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
  90. 78 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java
  91. 3 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
  92. 2 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java
  93. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java
  94. 21 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -453,6 +453,9 @@ Release 2.0.3-alpha - Unreleased
     HADOOP-9064. Augment DelegationTokenRenewer API to cancel the tokens on 
     calls to removeRenewAction. (kkambatl via tucu)
 
+    HADOOP-8958. ViewFs:Non absolute mount name failures when running 
+    multiple tests on Windows. (Chris Nauroth via suresh)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES

+ 16 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -28,6 +28,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /** The class represents a cluster of computer with a tree hierarchical
  * network topology.
@@ -52,6 +55,19 @@ public class NetworkTopology {
       super(msg);
     }
   }
+  
+  /**
+   * Get an instance of NetworkTopology based on the value of the configuration
+   * parameter net.topology.impl.
+   * 
+   * @param conf the configuration to be used
+   * @return an instance of NetworkTopology
+   */
+  public static NetworkTopology getInstance(Configuration conf){
+    return ReflectionUtils.newInstance(
+        conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
+        NetworkTopology.class, NetworkTopology.class), conf);
+  }
 
   /** InnerNode represents a switch/router of a data center or rack.
    * Different from a leaf node, it has non-null children.

+ 6 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystemTestHelper;
 import org.apache.hadoop.fs.FsConstants;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.viewfs.ConfigUtil;
+import org.apache.hadoop.util.Shell;
 import org.mortbay.log.Log;
 
 
@@ -123,8 +124,11 @@ public class ViewFileSystemTestSetup {
    * in the target file system.
    */
   static void linkUpFirstComponents(Configuration conf, String path, FileSystem fsTarget, String info) {
-    int indexOf2ndSlash = path.indexOf('/', 1);
-    String firstComponent = path.substring(0, indexOf2ndSlash);
+    int indexOfEnd = path.indexOf('/', 1);
+    if (Shell.WINDOWS) {
+      indexOfEnd = path.indexOf('/', indexOfEnd + 1);
+    }
+    String firstComponent = path.substring(0, indexOfEnd);
     URI linkTarget = fsTarget.makeQualified(new Path(firstComponent)).toUri();
     ConfigUtil.addLink(conf, firstComponent, linkTarget);
     Log.info("Added link for " + info + " " 

+ 6 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java

@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.FileContextTestHelper;
 import org.apache.hadoop.fs.FsConstants;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.viewfs.ConfigUtil;
+import org.apache.hadoop.util.Shell;
 import org.mortbay.log.Log;
 
 
@@ -120,8 +121,11 @@ public class ViewFsTestSetup {
    */
   static void linkUpFirstComponents(Configuration conf, String path,
       FileContext fsTarget, String info) {
-    int indexOf2ndSlash = path.indexOf('/', 1);
-    String firstComponent = path.substring(0, indexOf2ndSlash);
+    int indexOfEnd = path.indexOf('/', 1);
+    if (Shell.WINDOWS) {
+      indexOfEnd = path.indexOf('/', indexOfEnd + 1);
+    }
+    String firstComponent = path.substring(0, indexOfEnd);
     URI linkTarget = fsTarget.makeQualified(new Path(firstComponent)).toUri();
     ConfigUtil.addLink(conf, firstComponent, linkTarget);
     Log.info("Added link for " + info + " " 

+ 13 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -17,6 +17,9 @@ Trunk (Unreleased)
     reliably storing HDFS edit logs. See dedicated section below for breakdown
     of subtasks.
 
+    HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup.
+    (Junping Du via szetszwo)
+
   IMPROVEMENTS
 
     HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants ->
@@ -386,6 +389,9 @@ Release 2.0.3-alpha - Unreleased
 
     HDFS-4155. libhdfs implementation of hsync API (Liang Xie via todd)
 
+    HDFS-4213. Add an API to hsync for updating the last block length at the
+    namenode. (Jing Zhao via szetszwo)
+
   IMPROVEMENTS
   
     HDFS-3925. Prettify PipelineAck#toString() for printing to a log
@@ -484,6 +490,9 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4038. Override toString() for BookKeeperEditLogInputStream.
     (Vinay via umamahesh)
 
+    HDFS-4214. OfflineEditsViewer should print out the offset at which it
+    encountered an error. (Colin Patrick McCabe via atm)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -633,6 +642,10 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4216. Do not ignore QuotaExceededException when adding symlinks.
     (szetszwo)
 
+    HDFS-4242. Map.Entry is incorrectly used in LeaseManager since the behavior
+    of it is undefined after the iteration or modifications of the map.
+    (szetszwo)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES

+ 42 - 11
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java

@@ -46,6 +46,7 @@ import org.apache.hadoop.fs.Syncable;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
 import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -1489,9 +1490,14 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
    */
   @Override
   public void hflush() throws IOException {
-    flushOrSync(false);
+    flushOrSync(false, EnumSet.noneOf(SyncFlag.class));
   }
 
+  @Override
+  public void hsync() throws IOException {
+    hsync(EnumSet.noneOf(SyncFlag.class));
+  }
+  
   /**
    * The expected semantics is all data have flushed out to all replicas 
    * and all replicas have done posix fsync equivalent - ie the OS has 
@@ -1500,17 +1506,35 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
    * Note that only the current block is flushed to the disk device.
    * To guarantee durable sync across block boundaries the stream should
    * be created with {@link CreateFlag#SYNC_BLOCK}.
+   * 
+   * @param syncFlags
+   *          Indicate the semantic of the sync. Currently used to specify
+   *          whether or not to update the block length in NameNode.
    */
-  @Override
-  public void hsync() throws IOException {
-    flushOrSync(true);
+  public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
+    flushOrSync(true, syncFlags);
   }
 
-  private void flushOrSync(boolean isSync) throws IOException {
+  /**
+   * Flush/Sync buffered data to DataNodes.
+   * 
+   * @param isSync
+   *          Whether or not to require all replicas to flush data to the disk
+   *          device
+   * @param syncFlags
+   *          Indicate extra detailed semantic of the flush/sync. Currently
+   *          mainly used to specify whether or not to update the file length in
+   *          the NameNode
+   * @throws IOException
+   */
+  private void flushOrSync(boolean isSync, EnumSet<SyncFlag> syncFlags)
+      throws IOException {
     dfsClient.checkOpen();
     isClosed();
     try {
       long toWaitFor;
+      long lastBlockLength = -1L;
+      boolean updateLength = syncFlags.contains(SyncFlag.UPDATE_LENGTH);
       synchronized (this) {
         /* Record current blockOffset. This might be changed inside
          * flushBuffer() where a partial checksum chunk might be flushed.
@@ -1574,13 +1598,20 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
       } // end synchronized
 
       waitForAckedSeqno(toWaitFor);
-
-      // If any new blocks were allocated since the last flush, 
-      // then persist block locations on namenode. 
-      //
-      if (persistBlocks.getAndSet(false)) {
+      
+      if (updateLength) {
+        synchronized (this) {
+          if (streamer != null && streamer.block != null) {
+            lastBlockLength = streamer.block.getNumBytes();
+          }
+        }
+      }
+      // If 1) any new blocks were allocated since the last flush, or 2) to
+      // update length in NN is requried, then persist block locations on
+      // namenode.
+      if (persistBlocks.getAndSet(false) || updateLength) {
         try {
-          dfsClient.namenode.fsync(src, dfsClient.clientName);
+          dfsClient.namenode.fsync(src, dfsClient.clientName, lastBlockLength);
         } catch (IOException ioe) {
           DFSClient.LOG.warn("Unable to persist blocks in hflush for " + src, ioe);
           // If we got an error here, it might be because some other thread called

+ 21 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.client;
 
 import java.io.IOException;
+import java.util.EnumSet;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -56,4 +57,24 @@ public class HdfsDataOutputStream extends FSDataOutputStream {
   public synchronized int getCurrentBlockReplication() throws IOException {
     return ((DFSOutputStream)getWrappedStream()).getCurrentBlockReplication();
   }
+  
+  /**
+   * Sync buffered data to DataNodes (flush to disk devices).
+   * 
+   * @param syncFlags
+   *          Indicate the detailed semantic and actions of the hsync.
+   * @throws IOException
+   * @see FSDataOutputStream#hsync()
+   */
+  public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
+    ((DFSOutputStream) getWrappedStream()).hsync(syncFlags);
+  }
+  
+  public static enum SyncFlag {
+    /**
+     * When doing sync to DataNodes, also update the metadata (block
+     * length) in the NameNode
+     */
+    UPDATE_LENGTH;
+  }
 }

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java

@@ -827,14 +827,15 @@ public interface ClientProtocol {
    * The file must be currently open for writing.
    * @param src The string representation of the path
    * @param client The string representation of the client
-   * 
+   * @param lastBlockLength The length of the last block (under construction) 
+   *                        to be reported to NameNode 
    * @throws AccessControlException permission denied
    * @throws FileNotFoundException file <code>src</code> is not found
    * @throws UnresolvedLinkException if <code>src</code> contains a symlink. 
    * @throws IOException If an I/O error occurred
    */
   @Idempotent
-  public void fsync(String src, String client) 
+  public void fsync(String src, String client, long lastBlockLength) 
       throws AccessControlException, FileNotFoundException, 
       UnresolvedLinkException, IOException;
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java

@@ -705,7 +705,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public FsyncResponseProto fsync(RpcController controller,
       FsyncRequestProto req) throws ServiceException {
     try {
-      server.fsync(req.getSrc(), req.getClient());
+      server.fsync(req.getSrc(), req.getClient(), req.getLastBlockLength());
       return VOID_FSYNC_RESPONSE;
     } catch (IOException e) {
       throw new ServiceException(e);

+ 5 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java

@@ -663,12 +663,11 @@ public class ClientNamenodeProtocolTranslatorPB implements
   }
 
   @Override
-  public void fsync(String src, String client) throws AccessControlException,
-      FileNotFoundException, UnresolvedLinkException, IOException {
-    FsyncRequestProto req = FsyncRequestProto.newBuilder()
-        .setSrc(src)
-        .setClient(client)
-        .build();
+  public void fsync(String src, String client, long lastBlockLength)
+      throws AccessControlException, FileNotFoundException,
+      UnresolvedLinkException, IOException {
+    FsyncRequestProto req = FsyncRequestProto.newBuilder().setSrc(src)
+        .setClient(client).setLastBlockLength(lastBlockLength).build();
     try {
       rpcProxy.fsync(null, req);
     } catch (ServiceException e) {

+ 185 - 57
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java

@@ -168,7 +168,7 @@ import org.apache.hadoop.util.ToolRunner;
  * <ol>
  * <li>The cluster is balanced. Exiting
  * <li>No block can be moved. Exiting...
- * <li>No block has been moved for 3 iterations. Exiting...
+ * <li>No block has been moved for 5 iterations. Exiting...
  * <li>Received an IO exception: failure reason. Exiting...
  * <li>Another balancer is running. Exiting...
  * </ol>
@@ -222,7 +222,7 @@ public class Balancer {
   private Map<String, BalancerDatanode> datanodes
                  = new HashMap<String, BalancerDatanode>();
   
-  private NetworkTopology cluster = new NetworkTopology();
+  private NetworkTopology cluster;
   
   final static private int MOVER_THREAD_POOL_SIZE = 1000;
   final private ExecutorService moverExecutor = 
@@ -249,7 +249,7 @@ public class Balancer {
      * Return true if a block and its proxy are chosen; false otherwise
      */
     private boolean chooseBlockAndProxy() {
-      // iterate all source's blocks until find a good one    
+      // iterate all source's blocks until find a good one
       for (Iterator<BalancerBlock> blocks=
         source.getBlockIterator(); blocks.hasNext();) {
         if (markMovedIfGoodBlock(blocks.next())) {
@@ -293,22 +293,35 @@ public class Balancer {
      * @return true if a proxy is found; otherwise false
      */
     private boolean chooseProxySource() {
-      // check if there is replica which is on the same rack with the target
+      final DatanodeInfo targetDN = target.getDatanode();
+      boolean find = false;
       for (BalancerDatanode loc : block.getLocations()) {
-        if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
-          if (loc.addPendingBlock(this)) {
-            proxySource = loc;
+        // check if there is replica which is on the same rack with the target
+        if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
+          find = true;
+          // if cluster is not nodegroup aware or the proxy is on the same 
+          // nodegroup with target, then we already find the nearest proxy
+          if (!cluster.isNodeGroupAware() 
+              || cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)) {
             return true;
           }
         }
-      }
-      // find out a non-busy replica
-      for (BalancerDatanode loc : block.getLocations()) {
-        if (loc.addPendingBlock(this)) {
-          proxySource = loc;
-          return true;
+        
+        if (!find) {
+          // find out a non-busy replica out of rack of target
+          find = addTo(loc);
         }
       }
+      
+      return find;
+    }
+    
+    // add a BalancerDatanode as proxy source for specific block movement
+    private boolean addTo(BalancerDatanode bdn) {
+      if (bdn.addPendingBlock(this)) {
+        proxySource = bdn;
+        return true;
+      }
       return false;
     }
     
@@ -686,7 +699,7 @@ public class Balancer {
         NodeTask task = tasks.next();
         BalancerDatanode target = task.getDatanode();
         PendingBlockMove pendingBlock = new PendingBlockMove();
-        if ( target.addPendingBlock(pendingBlock) ) { 
+        if (target.addPendingBlock(pendingBlock)) { 
           // target is not busy, so do a tentative block allocation
           pendingBlock.source = this;
           pendingBlock.target = target;
@@ -787,9 +800,10 @@ public class Balancer {
    */
   private static void checkReplicationPolicyCompatibility(Configuration conf
       ) throws UnsupportedActionException {
-    if (BlockPlacementPolicy.getInstance(conf, null, null).getClass() != 
-        BlockPlacementPolicyDefault.class) {
-      throw new UnsupportedActionException("Balancer without BlockPlacementPolicyDefault");
+    if (BlockPlacementPolicy.getInstance(conf, null, null) instanceof 
+        BlockPlacementPolicyDefault) {
+      throw new UnsupportedActionException(
+          "Balancer without BlockPlacementPolicyDefault");
     }
   }
 
@@ -804,6 +818,7 @@ public class Balancer {
     this.threshold = p.threshold;
     this.policy = p.policy;
     this.nnc = theblockpool;
+    cluster = NetworkTopology.getInstance(conf);
   }
   
   /* Shuffle datanode array */
@@ -914,9 +929,15 @@ public class Balancer {
    * Return total number of bytes to move in this iteration
    */
   private long chooseNodes() {
-    // Match nodes on the same rack first
+    // First, match nodes on the same node group if cluster has nodegroup
+    // awareness
+    if (cluster.isNodeGroupAware()) {
+      chooseNodesOnSameNodeGroup();
+    }
+    
+    // Then, match nodes on the same rack
     chooseNodes(true);
-    // Then match nodes on different racks
+    // At last, match nodes on different racks
     chooseNodes(false);
     
     assert (datanodes.size() >= sources.size()+targets.size())
@@ -931,6 +952,102 @@ public class Balancer {
     }
     return bytesToMove;
   }
+  
+  /**
+   * Decide all <source, target> pairs where source and target are 
+   * on the same NodeGroup
+   */
+  private void chooseNodesOnSameNodeGroup() {
+
+    /* first step: match each overUtilized datanode (source) to
+     * one or more underUtilized datanodes within same NodeGroup(targets).
+     */
+    chooseOnSameNodeGroup(overUtilizedDatanodes, underUtilizedDatanodes);
+
+    /* match each remaining overutilized datanode (source) to below average 
+     * utilized datanodes within the same NodeGroup(targets).
+     * Note only overutilized datanodes that haven't had that max bytes to move
+     * satisfied in step 1 are selected
+     */
+    chooseOnSameNodeGroup(overUtilizedDatanodes, belowAvgUtilizedDatanodes);
+
+    /* match each remaining underutilized datanode to above average utilized 
+     * datanodes within the same NodeGroup.
+     * Note only underutilized datanodes that have not had that max bytes to
+     * move satisfied in step 1 are selected.
+     */
+    chooseOnSameNodeGroup(underUtilizedDatanodes, aboveAvgUtilizedDatanodes);
+  }
+  
+  /**
+   * Match two sets of nodes within the same NodeGroup, one should be source
+   * nodes (utilization > Avg), and the other should be destination nodes 
+   * (utilization < Avg).
+   * @param datanodes
+   * @param candidates
+   */
+  private <D extends BalancerDatanode, C extends BalancerDatanode> void 
+      chooseOnSameNodeGroup(Collection<D> datanodes, Collection<C> candidates) {
+    for (Iterator<D> i = datanodes.iterator(); i.hasNext();) {
+      final D datanode = i.next();
+      for(; chooseOnSameNodeGroup(datanode, candidates.iterator()); );
+      if (!datanode.isMoveQuotaFull()) {
+        i.remove();
+      }
+    }
+  }
+  
+  /**
+   * Match one datanode with a set of candidates nodes within the same NodeGroup.
+   */
+  private <T extends BalancerDatanode> boolean chooseOnSameNodeGroup(
+      BalancerDatanode dn, Iterator<T> candidates) {
+    final T chosen = chooseCandidateOnSameNodeGroup(dn, candidates);
+    if (chosen == null) {
+      return false;
+    }
+    if (dn instanceof Source) {
+      matchSourceWithTargetToMove((Source)dn, chosen);
+    } else {
+      matchSourceWithTargetToMove((Source)chosen, dn);
+    }
+    if (!chosen.isMoveQuotaFull()) {
+      candidates.remove();
+    }
+    return true;
+  }
+  
+  private void matchSourceWithTargetToMove(
+      Source source, BalancerDatanode target) {
+    long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
+    NodeTask nodeTask = new NodeTask(target, size);
+    source.addNodeTask(nodeTask);
+    target.incScheduledSize(nodeTask.getSize());
+    sources.add(source);
+    targets.add(target);
+    LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
+        +source.datanode.getName() + " to " + target.datanode.getName());
+  }
+  
+  /** choose a datanode from <code>candidates</code> within the same NodeGroup 
+   * of <code>dn</code>.
+   */
+  private <T extends BalancerDatanode> T chooseCandidateOnSameNodeGroup(
+      BalancerDatanode dn, Iterator<T> candidates) {
+    if (dn.isMoveQuotaFull()) {
+      for(; candidates.hasNext(); ) {
+        final T c = candidates.next();
+        if (!c.isMoveQuotaFull()) {
+          candidates.remove();
+          continue;
+        }
+        if (cluster.isOnSameNodeGroup(dn.getDatanode(), c.getDatanode())) {
+          return c;
+        }
+      }
+    }
+    return null;
+  }
 
   /* if onRack is true, decide all <source, target> pairs
    * where source and target are on the same rack; Otherwise
@@ -941,33 +1058,33 @@ public class Balancer {
     /* first step: match each overUtilized datanode (source) to
      * one or more underUtilized datanodes (targets).
      */
-    chooseTargets(underUtilizedDatanodes.iterator(), onRack);
+    chooseTargets(underUtilizedDatanodes, onRack);
     
     /* match each remaining overutilized datanode (source) to 
      * below average utilized datanodes (targets).
      * Note only overutilized datanodes that haven't had that max bytes to move
      * satisfied in step 1 are selected
      */
-    chooseTargets(belowAvgUtilizedDatanodes.iterator(), onRack);
+    chooseTargets(belowAvgUtilizedDatanodes, onRack);
 
-    /* match each remaining underutilized datanode to 
-     * above average utilized datanodes.
+    /* match each remaining underutilized datanode (target) to 
+     * above average utilized datanodes (source).
      * Note only underutilized datanodes that have not had that max bytes to
      * move satisfied in step 1 are selected.
      */
-    chooseSources(aboveAvgUtilizedDatanodes.iterator(), onRack);
+    chooseSources(aboveAvgUtilizedDatanodes, onRack);
   }
    
   /* choose targets from the target candidate list for each over utilized
    * source datanode. OnRackTarget determines if the chosen target 
    * should be on the same rack as the source
    */
-  private void chooseTargets(  
-      Iterator<BalancerDatanode> targetCandidates, boolean onRackTarget ) {
+  private void chooseTargets(
+      Collection<BalancerDatanode> targetCandidates, boolean onRackTarget ) {
     for (Iterator<Source> srcIterator = overUtilizedDatanodes.iterator();
         srcIterator.hasNext();) {
       Source source = srcIterator.next();
-      while (chooseTarget(source, targetCandidates, onRackTarget)) {
+      while (chooseTarget(source, targetCandidates.iterator(), onRackTarget)) {
       }
       if (!source.isMoveQuotaFull()) {
         srcIterator.remove();
@@ -981,11 +1098,11 @@ public class Balancer {
    * should be on the same rack as the target
    */
   private void chooseSources(
-      Iterator<Source> sourceCandidates, boolean onRackSource) {
+      Collection<Source> sourceCandidates, boolean onRackSource) {
     for (Iterator<BalancerDatanode> targetIterator = 
       underUtilizedDatanodes.iterator(); targetIterator.hasNext();) {
       BalancerDatanode target = targetIterator.next();
-      while (chooseSource(target, sourceCandidates, onRackSource)) {
+      while (chooseSource(target, sourceCandidates.iterator(), onRackSource)) {
       }
       if (!target.isMoveQuotaFull()) {
         targetIterator.remove();
@@ -1025,23 +1142,15 @@ public class Balancer {
     }
     if (foundTarget) {
       assert(target != null):"Choose a null target";
-      long size = Math.min(source.availableSizeToMove(),
-          target.availableSizeToMove());
-      NodeTask nodeTask = new NodeTask(target, size);
-      source.addNodeTask(nodeTask);
-      target.incScheduledSize(nodeTask.getSize());
-      sources.add(source);
-      targets.add(target);
+      matchSourceWithTargetToMove(source, target);
       if (!target.isMoveQuotaFull()) {
         targetCandidates.remove();
       }
-      LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
-          +source.datanode + " to " + target.datanode);
       return true;
     }
     return false;
   }
-  
+
   /* For the given target, choose sources from the source candidate list.
    * OnRackSource determines if the chosen source 
    * should be on the same rack as the target
@@ -1073,18 +1182,10 @@ public class Balancer {
     }
     if (foundSource) {
       assert(source != null):"Choose a null source";
-      long size = Math.min(source.availableSizeToMove(),
-          target.availableSizeToMove());
-      NodeTask nodeTask = new NodeTask(target, size);
-      source.addNodeTask(nodeTask);
-      target.incScheduledSize(nodeTask.getSize());
-      sources.add(source);
-      targets.add(target);
+      matchSourceWithTargetToMove(source, target);
       if ( !source.isMoveQuotaFull()) {
-        sourceCandidates.remove();
-      }
-      LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
-          +source.datanode + " to " + target.datanode);
+          sourceCandidates.remove();
+        }
       return true;
     }
     return false;
@@ -1226,6 +1327,10 @@ public class Balancer {
     if (block.isLocatedOnDatanode(target)) {
       return false;
     }
+    if (cluster.isNodeGroupAware() && 
+        isOnSameNodeGroupWithReplicas(target, block, source)) {
+      return false;
+    }
 
     boolean goodBlock = false;
     if (cluster.isOnSameRack(source.getDatanode(), target.getDatanode())) {
@@ -1257,10 +1362,32 @@ public class Balancer {
     }
     return goodBlock;
   }
-  
+
+  /**
+   * Check if there are any replica (other than source) on the same node group
+   * with target. If true, then target is not a good candidate for placing 
+   * specific block replica as we don't want 2 replicas under the same nodegroup 
+   * after balance.
+   * @param target targetDataNode
+   * @param block dataBlock
+   * @param source sourceDataNode
+   * @return true if there are any replica (other than source) on the same node
+   * group with target
+   */
+  private boolean isOnSameNodeGroupWithReplicas(BalancerDatanode target,
+      BalancerBlock block, Source source) {
+    for (BalancerDatanode loc : block.locations) {
+      if (loc != source && 
+        cluster.isOnSameNodeGroup(loc.getDatanode(), target.getDatanode())) {
+          return true;
+        }
+      }
+    return false;
+  }
+
   /* reset all fields in a balancer preparing for the next iteration */
-  private void resetData() {
-    this.cluster = new NetworkTopology();
+  private void resetData(Configuration conf) {
+    this.cluster = NetworkTopology.getInstance(conf);
     this.overUtilizedDatanodes.clear();
     this.aboveAvgUtilizedDatanodes.clear();
     this.belowAvgUtilizedDatanodes.clear();
@@ -1331,7 +1458,8 @@ public class Balancer {
   }
 
   /** Run an iteration for all datanodes. */
-  private ReturnStatus run(int iteration, Formatter formatter) {
+  private ReturnStatus run(int iteration, Formatter formatter,
+      Configuration conf) {
     try {
       /* get all live datanodes of a cluster and their disk usage
        * decide the number of bytes need to be moved
@@ -1385,7 +1513,7 @@ public class Balancer {
       }
 
       // clean all lists
-      resetData();
+      resetData(conf);
       return ReturnStatus.IN_PROGRESS;
     } catch (IllegalArgumentException e) {
       System.out.println(e + ".  Exiting ...");
@@ -1433,7 +1561,7 @@ public class Balancer {
         Collections.shuffle(connectors);
         for(NameNodeConnector nnc : connectors) {
           final Balancer b = new Balancer(nnc, p, conf);
-          final ReturnStatus r = b.run(iteration, formatter);
+          final ReturnStatus r = b.run(iteration, formatter, conf);
           if (r == ReturnStatus.IN_PROGRESS) {
             done = false;
           } else if (r != ReturnStatus.SUCCESS) {
@@ -1527,7 +1655,7 @@ public class Balancer {
       if (args != null) {
         try {
           for(int i = 0; i < args.length; i++) {
-            checkArgument(args.length >= 2, "args = " + Arrays.toString(args));           
+            checkArgument(args.length >= 2, "args = " + Arrays.toString(args));
             if ("-threshold".equalsIgnoreCase(args[i])) {
               i++;
               try {

+ 1 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -39,7 +39,6 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -151,10 +150,7 @@ public class DatanodeManager {
     this.namesystem = namesystem;
     this.blockManager = blockManager;
     
-    Class<? extends NetworkTopology> networkTopologyClass =
-        conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
-            NetworkTopology.class, NetworkTopology.class);
-    networktopology = ReflectionUtils.newInstance(networkTopologyClass, conf);
+    networktopology = NetworkTopology.getInstance(conf);
 
     this.heartbeatManager = new HeartbeatManager(namesystem, blockManager, conf);
 

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -3023,9 +3023,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   /** Persist all metadata about this file.
    * @param src The string representation of the path
    * @param clientName The string representation of the client
+   * @param lastBlockLength The length of the last block 
+   *                        under construction reported from client.
    * @throws IOException if path does not exist
    */
-  void fsync(String src, String clientName) 
+  void fsync(String src, String clientName, long lastBlockLength) 
       throws IOException, UnresolvedLinkException {
     NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
     writeLock();
@@ -3035,6 +3037,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         throw new SafeModeException("Cannot fsync file " + src, safeMode);
       }
       INodeFileUnderConstruction pendingFile  = checkLease(src, clientName);
+      if (lastBlockLength > 0) {
+        pendingFile.updateLengthOfLastBlock(lastBlockLength);
+      }
       dir.persistBlocks(src, pendingFile);
     } finally {
       writeUnlock();

+ 18 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java

@@ -172,4 +172,22 @@ public class INodeFileUnderConstruction extends INodeFile implements MutableBloc
     setBlock(numBlocks()-1, ucBlock);
     return ucBlock;
   }
+
+  /**
+   * Update the length for the last block
+   * 
+   * @param lastBlockLength
+   *          The length of the last block reported from client
+   * @throws IOException
+   */
+  void updateLengthOfLastBlock(long lastBlockLength) throws IOException {
+    BlockInfo lastBlock = this.getLastBlock();
+    assert (lastBlock != null) : "The last block for path "
+        + this.getFullPathName() + " is null when updating its length";
+    assert (lastBlock instanceof BlockInfoUnderConstruction) : "The last block for path "
+        + this.getFullPathName()
+        + " is not a BlockInfoUnderConstruction when updating its length";
+    lastBlock.setNumBytes(lastBlockLength);
+  }
+  
 }

+ 10 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java

@@ -17,9 +17,12 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.util.Time.now;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
@@ -39,8 +42,6 @@ import org.apache.hadoop.util.Daemon;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
-import static org.apache.hadoop.util.Time.now;
-
 /**
  * LeaseManager does the lease housekeeping for writing on files.   
  * This class also provides useful static methods for lease recovery.
@@ -340,7 +341,8 @@ public class LeaseManager {
     }
 
     final int len = overwrite.length();
-    for(Map.Entry<String, Lease> entry : findLeaseWithPrefixPath(src, sortedLeasesByPath)) {
+    for(Map.Entry<String, Lease> entry
+        : findLeaseWithPrefixPath(src, sortedLeasesByPath).entrySet()) {
       final String oldpath = entry.getKey();
       final Lease lease = entry.getValue();
       //overwrite must be a prefix of oldpath
@@ -355,7 +357,8 @@ public class LeaseManager {
   }
 
   synchronized void removeLeaseWithPrefixPath(String prefix) {
-    for(Map.Entry<String, Lease> entry : findLeaseWithPrefixPath(prefix, sortedLeasesByPath)) {
+    for(Map.Entry<String, Lease> entry
+        : findLeaseWithPrefixPath(prefix, sortedLeasesByPath).entrySet()) {
       if (LOG.isDebugEnabled()) {
         LOG.debug(LeaseManager.class.getSimpleName()
             + ".removeLeaseWithPrefixPath: entry=" + entry);
@@ -364,13 +367,13 @@ public class LeaseManager {
     }
   }
 
-  static private List<Map.Entry<String, Lease>> findLeaseWithPrefixPath(
+  static private Map<String, Lease> findLeaseWithPrefixPath(
       String prefix, SortedMap<String, Lease> path2lease) {
     if (LOG.isDebugEnabled()) {
       LOG.debug(LeaseManager.class.getSimpleName() + ".findLease: prefix=" + prefix);
     }
 
-    List<Map.Entry<String, Lease>> entries = new ArrayList<Map.Entry<String, Lease>>();
+    final Map<String, Lease> entries = new HashMap<String, Lease>();
     final int srclen = prefix.length();
 
     for(Map.Entry<String, Lease> entry : path2lease.tailMap(prefix).entrySet()) {
@@ -379,7 +382,7 @@ public class LeaseManager {
         return entries;
       }
       if (p.length() == srclen || p.charAt(srclen) == Path.SEPARATOR_CHAR) {
-        entries.add(entry);
+        entries.put(entry.getKey(), entry.getValue());
       }
     }
     return entries;

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java

@@ -825,8 +825,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
   }
   
   @Override // ClientProtocol
-  public void fsync(String src, String clientName) throws IOException {
-    namesystem.fsync(src, clientName);
+  public void fsync(String src, String clientName, long lastBlockLength)
+      throws IOException {
+    namesystem.fsync(src, clientName, lastBlockLength);
   }
 
   @Override // ClientProtocol

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsBinaryLoader.java

@@ -79,6 +79,7 @@ class OfflineEditsBinaryLoader implements OfflineEditsLoader {
       } catch (IOException e) {
         if (!recoveryMode) {
           // Tell the visitor to clean up, then re-throw the exception
+          LOG.error("Got IOException at position " + inputStream.getPosition());
           visitor.close(e);
           throw e;
         }
@@ -87,6 +88,7 @@ class OfflineEditsBinaryLoader implements OfflineEditsLoader {
       } catch (RuntimeException e) {
         if (!recoveryMode) {
           // Tell the visitor to clean up, then re-throw the exception
+          LOG.error("Got RuntimeException at position " + inputStream.getPosition());
           visitor.close(e);
           throw e;
         }

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto

@@ -357,6 +357,7 @@ message SetQuotaResponseProto { // void response
 message FsyncRequestProto {
   required string src = 1;
   required string client = 2;
+  optional sint64 lastBlockLength = 3 [default = -1];
 }
 
 message FsyncResponseProto { // void response

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -321,7 +321,7 @@ public class MiniDFSCluster {
   /**
    * Used by builder to create and return an instance of MiniDFSCluster
    */
-  private MiniDFSCluster(Builder builder) throws IOException {
+  protected MiniDFSCluster(Builder builder) throws IOException {
     if (builder.nnTopology == null) {
       // If no topology is specified, build a single NN. 
       builder.nnTopology = MiniDFSNNTopology.simpleSingleNN(
@@ -369,8 +369,8 @@ public class MiniDFSCluster {
 
   private Configuration conf;
   private NameNodeInfo[] nameNodes;
-  private int numDataNodes;
-  private ArrayList<DataNodeProperties> dataNodes = 
+  protected int numDataNodes;
+  protected List<DataNodeProperties> dataNodes = 
                          new ArrayList<DataNodeProperties>();
   private File base_dir;
   private File data_dir;
@@ -2303,7 +2303,7 @@ public class MiniDFSCluster {
     return port;
   }
   
-  private void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
+  protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
                            boolean checkDataNodeAddrConfig) throws IOException {
     if (setupHostsFile) {
       String hostsFile = conf.get(DFS_HOSTS, "").trim();

+ 228 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java

@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY;
+import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter;
+import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
+import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.StaticMapping;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.ssl.SSLFactory;
+
+public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
+
+  private static String[] NODE_GROUPS = null;
+  private static final Log LOG = LogFactory.getLog(MiniDFSClusterWithNodeGroup.class);
+  
+  public MiniDFSClusterWithNodeGroup(Builder builder) throws IOException {
+    super(builder);
+  }
+
+  public static void setNodeGroups (String[] nodeGroups) {
+    NODE_GROUPS = nodeGroups;
+  }
+
+  public synchronized void startDataNodes(Configuration conf, int numDataNodes,
+      boolean manageDfsDirs, StartupOption operation, 
+      String[] racks, String[] nodeGroups, String[] hosts,
+      long[] simulatedCapacities,
+      boolean setupHostsFile,
+      boolean checkDataNodeAddrConfig,
+      boolean checkDataNodeHostConfig) throws IOException {
+    if (operation == StartupOption.RECOVER) {
+      return;
+    }
+    if (checkDataNodeHostConfig) {
+      conf.setIfUnset(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
+    } else {
+      conf.set(DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
+    }
+    conf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, "127.0.0.1");
+
+    int curDatanodesNum = dataNodes.size();
+    // for mincluster's the default initialDelay for BRs is 0
+    if (conf.get(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY) == null) {
+      conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY, 0);
+    }
+    // If minicluster's name node is null assume that the conf has been
+    // set with the right address:port of the name node.
+    //
+    if (racks != null && numDataNodes > racks.length ) {
+      throw new IllegalArgumentException( "The length of racks [" + racks.length
+          + "] is less than the number of datanodes [" + numDataNodes + "].");
+    }
+
+    if (nodeGroups != null && numDataNodes > nodeGroups.length ) {
+      throw new IllegalArgumentException( "The length of nodeGroups [" + nodeGroups.length
+          + "] is less than the number of datanodes [" + numDataNodes + "].");
+    }
+
+    if (hosts != null && numDataNodes > hosts.length ) {
+      throw new IllegalArgumentException( "The length of hosts [" + hosts.length
+          + "] is less than the number of datanodes [" + numDataNodes + "].");
+    }
+    //Generate some hostnames if required
+    if (racks != null && hosts == null) {
+      hosts = new String[numDataNodes];
+      for (int i = curDatanodesNum; i < curDatanodesNum + numDataNodes; i++) {
+        hosts[i - curDatanodesNum] = "host" + i + ".foo.com";
+      }
+    }
+
+    if (simulatedCapacities != null 
+        && numDataNodes > simulatedCapacities.length) {
+      throw new IllegalArgumentException( "The length of simulatedCapacities [" 
+          + simulatedCapacities.length
+          + "] is less than the number of datanodes [" + numDataNodes + "].");
+    }
+
+    String [] dnArgs = (operation == null ||
+    operation != StartupOption.ROLLBACK) ?
+        null : new String[] {operation.getName()};
+
+    for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; i++) {
+      Configuration dnConf = new HdfsConfiguration(conf);
+      // Set up datanode address
+      setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
+      if (manageDfsDirs) {
+        File dir1 = getInstanceStorageDir(i, 0);
+        File dir2 = getInstanceStorageDir(i, 1);
+        dir1.mkdirs();
+        dir2.mkdirs();
+        if (!dir1.isDirectory() || !dir2.isDirectory()) { 
+          throw new IOException("Mkdirs failed to create directory for DataNode "
+              + i + ": " + dir1 + " or " + dir2);
+        }
+        String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
+        dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
+        conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
+      }
+      if (simulatedCapacities != null) {
+        SimulatedFSDataset.setFactory(dnConf);
+        dnConf.setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY,
+        simulatedCapacities[i-curDatanodesNum]);
+      }
+      LOG.info("Starting DataNode " + i + " with "
+          + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + ": "
+          + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY));
+      if (hosts != null) {
+        dnConf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, hosts[i - curDatanodesNum]);
+        LOG.info("Starting DataNode " + i + " with hostname set to: "
+            + dnConf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY));
+      }
+      if (racks != null) {
+        String name = hosts[i - curDatanodesNum];
+        if (nodeGroups == null) {
+          LOG.info("Adding node with hostname : " + name + " to rack " +
+             racks[i-curDatanodesNum]);
+          StaticMapping.addNodeToRack(name,racks[i-curDatanodesNum]);
+        } else {
+          LOG.info("Adding node with hostname : " + name + " to serverGroup " +
+              nodeGroups[i-curDatanodesNum] + " and rack " +
+              racks[i-curDatanodesNum]);
+          StaticMapping.addNodeToRack(name,racks[i-curDatanodesNum] + 
+              nodeGroups[i-curDatanodesNum]);
+        }
+      }
+      Configuration newconf = new HdfsConfiguration(dnConf); // save config
+      if (hosts != null) {
+        NetUtils.addStaticResolution(hosts[i - curDatanodesNum], "localhost");
+      }
+      
+      SecureResources secureResources = null;
+      if (UserGroupInformation.isSecurityEnabled()) {
+        SSLFactory sslFactory = new SSLFactory(SSLFactory.Mode.SERVER, dnConf);
+        try {
+          secureResources = SecureDataNodeStarter.getSecureResources(sslFactory, dnConf);
+        } catch (Exception ex) {
+          ex.printStackTrace();
+        }
+      }
+      DataNode dn = DataNode.instantiateDataNode(dnArgs, dnConf, secureResources);
+      if(dn == null)
+        throw new IOException("Cannot start DataNode in "
+          + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY));
+      //since the HDFS does things based on IP:port, we need to add the mapping
+      //for IP:port to rackId
+      String ipAddr = dn.getXferAddress().getAddress().getHostAddress();
+      if (racks != null) {
+        int port = dn.getXferAddress().getPort();
+        if (nodeGroups == null) {
+          LOG.info("Adding node with IP:port : " + ipAddr + ":" + port +
+              " to rack " + racks[i-curDatanodesNum]);
+          StaticMapping.addNodeToRack(ipAddr + ":" + port,
+              racks[i-curDatanodesNum]);
+        } else {
+          LOG.info("Adding node with IP:port : " + ipAddr + ":" + port + " to nodeGroup " +
+          nodeGroups[i-curDatanodesNum] + " and rack " + racks[i-curDatanodesNum]);
+          StaticMapping.addNodeToRack(ipAddr + ":" + port, racks[i-curDatanodesNum] + 
+              nodeGroups[i-curDatanodesNum]);
+        }
+      }
+      dn.runDatanodeDaemon();
+      dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs, secureResources));
+    }
+    curDatanodesNum += numDataNodes;
+    this.numDataNodes += numDataNodes;
+    waitActive();
+  }
+
+  public synchronized void startDataNodes(Configuration conf, int numDataNodes, 
+      boolean manageDfsDirs, StartupOption operation, 
+      String[] racks, String[] nodeGroups, String[] hosts,
+      long[] simulatedCapacities,
+      boolean setupHostsFile) throws IOException {
+    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups, 
+        hosts, simulatedCapacities, setupHostsFile, false, false);
+  }
+
+  public void startDataNodes(Configuration conf, int numDataNodes, 
+      boolean manageDfsDirs, StartupOption operation, 
+      String[] racks, long[] simulatedCapacities,
+      String[] nodeGroups) throws IOException {
+    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups,
+        null, simulatedCapacities, false);
+  }
+
+  // This is for initialize from parent class.
+  @Override
+  public synchronized void startDataNodes(Configuration conf, int numDataNodes, 
+      boolean manageDfsDirs, StartupOption operation, 
+      String[] racks, String[] hosts,
+      long[] simulatedCapacities,
+      boolean setupHostsFile,
+      boolean checkDataNodeAddrConfig,
+      boolean checkDataNodeHostConfig) throws IOException {
+    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, 
+        NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile, 
+        checkDataNodeAddrConfig, checkDataNodeHostConfig);
+  }
+
+}

+ 128 - 23
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java

@@ -23,12 +23,14 @@ import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
 import java.io.InterruptedIOException;
+import java.util.EnumSet;
 
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.log4j.Level;
 import org.junit.Test;
@@ -43,16 +45,21 @@ public class TestHFlush {
   
   private final String fName = "hflushtest.dat";
   
-  /** The test uses {@link #doTheJob(Configuration, String, long, short)
+  /**
+   * The test uses
+   * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)} 
    * to write a file with a standard block size
    */
   @Test
   public void hFlush_01() throws IOException {
-    doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE, (short)2);
+    doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE,
+        (short) 2, false, EnumSet.noneOf(SyncFlag.class));
   }
 
-  /** The test uses {@link #doTheJob(Configuration, String, long, short)
-   * to write a file with a custom block size so the writes will be
+  /**
+   * The test uses
+   * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)} 
+   * to write a file with a custom block size so the writes will be 
    * happening across block' boundaries
    */
   @Test
@@ -64,14 +71,17 @@ public class TestHFlush {
     conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
 
-    doTheJob(conf, fName, customBlockSize, (short)2);
+    doTheJob(conf, fName, customBlockSize, (short) 2, false,
+        EnumSet.noneOf(SyncFlag.class));
   }
 
-  /** The test uses {@link #doTheJob(Configuration, String, long, short)
-   * to write a file with a custom block size so the writes will be
+  /**
+   * The test uses
+   * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)} 
+   * to write a file with a custom block size so the writes will be 
    * happening across block's and checksum' boundaries
    */
- @Test
+  @Test
   public void hFlush_03() throws IOException {
     Configuration conf = new HdfsConfiguration();
     int customPerChecksumSize = 400;
@@ -80,22 +90,106 @@ public class TestHFlush {
     conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
 
-    doTheJob(conf, fName, customBlockSize, (short)2);
+    doTheJob(conf, fName, customBlockSize, (short) 2, false,
+        EnumSet.noneOf(SyncFlag.class));
+  }
+
+  /**
+   * Test hsync (with updating block length in NameNode) while no data is
+   * actually written yet
+   */
+  @Test
+  public void hSyncUpdateLength_00() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
+        2).build();
+    DistributedFileSystem fileSystem =
+        (DistributedFileSystem)cluster.getFileSystem();
+    
+    try {
+      Path path = new Path(fName);
+      FSDataOutputStream stm = fileSystem.create(path, true, 4096, (short) 2,
+          AppendTestUtil.BLOCK_SIZE);
+      System.out.println("Created file " + path.toString());
+      ((DFSOutputStream) stm.getWrappedStream()).hsync(EnumSet
+          .of(SyncFlag.UPDATE_LENGTH));
+      long currentFileLength = fileSystem.getFileStatus(path).getLen();
+      assertEquals(0L, currentFileLength);
+      stm.close();
+    } finally {
+      fileSystem.close();
+      cluster.shutdown();
+    }
+  }
+  
+  /**
+   * The test calls
+   * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+   * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
+   */
+  @Test
+  public void hSyncUpdateLength_01() throws IOException {
+    doTheJob(new HdfsConfiguration(), fName, AppendTestUtil.BLOCK_SIZE,
+        (short) 2, true, EnumSet.of(SyncFlag.UPDATE_LENGTH));
   }
 
   /**
-    The method starts new cluster with defined Configuration;
-    creates a file with specified block_size and writes 10 equal sections in it;
-    it also calls hflush() after each write and throws an IOException in case of 
-    an error.
-    @param conf cluster configuration
-    @param fileName of the file to be created and processed as required
-    @param block_size value to be used for the file's creation
-    @param replicas is the number of replicas
-    @throws IOException in case of any errors 
+   * The test calls
+   * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+   * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
+   * Similar with {@link #hFlush_02()} , it writes a file with a custom block
+   * size so the writes will be happening across block' boundaries
+   */
+  @Test
+  public void hSyncUpdateLength_02() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    int customPerChecksumSize = 512;
+    int customBlockSize = customPerChecksumSize * 3;
+    // Modify defaul filesystem settings
+    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
+
+    doTheJob(conf, fName, customBlockSize, (short) 2, true,
+        EnumSet.of(SyncFlag.UPDATE_LENGTH));
+  }
+  
+  /**
+   * The test calls
+   * {@link #doTheJob(Configuration, String, long, short, boolean, EnumSet)}
+   * while requiring the semantic of {@link SyncFlag#UPDATE_LENGTH}.
+   * Similar with {@link #hFlush_03()} , it writes a file with a custom block
+   * size so the writes will be happening across block's and checksum'
+   * boundaries.
+   */
+  @Test
+  public void hSyncUpdateLength_03() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    int customPerChecksumSize = 400;
+    int customBlockSize = customPerChecksumSize * 3;
+    // Modify defaul filesystem settings
+    conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, customPerChecksumSize);
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, customBlockSize);
+
+    doTheJob(conf, fName, customBlockSize, (short) 2, true,
+        EnumSet.of(SyncFlag.UPDATE_LENGTH));
+  }
+  
+  /**
+   * The method starts new cluster with defined Configuration; creates a file
+   * with specified block_size and writes 10 equal sections in it; it also calls
+   * hflush/hsync after each write and throws an IOException in case of an error.
+   * 
+   * @param conf cluster configuration
+   * @param fileName of the file to be created and processed as required
+   * @param block_size value to be used for the file's creation
+   * @param replicas is the number of replicas
+   * @param isSync hsync or hflush         
+   * @param syncFlags specify the semantic of the sync/flush
+   * @throws IOException in case of any errors
    */
   public static void doTheJob(Configuration conf, final String fileName,
-                              long block_size, short replicas) throws IOException {
+      long block_size, short replicas, boolean isSync,
+      EnumSet<SyncFlag> syncFlags) throws IOException {
     byte[] fileContent;
     final int SECTIONS = 10;
 
@@ -119,8 +213,21 @@ public class TestHFlush {
         System.out.println("Writing " + (tenth * i) + " to " + (tenth * (i+1)) + " section to file " + fileName);
         // write to the file
         stm.write(fileContent, tenth * i, tenth);
-        // Wait while hflush() pushes all packets through built pipeline
-        ((DFSOutputStream)stm.getWrappedStream()).hflush();
+        
+        // Wait while hflush/hsync pushes all packets through built pipeline
+        if (isSync) {
+          ((DFSOutputStream)stm.getWrappedStream()).hsync(syncFlags);
+        } else {
+          ((DFSOutputStream)stm.getWrappedStream()).hflush();
+        }
+        
+        // Check file length if updatelength is required
+        if (isSync && syncFlags.contains(SyncFlag.UPDATE_LENGTH)) {
+          long currentFileLength = fileSystem.getFileStatus(path).getLen();
+          assertEquals(
+            "File size doesn't match for hsync/hflush with updating the length",
+            tenth * (i + 1), currentFileLength);
+        }
         byte [] toRead = new byte[tenth];
         byte [] expected = new byte[tenth];
         System.arraycopy(fileContent, tenth * i, expected, 0, tenth);
@@ -139,8 +246,6 @@ public class TestHFlush {
 
       assertEquals("File size doesn't match ", AppendTestUtil.FILE_SIZE, fileSystem.getFileStatus(path).getLen());
       AppendTestUtil.checkFullFile(fileSystem, path, fileContent.length, fileContent, "hflush()");
-    } catch (Exception e) {
-      e.printStackTrace();
     } finally {
       fileSystem.close();
       cluster.shutdown();

+ 290 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java

@@ -0,0 +1,290 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.balancer;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeoutException;
+import junit.framework.TestCase;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSClusterWithNodeGroup;
+import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.net.NetworkTopology;
+import org.junit.Test;
+
+/**
+ * This class tests if a balancer schedules tasks correctly.
+ */
+public class TestBalancerWithNodeGroup extends TestCase {
+  private static final Log LOG = LogFactory.getLog(
+  "org.apache.hadoop.hdfs.TestBalancerWithNodeGroup");
+  
+  final private static long CAPACITY = 500L;
+  final private static String RACK0 = "/rack0";
+  final private static String RACK1 = "/rack1";
+  final private static String NODEGROUP0 = "/nodegroup0";
+  final private static String NODEGROUP1 = "/nodegroup1";
+  final private static String NODEGROUP2 = "/nodegroup2";
+  final static private String fileName = "/tmp.txt";
+  final static private Path filePath = new Path(fileName);
+  MiniDFSClusterWithNodeGroup cluster;
+
+  ClientProtocol client;
+
+  static final long TIMEOUT = 20000L; //msec
+  static final double CAPACITY_ALLOWED_VARIANCE = 0.005;  // 0.5%
+  static final double BALANCE_ALLOWED_VARIANCE = 0.11;    // 10%+delta
+  static final int DEFAULT_BLOCK_SIZE = 10;
+
+  static {
+    Balancer.setBlockMoveWaitTime(1000L) ;
+  }
+
+  static Configuration createConf() {
+    Configuration conf = new HdfsConfiguration();
+    TestBalancer.initConf(conf);
+    conf.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 
+        "org.apache.hadoop.net.NetworkTopologyWithNodeGroup");
+    conf.set("dfs.block.replicator.classname", 
+        "org.apache.hadoop.hdfs.server.blockmanagement." +
+        "BlockPlacementPolicyWithNodeGroup");
+    return conf;
+  }
+
+  /**
+   * Wait until heartbeat gives expected results, within CAPACITY_ALLOWED_VARIANCE, 
+   * summed over all nodes.  Times out after TIMEOUT msec.
+   * @param expectedUsedSpace
+   * @param expectedTotalSpace
+   * @throws IOException - if getStats() fails
+   * @throws TimeoutException
+   */
+  private void waitForHeartBeat(long expectedUsedSpace, long expectedTotalSpace)
+  throws IOException, TimeoutException {
+    long timeout = TIMEOUT;
+    long failtime = (timeout <= 0L) ? Long.MAX_VALUE
+             : System.currentTimeMillis() + timeout;
+
+    while (true) {
+      long[] status = client.getStats();
+      double totalSpaceVariance = Math.abs((double)status[0] - expectedTotalSpace) 
+          / expectedTotalSpace;
+      double usedSpaceVariance = Math.abs((double)status[1] - expectedUsedSpace) 
+          / expectedUsedSpace;
+      if (totalSpaceVariance < CAPACITY_ALLOWED_VARIANCE 
+          && usedSpaceVariance < CAPACITY_ALLOWED_VARIANCE)
+        break; //done
+
+      if (System.currentTimeMillis() > failtime) {
+        throw new TimeoutException("Cluster failed to reached expected values of "
+            + "totalSpace (current: " + status[0] 
+            + ", expected: " + expectedTotalSpace 
+            + "), or usedSpace (current: " + status[1] 
+            + ", expected: " + expectedUsedSpace
+            + "), in more than " + timeout + " msec.");
+      }
+      try {
+        Thread.sleep(100L);
+      } catch(InterruptedException ignored) {
+      }
+    }
+  }
+
+  /**
+   * Wait until balanced: each datanode gives utilization within 
+   * BALANCE_ALLOWED_VARIANCE of average
+   * @throws IOException
+   * @throws TimeoutException
+   */
+  private void waitForBalancer(long totalUsedSpace, long totalCapacity) 
+  throws IOException, TimeoutException {
+    long timeout = TIMEOUT;
+    long failtime = (timeout <= 0L) ? Long.MAX_VALUE
+        : System.currentTimeMillis() + timeout;
+    final double avgUtilization = ((double)totalUsedSpace) / totalCapacity;
+    boolean balanced;
+    do {
+      DatanodeInfo[] datanodeReport = 
+          client.getDatanodeReport(DatanodeReportType.ALL);
+      assertEquals(datanodeReport.length, cluster.getDataNodes().size());
+      balanced = true;
+      for (DatanodeInfo datanode : datanodeReport) {
+        double nodeUtilization = ((double)datanode.getDfsUsed())
+            / datanode.getCapacity();
+        if (Math.abs(avgUtilization - nodeUtilization) >
+            BALANCE_ALLOWED_VARIANCE) {
+          balanced = false;
+          if (System.currentTimeMillis() > failtime) {
+            throw new TimeoutException(
+                "Rebalancing expected avg utilization to become "
+                + avgUtilization + ", but on datanode " + datanode
+                + " it remains at " + nodeUtilization
+                + " after more than " + TIMEOUT + " msec.");
+          }
+          try {
+            Thread.sleep(100);
+          } catch (InterruptedException ignored) {
+          }
+          break;
+        }
+      }
+    } while (!balanced);
+  }
+
+  private void runBalancer(Configuration conf,
+      long totalUsedSpace, long totalCapacity) throws Exception {
+    waitForHeartBeat(totalUsedSpace, totalCapacity);
+
+    // start rebalancing
+    Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
+    final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
+    assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+
+    waitForHeartBeat(totalUsedSpace, totalCapacity);
+    LOG.info("Rebalancing with default factor.");
+    waitForBalancer(totalUsedSpace, totalCapacity);
+  }
+
+  /**
+   * Create a cluster with even distribution, and a new empty node is added to
+   * the cluster, then test rack locality for balancer policy. 
+   */
+  @Test
+  public void testBalancerWithRackLocality() throws Exception {
+    Configuration conf = createConf();
+    long[] capacities = new long[]{CAPACITY, CAPACITY};
+    String[] racks = new String[]{RACK0, RACK1};
+    String[] nodeGroups = new String[]{NODEGROUP0, NODEGROUP1};
+    
+    int numOfDatanodes = capacities.length;
+    assertEquals(numOfDatanodes, racks.length);
+    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf)
+                                .numDataNodes(capacities.length)
+                                .racks(racks)
+                                .simulatedCapacities(capacities);
+    MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
+    cluster = new MiniDFSClusterWithNodeGroup(builder);
+    try {
+      cluster.waitActive();
+      client = NameNodeProxies.createProxy(conf, 
+          cluster.getFileSystem(0).getUri(),
+          ClientProtocol.class).getProxy();
+
+      long totalCapacity = TestBalancer.sum(capacities);
+
+      // fill up the cluster to be 30% full
+      long totalUsedSpace = totalCapacity * 3 / 10;
+      TestBalancer.createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
+          (short) numOfDatanodes, 0);
+      
+      long newCapacity = CAPACITY;
+      String newRack = RACK1;
+      String newNodeGroup = NODEGROUP2;
+      // start up an empty node with the same capacity and on the same rack
+      cluster.startDataNodes(conf, 1, true, null, new String[]{newRack},
+          new long[] {newCapacity}, new String[]{newNodeGroup});
+
+      totalCapacity += newCapacity;
+
+      // run balancer and validate results
+      runBalancer(conf, totalUsedSpace, totalCapacity);
+      
+      DatanodeInfo[] datanodeReport = 
+              client.getDatanodeReport(DatanodeReportType.ALL);
+      
+      Map<String, Integer> rackToUsedCapacity = new HashMap<String, Integer>();
+      for (DatanodeInfo datanode: datanodeReport) {
+        String rack = NetworkTopology.getFirstHalf(datanode.getNetworkLocation());
+        int usedCapacity = (int) datanode.getDfsUsed();
+         
+        if (rackToUsedCapacity.get(rack) != null) {
+          rackToUsedCapacity.put(rack, usedCapacity + rackToUsedCapacity.get(rack));
+        } else {
+          rackToUsedCapacity.put(rack, usedCapacity);
+        }
+      }
+      assertEquals(rackToUsedCapacity.size(), 2);
+      assertEquals(rackToUsedCapacity.get(RACK0), rackToUsedCapacity.get(RACK1));
+      
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
+  /**
+   * Create a cluster with even distribution, and a new empty node is added to
+   * the cluster, then test node-group locality for balancer policy.
+   */
+  @Test
+  public void testBalancerWithNodeGroup() throws Exception {
+    Configuration conf = createConf();
+    long[] capacities = new long[]{CAPACITY, CAPACITY, CAPACITY, CAPACITY};
+    String[] racks = new String[]{RACK0, RACK0, RACK1, RACK1};
+    String[] nodeGroups = new String[]{NODEGROUP0, NODEGROUP0, NODEGROUP1, NODEGROUP2};
+    
+    int numOfDatanodes = capacities.length;
+    assertEquals(numOfDatanodes, racks.length);
+    assertEquals(numOfDatanodes, nodeGroups.length);
+    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf)
+                                .numDataNodes(capacities.length)
+                                .racks(racks)
+                                .simulatedCapacities(capacities);
+    MiniDFSClusterWithNodeGroup.setNodeGroups(nodeGroups);
+    cluster = new MiniDFSClusterWithNodeGroup(builder);
+    try {
+      cluster.waitActive();
+      client = NameNodeProxies.createProxy(conf, 
+          cluster.getFileSystem(0).getUri(),
+          ClientProtocol.class).getProxy();
+
+      long totalCapacity = TestBalancer.sum(capacities);
+      // fill up the cluster to be 20% full
+      long totalUsedSpace = totalCapacity * 2 / 10;
+      TestBalancer.createFile(cluster, filePath, totalUsedSpace / (numOfDatanodes/2),
+          (short) (numOfDatanodes/2), 0);
+      
+      long newCapacity = CAPACITY;
+      String newRack = RACK1;
+      String newNodeGroup = NODEGROUP2;
+      // start up an empty node with the same capacity and on NODEGROUP2
+      cluster.startDataNodes(conf, 1, true, null, new String[]{newRack},
+          new long[] {newCapacity}, new String[]{newNodeGroup});
+
+      totalCapacity += newCapacity;
+
+      // run balancer and validate results
+      runBalancer(conf, totalUsedSpace, totalCapacity);
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}

+ 6 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -598,6 +598,12 @@ Release 0.23.6 - UNRELEASED
 
     MAPREDUCE-4764. repair TestBinaryTokenFile (Ivan A. Veselovsky via bobby)
 
+    MAPREDUCE-4825. JobImpl.finished doesn't expect ERROR as a final job state
+    (jlowe via bobby)
+
+    MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large 
+    amounts of data (tgraves)
+
 Release 0.23.5 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 0 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java

@@ -274,7 +274,6 @@ public class TaskAttemptListenerImpl extends CompositeService
   @Override
   public boolean ping(TaskAttemptID taskAttemptID) throws IOException {
     LOG.info("Ping from " + taskAttemptID.toString());
-    taskHeartbeatHandler.pinged(TypeConverter.toYarn(taskAttemptID));
     return true;
   }
 

+ 3 - 24
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/TaskHeartbeatHandler.java

@@ -46,33 +46,22 @@ import org.apache.hadoop.yarn.service.AbstractService;
 public class TaskHeartbeatHandler extends AbstractService {
   
   private static class ReportTime {
-    private long lastPing;
     private long lastProgress;
     
     public ReportTime(long time) {
       setLastProgress(time);
     }
     
-    public synchronized void setLastPing(long time) {
-      lastPing = time;
-    }
-    
     public synchronized void setLastProgress(long time) {
       lastProgress = time;
-      lastPing = time;
-    }
-    
-    public synchronized long getLastPing() {
-      return lastPing;
     }
-    
+
     public synchronized long getLastProgress() {
       return lastProgress;
     }
   }
   
   private static final Log LOG = LogFactory.getLog(TaskHeartbeatHandler.class);
-  private static final int PING_TIMEOUT = 5 * 60 * 1000;
   
   //thread which runs periodically to see the last time since a heartbeat is
   //received from a task.
@@ -127,14 +116,6 @@ public class TaskHeartbeatHandler extends AbstractService {
     }
   }
 
-  public void pinged(TaskAttemptId attemptID) {
-    //only put for the registered attempts
-      //TODO throw an exception if the task isn't registered.
-      ReportTime time = runningAttempts.get(attemptID);
-      if(time != null) {
-        time.setLastPing(clock.getTime());
-      }
-    }
   
   public void register(TaskAttemptId attemptID) {
     runningAttempts.put(attemptID, new ReportTime(clock.getTime()));
@@ -159,10 +140,8 @@ public class TaskHeartbeatHandler extends AbstractService {
           Map.Entry<TaskAttemptId, ReportTime> entry = iterator.next();
           boolean taskTimedOut = (taskTimeOut > 0) && 
               (currentTime > (entry.getValue().getLastProgress() + taskTimeOut));
-          boolean pingTimedOut =
-              (currentTime > (entry.getValue().getLastPing() + PING_TIMEOUT));
-              
-          if(taskTimedOut || pingTimedOut) {
+           
+          if(taskTimedOut) {
             // task is lost, remove from the list and raise lost event
             iterator.remove();
             eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(entry

+ 1 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java

@@ -828,6 +828,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
       case KILLED:
         metrics.killedJob(this);
         break;
+      case ERROR:
       case FAILED:
         metrics.failedJob(this);
         break;

+ 16 - 0
hadoop-yarn-project/CHANGES.txt

@@ -106,6 +106,19 @@ Release 2.0.3-alpha - Unreleased
     YARN-184. Remove unnecessary locking in fair scheduler, and address 
     findbugs excludes. (sandyr via tucu)
 
+    YARN-224. Fair scheduler logs too many nodeUpdate INFO messages.
+    (Sandy Ryza via tomwhite)
+
+    YARN-222. Fair scheduler should create queue for each user by default.
+    (Sandy Ryza via tomwhite)
+
+    MAPREDUCE-4778. Fair scheduler event log is only written if directory
+    exists on HDFS. (Sandy Ryza via tomwhite)
+
+    YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy) 
+
+    YARN-187. Add hierarchical queues to the fair scheduler. (Sandy Ryza via tomwhite)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES
@@ -183,6 +196,9 @@ Release 0.23.6 - UNRELEASED
     YARN-204. test coverage for org.apache.hadoop.tools (Aleksey Gorshkov via
     bobby)
 
+    YARN-251. Proxy URI generation fails for blank tracking URIs (Tom White
+    via jlowe)
+
 Release 0.23.5 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestGetGroups.java

@@ -28,7 +28,7 @@ import org.apache.hadoop.tools.GetGroupsTestBase;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.service.Service.STATE;
 import org.junit.AfterClass;
@@ -46,7 +46,7 @@ public class TestGetGroups extends GetGroupsTestBase {
   @BeforeClass
   public static void setUpResourceManager() throws IOException, InterruptedException {
     conf = new YarnConfiguration();
-    Store store = StoreFactory.getStore(conf);
+    RMStateStore store = StoreFactory.getStore(conf);
     resourceManager = new ResourceManager(store) {
       @Override
       protected void doSecureLogin() throws IOException {

+ 1 - 10
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -228,16 +228,7 @@ public class YarnConfiguration extends Configuration {
   
   /** The class to use as the persistent store.*/
   public static final String RM_STORE = RM_PREFIX + "store.class";
- 
-  /** The address of the zookeeper instance to use with ZK store.*/
-  public static final String RM_ZK_STORE_ADDRESS = 
-    RM_PREFIX + "zookeeper-store.address";
-  
-  /** The zookeeper session timeout for the zookeeper store.*/
-  public static final String RM_ZK_STORE_TIMEOUT_MS = 
-    RM_PREFIX + "zookeeper-store.session.timeout-ms";
-  public static final int DEFAULT_RM_ZK_STORE_TIMEOUT_MS = 60000;
-  
+   
   /** The maximum number of completed applications RM keeps. */ 
   public static final String RM_MAX_COMPLETED_APPLICATIONS =
     RM_PREFIX + "max-completed-applications";

+ 0 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -209,17 +209,6 @@
     <name>yarn.resourcemanager.store.class</name>
   </property>
 
-  <property>
-    <description>The address of the zookeeper instance to use with ZK store.</description>
-    <name>yarn.resourcemanager.zookeeper-store.address</name>
-  </property>
-
-  <property>
-    <description>The zookeeper session timeout for the zookeeper store.</description>
-    <name>yarn.resourcemanager.zookeeper-store.session.timeout-ms</name>
-    <value>60000</value>
-  </property>
-
   <property>
     <description>The maximum number of completed applications RM keeps. </description>
     <name>yarn.resourcemanager.max-completed-applications</name>

+ 1 - 7
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java

@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.security.client.ClientTokenIdentifier;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@@ -251,17 +250,12 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent> {
             YarnConfiguration.DEFAULT_APPLICATION_NAME);
       }
 
-      // Store application for recovery
-      ApplicationStore appStore = rmContext.getApplicationsStore()
-          .createApplicationStore(submissionContext.getApplicationId(),
-          submissionContext);
-
       // Create RMApp
       application =
           new RMAppImpl(applicationId, rmContext, this.conf,
             submissionContext.getApplicationName(),
             submissionContext.getUser(), submissionContext.getQueue(),
-            submissionContext, clientTokenStr, appStore, this.scheduler,
+            submissionContext, clientTokenStr, this.scheduler,
             this.masterService, submitTime);
 
       // Sanity check - duplicate?

+ 0 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java

@@ -23,8 +23,6 @@ import java.util.concurrent.ConcurrentMap;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.NodeStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
@@ -41,10 +39,6 @@ public interface RMContext {
 
   Dispatcher getDispatcher();
 
-  NodeStore getNodeStore();
-
-  ApplicationsStore getApplicationsStore();
-
   ConcurrentMap<ApplicationId, RMApp> getRMApps();
   
   ConcurrentMap<String, RMNode> getInactiveRMNodes();

+ 1 - 16
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java

@@ -24,9 +24,6 @@ import java.util.concurrent.ConcurrentMap;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.NodeStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
@@ -39,7 +36,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSe
 public class RMContextImpl implements RMContext {
 
   private final Dispatcher rmDispatcher;
-  private final Store store;
 
   private final ConcurrentMap<ApplicationId, RMApp> applications
     = new ConcurrentHashMap<ApplicationId, RMApp>();
@@ -58,7 +54,7 @@ public class RMContextImpl implements RMContext {
   private final RMContainerTokenSecretManager containerTokenSecretManager;
   private final ClientToAMTokenSecretManagerInRM clientToAMTokenSecretManager;
 
-  public RMContextImpl(Store store, Dispatcher rmDispatcher,
+  public RMContextImpl(Dispatcher rmDispatcher,
       ContainerAllocationExpirer containerAllocationExpirer,
       AMLivelinessMonitor amLivelinessMonitor,
       AMLivelinessMonitor amFinishingMonitor,
@@ -66,7 +62,6 @@ public class RMContextImpl implements RMContext {
       ApplicationTokenSecretManager appTokenSecretManager,
       RMContainerTokenSecretManager containerTokenSecretManager,
       ClientToAMTokenSecretManagerInRM clientTokenSecretManager) {
-    this.store = store;
     this.rmDispatcher = rmDispatcher;
     this.containerAllocationExpirer = containerAllocationExpirer;
     this.amLivelinessMonitor = amLivelinessMonitor;
@@ -82,16 +77,6 @@ public class RMContextImpl implements RMContext {
     return this.rmDispatcher;
   }
 
-  @Override
-  public NodeStore getNodeStore() {
-   return store;
-  }
-
-  @Override
-  public ApplicationsStore getApplicationsStore() {
-    return store;
-  }
-
   @Override
   public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
     return this.applications;

+ 6 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java

@@ -46,8 +46,8 @@ import org.apache.hadoop.yarn.server.RMDelegationTokenSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@@ -119,12 +119,12 @@ public class ResourceManager extends CompositeService implements Recoverable {
   protected RMDelegationTokenSecretManager rmDTSecretManager;
   private WebApp webApp;
   protected RMContext rmContext;
-  private final Store store;
+  private final RMStateStore store;
   protected ResourceTrackerService resourceTracker;
 
   private Configuration conf;
 
-  public ResourceManager(Store store) {
+  public ResourceManager(RMStateStore store) {
     super("ResourceManager");
     this.store = store;
   }
@@ -161,7 +161,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
     this.containerTokenSecretManager = createContainerTokenSecretManager(conf);
     
     this.rmContext =
-        new RMContextImpl(this.store, this.rmDispatcher,
+        new RMContextImpl(this.rmDispatcher,
           this.containerAllocationExpirer, amLivelinessMonitor,
           amFinishingMonitor, tokenRenewer, this.appTokenSecretManager,
           this.containerTokenSecretManager, this.clientToAMSecretManager);
@@ -643,8 +643,6 @@ public class ResourceManager extends CompositeService implements Recoverable {
 
   @Override
   public void recover(RMState state) throws Exception {
-    resourceTracker.recover(state);
-    scheduler.recover(state);
   }
   
   public static void main(String argv[]) {
@@ -652,14 +650,13 @@ public class ResourceManager extends CompositeService implements Recoverable {
     StringUtils.startupShutdownMessage(ResourceManager.class, argv, LOG);
     try {
       Configuration conf = new YarnConfiguration();
-      Store store =  StoreFactory.getStore(conf);
+      RMStateStore store =  StoreFactory.getStore(conf);
       ResourceManager resourceManager = new ResourceManager(store);
       ShutdownHookManager.get().addShutdownHook(
         new CompositeServiceShutdownHook(resourceManager),
         SHUTDOWN_HOOK_PRIORITY);
       resourceManager.init(conf);
       //resourceManager.recover(store.restore());
-      //store.doneWithRecovery();
       resourceManager.start();
     } catch (Throwable t) {
       LOG.fatal("Error starting ResourceManager", t);

+ 1 - 23
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java

@@ -44,7 +44,7 @@ import org.apache.hadoop.yarn.server.api.records.MasterKey;
 import org.apache.hadoop.yarn.server.api.records.NodeAction;
 import org.apache.hadoop.yarn.server.api.records.NodeStatus;
 import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
@@ -297,28 +297,6 @@ public class ResourceTrackerService extends AbstractService implements
     return nodeHeartBeatResponse;
   }
 
-  public void recover(RMState state) {
-//
-//    List<RMNode> nodeManagers = state.getStoredNodeManagers();
-//    for (RMNode nm : nodeManagers) {
-//      createNewNode(nm.getNodeID(), nm.getNodeHostName(), nm
-//          .getCommandPort(), nm.getHttpPort(), nm.getNode(), nm
-//          .getTotalCapability());
-//    }
-//    for (Map.Entry<ApplicationId, ApplicationInfo> entry : state
-//        .getStoredApplications().entrySet()) {
-//      List<Container> containers = entry.getValue().getContainers();
-//      List<Container> containersToAdd = new ArrayList<Container>();
-//      for (Container c : containers) {
-//        RMNode containerNode = this.rmContext.getNodesCollection()
-//            .getNodeInfo(c.getNodeId());
-//        containersToAdd.add(c);
-//        containerNode.allocateContainer(entry.getKey(), containersToAdd);
-//        containersToAdd.clear();
-//      }
-//    }
-  }
-
   /**
    * resolving the network topology.
    * @param hostName the hostname of this node.

+ 0 - 39
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ApplicationsStore.java

@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-
-import java.io.IOException;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-
-public interface ApplicationsStore {
-  public ApplicationStore createApplicationStore(ApplicationId applicationId,
-      ApplicationSubmissionContext context) throws IOException;
-  public void removeApplication(ApplicationId application) throws IOException;
- 
-  public interface ApplicationStore {
-    public void storeContainer(Container container) throws IOException;
-    public void removeContainer(Container container) throws IOException;
-    public void storeMasterContainer(Container container) throws IOException;
-    public void updateApplicationState(ApplicationMaster master) throws IOException;
-    public boolean isLoggable();
-  }
-}

+ 2 - 12
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NodeStore.java → hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileRMStateStore.java

@@ -15,18 +15,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
 package org.apache.hadoop.yarn.server.resourcemanager.recovery;
 
-import java.io.IOException;
-
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
+public class FileRMStateStore implements RMStateStore {
 
-public interface NodeStore {
-  public void storeNode(RMNode node) throws IOException;
-  public void removeNode(RMNode node) throws IOException;
-  public NodeId getNextNodeId() throws IOException;
-  public boolean isLoggable();
-}
+}

+ 0 - 128
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemStore.java

@@ -1,128 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
-public class MemStore implements Store {
-  RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-  private NodeId nodeId;
-  private boolean doneWithRecovery = false;
-
-  public MemStore() {
-    nodeId = recordFactory.newRecordInstance(NodeId.class);
-    nodeId.setHost("TODO");
-    nodeId.setPort(-1);
-  }
-
-  @Override
-  public void storeNode(RMNode node) throws IOException {}
-
-  @Override
-  public void removeNode(RMNode node) throws IOException {}
-
-  private class ApplicationStoreImpl implements ApplicationStore {
-    @Override
-    public void storeContainer(Container container) throws IOException {}
-
-    @Override
-    public void removeContainer(Container container) throws IOException {}
-
-    @Override
-    public void storeMasterContainer(Container container) throws IOException {}
-
-    @Override
-    public void updateApplicationState(
-        ApplicationMaster master) throws IOException {}
-
-    @Override
-    public boolean isLoggable() {
-      return doneWithRecovery;
-    }
-
-  }
-
-  @Override
-  public ApplicationStore createApplicationStore(ApplicationId application,
-      ApplicationSubmissionContext context) throws IOException {
-    return new ApplicationStoreImpl();
-  }
-
-
-  @Override
-  public void removeApplication(ApplicationId application) throws IOException {}
-
-  @Override
-  public RMState restore() throws IOException {
-    MemRMState state = new MemRMState();
-    return state;
-  }
-
-  @Override
-  public synchronized NodeId getNextNodeId() throws IOException {
-    // TODO: FIXMEVinodkv
-//    int num = nodeId.getId();
-//    num++;
-//    nodeId.setId(num);
-    return nodeId;
-  }
-
-  private class MemRMState implements RMState {
-
-    public MemRMState() {
-      nodeId = recordFactory.newRecordInstance(NodeId.class);
-    }
-
-    @Override
-    public List<RMNode> getStoredNodeManagers()  {
-      return new ArrayList<RMNode>();
-    }
-
-    @Override
-    public NodeId getLastLoggedNodeId() {
-      return nodeId;
-    }
-
-    @Override
-    public Map<ApplicationId, ApplicationInfo> getStoredApplications() {
-      return new HashMap<ApplicationId, Store.ApplicationInfo>();
-    }
-  }
-
-  @Override
-  public boolean isLoggable() {
-    return doneWithRecovery;
-  }
-
-  @Override
-  public void doneWithRecovery() {
-    doneWithRecovery = true;
-  }
-}

+ 24 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java

@@ -0,0 +1,24 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.yarn.server.resourcemanager.recovery;
+
+public interface RMStateStore {
+  public interface RMState {
+    
+  }
+}

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Recoverable.java

@@ -17,7 +17,7 @@
 */
 package org.apache.hadoop.yarn.server.resourcemanager.recovery;
 
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 
 public interface Recoverable {
   public void recover(RMState state) throws Exception;

+ 0 - 46
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/Store.java

@@ -1,46 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-
-
-public interface Store extends NodeStore, ApplicationsStore {
-  public interface ApplicationInfo {
-    public ApplicationMaster getApplicationMaster();
-    public Container getMasterContainer();
-    public ApplicationSubmissionContext getApplicationSubmissionContext();
-    public List<Container> getContainers();
-  }
-  public interface RMState {
-    public List<RMNode> getStoredNodeManagers() ;
-    public Map<ApplicationId, ApplicationInfo> getStoredApplications();
-    public NodeId getLastLoggedNodeId();
-  }
-  public RMState restore() throws IOException;
-  public void doneWithRecovery();
-}

+ 3 - 39
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/StoreFactory.java

@@ -17,53 +17,17 @@
 */
 package org.apache.hadoop.yarn.server.resourcemanager.recovery;
 
-import java.io.IOException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 
 public class StoreFactory {
   
-  public static Store getStore(Configuration conf) {
-    Store store = ReflectionUtils.newInstance(
+  public static RMStateStore getStore(Configuration conf) {
+    RMStateStore store = ReflectionUtils.newInstance(
         conf.getClass(YarnConfiguration.RM_STORE, 
-            MemStore.class, Store.class), 
+            FileRMStateStore.class, RMStateStore.class), 
             conf);
     return store;
   }
-  
-  public static ApplicationStore createVoidAppStore() {
-    return new VoidApplicationStore();
-  }
-  
-  private static class VoidApplicationStore implements ApplicationStore {
-
-    public VoidApplicationStore() {}
-    
-    @Override
-    public void storeContainer(Container container) throws IOException {
-    }
-
-    @Override
-    public void removeContainer(Container container) throws IOException {
-    }
-
-    @Override
-    public void storeMasterContainer(Container container) throws IOException {
-    }
-
-    @Override
-    public void updateApplicationState(ApplicationMaster master)
-        throws IOException {
-    }
-
-    @Override
-    public boolean isLoggable() {
-      return false;
-    }
-  }
 }

+ 0 - 509
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKStore.java

@@ -1,509 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.recovery;
-
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.NodeReport;
-import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationMasterPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.NodeReportPBImpl;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationMasterProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-import org.apache.hadoop.yarn.util.ConverterUtils;
-import org.apache.zookeeper.CreateMode;
-import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.WatchedEvent;
-import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
-import org.apache.zookeeper.data.Stat;
-
-public class ZKStore implements Store {
-  private final Configuration conf;
-  private final ZooKeeper zkClient;
-  private static final Log LOG = LogFactory.getLog(ZKStore.class);
-  private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-  private static final String NODES = "nodes/";
-  private static final String APPS = "apps/";
-  private static final String ZK_PATH_SEPARATOR = "/";
-  private static final String NODE_ID = "nodeid";
-  private static final String APP_MASTER = "master";
-  private static final String APP_MASTER_CONTAINER = "mastercontainer";
-  private final String ZK_ADDRESS;
-  private final int ZK_TIMEOUT;
-  private boolean doneWithRecovery = false;
-  
-  /** TODO make this generic **/
-  private NodeIdPBImpl nodeId = new NodeIdPBImpl();
-
-  /**
-   * TODO fix this for later to handle all kinds of events 
-   * of connection and session events.
-   *
-   */
-  private static class ZKWatcher implements Watcher {
-    @Override
-    public void process(WatchedEvent arg0) {
-    }
-  }
-
-  public ZKStore(Configuration conf) throws IOException {
-    this.conf = conf;
-    this.ZK_ADDRESS = conf.get(YarnConfiguration.RM_ZK_STORE_ADDRESS);
-    this.ZK_TIMEOUT = conf.getInt(YarnConfiguration.RM_ZK_STORE_TIMEOUT_MS,
-        YarnConfiguration.DEFAULT_RM_ZK_STORE_TIMEOUT_MS);
-    zkClient = new ZooKeeper(this.ZK_ADDRESS, 
-        this.ZK_TIMEOUT,
-        createZKWatcher() 
-    );
-    // TODO: FIXMEVinodkv
-//    this.nodeId.setId(0);
-  }
-
-  protected Watcher createZKWatcher() {
-    return new ZKWatcher();   
-  }
-
-  private NodeReportPBImpl createNodeManagerInfo(RMNode rmNode) {
-    NodeReport node = 
-      recordFactory.newRecordInstance(NodeReport.class);
-    node.setNodeId(rmNode.getNodeID());
-    node.setRackName(rmNode.getRackName());
-    node.setCapability(rmNode.getTotalCapability());
-    // TODO: FIXME
-//    node.setUsed(nodeInfo.getUsedResource());
-    // TODO: acm: refactor2 FIXME
-//  node.setNumContainers(rmNode.getNumContainers());
-    return (NodeReportPBImpl)node;
-  }
-
-  @Override
-  public synchronized void storeNode(RMNode node) throws IOException {
-    /** create a storage node and store it in zk **/
-    if (!doneWithRecovery) return;
-    // TODO: FIXMEVinodkv
-//    NodeReportPBImpl nodeManagerInfo = createNodeManagerInfo(node);
-//    byte[] bytes = nodeManagerInfo.getProto().toByteArray();
-//    try {
-//      zkClient.create(NODES + Integer.toString(node.getNodeID().getId()), bytes, null,
-//          CreateMode.PERSISTENT);
-//    } catch(InterruptedException ie) {
-//      LOG.info("Interrupted", ie);
-//      throw new InterruptedIOException("Interrupted");
-//    } catch(KeeperException ke) {
-//      LOG.info("Keeper exception", ke);
-//      throw convertToIOException(ke);
-//    }
-  }
-
-  @Override
-  public synchronized void removeNode(RMNode node) throws IOException {
-    if (!doneWithRecovery) return;
-    
-//    TODO: FIXME VINODKV
-//    /** remove a storage node **/
-//    try {
-//      zkClient.delete(NODES + Integer.toString(node.getNodeID().getId()), -1);
-//    } catch(InterruptedException ie) {
-//      LOG.info("Interrupted", ie);
-//      throw new InterruptedIOException("Interrupted");
-//    } catch(KeeperException ke) {
-//      LOG.info("Keeper exception", ke);
-//      throw convertToIOException(ke);
-//    }
-
-  }
-
-  private static IOException convertToIOException(KeeperException ke) {
-    IOException io = new IOException();
-    io.setStackTrace(ke.getStackTrace());
-    return io;
-  }
-
-  @Override
-  public synchronized NodeId getNextNodeId() throws IOException {
-//    TODO: FIXME VINODKV
-//    int num = nodeId.getId();
-//    num++;
-//    nodeId.setId(num);
-//    try {
-//      zkClient.setData(NODES + NODE_ID, nodeId.getProto().toByteArray() , -1);
-//    } catch(InterruptedException ie) {
-//      LOG.info("Interrupted", ie);
-//      throw new InterruptedIOException(ie.getMessage());
-//    } catch(KeeperException ke) {
-//      throw convertToIOException(ke);
-//    }
-    return nodeId;
-  }
-
-  private String containerPathFromContainerId(ContainerId containerId) {
-    String appString = ConverterUtils.toString(
-        containerId.getApplicationAttemptId().getApplicationId());
-    return appString + "/" + containerId.getId();
-  }
-
-  private class ZKApplicationStore implements ApplicationStore {
-    private final ApplicationId applicationId;
-
-    public ZKApplicationStore(ApplicationId applicationId) {
-      this.applicationId = applicationId;
-    }
-
-    @Override
-    public void storeMasterContainer(Container container) throws IOException {
-      if (!doneWithRecovery) return;
-      
-      ContainerPBImpl containerPBImpl = (ContainerPBImpl) container;
-      try {
-        zkClient.setData(APPS + 
-            ConverterUtils.toString(
-                container.getId().getApplicationAttemptId().getApplicationId()) 
-                +
-            ZK_PATH_SEPARATOR + APP_MASTER_CONTAINER
-            , containerPBImpl.getProto().toByteArray(), -1);
-      } catch(InterruptedException ie) {
-        LOG.info("Interrupted", ie);
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        LOG.info("Keeper exception", ke);
-        throw convertToIOException(ke);
-      }
-    }
-    @Override
-    public synchronized void storeContainer(Container container) throws IOException {
-      if (!doneWithRecovery) return;
-      
-      ContainerPBImpl containerPBImpl = (ContainerPBImpl) container;
-      try {
-        zkClient.create(APPS + containerPathFromContainerId(container.getId())
-            , containerPBImpl.getProto().toByteArray(), null, CreateMode.PERSISTENT);
-      } catch(InterruptedException ie) {
-        LOG.info("Interrupted", ie);
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        LOG.info("Keeper exception", ke);
-        throw convertToIOException(ke);
-      }
-    }
-
-    @Override
-    public synchronized void removeContainer(Container container) throws IOException {
-      if (!doneWithRecovery) return;
-      try { 
-        zkClient.delete(APPS + containerPathFromContainerId(container.getId()),
-            -1);
-      } catch(InterruptedException ie) {
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        LOG.info("Keeper exception", ke);
-        throw convertToIOException(ke);
-      }
-    }
-
-    @Override
-    public void updateApplicationState(
-        ApplicationMaster master) throws IOException {
-      if (!doneWithRecovery) return;
-      
-      String appString = APPS + ConverterUtils.toString(applicationId);
-      ApplicationMasterPBImpl masterPBImpl = (ApplicationMasterPBImpl) master;
-      try {
-        zkClient.setData(appString, masterPBImpl.getProto().toByteArray(), -1);
-      } catch(InterruptedException ie) {
-        LOG.info("Interrupted", ie);
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        LOG.info("Keeper exception", ke);
-        throw convertToIOException(ke);
-      }
-    }
-
-    @Override
-    public boolean isLoggable() {
-      return doneWithRecovery;
-    }
-  }
-
-  @Override
-  public synchronized ApplicationStore createApplicationStore(ApplicationId application, 
-      ApplicationSubmissionContext context) throws IOException {
-    if (!doneWithRecovery) return new ZKApplicationStore(application);
-    
-    ApplicationSubmissionContextPBImpl contextPBImpl = (ApplicationSubmissionContextPBImpl) context;
-    String appString = APPS + ConverterUtils.toString(application);
-   
-    ApplicationMasterPBImpl masterPBImpl = new ApplicationMasterPBImpl();
-    ContainerPBImpl container = new ContainerPBImpl();
-    try {
-      zkClient.create(appString, contextPBImpl.getProto()
-          .toByteArray(), null, CreateMode.PERSISTENT);
-      zkClient.create(appString + ZK_PATH_SEPARATOR + APP_MASTER, 
-          masterPBImpl.getProto().toByteArray(), null, CreateMode.PERSISTENT);
-      zkClient.create(appString + ZK_PATH_SEPARATOR + APP_MASTER_CONTAINER, 
-          container.getProto().toByteArray(), null, CreateMode.PERSISTENT);
-    } catch(InterruptedException ie) {
-      LOG.info("Interrupted", ie);
-      throw new InterruptedIOException(ie.getMessage());
-    } catch(KeeperException ke) {
-      LOG.info("Keeper exception", ke);
-      throw convertToIOException(ke);
-    }
-    return new ZKApplicationStore(application);
-  }
-
-  @Override
-  public synchronized void removeApplication(ApplicationId application) throws IOException {
-    if (!doneWithRecovery) return;
-    
-    try {
-      zkClient.delete(APPS + ConverterUtils.toString(application), -1);
-    } catch(InterruptedException ie) {
-      LOG.info("Interrupted", ie);
-      throw new InterruptedIOException(ie.getMessage());
-    } catch(KeeperException ke) {
-      LOG.info("Keeper Exception", ke);
-      throw convertToIOException(ke);
-    }
-  }
-
-  @Override
-  public boolean isLoggable() {
-    return doneWithRecovery;
-  }
-
-  @Override
-  public void doneWithRecovery() {
-    this.doneWithRecovery = true;
-  }
-
-  
-  @Override
-  public synchronized RMState restore() throws IOException {
-    ZKRMState rmState = new ZKRMState();
-    rmState.load();
-    return rmState;
-  }  
-
-  private static class ApplicationInfoImpl implements ApplicationInfo {
-    private ApplicationMaster master;
-    private Container masterContainer;
-
-    private final ApplicationSubmissionContext context;
-    private final List<Container> containers = new ArrayList<Container>();
-
-    public ApplicationInfoImpl(ApplicationSubmissionContext context) {
-      this.context = context;
-    }
-
-    public void setApplicationMaster(ApplicationMaster master) {
-      this.master = master;
-    }
-
-    public void setMasterContainer(Container container) {
-      this.masterContainer = container;
-    }
-
-    @Override
-    public ApplicationMaster getApplicationMaster() {
-      return this.master;
-    }
-
-    @Override
-    public ApplicationSubmissionContext getApplicationSubmissionContext() {
-      return this.context;
-    }
-
-    @Override
-    public Container getMasterContainer() {
-      return this.masterContainer;
-    }
-
-    @Override
-    public List<Container> getContainers() {
-      return this.containers;
-    }
-
-    public void addContainer(Container container) {
-      containers.add(container);
-    }
-  }
-
-  private class ZKRMState implements RMState {
-    private List<RMNode> nodeManagers = new ArrayList<RMNode>();
-    private Map<ApplicationId, ApplicationInfo> applications = new 
-    HashMap<ApplicationId, ApplicationInfo>();
-
-    public ZKRMState() {
-      LOG.info("Restoring RM state from ZK");
-    }
-
-    private synchronized List<NodeReport> listStoredNodes() throws IOException {
-      /** get the list of nodes stored in zk **/
-      //TODO PB
-      List<NodeReport> nodes = new ArrayList<NodeReport>();
-      Stat stat = new Stat();
-      try {
-        List<String> children = zkClient.getChildren(NODES, false);
-        for (String child: children) {
-          byte[] data = zkClient.getData(NODES + child, false, stat);
-          NodeReportPBImpl nmImpl = new NodeReportPBImpl(
-              NodeReportProto.parseFrom(data));
-          nodes.add(nmImpl);
-        }
-      } catch (InterruptedException ie) {
-        LOG.info("Interrupted" , ie);
-        throw new InterruptedIOException("Interrupted");
-      } catch(KeeperException ke) {
-        LOG.error("Failed to list nodes", ke);
-        throw convertToIOException(ke);
-      }
-      return nodes;
-    }
-
-    @Override
-    public List<RMNode> getStoredNodeManagers()  {
-      return nodeManagers;
-    }
-
-    @Override
-    public NodeId getLastLoggedNodeId() {
-      return nodeId;
-    }
-
-    private void readLastNodeId() throws IOException {
-      Stat stat = new Stat();
-      try {
-        byte[] data = zkClient.getData(NODES + NODE_ID, false, stat);
-        nodeId = new NodeIdPBImpl(NodeIdProto.parseFrom(data));
-      } catch(InterruptedException ie) {
-        LOG.info("Interrupted", ie);
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        LOG.info("Keeper Exception", ke);
-        throw convertToIOException(ke);
-      }
-    }
-
-    private ApplicationInfo getAppInfo(String app) throws IOException {
-      ApplicationInfoImpl info = null;
-      Stat stat = new Stat();
-      try {
-        ApplicationSubmissionContext context = null;
-        byte[] data = zkClient.getData(APPS + app, false, stat);
-        context = new ApplicationSubmissionContextPBImpl(
-            ApplicationSubmissionContextProto.parseFrom(data));
-        info = new ApplicationInfoImpl(context);
-        List<String> children = zkClient.getChildren(APPS + app, false, stat);
-        ApplicationMaster master = null;
-        for (String child: children) {
-          byte[] childdata = zkClient.getData(APPS + app + ZK_PATH_SEPARATOR + child, false, stat);
-          if (APP_MASTER.equals(child)) {
-            master = new ApplicationMasterPBImpl(ApplicationMasterProto.parseFrom(childdata));
-            info.setApplicationMaster(master);
-          } else if (APP_MASTER_CONTAINER.equals(child)) {
-            Container masterContainer = new ContainerPBImpl(ContainerProto.parseFrom(data));
-            info.setMasterContainer(masterContainer);
-          } else {
-            Container container = new ContainerPBImpl(ContainerProto.parseFrom(data));
-            info.addContainer(container);
-          }
-        }
-      } catch(InterruptedException ie) {
-        LOG.info("Interrupted", ie);
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        throw convertToIOException(ke);
-      }
-      return info;
-    }
-
-    private void load() throws IOException {
-      List<NodeReport> nodeInfos = listStoredNodes();
-      final Pattern trackerPattern = Pattern.compile(".*:.*");
-      final Matcher m = trackerPattern.matcher("");
-      for (NodeReport node: nodeInfos) {
-        m.reset(node.getNodeId().getHost());
-        if (!m.find()) {
-          LOG.info("Skipping node, bad node-address "
-              + node.getNodeId().getHost());
-          continue;
-        }
-        String hostName = m.group(0);
-        int cmPort = Integer.valueOf(m.group(1));
-        m.reset(node.getHttpAddress());
-        if (!m.find()) {
-          LOG.info("Skipping node, bad http-address " + node.getHttpAddress());
-          continue;
-        }
-        int httpPort = Integer.valueOf(m.group(1));
-        // TODO: FindBugs warns passing null below. Commenting this for later.
-//        RMNode nm = new RMNodeImpl(node.getNodeId(), null,
-//            hostName, cmPort, httpPort,
-//            ResourceTrackerService.resolve(node.getNodeId().getHost()), 
-//            node.getCapability());
-//        nodeManagers.add(nm);
-      }
-      readLastNodeId();
-      /* make sure we get all the applications */
-      List<String> apps = null;
-      try {
-        apps = zkClient.getChildren(APPS, false);
-      } catch(InterruptedException ie) {
-        LOG.info("Interrupted", ie);
-        throw new InterruptedIOException(ie.getMessage());
-      } catch(KeeperException ke) {
-        throw convertToIOException(ke);
-      }
-      for (String app: apps) {
-        ApplicationInfo info = getAppInfo(app);
-        applications.put(info.getApplicationMaster().getApplicationId(), info);
-      }
-    }
-
-    @Override
-    public Map<ApplicationId, ApplicationInfo> getStoredApplications() {
-      return applications;
-    }
-  }
-}

+ 23 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/package-info.java

@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+package org.apache.hadoop.yarn.server.resourcemanager.recovery;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;

+ 0 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java

@@ -29,7 +29,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 
@@ -131,13 +130,6 @@ public interface RMApp extends EventHandler<RMAppEvent> {
    */
   int pullRMNodeUpdates(Collection<RMNode> updatedNodes);
 
-  /**
-   * Application level metadata is stored in {@link ApplicationStore} which
-   * can persist the information.
-   * @return the {@link ApplicationStore}  for this {@link RMApp}.
-   */
-  ApplicationStore getApplicationStore();
-
   /**
    * The finish time of the {@link RMApp}
    * @return the finish time of the application.,

+ 0 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java

@@ -49,7 +49,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNodeUpdateEvent.RMAppNodeUpdateType;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
@@ -81,7 +80,6 @@ public class RMAppImpl implements RMApp {
   private final String name;
   private final ApplicationSubmissionContext submissionContext;
   private final String clientTokenStr;
-  private final ApplicationStore appStore;
   private final Dispatcher dispatcher;
   private final YarnScheduler scheduler;
   private final ApplicationMasterService masterService;
@@ -213,7 +211,6 @@ public class RMAppImpl implements RMApp {
   public RMAppImpl(ApplicationId applicationId, RMContext rmContext,
       Configuration config, String name, String user, String queue,
       ApplicationSubmissionContext submissionContext, String clientTokenStr,
-      ApplicationStore appStore,
       YarnScheduler scheduler, ApplicationMasterService masterService, 
       long submitTime) {
 
@@ -227,7 +224,6 @@ public class RMAppImpl implements RMApp {
     this.queue = queue;
     this.submissionContext = submissionContext;
     this.clientTokenStr = clientTokenStr;
-    this.appStore = appStore;
     this.scheduler = scheduler;
     this.masterService = masterService;
     this.submitTime = submitTime;
@@ -340,11 +336,6 @@ public class RMAppImpl implements RMApp {
     }
   }
 
-  @Override
-  public ApplicationStore getApplicationStore() {
-    return this.appStore;
-  }
-
   private YarnApplicationState createApplicationState(RMAppState rmAppState) {
     switch(rmAppState) {
     case NEW:

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -33,6 +33,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -399,7 +400,7 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       final String trackingUriWithoutScheme) {
     this.readLock.lock();
     try {
-      URI trackingUri = trackingUriWithoutScheme == null ? null :
+      URI trackingUri = StringUtils.isEmpty(trackingUriWithoutScheme) ? null :
         ProxyUriUtils.getUriFromAMUrl(trackingUriWithoutScheme);
       String proxy = YarnConfiguration.getProxyHostAndPort(conf);
       URI proxyUri = ProxyUriUtils.getUriFromAMUrl(proxy);

+ 1 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java

@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -70,14 +69,12 @@ public class AppSchedulingInfo {
   boolean pending = true; // for app metrics
 
   public AppSchedulingInfo(ApplicationAttemptId appAttemptId,
-      String user, Queue queue, ActiveUsersManager activeUsersManager,
-      ApplicationStore store) {
+      String user, Queue queue, ActiveUsersManager activeUsersManager) {
     this.applicationAttemptId = appAttemptId;
     this.applicationId = appAttemptId.getApplicationId();
     this.queue = queue;
     this.queueName = queue.getQueueName();
     this.user = user;
-    //this.store = store;
     this.activeUsersManager = activeUsersManager;
   }
 

+ 3 - 14
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java

@@ -49,7 +49,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
@@ -365,7 +365,7 @@ implements ResourceScheduler, CapacitySchedulerContext, Configurable {
     // TODO: Fix store
     FiCaSchedulerApp SchedulerApp = 
         new FiCaSchedulerApp(applicationAttemptId, user, queue, 
-            queue.getActiveUsersManager(), rmContext, null);
+            queue.getActiveUsersManager(), rmContext);
 
     // Submit to the queue
     try {
@@ -767,18 +767,7 @@ implements ResourceScheduler, CapacitySchedulerContext, Configurable {
   @Override
   @Lock(Lock.NoLock.class)
   public void recover(RMState state) throws Exception {
-    // TODO: VINDOKVFIXME recovery
-//    applications.clear();
-//    for (Map.Entry<ApplicationId, ApplicationInfo> entry : state.getStoredApplications().entrySet()) {
-//      ApplicationId appId = entry.getKey();
-//      ApplicationInfo appInfo = entry.getValue();
-//      SchedulerApp app = applications.get(appId);
-//      app.allocate(appInfo.getContainers());
-//      for (Container c: entry.getValue().getContainers()) {
-//        Queue queue = queues.get(appInfo.getApplicationSubmissionContext().getQueue());
-//        queue.recoverContainer(clusterResource, applications.get(appId), c);
-//      }
-//    }
+    // NOT IMPLEMENTED
   }
 
   @Override

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java

@@ -43,7 +43,6 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@@ -111,11 +110,11 @@ public class FiCaSchedulerApp extends SchedulerApplication {
   private final RMContext rmContext;
   public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId, 
       String user, Queue queue, ActiveUsersManager activeUsersManager,
-      RMContext rmContext, ApplicationStore store) {
+      RMContext rmContext) {
     this.rmContext = rmContext;
     this.appSchedulingInfo = 
         new AppSchedulingInfo(applicationAttemptId, user, queue,  
-            activeUsersManager, store);
+            activeUsersManager);
     this.queue = queue;
   }
 

+ 3 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java

@@ -50,10 +50,10 @@ public class AppSchedulable extends Schedulable {
   private long startTime;
   private static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
   private static final Log LOG = LogFactory.getLog(AppSchedulable.class);
-  private FSQueue queue;
+  private FSLeafQueue queue;
   private RMContainerTokenSecretManager containerTokenSecretManager;
 
-  public AppSchedulable(FairScheduler scheduler, FSSchedulerApp app, FSQueue queue) {
+  public AppSchedulable(FairScheduler scheduler, FSSchedulerApp app, FSLeafQueue queue) {
     this.scheduler = scheduler;
     this.app = app;
     this.startTime = System.currentTimeMillis();
@@ -96,9 +96,6 @@ public class AppSchedulable extends Schedulable {
     return startTime;
   }
 
-  @Override
-  public void redistributeShare() {}
-
   @Override
   public Resource getResourceUsage() {
     return app.getCurrentConsumption();
@@ -114,7 +111,7 @@ public class AppSchedulable extends Schedulable {
    * Get metrics reference from containing queue.
    */
   public QueueMetrics getMetrics() {
-    return queue.getQueueSchedulable().getMetrics();
+    return queue.getMetrics();
   }
 
   @Override

+ 88 - 166
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueSchedulable.java → hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java

@@ -22,73 +22,57 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashMap;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
-import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.QueueACL;
-import org.apache.hadoop.yarn.api.records.QueueInfo;
-import org.apache.hadoop.yarn.api.records.QueueState;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 
-@Private
-@Unstable
-public class FSQueueSchedulable extends Schedulable implements Queue {
-  public static final Log LOG = LogFactory.getLog(
-      FSQueueSchedulable.class.getName());
-
-  private FairScheduler scheduler;
-  private FSQueue queue;
-  private QueueManager queueMgr;
-  private List<AppSchedulable> appScheds = new LinkedList<AppSchedulable>();
+public class FSLeafQueue extends FSQueue {
+  private static final Log LOG = LogFactory.getLog(
+      FSLeafQueue.class.getName());
+    
+  private final List<AppSchedulable> appScheds = 
+      new ArrayList<AppSchedulable>();
+
+  /** Scheduling mode for jobs inside the queue (fair or FIFO) */
+  private SchedulingMode schedulingMode;
+  
+  private final FairScheduler scheduler;
+  private final QueueManager queueMgr;
   private Resource demand = Resources.createResource(0);
-  private QueueMetrics metrics;
-  private RecordFactory recordFactory =
-      RecordFactoryProvider.getRecordFactory(null);
-
+  
   // Variables used for preemption
-  long lastTimeAtMinShare;
-  long lastTimeAtHalfFairShare;
-
-  // Constructor for tests
-  protected FSQueueSchedulable(FairScheduler scheduler, FSQueue fsQueue,
-      QueueManager qMgr, QueueMetrics metrics, long minShare, long fairShare) {
-    this.scheduler = scheduler;
-    this.queueMgr = qMgr;
-    this.queue = fsQueue;
-    this.metrics = metrics;
-    this.lastTimeAtMinShare = minShare;
-    this.lastTimeAtHalfFairShare = fairShare;
-  }
-
-  public FSQueueSchedulable(FairScheduler scheduler, FSQueue queue) {
+  private long lastTimeAtMinShare;
+  private long lastTimeAtHalfFairShare;
+  
+  public FSLeafQueue(String name, QueueManager queueMgr, FairScheduler scheduler,
+      FSParentQueue parent) {
+    super(name, queueMgr, scheduler, parent);
     this.scheduler = scheduler;
-    this.queue = queue;
-    this.queueMgr = scheduler.getQueueManager();
-    this.metrics = QueueMetrics.forQueue(getName(), null, true, scheduler.getConf());
+    this.queueMgr = queueMgr;
     this.lastTimeAtMinShare = scheduler.getClock().getTime();
     this.lastTimeAtHalfFairShare = scheduler.getClock().getTime();
   }
-
-  public void addApp(AppSchedulable app) {
-    appScheds.add(app);
+  
+  public void addApp(FSSchedulerApp app) {
+    AppSchedulable appSchedulable = new AppSchedulable(scheduler, app, this);
+    app.setAppSchedulable(appSchedulable);
+    appScheds.add(appSchedulable);
   }
-
+  
+  // for testing
+  void addAppSchedulable(AppSchedulable appSched) {
+    appScheds.add(appSched);
+  }
+  
   public void removeApp(FSSchedulerApp app) {
     for (Iterator<AppSchedulable> it = appScheds.iterator(); it.hasNext();) {
       AppSchedulable appSched = it.next();
@@ -98,17 +82,47 @@ public class FSQueueSchedulable extends Schedulable implements Queue {
       }
     }
   }
+  
+  public Collection<AppSchedulable> getAppSchedulables() {
+    return appScheds;
+  }
+
+  public void setSchedulingMode(SchedulingMode mode) {
+    this.schedulingMode = mode;
+  }
+  
+  @Override
+  public void recomputeFairShares() {
+    if (schedulingMode == SchedulingMode.FAIR) {
+      SchedulingAlgorithms.computeFairShares(appScheds, getFairShare());
+    } else {
+      for (AppSchedulable sched: appScheds) {
+        sched.setFairShare(Resources.createResource(0));
+      }
+    }
+  }
+
+  @Override
+  public Resource getDemand() {
+    return demand;
+  }
+
+  @Override
+  public Resource getResourceUsage() {
+    Resource usage = Resources.createResource(0);
+    for (AppSchedulable app : appScheds) {
+      Resources.addTo(usage, app.getResourceUsage());
+    }
+    return usage;
+  }
 
-  /**
-   * Update demand by asking apps in the queue to update
-   */
   @Override
   public void updateDemand() {
     // Compute demand by iterating through apps in the queue
     // Limit demand to maxResources
-    Resource maxRes = queueMgr.getMaxResources(queue.getName());
+    Resource maxRes = queueMgr.getMaxResources(getName());
     demand = Resources.createResource(0);
-    for (AppSchedulable sched: appScheds) {
+    for (AppSchedulable sched : appScheds) {
       sched.updateDemand();
       Resource toAdd = sched.getDemand();
       if (LOG.isDebugEnabled()) {
@@ -128,46 +142,12 @@ public class FSQueueSchedulable extends Schedulable implements Queue {
     }
   }
 
-  /**
-   * Distribute the queue's fair share among its jobs
-   */
-  @Override
-  public void redistributeShare() {
-    if (queue.getSchedulingMode() == SchedulingMode.FAIR) {
-      SchedulingAlgorithms.computeFairShares(appScheds, getFairShare());
-    } else {
-      for (AppSchedulable sched: appScheds) {
-        sched.setFairShare(Resources.createResource(0));
-      }
-    }
-  }
-
-  @Override
-  public Resource getDemand() {
-    return demand;
-  }
-
-  @Override
-  public Resource getMinShare() {
-    return queueMgr.getMinResources(queue.getName());
-  }
-
-  @Override
-  public double getWeight() {
-    return queueMgr.getQueueWeight(queue.getName());
-  }
-
-  @Override
-  public long getStartTime() {
-    return 0;
-  }
-
   @Override
   public Resource assignContainer(FSSchedulerNode node, boolean reserved) {
     LOG.debug("Node offered to queue: " + getName() + " reserved: " + reserved);
     // If this queue is over its limit, reject
     if (Resources.greaterThan(getResourceUsage(),
-        queueMgr.getMaxResources(queue.getName()))) {
+        queueMgr.getMaxResources(getName()))) {
       return Resources.none();
     }
 
@@ -185,15 +165,14 @@ public class FSQueueSchedulable extends Schedulable implements Queue {
 
     // Otherwise, chose app to schedule based on given policy (fair vs fifo).
     else {
-      SchedulingMode mode = queue.getSchedulingMode();
-
       Comparator<Schedulable> comparator;
-      if (mode == SchedulingMode.FIFO) {
+      if (schedulingMode == SchedulingMode.FIFO) {
         comparator = new SchedulingAlgorithms.FifoComparator();
-      } else if (mode == SchedulingMode.FAIR) {
+      } else if (schedulingMode == SchedulingMode.FAIR) {
         comparator = new SchedulingAlgorithms.FairShareComparator();
       } else {
-        throw new RuntimeException("Unsupported queue scheduling mode " + mode);
+        throw new RuntimeException("Unsupported queue scheduling mode " + 
+            schedulingMode);
       }
 
       Collections.sort(appScheds, comparator);
@@ -203,81 +182,13 @@ public class FSQueueSchedulable extends Schedulable implements Queue {
 
       return Resources.none();
     }
-
-  }
-
-  @Override
-  public String getName() {
-    return queue.getName();
-  }
-
-  FSQueue getQueue() {
-    return queue;
-  }
-
-  public Collection<AppSchedulable> getAppSchedulables() {
-    return appScheds;
-  }
-
-  public long getLastTimeAtMinShare() {
-    return lastTimeAtMinShare;
-  }
-
-  public void setLastTimeAtMinShare(long lastTimeAtMinShare) {
-    this.lastTimeAtMinShare = lastTimeAtMinShare;
-  }
-
-  public long getLastTimeAtHalfFairShare() {
-    return lastTimeAtHalfFairShare;
-  }
-
-  public void setLastTimeAtHalfFairShare(long lastTimeAtHalfFairShare) {
-    this.lastTimeAtHalfFairShare = lastTimeAtHalfFairShare;
-  }
-
-  @Override
-  public QueueMetrics getMetrics() {
-    return metrics;
-  }
-
-  @Override
-  public Resource getResourceUsage() {
-    Resource usage = Resources.createResource(0);
-    for (AppSchedulable app : appScheds) {
-      Resources.addTo(usage, app.getResourceUsage());
-    }
-    return usage;
   }
 
   @Override
-  public Priority getPriority() {
-    Priority p = recordFactory.newRecordInstance(Priority.class);
-    p.setPriority(1);
-    return p;
-  }
-
-  @Override
-  public Map<QueueACL, AccessControlList> getQueueAcls() {
-    Map<QueueACL, AccessControlList> acls = queueMgr.getQueueAcls(getName());
-    return new HashMap<QueueACL, AccessControlList>(acls);
-  }
-
-  @Override
-  public QueueInfo getQueueInfo(boolean includeChildQueues, boolean recursive) {
-    QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
-    queueInfo.setQueueName(getQueueName());
-    // TODO: we might change these queue metrics around a little bit
-    // to match the semantics of the fair scheduler.
-    queueInfo.setCapacity((float) getFairShare().getMemory() /
-        scheduler.getClusterCapacity().getMemory());
-    queueInfo.setCapacity((float) getResourceUsage().getMemory() /
-        scheduler.getClusterCapacity().getMemory());
-
-    queueInfo.setChildQueues(new ArrayList<QueueInfo>());
-    queueInfo.setQueueState(QueueState.RUNNING);
-    return queueInfo;
+  public Collection<FSQueue> getChildQueues() {
+    return new ArrayList<FSQueue>(1);
   }
-
+  
   @Override
   public List<QueueUserACLInfo> getQueueUserAclInfo(UserGroupInformation user) {
     QueueUserACLInfo userAclInfo =
@@ -294,9 +205,20 @@ public class FSQueueSchedulable extends Schedulable implements Queue {
     userAclInfo.setUserAcls(operations);
     return Collections.singletonList(userAclInfo);
   }
+  
+  public long getLastTimeAtMinShare() {
+    return lastTimeAtMinShare;
+  }
 
-  @Override
-  public String getQueueName() {
-    return getName();
+  public void setLastTimeAtMinShare(long lastTimeAtMinShare) {
+    this.lastTimeAtMinShare = lastTimeAtMinShare;
+  }
+
+  public long getLastTimeAtHalfFairShare() {
+    return lastTimeAtHalfFairShare;
+  }
+
+  public void setLastTimeAtHalfFairShare(long lastTimeAtHalfFairShare) {
+    this.lastTimeAtHalfFairShare = lastTimeAtHalfFairShare;
   }
 }

+ 158 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java

@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+
+public class FSParentQueue extends FSQueue {
+  private static final Log LOG = LogFactory.getLog(
+      FSParentQueue.class.getName());
+
+
+  private final List<FSQueue> childQueues = 
+      new ArrayList<FSQueue>();
+  private final QueueManager queueMgr;
+  private Resource demand = Resources.createResource(0);
+  
+  public FSParentQueue(String name, QueueManager queueMgr, FairScheduler scheduler,
+      FSParentQueue parent) {
+    super(name, queueMgr, scheduler, parent);
+    this.queueMgr = queueMgr;
+  }
+  
+  public void addChildQueue(FSQueue child) {
+    childQueues.add(child);
+  }
+
+  @Override
+  public void recomputeFairShares() {
+    SchedulingAlgorithms.computeFairShares(childQueues, getFairShare());
+    for (FSQueue childQueue : childQueues) {
+      childQueue.getMetrics().setAvailableResourcesToQueue(childQueue.getFairShare());
+      childQueue.recomputeFairShares();
+    }
+  }
+
+  @Override
+  public Resource getDemand() {
+    return demand;
+  }
+
+  @Override
+  public Resource getResourceUsage() {
+    Resource usage = Resources.createResource(0);
+    for (FSQueue child : childQueues) {
+      Resources.addTo(usage, child.getResourceUsage());
+    }
+    return usage;
+  }
+
+  @Override
+  public void updateDemand() {
+    // Compute demand by iterating through apps in the queue
+    // Limit demand to maxResources
+    Resource maxRes = queueMgr.getMaxResources(getName());
+    demand = Resources.createResource(0);
+    for (FSQueue childQueue : childQueues) {
+      childQueue.updateDemand();
+      Resource toAdd = childQueue.getDemand();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Counting resource from " + childQueue.getName() + " " + 
+            toAdd + "; Total resource consumption for " + getName() +
+            " now " + demand);
+      }
+      demand = Resources.add(demand, toAdd);
+      if (Resources.greaterThanOrEqual(demand, maxRes)) {
+        demand = maxRes;
+        break;
+      }
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("The updated demand for " + getName() + " is " + demand +
+          "; the max is " + maxRes);
+    }    
+  }
+  
+  public boolean hasAccess(QueueACL acl, UserGroupInformation user) {
+    synchronized (this) {
+      if (getQueueAcls().get(acl).isUserAllowed(user)) {
+        return true;
+      }
+    }
+    
+    if (parent != null) {
+      return parent.hasAccess(acl, user);
+    }
+    
+    return false;
+  }
+  
+  private synchronized QueueUserACLInfo getUserAclInfo(
+      UserGroupInformation user) {
+    QueueUserACLInfo userAclInfo = 
+      recordFactory.newRecordInstance(QueueUserACLInfo.class);
+    List<QueueACL> operations = new ArrayList<QueueACL>();
+    for (QueueACL operation : QueueACL.values()) {
+      if (hasAccess(operation, user)) {
+        operations.add(operation);
+      } 
+    }
+
+    userAclInfo.setQueueName(getQueueName());
+    userAclInfo.setUserAcls(operations);
+    return userAclInfo;
+  }
+  
+  @Override
+  public synchronized List<QueueUserACLInfo> getQueueUserAclInfo(
+      UserGroupInformation user) {
+    List<QueueUserACLInfo> userAcls = new ArrayList<QueueUserACLInfo>();
+    
+    // Add queue acls
+    userAcls.add(getUserAclInfo(user));
+    
+    // Add children queue acls
+    for (FSQueue child : childQueues) {
+      userAcls.addAll(child.getQueueUserAclInfo(user));
+    }
+ 
+    return userAcls;
+  }
+
+  @Override
+  public Resource assignContainer(FSSchedulerNode node, boolean reserved) {
+    throw new IllegalStateException(
+        "Parent queue should not be assigned container");
+  }
+
+  @Override
+  public Collection<FSQueue> getChildQueues() {
+    return childQueues;
+  }
+}

+ 96 - 49
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java

@@ -20,65 +20,112 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
 
 import java.util.ArrayList;
 import java.util.Collection;
-
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-
-/**
- * A queue containing several applications.
- */
-@Private
-@Unstable
-public class FSQueue {
-  /** Queue name. */
-  private String name;
-
-  /** Applications in this specific queue; does not include children queues' jobs. */
-  private Collection<FSSchedulerApp> applications = 
-      new ArrayList<FSSchedulerApp>();
-
-  /** Scheduling mode for jobs inside the queue (fair or FIFO) */
-  private SchedulingMode schedulingMode;
-
-  private FairScheduler scheduler;
-
-  private FSQueueSchedulable queueSchedulable;
-
-  public FSQueue(FairScheduler scheduler, String name) {
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.security.authorize.AccessControlList;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.api.records.QueueState;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
+
+public abstract class FSQueue extends Schedulable implements Queue {
+  private final String name;
+  private final QueueManager queueMgr;
+  private final FairScheduler scheduler;
+  private final QueueMetrics metrics;
+  
+  protected final FSParentQueue parent;
+  protected final RecordFactory recordFactory =
+      RecordFactoryProvider.getRecordFactory(null);
+  
+  public FSQueue(String name, QueueManager queueMgr, 
+      FairScheduler scheduler, FSParentQueue parent) {
     this.name = name;
-    this.queueSchedulable = new FSQueueSchedulable(scheduler, this);
+    this.queueMgr = queueMgr;
     this.scheduler = scheduler;
+    this.metrics = QueueMetrics.forQueue(getName(), parent, true, scheduler.getConf());
+    this.parent = parent;
   }
-
-  public Collection<FSSchedulerApp> getApplications() {
-    return applications;
+  
+  public String getName() {
+    return name;
   }
-
-  public void addApp(FSSchedulerApp app) {
-    applications.add(app);
-    AppSchedulable appSchedulable = new AppSchedulable(scheduler, app, this);
-    app.setAppSchedulable(appSchedulable);
-    queueSchedulable.addApp(appSchedulable);
+  
+  @Override
+  public String getQueueName() {
+    return name;
   }
-
-  public void removeApp(FSSchedulerApp app) {
-    applications.remove(app);
-    queueSchedulable.removeApp(app);
+  
+  @Override
+  public double getWeight() {
+    return queueMgr.getQueueWeight(getName());
   }
-
-  public String getName() {
-    return name;
+  
+  @Override
+  public Resource getMinShare() {
+    return queueMgr.getMinResources(getName());
   }
 
-  public SchedulingMode getSchedulingMode() {
-    return schedulingMode;
+  @Override
+  public long getStartTime() {
+    return 0;
   }
 
-  public void setSchedulingMode(SchedulingMode schedulingMode) {
-    this.schedulingMode = schedulingMode;
+  @Override
+  public Priority getPriority() {
+    Priority p = recordFactory.newRecordInstance(Priority.class);
+    p.setPriority(1);
+    return p;
   }
-
-  public FSQueueSchedulable getQueueSchedulable() {
-    return queueSchedulable;
+  
+  @Override
+  public QueueInfo getQueueInfo(boolean includeChildQueues, boolean recursive) {
+    QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
+    queueInfo.setQueueName(getQueueName());
+    // TODO: we might change these queue metrics around a little bit
+    // to match the semantics of the fair scheduler.
+    queueInfo.setCapacity((float) getFairShare().getMemory() /
+        scheduler.getClusterCapacity().getMemory());
+    queueInfo.setCapacity((float) getResourceUsage().getMemory() /
+        scheduler.getClusterCapacity().getMemory());
+    
+    ArrayList<QueueInfo> childQueueInfos = new ArrayList<QueueInfo>();
+    if (includeChildQueues) {
+      Collection<FSQueue> childQueues = getChildQueues();
+      for (FSQueue child : childQueues) {
+        childQueueInfos.add(child.getQueueInfo(recursive, recursive));
+      }
+    }
+    queueInfo.setChildQueues(childQueueInfos);
+    queueInfo.setQueueState(QueueState.RUNNING);
+    return queueInfo;
+  }
+  
+  @Override
+  public Map<QueueACL, AccessControlList> getQueueAcls() {
+    Map<QueueACL, AccessControlList> acls = queueMgr.getQueueAcls(getName());
+    return new HashMap<QueueACL, AccessControlList>(acls);
+  }
+  
+  @Override
+  public QueueMetrics getMetrics() {
+    return metrics;
   }
+  
+  /**
+   * Recomputes the fair shares for all queues and applications
+   * under this queue.
+   */
+  public abstract void recomputeFairShares();
+  
+  /**
+   * Gets the children of this queue, if any.
+   */
+  public abstract Collection<FSQueue> getChildQueues();
 }

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java

@@ -42,7 +42,6 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
@@ -103,11 +102,11 @@ public class FSSchedulerApp extends SchedulerApplication {
   private final RMContext rmContext;
   public FSSchedulerApp(ApplicationAttemptId applicationAttemptId, 
       String user, Queue queue, ActiveUsersManager activeUsersManager,
-      RMContext rmContext, ApplicationStore store) {
+      RMContext rmContext) {
     this.rmContext = rmContext;
     this.appSchedulingInfo = 
         new AppSchedulingInfo(applicationAttemptId, user, queue,  
-            activeUsersManager, store);
+            activeUsersManager);
     this.queue = queue;
   }
 

+ 133 - 134
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -50,7 +51,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
@@ -75,6 +76,25 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateS
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
 
+/**
+ * A scheduler that schedules resources between a set of queues. The scheduler
+ * keeps track of the resources used by each queue, and attempts to maintain
+ * fairness by scheduling tasks at queues whose allocations are farthest below
+ * an ideal fair distribution.
+ * 
+ * The fair scheduler supports hierarchical queues. All queues descend from a
+ * queue named "root". Available resources are distributed among the children
+ * of the root queue in the typical fair scheduling fashion. Then, the children
+ * distribute the resources assigned to them to their children in the same
+ * fashion.  Applications may only be scheduled on leaf queues. Queues can be
+ * specified as children of other queues by placing them as sub-elements of their
+ * parents in the fair scheduler configuration file.
+ * 
+ * A queue's name starts with the names of its parents, with periods as
+ * separators.  So a queue named "queue1" under the root named, would be 
+ * referred to as "root.queue1", and a queue named "queue2" under a queue
+ * named "parent1" would be referred to as "root.parent1.queue2".
+ */
 @LimitedPrivate("yarn")
 @Unstable
 @SuppressWarnings("unchecked")
@@ -105,23 +125,22 @@ public class FairScheduler implements ResourceScheduler {
   // Aggregate metrics
   QueueMetrics rootMetrics;
 
-  //Time when we last updated preemption vars
+  // Time when we last updated preemption vars
   protected long lastPreemptionUpdateTime;
-  //Time we last ran preemptTasksIfNecessary
+  // Time we last ran preemptTasksIfNecessary
   private long lastPreemptCheckTime;
 
-
   // This stores per-application scheduling information, indexed by
   // attempt ID's for fast lookup.
-  protected Map<ApplicationAttemptId, FSSchedulerApp> applications
-  = new HashMap<ApplicationAttemptId, FSSchedulerApp>();
+  protected Map<ApplicationAttemptId, FSSchedulerApp> applications = 
+      new HashMap<ApplicationAttemptId, FSSchedulerApp>();
 
   // Nodes in the cluster, indexed by NodeId
-  private Map<NodeId, FSSchedulerNode> nodes =
+  private Map<NodeId, FSSchedulerNode> nodes = 
       new ConcurrentHashMap<NodeId, FSSchedulerNode>();
 
   // Aggregate capacity of the cluster
-  private Resource clusterCapacity =
+  private Resource clusterCapacity = 
       RecordFactoryProvider.getRecordFactory(null).newRecordInstance(Resource.class);
 
   // How often tasks are preempted (must be longer than a couple
@@ -131,10 +150,11 @@ public class FairScheduler implements ResourceScheduler {
   protected boolean preemptionEnabled;
   protected boolean sizeBasedWeight; // Give larger weights to larger jobs
   protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster
-  protected double nodeLocalityThreshold;   // Cluster threshold for node locality
-  protected double rackLocalityThreshold;   // Cluster threshold for rack locality
-  private FairSchedulerEventLog eventLog;   // Machine-readable event log
-  protected boolean assignMultiple; // Allocate multiple containers per heartbeat
+  protected double nodeLocalityThreshold; // Cluster threshold for node locality
+  protected double rackLocalityThreshold; // Cluster threshold for rack locality
+  private FairSchedulerEventLog eventLog; // Machine-readable event log
+  protected boolean assignMultiple; // Allocate multiple containers per
+                                    // heartbeat
   protected int maxAssign; // Max containers to assign per heartbeat
   
   public FairScheduler() {
@@ -150,16 +170,8 @@ public class FairScheduler implements ResourceScheduler {
     return queueMgr;
   }
 
-  public List<FSQueueSchedulable> getQueueSchedulables() {
-    List<FSQueueSchedulable> scheds = new ArrayList<FSQueueSchedulable>();
-    for (FSQueue queue: queueMgr.getQueues()) {
-      scheds.add(queue.getQueueSchedulable());
-    }
-    return scheds;
-  }
-
   private RMContainer getRMContainer(ContainerId containerId) {
-    FSSchedulerApp application =
+    FSSchedulerApp application = 
         applications.get(containerId.getApplicationAttemptId());
     return (application == null) ? null : application.getRMContainer(containerId);
   }
@@ -183,34 +195,24 @@ public class FairScheduler implements ResourceScheduler {
   }
 
   /**
-  * Recompute the internal variables used by the scheduler - per-job weights,
-  * fair shares, deficits, minimum slot allocations, and amount of used and
-  * required resources per job.
-  */
+   * Recompute the internal variables used by the scheduler - per-job weights,
+   * fair shares, deficits, minimum slot allocations, and amount of used and
+   * required resources per job.
+   */
   protected synchronized void update() {
     queueMgr.reloadAllocsIfNecessary(); // Relaod alloc file
     updateRunnability(); // Set job runnability based on user/queue limits
     updatePreemptionVariables(); // Determine if any queues merit preemption
 
-    // Update demands of apps and queues
-    for (FSQueue queue: queueMgr.getQueues()) {
-      queue.getQueueSchedulable().updateDemand();
-    }
-
-    // Compute fair shares based on updated demands
-    List<FSQueueSchedulable> queueScheds = getQueueSchedulables();
-    SchedulingAlgorithms.computeFairShares(
-        queueScheds, clusterCapacity);
+    FSQueue rootQueue = queueMgr.getRootQueue();
 
-    // Update queue metrics for this queue
-    for (FSQueueSchedulable sched : queueScheds) {
-      sched.getMetrics().setAvailableResourcesToQueue(sched.getFairShare());
-    }
+    // Recursively update demands for all queues
+    rootQueue.updateDemand();
 
-    // Use the computed shares to assign shares within each queue
-    for (FSQueue queue: queueMgr.getQueues()) {
-      queue.getQueueSchedulable().redistributeShare();
-    }
+    rootQueue.setFairShare(clusterCapacity);
+    // Recursively compute fair shares for all queues
+    // and update metrics
+    rootQueue.recomputeFairShares();
 
     // Update recorded capacity of root queue (child queues are updated
     // when fair share is calculated).
@@ -225,7 +227,7 @@ public class FairScheduler implements ResourceScheduler {
   private void updatePreemptionVariables() {
     long now = clock.getTime();
     lastPreemptionUpdateTime = now;
-    for (FSQueueSchedulable sched: getQueueSchedulables()) {
+    for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
       if (!isStarvedForMinShare(sched)) {
         sched.setLastTimeAtMinShare(now);
       }
@@ -238,16 +240,16 @@ public class FairScheduler implements ResourceScheduler {
   /**
    * Is a queue below its min share for the given task type?
    */
-  boolean isStarvedForMinShare(FSQueueSchedulable sched) {
+  boolean isStarvedForMinShare(FSLeafQueue sched) {
     Resource desiredShare = Resources.min(sched.getMinShare(), sched.getDemand());
     return Resources.lessThan(sched.getResourceUsage(), desiredShare);
   }
 
   /**
-   * Is a queue being starved for fair share for the given task type?
-   * This is defined as being below half its fair share.
+   * Is a queue being starved for fair share for the given task type? This is
+   * defined as being below half its fair share.
    */
-  boolean isStarvedForFairShare(FSQueueSchedulable sched) {
+  boolean isStarvedForFairShare(FSLeafQueue sched) {
     Resource desiredFairShare = Resources.max(
         Resources.multiply(sched.getFairShare(), .5), sched.getDemand());
     return Resources.lessThan(sched.getResourceUsage(), desiredFairShare);
@@ -255,10 +257,10 @@ public class FairScheduler implements ResourceScheduler {
 
   /**
    * Check for queues that need tasks preempted, either because they have been
-   * below their guaranteed share for minSharePreemptionTimeout or they
-   * have been below half their fair share for the fairSharePreemptionTimeout.
-   * If such queues exist, compute how many tasks of each type need to be
-   * preempted and then select the right ones using preemptTasks.
+   * below their guaranteed share for minSharePreemptionTimeout or they have
+   * been below half their fair share for the fairSharePreemptionTimeout. If
+   * such queues exist, compute how many tasks of each type need to be preempted
+   * and then select the right ones using preemptTasks.
    */
   protected synchronized void preemptTasksIfNecessary() {
     if (!preemptionEnabled) {
@@ -273,35 +275,37 @@ public class FairScheduler implements ResourceScheduler {
 
     Resource resToPreempt = Resources.none();
 
-    for (FSQueueSchedulable sched: getQueueSchedulables()) {
+    for (FSLeafQueue sched : queueMgr.getLeafQueues()) {
       resToPreempt = Resources.add(resToPreempt, resToPreempt(sched, curTime));
     }
     if (Resources.greaterThan(resToPreempt, Resources.none())) {
-      preemptResources(getQueueSchedulables(), resToPreempt);
+      preemptResources(queueMgr.getLeafQueues(), resToPreempt);
     }
   }
 
   /**
-   * Preempt a quantity of resources from a list of QueueSchedulables.
-   * The policy for this is to pick apps from queues that are over their fair
-   * share, but make sure that no queue is placed below its fair share in the
-   * process. We further prioritize preemption by choosing containers with
-   * lowest priority to preempt.
+   * Preempt a quantity of resources from a list of QueueSchedulables. The
+   * policy for this is to pick apps from queues that are over their fair share,
+   * but make sure that no queue is placed below its fair share in the process.
+   * We further prioritize preemption by choosing containers with lowest
+   * priority to preempt.
    */
-  protected void preemptResources(List<FSQueueSchedulable> scheds, Resource toPreempt) {
+  protected void preemptResources(Collection<FSLeafQueue> scheds,
+      Resource toPreempt) {
     if (scheds.isEmpty() || Resources.equals(toPreempt, Resources.none())) {
       return;
     }
 
     Map<RMContainer, FSSchedulerApp> apps = 
         new HashMap<RMContainer, FSSchedulerApp>();
-    Map<RMContainer, FSQueueSchedulable> queues = new HashMap<RMContainer, FSQueueSchedulable>();
+    Map<RMContainer, FSLeafQueue> queues = 
+        new HashMap<RMContainer, FSLeafQueue>();
 
     // Collect running containers from over-scheduled queues
     List<RMContainer> runningContainers = new ArrayList<RMContainer>();
-    for (FSQueueSchedulable sched: scheds) {
+    for (FSLeafQueue sched : scheds) {
       if (Resources.greaterThan(sched.getResourceUsage(), sched.getFairShare())) {
-        for (AppSchedulable as: sched.getAppSchedulables()) {
+        for (AppSchedulable as : sched.getAppSchedulables()) {
           for (RMContainer c : as.getApp().getLiveContainers()) {
             runningContainers.add(c);
             apps.put(c, as.getApp());
@@ -321,12 +325,12 @@ public class FairScheduler implements ResourceScheduler {
 
     // Scan down the sorted list of task statuses until we've killed enough
     // tasks, making sure we don't kill too many from any queue
-    for (RMContainer container: runningContainers) {
-     FSQueueSchedulable sched = queues.get(container);
+    for (RMContainer container : runningContainers) {
+      FSLeafQueue sched = queues.get(container);
       if (Resources.greaterThan(sched.getResourceUsage(), sched.getFairShare())) {
         LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
             "res=" + container.getContainer().getResource() +
-            ") from queue " + sched.getQueue().getName());
+            ") from queue " + sched.getName());
         ContainerStatus status = SchedulerUtils.createAbnormalContainerStatus(
             container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);
 
@@ -348,12 +352,12 @@ public class FairScheduler implements ResourceScheduler {
    * If the queue has been below its min share for at least its preemption
    * timeout, it should preempt the difference between its current share and
    * this min share. If it has been below half its fair share for at least the
-   * fairSharePreemptionTimeout, it should preempt enough tasks to get up to
-   * its full fair share. If both conditions hold, we preempt the max of the
-   * two amounts (this shouldn't happen unless someone sets the timeouts to
-   * be identical for some reason).
+   * fairSharePreemptionTimeout, it should preempt enough tasks to get up to its
+   * full fair share. If both conditions hold, we preempt the max of the two
+   * amounts (this shouldn't happen unless someone sets the timeouts to be
+   * identical for some reason).
    */
-  protected Resource resToPreempt(FSQueueSchedulable sched, long curTime) {
+  protected Resource resToPreempt(FSLeafQueue sched, long curTime) {
     String queue = sched.getName();
     long minShareTimeout = queueMgr.getMinSharePreemptionTimeout(queue);
     long fairShareTimeout = queueMgr.getFairSharePreemptionTimeout();
@@ -362,7 +366,7 @@ public class FairScheduler implements ResourceScheduler {
     if (curTime - sched.getLastTimeAtMinShare() > minShareTimeout) {
       Resource target = Resources.min(sched.getMinShare(), sched.getDemand());
       resDueToMinShare = Resources.max(Resources.none(),
-                            Resources.subtract(target, sched.getResourceUsage()));
+          Resources.subtract(target, sched.getResourceUsage()));
     }
     if (curTime - sched.getLastTimeAtHalfFairShare() > fairShareTimeout) {
       Resource target = Resources.min(sched.getFairShare(), sched.getDemand());
@@ -380,15 +384,15 @@ public class FairScheduler implements ResourceScheduler {
   }
 
   /**
-   * This updates the runnability of all apps based on whether or not
-   * any users/queues have exceeded their capacity.
+   * This updates the runnability of all apps based on whether or not any
+   * users/queues have exceeded their capacity.
    */
   private void updateRunnability() {
     List<AppSchedulable> apps = new ArrayList<AppSchedulable>();
 
     // Start by marking everything as not runnable
-    for (FSQueue p: queueMgr.getQueues()) {
-      for (AppSchedulable a: p.getQueueSchedulable().getAppSchedulables()) {
+    for (FSLeafQueue leafQueue : queueMgr.getLeafQueues()) {
+      for (AppSchedulable a : leafQueue.getAppSchedulables()) {
         a.setRunnable(false);
         apps.add(a);
       }
@@ -400,7 +404,7 @@ public class FairScheduler implements ResourceScheduler {
     Map<String, Integer> userApps = new HashMap<String, Integer>();
     Map<String, Integer> queueApps = new HashMap<String, Integer>();
 
-    for (AppSchedulable app: apps) {
+    for (AppSchedulable app : apps) {
       String user = app.getApp().getUser();
       String queue = app.getApp().getQueueName();
       int userCount = userApps.containsKey(user) ? userApps.get(user) : 0;
@@ -473,22 +477,25 @@ public class FairScheduler implements ResourceScheduler {
   }
 
   /**
-   * Add a new application to the scheduler, with a given id, queue name,
-   * and user. This will accept a new app even if the user or queue is above
+   * Add a new application to the scheduler, with a given id, queue name, and
+   * user. This will accept a new app even if the user or queue is above
    * configured limits, but the app will not be marked as runnable.
    */
-  protected synchronized void
-  addApplication(ApplicationAttemptId applicationAttemptId,
-      String queueName, String user) {
+  protected synchronized void addApplication(
+      ApplicationAttemptId applicationAttemptId, String queueName, String user) {
 
-    FSQueue queue = queueMgr.getQueue(queueName);
+    FSLeafQueue queue = queueMgr.getLeafQueue(queueName);
+    if (queue == null) {
+      // queue is not an existing or createable leaf queue
+      queue = queueMgr.getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
+    }
 
     FSSchedulerApp schedulerApp =
         new FSSchedulerApp(applicationAttemptId, user,
-            queue.getQueueSchedulable(), new ActiveUsersManager(getRootQueueMetrics()),
-            rmContext, null);
-
-    // Inforce ACLs
+            queue, new ActiveUsersManager(getRootQueueMetrics()),
+            rmContext);
+    
+    // Enforce ACLs
     UserGroupInformation userUgi;
     try {
       userUgi = UserGroupInformation.getCurrentUser();
@@ -497,8 +504,8 @@ public class FairScheduler implements ResourceScheduler {
       return;
     }
 
-    List<QueueUserACLInfo> info = queue.getQueueSchedulable().getQueueUserAclInfo(
-        userUgi); // Always a signleton list
+    // Always a singleton list
+    List<QueueUserACLInfo> info = queue.getQueueUserAclInfo(userUgi);
     if (!info.get(0).getUserAcls().contains(QueueACL.SUBMIT_APPLICATIONS)) {
       LOG.info("User " + userUgi.getUserName() +
           " cannot submit" + " applications to queue " + queue.getName());
@@ -506,14 +513,13 @@ public class FairScheduler implements ResourceScheduler {
     }
 
     queue.addApp(schedulerApp);
-    queue.getQueueSchedulable().getMetrics().submitApp(user,
-    		applicationAttemptId.getAttemptId());
+    queue.getMetrics().submitApp(user, applicationAttemptId.getAttemptId());
     rootMetrics.submitApp(user, applicationAttemptId.getAttemptId());
 
     applications.put(applicationAttemptId, schedulerApp);
 
     LOG.info("Application Submission: " + applicationAttemptId +
-        ", user: " + user +
+        ", user: "+ user +
         ", currently active: " + applications.size());
 
     rmContext.getDispatcher().getEventHandler().handle(
@@ -540,10 +546,10 @@ public class FairScheduler implements ResourceScheduler {
           SchedulerUtils.createAbnormalContainerStatus(
               rmContainer.getContainerId(),
               SchedulerUtils.COMPLETED_APPLICATION),
-          RMContainerEventType.KILL);
+              RMContainerEventType.KILL);
     }
 
-     // Release all reserved containers
+    // Release all reserved containers
     for (RMContainer rmContainer : application.getReservedContainers()) {
       completedContainer(rmContainer,
           SchedulerUtils.createAbnormalContainerStatus(
@@ -556,7 +562,8 @@ public class FairScheduler implements ResourceScheduler {
     application.stop(rmAppAttemptFinalState);
 
     // Inform the queue
-    FSQueue queue = queueMgr.getQueue(application.getQueue().getQueueName());
+    FSLeafQueue queue = queueMgr.getLeafQueue(application.getQueue()
+        .getQueueName());
     queue.removeApp(application);
 
     // Remove from our data-structure
@@ -658,11 +665,11 @@ public class FairScheduler implements ResourceScheduler {
     for (ContainerId releasedContainerId : release) {
       RMContainer rmContainer = getRMContainer(releasedContainerId);
       if (rmContainer == null) {
-         RMAuditLogger.logFailure(application.getUser(),
-             AuditConstants.RELEASE_CONTAINER,
-             "Unauthorized access or invalid container", "FairScheduler",
-             "Trying to release container not owned by app or with invalid id",
-             application.getApplicationId(), releasedContainerId);
+        RMAuditLogger.logFailure(application.getUser(),
+            AuditConstants.RELEASE_CONTAINER,
+            "Unauthorized access or invalid container", "FairScheduler",
+            "Trying to release container not owned by app or with invalid id",
+            application.getApplicationId(), releasedContainerId);
       }
       completedContainer(rmContainer,
           SchedulerUtils.createAbnormalContainerStatus(
@@ -675,8 +682,8 @@ public class FairScheduler implements ResourceScheduler {
       if (!ask.isEmpty()) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("allocate: pre-update" +
-            " applicationAttemptId=" + appAttemptId +
-            " application=" + application.getApplicationId());
+              " applicationAttemptId=" + appAttemptId +
+              " application=" + application.getApplicationId());
         }
         application.showRequests();
 
@@ -689,19 +696,17 @@ public class FairScheduler implements ResourceScheduler {
 
       if (LOG.isDebugEnabled()) {
         LOG.debug("allocate:" +
-          " applicationAttemptId=" + appAttemptId +
-          " #ask=" + ask.size());
+            " applicationAttemptId=" + appAttemptId +
+            " #ask=" + ask.size());
       }
 
-      return new Allocation(
-          application.pullNewlyAllocatedContainers(),
+      return new Allocation(application.pullNewlyAllocatedContainers(),
           application.getHeadroom());
     }
   }
 
   /**
-   * Process a container which has launched on a node, as reported by the
-   * node.
+   * Process a container which has launched on a node, as reported by the node.
    */
   private void containerLaunchedOnNode(ContainerId containerId, FSSchedulerNode node) {
     // Get the application for the finished container
@@ -723,7 +728,9 @@ public class FairScheduler implements ResourceScheduler {
   private synchronized void nodeUpdate(RMNode nm,
       List<ContainerStatus> newlyLaunchedContainers,
       List<ContainerStatus> completedContainers) {
-    LOG.info("nodeUpdate: " + nm + " cluster capacity: " + clusterCapacity);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterCapacity);
+    }
     eventLog.log("HEARTBEAT", nm.getHostName());
     FSSchedulerNode node = nodes.get(nm.getNodeID());
 
@@ -755,20 +762,20 @@ public class FairScheduler implements ResourceScheduler {
       LOG.info("Trying to fulfill reservation for application " +
           reservedApplication.getApplicationId() + " on node: " + nm);
 
-      FSQueue queue = queueMgr.getQueue(reservedApplication.getQueueName());
-      queue.getQueueSchedulable().assignContainer(node, true);
+      FSLeafQueue queue = queueMgr.getLeafQueue(reservedApplication.getQueueName());
+      queue.assignContainer(node, true);
     }
 
-
     // Otherwise, schedule at queue which is furthest below fair share
     else {
       int assignedContainers = 0;
       while (true) {
         // At most one task is scheduled each iteration of this loop
-        List<FSQueueSchedulable> scheds = getQueueSchedulables();
+        List<FSLeafQueue> scheds = new ArrayList<FSLeafQueue>(
+            queueMgr.getLeafQueues());
         Collections.sort(scheds, new SchedulingAlgorithms.FairShareComparator());
         boolean assignedContainer = false;
-        for (FSQueueSchedulable sched : scheds) {
+        for (FSLeafQueue sched : scheds) {
           Resource assigned = sched.assignContainer(node, false);
           if (Resources.greaterThan(assigned, Resources.none())) {
             eventLog.log("ASSIGN", nm.getHostName(), assigned);
@@ -811,7 +818,7 @@ public class FairScheduler implements ResourceScheduler {
 
   @Override
   public void handle(SchedulerEvent event) {
-    switch(event.getType()) {
+    switch (event.getType()) {
     case NODE_ADDED:
       if (!(event instanceof NodeAddedSchedulerEvent)) {
         throw new RuntimeException("Unexpected event type: " + event);
@@ -830,8 +837,7 @@ public class FairScheduler implements ResourceScheduler {
       if (!(event instanceof NodeUpdateSchedulerEvent)) {
         throw new RuntimeException("Unexpected event type: " + event);
       }
-      NodeUpdateSchedulerEvent nodeUpdatedEvent =
-      (NodeUpdateSchedulerEvent)event;
+      NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
       nodeUpdate(nodeUpdatedEvent.getRMNode(),
           nodeUpdatedEvent.getNewlyLaunchedContainers(),
           nodeUpdatedEvent.getCompletedContainers());
@@ -840,7 +846,7 @@ public class FairScheduler implements ResourceScheduler {
       if (!(event instanceof AppAddedSchedulerEvent)) {
         throw new RuntimeException("Unexpected event type: " + event);
       }
-      AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
+      AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent)event;
       String queue = appAddedEvent.getQueue();
 
       // Potentially set queue to username if configured to do so
@@ -865,7 +871,7 @@ public class FairScheduler implements ResourceScheduler {
         throw new RuntimeException("Unexpected event type: " + event);
       }
       ContainerExpiredSchedulerEvent containerExpiredEvent =
-          (ContainerExpiredSchedulerEvent) event;
+          (ContainerExpiredSchedulerEvent)event;
       ContainerId containerId = containerExpiredEvent.getContainerId();
       completedContainer(getRMContainer(containerId),
           SchedulerUtils.createAbnormalContainerStatus(
@@ -884,8 +890,8 @@ public class FairScheduler implements ResourceScheduler {
   }
 
   @Override
-  public synchronized void
-      reinitialize(Configuration conf, RMContext rmContext) throws IOException {
+  public synchronized void reinitialize(Configuration conf, RMContext rmContext)
+      throws IOException {
     if (!initialized) {
       this.conf = new FairSchedulerConfiguration(conf);
       rootMetrics = QueueMetrics.forQueue("root", null, true, conf);
@@ -907,11 +913,10 @@ public class FairScheduler implements ResourceScheduler {
 
       try {
         queueMgr.initialize();
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         throw new IOException("Failed to start FairScheduler", e);
       }
-      
+
       Thread updateThread = new Thread(new UpdateThread());
       updateThread.setName("FairSchedulerUpdateThread");
       updateThread.setDaemon(true);
@@ -923,10 +928,9 @@ public class FairScheduler implements ResourceScheduler {
       rackLocalityThreshold = this.conf.getLocalityThresholdRack();
       preemptionEnabled = this.conf.getPreemptionEnabled();
       try {
-       queueMgr.reloadAllocs();
+        queueMgr.reloadAllocs();
 
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         throw new IOException("Failed to initialize FairScheduler", e);
       }
     }
@@ -938,8 +942,8 @@ public class FairScheduler implements ResourceScheduler {
     if (!queueMgr.exists(queueName)) {
       return null;
     }
-    return queueMgr.getQueue(queueName).getQueueSchedulable().getQueueInfo(
-        includeChildQueues, recursive);
+    return queueMgr.getQueue(queueName).getQueueInfo(includeChildQueues,
+        recursive);
   }
 
   @Override
@@ -951,12 +955,7 @@ public class FairScheduler implements ResourceScheduler {
       return new ArrayList<QueueUserACLInfo>();
     }
 
-    List<QueueUserACLInfo> userAcls = new ArrayList<QueueUserACLInfo>();
-
-    for (FSQueue queue : queueMgr.getQueues()) {
-      userAcls.addAll(queue.getQueueSchedulable().getQueueUserAclInfo(user));
-    }
-    return userAcls;
+    return queueMgr.getRootQueue().getQueueUserAclInfo(user);
   }
 
   @Override

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java

@@ -18,7 +18,7 @@ public class FairSchedulerConfiguration extends Configuration {
   /** Whether to use the user name as the queue name (instead of "default") if
    * the request does not specify a queue. */
   protected static final String  USER_AS_DEFAULT_QUEUE = CONF_PREFIX + "user-as-default-queue";
-  protected static final boolean DEFAULT_USER_AS_DEFAULT_QUEUE = false;
+  protected static final boolean DEFAULT_USER_AS_DEFAULT_QUEUE = true;
 
   protected static final String LOCALITY_THRESHOLD = CONF_PREFIX + "locality.threshold";
   protected static final float  DEFAULT_LOCALITY_THRESHOLD = -1.0f;

+ 8 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerEventLog.java

@@ -77,12 +77,11 @@ class FairSchedulerEventLog {
   boolean init(FairSchedulerConfiguration conf) {
     try {
       logDir = conf.getEventlogDir();
-      Path logDirPath = new Path(logDir);
-      FileSystem fs = logDirPath.getFileSystem(conf);
-      if (!fs.exists(logDirPath)) {
-        if (!fs.mkdirs(logDirPath)) {
+      File logDirFile = new File(logDir);
+      if (!logDirFile.exists()) {
+        if (!logDirFile.mkdirs()) {
           throw new IOException(
-              "Mkdirs failed to create " + logDirPath.toString());
+              "Mkdirs failed to create " + logDirFile.toString());
         }
       }
       String username = System.getProperty("user.name");
@@ -142,4 +141,8 @@ class FairSchedulerEventLog {
   synchronized boolean isEnabled() {
     return !logDisabled;
   }
+  
+  public String getLogFile() {
+    return logFile;
+  }
 }

+ 185 - 62
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java

@@ -27,6 +27,7 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CopyOnWriteArrayList;
 
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
@@ -52,6 +53,7 @@ import org.xml.sax.SAXException;
 /**
  * Maintains a list of queues as well as scheduling parameters for each queue,
  * such as guaranteed share allocations, from the fair scheduler config file.
+ * 
  */
 @Private
 @Unstable
@@ -59,6 +61,8 @@ public class QueueManager {
   public static final Log LOG = LogFactory.getLog(
     QueueManager.class.getName());
 
+  public static final String ROOT_QUEUE = "root";
+  
   /** Time to wait between checks of the allocation file */
   public static final long ALLOC_RELOAD_INTERVAL = 10 * 1000;
 
@@ -76,7 +80,10 @@ public class QueueManager {
                             // used) or a String to specify an absolute path (if
                             // mapred.fairscheduler.allocation.file is used).
 
-  private Map<String, FSQueue> queues = new HashMap<String, FSQueue>();
+  private final Collection<FSLeafQueue> leafQueues = 
+      new CopyOnWriteArrayList<FSLeafQueue>();
+  private final Map<String, FSQueue> queues = new HashMap<String, FSQueue>();
+  private FSParentQueue rootQueue;
 
   private volatile QueueManagerInfo info = new QueueManagerInfo();
   
@@ -87,10 +94,17 @@ public class QueueManager {
   public QueueManager(FairScheduler scheduler) {
     this.scheduler = scheduler;
   }
+  
+  public FSParentQueue getRootQueue() {
+    return rootQueue;
+  }
 
   public void initialize() throws IOException, SAXException,
       AllocationConfigurationException, ParserConfigurationException {
     FairSchedulerConfiguration conf = scheduler.getConf();
+    rootQueue = new FSParentQueue("root", this, scheduler, null);
+    queues.put(rootQueue.getName(), rootQueue);
+    
     this.allocFile = conf.getAllocationFile();
     if (allocFile == null) {
       // No allocation file specified in jobconf. Use the default allocation
@@ -106,21 +120,106 @@ public class QueueManager {
     lastSuccessfulReload = scheduler.getClock().getTime();
     lastReloadAttempt = scheduler.getClock().getTime();
     // Create the default queue
-    getQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
+    getLeafQueue(YarnConfiguration.DEFAULT_QUEUE_NAME);
   }
-
+  
   /**
-   * Get a queue by name, creating it if necessary
+   * Get a queue by name, creating it if necessary.  If the queue
+   * is not or can not be a leaf queue, i.e. it already exists as a parent queue,
+   * or one of the parents in its name is already a leaf queue, null is returned.
+   * 
+   * The root part of the name is optional, so a queue underneath the root 
+   * named "queue1" could be referred to  as just "queue1", and a queue named
+   * "queue2" underneath a parent named "parent1" that is underneath the root 
+   * could be referred to as just "parent1.queue2".
    */
-  public FSQueue getQueue(String name) {
+  public FSLeafQueue getLeafQueue(String name) {
+    if (!name.startsWith(ROOT_QUEUE + ".")) {
+      name = ROOT_QUEUE + "." + name;
+    }
     synchronized (queues) {
       FSQueue queue = queues.get(name);
       if (queue == null) {
-        queue = new FSQueue(scheduler, name);
-        queue.setSchedulingMode(info.defaultSchedulingMode);
-        queues.put(name, queue);
+        FSLeafQueue leafQueue = createLeafQueue(name);
+        if (leafQueue == null) {
+          return null;
+        }
+        leafQueue.setSchedulingMode(info.defaultSchedulingMode);
+        queue = leafQueue;
+      } else if (queue instanceof FSParentQueue) {
+        return null;
+      }
+      return (FSLeafQueue)queue;
+    }
+  }
+  
+  /**
+   * Creates a leaf queue and places it in the tree. Creates any
+   * parents that don't already exist.
+   * 
+   * @return
+   *    the created queue, if successful. null if not allowed (one of the parent
+   *    queues in the queue name is already a leaf queue)
+   */
+  private FSLeafQueue createLeafQueue(String name) {
+    List<String> newQueueNames = new ArrayList<String>();
+    newQueueNames.add(name);
+    int sepIndex = name.length();
+    FSParentQueue parent = null;
+
+    // Move up the queue tree until we reach one that exists.
+    while (sepIndex != -1) {
+      sepIndex = name.lastIndexOf('.', sepIndex-1);
+      FSQueue queue;
+      String curName = null;
+      curName = name.substring(0, sepIndex);
+      queue = queues.get(curName);
+
+      if (queue == null) {
+        newQueueNames.add(curName);
+      } else {
+        if (queue instanceof FSParentQueue) {
+          parent = (FSParentQueue)queue;
+          break;
+        } else {
+          return null;
+        }
       }
-      return queue;
+    }
+    
+    // At this point, parent refers to the deepest existing parent of the
+    // queue to create.
+    // Now that we know everything worked out, make all the queues
+    // and add them to the map.
+    FSLeafQueue leafQueue = null;
+    for (int i = newQueueNames.size()-1; i >= 0; i--) {
+      String queueName = newQueueNames.get(i);
+      if (i == 0) {
+        // First name added was the leaf queue
+        leafQueue = new FSLeafQueue(name, this, scheduler, parent);
+        parent.addChildQueue(leafQueue);
+        queues.put(leafQueue.getName(), leafQueue);
+        leafQueues.add(leafQueue);
+      } else {
+        FSParentQueue newParent = new FSParentQueue(queueName, this, scheduler, parent);
+        parent.addChildQueue(newParent);
+        queues.put(newParent.getName(), newParent);
+        parent = newParent;
+      }
+    }
+    
+    return leafQueue;
+  }
+
+  /**
+   * Gets a queue by name.
+   */
+  public FSQueue getQueue(String name) {
+    if (!name.startsWith(ROOT_QUEUE + ".") && !name.equals(ROOT_QUEUE)) {
+      name = ROOT_QUEUE + "." + name;
+    }
+    synchronized (queues) {
+      return queues.get(name);
     }
   }
 
@@ -136,8 +235,8 @@ public class QueueManager {
   /**
    * Get the queue for a given AppSchedulable.
    */
-  public FSQueue getQueueForApp(AppSchedulable app) {
-    return getQueue(app.getApp().getQueueName());
+  public FSLeafQueue getQueueForApp(AppSchedulable app) {
+    return getLeafQueue(app.getApp().getQueueName());
   }
 
   /**
@@ -237,54 +336,9 @@ public class QueueManager {
       Element element = (Element)node;
       if ("queue".equals(element.getTagName()) ||
     	  "pool".equals(element.getTagName())) {
-        String queueName = element.getAttribute("name");
-        Map<QueueACL, AccessControlList> acls =
-            new HashMap<QueueACL, AccessControlList>();
-        queueNamesInAllocFile.add(queueName);
-        NodeList fields = element.getChildNodes();
-        for (int j = 0; j < fields.getLength(); j++) {
-          Node fieldNode = fields.item(j);
-          if (!(fieldNode instanceof Element))
-            continue;
-          Element field = (Element) fieldNode;
-          if ("minResources".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            int val = Integer.parseInt(text);
-            minQueueResources.put(queueName, Resources.createResource(val));
-          } else if ("maxResources".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            int val = Integer.parseInt(text);
-            maxQueueResources.put(queueName, Resources.createResource(val));
-          } else if ("maxRunningApps".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            int val = Integer.parseInt(text);
-            queueMaxApps.put(queueName, val);
-          } else if ("weight".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            double val = Double.parseDouble(text);
-            queueWeights.put(queueName, val);
-          } else if ("minSharePreemptionTimeout".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            long val = Long.parseLong(text) * 1000L;
-            minSharePreemptionTimeouts.put(queueName, val);
-          } else if ("schedulingMode".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            queueModes.put(queueName, parseSchedulingMode(text));
-          } else if ("aclSubmitApps".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            acls.put(QueueACL.SUBMIT_APPLICATIONS, new AccessControlList(text));
-          } else if ("aclAdministerApps".equals(field.getTagName())) {
-            String text = ((Text)field.getFirstChild()).getData().trim();
-            acls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(text));
-          }
-        }
-        queueAcls.put(queueName, acls);
-        if (maxQueueResources.containsKey(queueName) && minQueueResources.containsKey(queueName)
-            && Resources.lessThan(maxQueueResources.get(queueName),
-                minQueueResources.get(queueName))) {
-          LOG.warn(String.format("Queue %s has max resources %d less than min resources %d",
-              queueName, maxQueueResources.get(queueName), minQueueResources.get(queueName)));
-        }
+        loadQueue("root", element, minQueueResources, maxQueueResources, queueMaxApps,
+            userMaxApps, queueWeights, queueModes, minSharePreemptionTimeouts,
+            queueAcls, queueNamesInAllocFile);
       } else if ("user".equals(element.getTagName())) {
         String userName = element.getAttribute("name");
         NodeList fields = element.getChildNodes();
@@ -331,7 +385,7 @@ public class QueueManager {
           queueMaxAppsDefault, defaultSchedulingMode, minSharePreemptionTimeouts,
           queueAcls, fairSharePreemptionTimeout, defaultMinSharePreemptionTimeout);
       for (String name: queueNamesInAllocFile) {
-        FSQueue queue = getQueue(name);
+        FSLeafQueue queue = getLeafQueue(name);
         if (queueModes.containsKey(name)) {
           queue.setSchedulingMode(queueModes.get(name));
         } else {
@@ -340,6 +394,75 @@ public class QueueManager {
       }
     }
   }
+  
+  /**
+   * Loads a queue from a queue element in the configuration file
+   */
+  private void loadQueue(String parentName, Element element, Map<String, Resource> minQueueResources,
+      Map<String, Resource> maxQueueResources, Map<String, Integer> queueMaxApps,
+      Map<String, Integer> userMaxApps, Map<String, Double> queueWeights,
+      Map<String, SchedulingMode> queueModes, Map<String, Long> minSharePreemptionTimeouts,
+      Map<String, Map<QueueACL, AccessControlList>> queueAcls, List<String> queueNamesInAllocFile) 
+      throws AllocationConfigurationException {
+    String queueName = parentName + "." + element.getAttribute("name");
+    Map<QueueACL, AccessControlList> acls =
+        new HashMap<QueueACL, AccessControlList>();
+    NodeList fields = element.getChildNodes();
+    boolean isLeaf = true;
+
+    for (int j = 0; j < fields.getLength(); j++) {
+      Node fieldNode = fields.item(j);
+      if (!(fieldNode instanceof Element))
+        continue;
+      Element field = (Element) fieldNode;
+      if ("minResources".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        int val = Integer.parseInt(text);
+        minQueueResources.put(queueName, Resources.createResource(val));
+      } else if ("maxResources".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        int val = Integer.parseInt(text);
+        maxQueueResources.put(queueName, Resources.createResource(val));
+      } else if ("maxRunningApps".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        int val = Integer.parseInt(text);
+        queueMaxApps.put(queueName, val);
+      } else if ("weight".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        double val = Double.parseDouble(text);
+        queueWeights.put(queueName, val);
+      } else if ("minSharePreemptionTimeout".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        long val = Long.parseLong(text) * 1000L;
+        minSharePreemptionTimeouts.put(queueName, val);
+      } else if ("schedulingMode".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        queueModes.put(queueName, parseSchedulingMode(text));
+      } else if ("aclSubmitApps".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        acls.put(QueueACL.SUBMIT_APPLICATIONS, new AccessControlList(text));
+      } else if ("aclAdministerApps".equals(field.getTagName())) {
+        String text = ((Text)field.getFirstChild()).getData().trim();
+        acls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(text));
+      } else if ("queue".endsWith(field.getTagName()) || 
+          "pool".equals(field.getTagName())) {
+        loadQueue(queueName, field, minQueueResources, maxQueueResources, queueMaxApps,
+            userMaxApps, queueWeights, queueModes, minSharePreemptionTimeouts,
+            queueAcls, queueNamesInAllocFile);
+        isLeaf = false;
+      }
+    }
+    if (isLeaf) {
+      queueNamesInAllocFile.add(queueName);
+    }
+    queueAcls.put(queueName, acls);
+    if (maxQueueResources.containsKey(queueName) && minQueueResources.containsKey(queueName)
+        && Resources.lessThan(maxQueueResources.get(queueName),
+            minQueueResources.get(queueName))) {
+      LOG.warn(String.format("Queue %s has max resources %d less than min resources %d",
+          queueName, maxQueueResources.get(queueName), minQueueResources.get(queueName)));
+    }
+  }
 
   private SchedulingMode parseSchedulingMode(String text)
       throws AllocationConfigurationException {
@@ -384,9 +507,9 @@ public class QueueManager {
   /**
    * Get a collection of all queues
    */
-  public Collection<FSQueue> getQueues() {
+  public Collection<FSLeafQueue> getLeafQueues() {
     synchronized (queues) {
-      return new ArrayList<FSQueue>(queues.values());
+      return leafQueues;
     }
   }
 

+ 0 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java

@@ -91,12 +91,6 @@ abstract class Schedulable {
   /** Refresh the Schedulable's demand and those of its children if any. */
   public abstract void updateDemand();
 
-  /**
-   * Distribute the fair share assigned to this Schedulable among its
-   * children (used in queues where the internal scheduler is fair sharing).
-   */
-  public abstract void redistributeShare();
-
   /**
    * Assign a container on this node if possible, and return the amount of
    * resources assigned. If {@code reserved} is true, it means a reservation

+ 3 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java

@@ -56,7 +56,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
@@ -292,7 +292,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
     // TODO: Fix store
     FiCaSchedulerApp schedulerApp = 
         new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager,
-            this.rmContext, null);
+            this.rmContext);
     applications.put(appAttemptId, schedulerApp);
     metrics.submitApp(user, appAttemptId.getAttemptId());
     LOG.info("Application Submission: " + appAttemptId.getApplicationId() + 
@@ -763,13 +763,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
 
   @Override
   public void recover(RMState state) {
-    // TODO fix recovery
-//    for (Map.Entry<ApplicationId, ApplicationInfo> entry: state.getStoredApplications().entrySet()) {
-//      ApplicationId appId = entry.getKey();
-//      ApplicationInfo appInfo = entry.getValue();
-//      SchedulerApp app = applications.get(appId);
-//      app.allocate(appInfo.getContainers());
-//    }
+    // NOT IMPLEMENTED
   }
 
   @Override

+ 3 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerInfo.java

@@ -23,7 +23,7 @@ import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 
 public class FairSchedulerInfo {
@@ -32,9 +32,9 @@ public class FairSchedulerInfo {
   
   public FairSchedulerInfo(FairScheduler fs) {
     scheduler = fs;
-    Collection<FSQueue> queues = fs.getQueueManager().getQueues();
+    Collection<FSLeafQueue> queues = fs.getQueueManager().getLeafQueues();
     queueInfos = new ArrayList<FairSchedulerQueueInfo>();
-    for (FSQueue queue : queues) {
+    for (FSLeafQueue queue : queues) {
       queueInfos.add(new FairSchedulerQueueInfo(queue, fs));
     }
   }

+ 9 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/FairSchedulerQueueInfo.java

@@ -22,9 +22,8 @@ import java.util.Collection;
 
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueueSchedulable;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSSchedulerApp;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AppSchedulable;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager;
 
@@ -49,17 +48,16 @@ public class FairSchedulerQueueInfo {
   
   private String queueName;
   
-  public FairSchedulerQueueInfo(FSQueue queue, FairScheduler scheduler) {
-    Collection<FSSchedulerApp> apps = queue.getApplications();
-    for (FSSchedulerApp app : apps) {
-      if (app.isPending()) {
+  public FairSchedulerQueueInfo(FSLeafQueue queue, FairScheduler scheduler) {
+    Collection<AppSchedulable> apps = queue.getAppSchedulables();
+    for (AppSchedulable app : apps) {
+      if (app.getApp().isPending()) {
         numPendingApps++;
       } else {
         numActiveApps++;
       }
     }
     
-    FSQueueSchedulable schedulable = queue.getQueueSchedulable();
     QueueManager manager = scheduler.getQueueManager();
     
     queueName = queue.getName();
@@ -67,11 +65,11 @@ public class FairSchedulerQueueInfo {
     Resource clusterMax = scheduler.getClusterCapacity();
     clusterMaxMem = clusterMax.getMemory();
     
-    usedResources = schedulable.getResourceUsage();
+    usedResources = queue.getResourceUsage();
     fractionUsed = (float)usedResources.getMemory() / clusterMaxMem;
     
-    fairShare = schedulable.getFairShare().getMemory();
-    minResources = schedulable.getMinShare();
+    fairShare = queue.getFairShare().getMemory();
+    minResources = queue.getMinShare();
     minShare = minResources.getMemory();
     maxResources = scheduler.getQueueManager().getMaxResources(queueName);
     if (maxResources.getMemory() > clusterMaxMem) {

+ 1 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java

@@ -37,7 +37,6 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@@ -93,7 +92,7 @@ public class TestAppManager{
         rmDispatcher);
     AMLivelinessMonitor amFinishingMonitor = new AMLivelinessMonitor(
         rmDispatcher);
-    return new RMContextImpl(new MemStore(), rmDispatcher,
+    return new RMContextImpl(rmDispatcher,
         containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
         null, null, null, null) {
       @Override
@@ -366,7 +365,6 @@ public class TestAppManager{
         YarnConfiguration.DEFAULT_QUEUE_NAME, 
         app.getQueue());
     Assert.assertEquals("app state doesn't match", RMAppState.NEW, app.getState());
-    Assert.assertNotNull("app store is null", app.getApplicationStore());
 
     // wait for event to be processed
     int timeoutSecs = 0;
@@ -413,7 +411,6 @@ public class TestAppManager{
     Assert.assertEquals("app name doesn't match", "testApp1", app.getName());
     Assert.assertEquals("app queue doesn't match", "testQueue", app.getQueue());
     Assert.assertEquals("app state doesn't match", RMAppState.NEW, app.getState());
-    Assert.assertNotNull("app store is null", app.getApplicationStore());
 
     // wait for event to be processed
     int timeoutSecs = 0;

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java

@@ -50,7 +50,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.service.Service.STATE;
@@ -85,7 +85,7 @@ public class TestApplicationACLs {
 
   @BeforeClass
   public static void setup() throws InterruptedException, IOException {
-    Store store = StoreFactory.getStore(conf);
+    RMStateStore store = StoreFactory.getStore(conf);
     conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
     AccessControlList adminACL = new AccessControlList("");
     adminACL.addGroup(SUPER_GROUP);

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java

@@ -182,7 +182,7 @@ public class TestClientRMService {
   private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler,
       ApplicationId applicationId3, YarnConfiguration config, String queueName) {
     return new RMAppImpl(applicationId3, rmContext, config, null, null,
-        queueName, null, null, null, yarnScheduler, null, System
+        queueName, null, null, yarnScheduler, null, System
             .currentTimeMillis());
   }
 }

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java

@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
@@ -79,7 +78,7 @@ public class TestRMNodeTransitions {
     InlineDispatcher rmDispatcher = new InlineDispatcher();
     
     rmContext =
-        new RMContextImpl(new MemStore(), rmDispatcher, null, null, null,
+        new RMContextImpl(rmDispatcher, null, null, null,
             mock(DelegationTokenRenewer.class), null, null, null);
     scheduler = mock(YarnScheduler.class);
     doAnswer(

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java

@@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -47,7 +47,7 @@ public class TestResourceManager {
   @Before
   public void setUp() throws Exception {
     Configuration conf = new YarnConfiguration();
-    Store store = StoreFactory.getStore(conf);
+    RMStateStore store = StoreFactory.getStore(conf);
     resourceManager = new ResourceManager(store);
     resourceManager.init(conf);
   }

+ 0 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java

@@ -32,7 +32,6 @@ import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
@@ -194,10 +193,6 @@ public abstract class MockAsm extends MockApps {
       throw new UnsupportedOperationException("Not supported yet.");
     }
     @Override
-    public ApplicationStore getApplicationStore() {
-      throw new UnsupportedOperationException("Not supported yet.");
-    }
-    @Override
     public float getProgress() {
       throw new UnsupportedOperationException("Not supported yet.");
     }

+ 0 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java

@@ -49,8 +49,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;

+ 1 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java

@@ -53,9 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;

+ 0 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestASMStateMachine.java

@@ -35,7 +35,6 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.junit.After;

+ 0 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterExpiry.java

@@ -36,8 +36,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.junit.Before;
 import org.junit.Test;

+ 1 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java

@@ -43,9 +43,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestNMExpiry.java

@@ -39,7 +39,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.NodeEventDispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@@ -70,7 +69,7 @@ public class TestNMExpiry {
     Configuration conf = new Configuration();
     // Dispatcher that processes events inline
     Dispatcher dispatcher = new InlineDispatcher();
-    RMContext context = new RMContextImpl(new MemStore(), dispatcher, null,
+    RMContext context = new RMContextImpl(dispatcher, null,
         null, null, null, null, null, null);
     dispatcher.register(SchedulerEventType.class,
         new InlineDispatcher.EmptyEventHandler());

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java

@@ -39,7 +39,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@@ -66,7 +65,7 @@ public class TestRMNMRPCResponseId {
       }
     });
     RMContext context =
-        new RMContextImpl(new MemStore(), dispatcher, null, null, null, null,
+        new RMContextImpl(dispatcher, null, null, null, null,
           null, null, null);
     dispatcher.register(RMNodeEventType.class,
         new ResourceManager.NodeEventDispatcher(context));

+ 0 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java

@@ -28,7 +28,6 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.MockApps;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 
@@ -137,11 +136,6 @@ public class MockRMApp implements RMApp {
     throw new UnsupportedOperationException("Not supported yet.");
   }
 
-  @Override
-  public ApplicationStore getApplicationStore() {
-    throw new UnsupportedOperationException("Not supported yet.");
-  }
-
   @Override
   public long getFinishTime() {
     return finish;

+ 2 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java

@@ -40,8 +40,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
@@ -140,7 +138,7 @@ public class TestRMAppTransitions {
     AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
     AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class);
     this.rmContext =
-        new RMContextImpl(new MemStore(), rmDispatcher,
+        new RMContextImpl(rmDispatcher,
           containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
           null, new ApplicationTokenSecretManager(conf),
           new RMContainerTokenSecretManager(conf),
@@ -171,7 +169,6 @@ public class TestRMAppTransitions {
     // ensure max retries set to known value
     conf.setInt(YarnConfiguration.RM_AM_MAX_RETRIES, maxRetries);
     String clientTokenStr = "bogusstring";
-    ApplicationStore appStore = mock(ApplicationStore.class);
     YarnScheduler scheduler = mock(YarnScheduler.class);
     ApplicationMasterService masterService =
         new ApplicationMasterService(rmContext, scheduler);
@@ -183,7 +180,7 @@ public class TestRMAppTransitions {
     RMApp application = new RMAppImpl(applicationId, rmContext,
         conf, name, user,
         queue, submissionContext, clientTokenStr,
-        appStore, scheduler,
+        scheduler,
         masterService, System.currentTimeMillis());
 
     testAppStartState(applicationId, user, name, queue, application);

+ 9 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java

@@ -53,7 +53,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@@ -162,7 +161,7 @@ public class TestRMAppAttemptTransitions {
     amFinishingMonitor = mock(AMLivelinessMonitor.class);
     Configuration conf = new Configuration();
     rmContext =
-        new RMContextImpl(new MemStore(), rmDispatcher,
+        new RMContextImpl(rmDispatcher,
           containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
           null, new ApplicationTokenSecretManager(conf),
           new RMContainerTokenSecretManager(conf),
@@ -639,6 +638,14 @@ public class TestRMAppAttemptTransitions {
         "Killed by user");
   }
 
+
+  @Test
+  public void testNoTrackingUrl() {
+    Container amContainer = allocateApplicationAttempt();
+    launchApplicationAttempt(amContainer);
+    runApplicationAttempt(amContainer, "host", 8042, "");
+  }
+
   @Test
   public void testUnregisterToSuccessfulFinishing() {
     Container amContainer = allocateApplicationAttempt();

+ 3 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java

@@ -479,7 +479,7 @@ public class TestApplicationLimits {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0_0 = 
         spy(new FiCaSchedulerApp(appAttemptId_0_0, user_0, queue, 
-            queue.getActiveUsersManager(), rmContext, null));
+            queue.getActiveUsersManager(), rmContext));
     queue.submitApplication(app_0_0, user_0, A);
 
     List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
@@ -498,7 +498,7 @@ public class TestApplicationLimits {
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_0_1 = 
         spy(new FiCaSchedulerApp(appAttemptId_0_1, user_0, queue, 
-            queue.getActiveUsersManager(), rmContext, null));
+            queue.getActiveUsersManager(), rmContext));
     queue.submitApplication(app_0_1, user_0, A);
     
     List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
@@ -517,7 +517,7 @@ public class TestApplicationLimits {
         TestUtils.getMockApplicationAttemptId(2, 0); 
     FiCaSchedulerApp app_1_0 = 
         spy(new FiCaSchedulerApp(appAttemptId_1_0, user_1, queue, 
-            queue.getActiveUsersManager(), rmContext, null));
+            queue.getActiveUsersManager(), rmContext));
     queue.submitApplication(app_1_0, user_1, A);
 
     List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();

+ 6 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java

@@ -40,7 +40,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.Task;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -76,7 +76,7 @@ public class TestCapacityScheduler {
   
   @Before
   public void setUp() throws Exception {
-    Store store = StoreFactory.getStore(new Configuration());
+    RMStateStore store = StoreFactory.getStore(new Configuration());
     resourceManager = new ResourceManager(store);
     CapacitySchedulerConfiguration csConf 
        = new CapacitySchedulerConfiguration();
@@ -251,7 +251,7 @@ public class TestCapacityScheduler {
     CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
     setupQueueConfiguration(conf);
     cs.setConf(new YarnConfiguration());
-    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
       null, new RMContainerTokenSecretManager(conf),
       new ClientToAMTokenSecretManagerInRM()));
     checkQueueCapacities(cs, A_CAPACITY, B_CAPACITY);
@@ -349,7 +349,7 @@ public class TestCapacityScheduler {
     conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".a.a1.b1", 100.0f);
     conf.setUserLimitFactor(CapacitySchedulerConfiguration.ROOT + ".a.a1.b1", 100.0f);
 
-    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
       null, new RMContainerTokenSecretManager(conf),
       new ClientToAMTokenSecretManagerInRM()));
   }
@@ -361,7 +361,7 @@ public class TestCapacityScheduler {
     setupQueueConfiguration(csConf);
     CapacityScheduler cs = new CapacityScheduler();
     cs.setConf(new YarnConfiguration());
-    cs.reinitialize(csConf, new RMContextImpl(null, null, null, null, null,
+    cs.reinitialize(csConf, new RMContextImpl(null, null, null, null,
       null, null, new RMContainerTokenSecretManager(csConf),
       new ClientToAMTokenSecretManagerInRM()));
 
@@ -387,7 +387,7 @@ public class TestCapacityScheduler {
     CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
     setupQueueConfiguration(conf);
     cs.setConf(new YarnConfiguration());
-    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
       null, new RMContainerTokenSecretManager(conf),
       new ClientToAMTokenSecretManagerInRM()));
     checkQueueCapacities(cs, A_CAPACITY, B_CAPACITY);

+ 27 - 27
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java

@@ -250,14 +250,14 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_0, user_0, B);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_1, user_0, B);  // same user
 
     
@@ -295,14 +295,14 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = TestUtils
         .getMockApplicationAttemptId(0, 1);
     FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_d, d, null,
-        rmContext, null);
+        rmContext);
     d.submitApplication(app_0, user_d, D);
 
     // Attempt the same application again
     final ApplicationAttemptId appAttemptId_1 = TestUtils
         .getMockApplicationAttemptId(0, 2);
     FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_d, d, null,
-        rmContext, null);
+        rmContext);
     d.submitApplication(app_1, user_d, D); // same user
   }
 
@@ -320,7 +320,7 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_0 = TestUtils
         .getMockApplicationAttemptId(0, 1);
     FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, null,
-        rmContext, null);
+        rmContext);
     a.submitApplication(app_0, user_0, B);
     
     when(cs.getApplication(appAttemptId_0)).thenReturn(app_0);
@@ -335,7 +335,7 @@ public class TestLeafQueue {
     final ApplicationAttemptId appAttemptId_1 = TestUtils
         .getMockApplicationAttemptId(0, 2);
     FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null,
-        rmContext, null);
+        rmContext);
     a.submitApplication(app_1, user_0, B); // same user
 
     assertEquals(1, a.getMetrics().getAppsSubmitted());
@@ -371,14 +371,14 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_1, user_0, A);  // same user
 
     
@@ -495,21 +495,21 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_0, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_1, user_0, A);  // same user
 
     final ApplicationAttemptId appAttemptId_2 = 
         TestUtils.getMockApplicationAttemptId(2, 0); 
     FiCaSchedulerApp app_2 = 
         new FiCaSchedulerApp(appAttemptId_2, user_1, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_2, user_1, A);
 
     // Setup some nodes
@@ -588,21 +588,21 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_0, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_1, user_0, A);  // same user
 
     final ApplicationAttemptId appAttemptId_2 = 
         TestUtils.getMockApplicationAttemptId(2, 0); 
     FiCaSchedulerApp app_2 = 
         new FiCaSchedulerApp(appAttemptId_2, user_1, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_2, user_1, A);
 
     // Setup some nodes
@@ -699,28 +699,28 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_0, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_1, user_0, A);  // same user
 
     final ApplicationAttemptId appAttemptId_2 = 
         TestUtils.getMockApplicationAttemptId(2, 0); 
     FiCaSchedulerApp app_2 = 
         new FiCaSchedulerApp(appAttemptId_2, user_1, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_2, user_1, A);
 
     final ApplicationAttemptId appAttemptId_3 = 
         TestUtils.getMockApplicationAttemptId(3, 0); 
     FiCaSchedulerApp app_3 = 
         new FiCaSchedulerApp(appAttemptId_3, user_2, a, 
-            a.getActiveUsersManager(), rmContext, null);
+            a.getActiveUsersManager(), rmContext);
     a.submitApplication(app_3, user_2, A);
     
     // Setup some nodes
@@ -874,14 +874,14 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_1, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_1, user_1, A);  
 
     // Setup some nodes
@@ -973,14 +973,14 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0);
     FiCaSchedulerApp app_0 =
         new FiCaSchedulerApp(appAttemptId_0, user_0, a,
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 =
         TestUtils.getMockApplicationAttemptId(1, 0);
     FiCaSchedulerApp app_1 =
         new FiCaSchedulerApp(appAttemptId_1, user_1, a,
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_1, user_1, A);
 
     // Setup some nodes
@@ -1072,14 +1072,14 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_0, user_0, A);
 
     final ApplicationAttemptId appAttemptId_1 = 
         TestUtils.getMockApplicationAttemptId(1, 0); 
     FiCaSchedulerApp app_1 = 
         new FiCaSchedulerApp(appAttemptId_1, user_1, a, 
-            mock(ActiveUsersManager.class), rmContext, null);
+            mock(ActiveUsersManager.class), rmContext);
     a.submitApplication(app_1, user_1, A);  
 
     // Setup some nodes
@@ -1187,7 +1187,7 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null));
+            mock(ActiveUsersManager.class), rmContext));
     a.submitApplication(app_0, user_0, A);
     
     // Setup some nodes and racks
@@ -1327,7 +1327,7 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null));
+            mock(ActiveUsersManager.class), rmContext));
     a.submitApplication(app_0, user_0, A);
     
     // Setup some nodes and racks
@@ -1457,7 +1457,7 @@ public class TestLeafQueue {
         TestUtils.getMockApplicationAttemptId(0, 0); 
     FiCaSchedulerApp app_0 = 
         spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, 
-            mock(ActiveUsersManager.class), rmContext, null));
+            mock(ActiveUsersManager.class), rmContext));
     a.submitApplication(app_0, user_0, A);
     
     // Setup some nodes and racks

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java

@@ -43,7 +43,7 @@ public class TestQueueParsing {
 
     CapacityScheduler capacityScheduler = new CapacityScheduler();
     capacityScheduler.setConf(conf);
-    capacityScheduler.reinitialize(conf, new RMContextImpl(null, null, null,
+    capacityScheduler.reinitialize(conf, new RMContextImpl(null, null,
       null, null, null, null, new RMContainerTokenSecretManager(conf),
       new ClientToAMTokenSecretManagerInRM()));
     

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java

@@ -83,7 +83,7 @@ public class TestUtils {
     
     Configuration conf = new Configuration();
     RMContext rmContext =
-        new RMContextImpl(null, nullDispatcher, cae, null, null, null,
+        new RMContextImpl(nullDispatcher, cae, null, null, null,
           new ApplicationTokenSecretManager(conf),
           new RMContainerTokenSecretManager(conf),
           new ClientToAMTokenSecretManagerInRM());

+ 0 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java

@@ -107,9 +107,6 @@ public class FakeSchedulable extends Schedulable {
     return minShare;
   }
 
-  @Override
-  public void redistributeShare() {}
-
   @Override
   public void updateDemand() {}
 }

+ 83 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSLeafQueue.java

@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
+import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+public class TestFSLeafQueue {
+  private FSLeafQueue schedulable = null;
+  private Resource maxResource = Resources.createResource(10);
+
+  @Before
+  public void setup() throws IOException {
+    FairScheduler scheduler = new FairScheduler();
+    Configuration conf = createConfiguration();
+    // All tests assume only one assignment per node update
+    conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");
+    RMStateStore store = StoreFactory.getStore(conf);
+    ResourceManager resourceManager = new ResourceManager(store);
+    resourceManager.init(conf);
+    ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
+    scheduler.reinitialize(conf, resourceManager.getRMContext());
+    
+    String queueName = "root.queue1";
+    QueueManager mockMgr = mock(QueueManager.class);
+    when(mockMgr.getMaxResources(queueName)).thenReturn(maxResource);
+
+    schedulable = new FSLeafQueue(queueName, mockMgr, scheduler, null);
+  }
+
+  @Test
+  public void testUpdateDemand() {
+    AppSchedulable app = mock(AppSchedulable.class);
+    Mockito.when(app.getDemand()).thenReturn(maxResource);
+
+    schedulable.addAppSchedulable(app);
+    schedulable.addAppSchedulable(app);
+
+    schedulable.updateDemand();
+
+    assertTrue("Demand is greater than max allowed ",
+        Resources.equals(schedulable.getDemand(), maxResource));
+  }
+  
+  private Configuration createConfiguration() {
+    Configuration conf = new YarnConfiguration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, FairScheduler.class,
+        ResourceScheduler.class);
+    return conf;
+  }
+}

+ 0 - 42
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSQueueSchedulable.java

@@ -1,42 +0,0 @@
-package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
-
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
-import org.junit.Before;
-import org.junit.Test;
-import org.mockito.Mockito;
-
-public class TestFSQueueSchedulable {
-  private FSQueueSchedulable schedulable = null;
-  private Resource maxResource = Resources.createResource(10);
-
-  @Before
-  public void setup() {
-    String queueName = "testFSQueue";
-    FSQueue mockQueue = mock(FSQueue.class);
-    when(mockQueue.getName()).thenReturn(queueName);
-
-    QueueManager mockMgr = mock(QueueManager.class);
-    when(mockMgr.getMaxResources(queueName)).thenReturn(maxResource);
-
-    schedulable = new FSQueueSchedulable(null, mockQueue, mockMgr, null, 0, 0);
-  }
-
-  @Test
-  public void testUpdateDemand() {
-    AppSchedulable app = mock(AppSchedulable.class);
-    Mockito.when(app.getDemand()).thenReturn(maxResource);
-
-    schedulable.addApp(app);
-    schedulable.addApp(app);
-
-    schedulable.updateDemand();
-
-    assertTrue("Demand is greater than max allowed ",
-        Resources.equals(schedulable.getDemand(), maxResource));
-  }
-}

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSSchedulerApp.java

@@ -53,7 +53,7 @@ public class TestFSSchedulerApp {
 
     ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
     FSSchedulerApp schedulerApp =
-        new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null, null);
+        new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null);
 
     // Default level should be node-local
     assertEquals(NodeType.NODE_LOCAL, schedulerApp.getAllowedLocalityLevel(
@@ -111,7 +111,7 @@ public class TestFSSchedulerApp {
 
     ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
     FSSchedulerApp schedulerApp =
-        new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null, null);
+        new FSSchedulerApp(applicationAttemptId, "user1", queue , null, null);
     assertEquals(NodeType.OFF_SWITCH, schedulerApp.getAllowedLocalityLevel(
         prio, 10, -1.0, -1.0));
   }

+ 206 - 108
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java

@@ -31,6 +31,10 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
+import javax.xml.parsers.ParserConfigurationException;
+
+import junit.framework.Assert;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.yarn.Clock;
@@ -47,7 +51,7 @@ import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
@@ -61,6 +65,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateS
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.xml.sax.SAXException;
 
 public class TestFairScheduler {
 
@@ -97,7 +102,7 @@ public class TestFairScheduler {
     Configuration conf = createConfiguration();
     // All tests assume only one assignment per node update
     conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");
-    Store store = StoreFactory.getStore(conf);
+    RMStateStore store = StoreFactory.getStore(conf);
     resourceManager = new ResourceManager(store);
     resourceManager.init(conf);
     ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
@@ -195,15 +200,64 @@ public class TestFairScheduler {
 
     scheduler.update();
 
-    Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+    Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
     assertEquals(3, queues.size());
 
-    for (FSQueue p : queues) {
-      if (p.getName() != "default") {
-        assertEquals(5120, p.getQueueSchedulable().getFairShare().getMemory());
+    for (FSLeafQueue p : queues) {
+      if (!p.getName().equals("root.default")) {
+        assertEquals(5120, p.getFairShare().getMemory());
       }
     }
   }
+  
+  @Test
+  public void testSimpleHierarchicalFairShareCalculation() {
+    // Add one big node (only care about aggregate capacity)
+    int capacity = 10 * 24;
+    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(capacity));
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
+    scheduler.handle(nodeEvent1);
+
+    // Have two queues which want entire cluster capacity
+    createSchedulingRequest(10 * 1024, "queue1", "user1");
+    createSchedulingRequest(10 * 1024, "parent.queue2", "user1");
+    createSchedulingRequest(10 * 1024, "parent.queue3", "user1");
+
+    scheduler.update();
+
+    QueueManager queueManager = scheduler.getQueueManager();
+    Collection<FSLeafQueue> queues = queueManager.getLeafQueues();
+    assertEquals(4, queues.size());
+    
+    FSLeafQueue queue1 = queueManager.getLeafQueue("queue1");
+    FSLeafQueue queue2 = queueManager.getLeafQueue("parent.queue2");
+    FSLeafQueue queue3 = queueManager.getLeafQueue("parent.queue3");
+    assertEquals(capacity / 2, queue1.getFairShare().getMemory());
+    assertEquals(capacity / 4, queue2.getFairShare().getMemory());
+    assertEquals(capacity / 4, queue3.getFairShare().getMemory());
+  }
+  
+  @Test
+  public void testHierarchicalQueuesSimilarParents() {
+    QueueManager queueManager = scheduler.getQueueManager();
+    FSLeafQueue leafQueue = queueManager.getLeafQueue("parent.child");
+    Assert.assertEquals(2, queueManager.getLeafQueues().size());
+    Assert.assertNotNull(leafQueue);
+    Assert.assertEquals("root.parent.child", leafQueue.getName());
+
+    FSLeafQueue leafQueue2 = queueManager.getLeafQueue("parent");
+    Assert.assertNull(leafQueue2);
+    Assert.assertEquals(2, queueManager.getLeafQueues().size());
+    
+    FSLeafQueue leafQueue3 = queueManager.getLeafQueue("parent.child.grandchild");
+    Assert.assertNull(leafQueue3);
+    Assert.assertEquals(2, queueManager.getLeafQueues().size());
+    
+    FSLeafQueue leafQueue4 = queueManager.getLeafQueue("parent.sister");
+    Assert.assertNotNull(leafQueue4);
+    Assert.assertEquals("root.parent.sister", leafQueue4.getName());
+    Assert.assertEquals(3, queueManager.getLeafQueues().size());
+  }
 
   @Test
   public void testSimpleContainerAllocation() {
@@ -228,14 +282,14 @@ public class TestFairScheduler {
     // Asked for less than min_allocation.
     assertEquals(YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
         scheduler.getQueueManager().getQueue("queue1").
-        getQueueSchedulable().getResourceUsage().getMemory());
+        getResourceUsage().getMemory());
 
     NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2,
         new ArrayList<ContainerStatus>(), new ArrayList<ContainerStatus>());
     scheduler.handle(updateEvent2);
 
     assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").
-      getQueueSchedulable().getResourceUsage().getMemory());
+      getResourceUsage().getMemory());
   }
 
   @Test
@@ -254,7 +308,7 @@ public class TestFairScheduler {
 
     // Make sure queue 1 is allocated app capacity
     assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").
-        getQueueSchedulable().getResourceUsage().getMemory());
+        getResourceUsage().getMemory());
 
     // Now queue 2 requests likewise
     ApplicationAttemptId attId = createSchedulingRequest(1024, "queue2", "user1", 1);
@@ -263,7 +317,7 @@ public class TestFairScheduler {
 
     // Make sure queue 2 is waiting with a reservation
     assertEquals(0, scheduler.getQueueManager().getQueue("queue2").
-      getQueueSchedulable().getResourceUsage().getMemory());
+      getResourceUsage().getMemory());
     assertEquals(1024, scheduler.applications.get(attId).getCurrentReservation().getMemory());
 
     // Now another node checks in with capacity
@@ -276,7 +330,7 @@ public class TestFairScheduler {
 
     // Make sure this goes to queue 2
     assertEquals(1024, scheduler.getQueueManager().getQueue("queue2").
-        getQueueSchedulable().getResourceUsage().getMemory());
+        getResourceUsage().getMemory());
 
     // The old reservation should still be there...
     assertEquals(1024, scheduler.applications.get(attId).getCurrentReservation().getMemory());
@@ -294,17 +348,22 @@ public class TestFairScheduler {
     AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(
         createAppAttemptId(1, 1), "default", "user1");
     scheduler.handle(appAddedEvent);
-    assertEquals(1, scheduler.getQueueManager().getQueue("user1").getApplications().size());
-    assertEquals(0, scheduler.getQueueManager().getQueue("default").getApplications().size());
+    assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1")
+        .getAppSchedulables().size());
+    assertEquals(0, scheduler.getQueueManager().getLeafQueue("default")
+        .getAppSchedulables().size());
 
     conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "false");
     scheduler.reinitialize(conf, resourceManager.getRMContext());
     AppAddedSchedulerEvent appAddedEvent2 = new AppAddedSchedulerEvent(
         createAppAttemptId(2, 1), "default", "user2");
     scheduler.handle(appAddedEvent2);
-    assertEquals(1, scheduler.getQueueManager().getQueue("user1").getApplications().size());
-    assertEquals(1, scheduler.getQueueManager().getQueue("default").getApplications().size());
-    assertEquals(0, scheduler.getQueueManager().getQueue("user2").getApplications().size());
+    assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1")
+        .getAppSchedulables().size());
+    assertEquals(1, scheduler.getQueueManager().getLeafQueue("default")
+        .getAppSchedulables().size());
+    assertEquals(0, scheduler.getQueueManager().getLeafQueue("user2")
+        .getAppSchedulables().size());
   }
 
   @Test
@@ -338,18 +397,17 @@ public class TestFairScheduler {
 
     scheduler.update();
 
-    Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+    Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
     assertEquals(3, queues.size());
 
-    for (FSQueue p : queues) {
-      if (p.getName().equals("queueA")) {
-        assertEquals(1024, p.getQueueSchedulable().getFairShare().getMemory());
+    for (FSLeafQueue p : queues) {
+      if (p.getName().equals("root.queueA")) {
+        assertEquals(1024, p.getFairShare().getMemory());
       }
-      else if (p.getName().equals("queueB")) {
-        assertEquals(2048, p.getQueueSchedulable().getFairShare().getMemory());
+      else if (p.getName().equals("root.queueB")) {
+        assertEquals(2048, p.getFairShare().getMemory());
       }
     }
-
   }
 
   /**
@@ -358,11 +416,11 @@ public class TestFairScheduler {
   @Test
   public void testQueueDemandCalculation() throws Exception {
     ApplicationAttemptId id11 = createAppAttemptId(1, 1);
-    scheduler.addApplication(id11, "queue1", "user1");
+    scheduler.addApplication(id11, "root.queue1", "user1");
     ApplicationAttemptId id21 = createAppAttemptId(2, 1);
-    scheduler.addApplication(id21, "queue2", "user1");
+    scheduler.addApplication(id21, "root.queue2", "user1");
     ApplicationAttemptId id22 = createAppAttemptId(2, 2);
-    scheduler.addApplication(id22, "queue2", "user1");
+    scheduler.addApplication(id22, "root.queue2", "user1");
 
     int minReqSize = YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB;
     
@@ -388,10 +446,10 @@ public class TestFairScheduler {
 
     scheduler.update();
 
-    assertEquals(2 * minReqSize, scheduler.getQueueManager().getQueue("queue1")
-        .getQueueSchedulable().getDemand().getMemory());
+    assertEquals(2 * minReqSize, scheduler.getQueueManager().getQueue("root.queue1")
+        .getDemand().getMemory());
     assertEquals(2 * minReqSize + 2 * minReqSize + (2 * minReqSize), scheduler
-        .getQueueManager().getQueue("queue2").getQueueSchedulable().getDemand()
+        .getQueueManager().getQueue("root.queue2").getDemand()
         .getMemory());
   }
 
@@ -401,11 +459,12 @@ public class TestFairScheduler {
         createAppAttemptId(1, 1), "default", "user1");
     scheduler.handle(appAddedEvent1);
 
-    // Scheduler should have one queue (the default)
-    assertEquals(1, scheduler.getQueueManager().getQueues().size());
+    // Scheduler should have two queues (the default and the one created for user1)
+    assertEquals(2, scheduler.getQueueManager().getLeafQueues().size());
 
     // That queue should have one app
-    assertEquals(1, scheduler.getQueueManager().getQueue("default").getApplications().size());
+    assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1")
+        .getAppSchedulables().size());
 
     AppRemovedSchedulerEvent appRemovedEvent1 = new AppRemovedSchedulerEvent(
         createAppAttemptId(1, 1), RMAppAttemptState.FINISHED);
@@ -413,8 +472,9 @@ public class TestFairScheduler {
     // Now remove app
     scheduler.handle(appRemovedEvent1);
 
-    // Default queue should have no apps
-    assertEquals(0, scheduler.getQueueManager().getQueue("default").getApplications().size());
+    // Queue should have no apps
+    assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1")
+        .getAppSchedulables().size());
   }
 
   @Test
@@ -466,60 +526,98 @@ public class TestFairScheduler {
     QueueManager queueManager = scheduler.getQueueManager();
     queueManager.initialize();
 
-    assertEquals(6, queueManager.getQueues().size()); // 5 in file + default queue
+    assertEquals(6, queueManager.getLeafQueues().size()); // 5 in file + default queue
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+        queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+        queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
 
     assertEquals(Resources.createResource(1024),
-        queueManager.getMinResources("queueA"));
+        queueManager.getMinResources("root.queueA"));
     assertEquals(Resources.createResource(2048),
-        queueManager.getMinResources("queueB"));
+        queueManager.getMinResources("root.queueB"));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources("queueC"));
+        queueManager.getMinResources("root.queueC"));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources("queueD"));
+        queueManager.getMinResources("root.queueD"));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources("queueE"));
-
-    assertEquals(15, queueManager.getQueueMaxApps(YarnConfiguration.DEFAULT_QUEUE_NAME));
-    assertEquals(15, queueManager.getQueueMaxApps("queueA"));
-    assertEquals(15, queueManager.getQueueMaxApps("queueB"));
-    assertEquals(15, queueManager.getQueueMaxApps("queueC"));
-    assertEquals(3, queueManager.getQueueMaxApps("queueD"));
-    assertEquals(15, queueManager.getQueueMaxApps("queueE"));
+        queueManager.getMinResources("root.queueE"));
+
+    assertEquals(15, queueManager.getQueueMaxApps("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueA"));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueB"));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueC"));
+    assertEquals(3, queueManager.getQueueMaxApps("root.queueD"));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueE"));
     assertEquals(10, queueManager.getUserMaxApps("user1"));
     assertEquals(5, queueManager.getUserMaxApps("user2"));
 
     // Unspecified queues should get default ACL
-    Map<QueueACL, AccessControlList> aclsA = queueManager.getQueueAcls("queueA");
+    Map<QueueACL, AccessControlList> aclsA = queueManager.getQueueAcls("root.queueA");
     assertTrue(aclsA.containsKey(QueueACL.ADMINISTER_QUEUE));
     assertEquals("*", aclsA.get(QueueACL.ADMINISTER_QUEUE).getAclString());
     assertTrue(aclsA.containsKey(QueueACL.SUBMIT_APPLICATIONS));
     assertEquals("*", aclsA.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
 
     // Queue B ACL
-    Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("queueB");
+    Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("root.queueB");
     assertTrue(aclsB.containsKey(QueueACL.ADMINISTER_QUEUE));
     assertEquals("alice,bob admins", aclsB.get(QueueACL.ADMINISTER_QUEUE).getAclString());
 
     // Queue c ACL
-    Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("queueC");
+    Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("root.queueC");
     assertTrue(aclsC.containsKey(QueueACL.SUBMIT_APPLICATIONS));
     assertEquals("alice,bob admins", aclsC.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
 
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout(
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root." + 
         YarnConfiguration.DEFAULT_QUEUE_NAME));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueB"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueC"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueD"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
-    assertEquals(60000, queueManager.getMinSharePreemptionTimeout("queueE"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueB"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueC"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueD"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+    assertEquals(60000, queueManager.getMinSharePreemptionTimeout("root.queueE"));
     assertEquals(300000, queueManager.getFairSharePreemptionTimeout());
   }
 
+  @Test
+  public void testHierarchicalQueueAllocationFileParsing() throws IOException, SAXException, 
+      AllocationConfigurationException, ParserConfigurationException {
+    Configuration conf = createConfiguration();
+    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+    scheduler.reinitialize(conf, resourceManager.getRMContext());
+
+    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
+    out.println("<?xml version=\"1.0\"?>");
+    out.println("<allocations>");
+    out.println("<queue name=\"queueA\">");
+    out.println("<minResources>2048</minResources>");
+    out.println("</queue>");
+    out.println("<queue name=\"queueB\">");
+    out.println("<minResources>2048</minResources>");
+    out.println("<queue name=\"queueC\">");
+    out.println("<minResources>2048</minResources>");
+    out.println("</queue>");
+    out.println("<queue name=\"queueD\">");
+    out.println("<minResources>2048</minResources>");
+    out.println("</queue>");
+    out.println("</queue>");
+    out.println("</allocations>");
+    out.close();
+
+    QueueManager queueManager = scheduler.getQueueManager();
+    queueManager.initialize();
+    
+    Collection<FSLeafQueue> leafQueues = queueManager.getLeafQueues();
+    Assert.assertEquals(4, leafQueues.size());
+    Assert.assertNotNull(queueManager.getLeafQueue("queueA"));
+    Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueC"));
+    Assert.assertNotNull(queueManager.getLeafQueue("queueB.queueD"));
+    Assert.assertNotNull(queueManager.getLeafQueue("default"));
+    // Make sure querying for queues didn't create any new ones:
+    Assert.assertEquals(4, leafQueues.size());
+  }
+  
   @Test
   public void testBackwardsCompatibleAllocationFileParsing() throws Exception {
     Configuration conf = createConfiguration();
@@ -569,29 +667,29 @@ public class TestFairScheduler {
     QueueManager queueManager = scheduler.getQueueManager();
     queueManager.initialize();
 
-    assertEquals(6, queueManager.getQueues().size()); // 5 in file + default queue
+    assertEquals(6, queueManager.getLeafQueues().size()); // 5 in file + default queue
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+        queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources(YarnConfiguration.DEFAULT_QUEUE_NAME));
+        queueManager.getMinResources("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
 
     assertEquals(Resources.createResource(1024),
-        queueManager.getMinResources("queueA"));
+        queueManager.getMinResources("root.queueA"));
     assertEquals(Resources.createResource(2048),
-        queueManager.getMinResources("queueB"));
+        queueManager.getMinResources("root.queueB"));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources("queueC"));
+        queueManager.getMinResources("root.queueC"));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources("queueD"));
+        queueManager.getMinResources("root.queueD"));
     assertEquals(Resources.createResource(0),
-        queueManager.getMinResources("queueE"));
-
-    assertEquals(15, queueManager.getQueueMaxApps(YarnConfiguration.DEFAULT_QUEUE_NAME));
-    assertEquals(15, queueManager.getQueueMaxApps("queueA"));
-    assertEquals(15, queueManager.getQueueMaxApps("queueB"));
-    assertEquals(15, queueManager.getQueueMaxApps("queueC"));
-    assertEquals(3, queueManager.getQueueMaxApps("queueD"));
-    assertEquals(15, queueManager.getQueueMaxApps("queueE"));
+        queueManager.getMinResources("root.queueE"));
+
+    assertEquals(15, queueManager.getQueueMaxApps("root." + YarnConfiguration.DEFAULT_QUEUE_NAME));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueA"));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueB"));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueC"));
+    assertEquals(3, queueManager.getQueueMaxApps("root.queueD"));
+    assertEquals(15, queueManager.getQueueMaxApps("root.queueE"));
     assertEquals(10, queueManager.getUserMaxApps("user1"));
     assertEquals(5, queueManager.getUserMaxApps("user2"));
 
@@ -603,23 +701,23 @@ public class TestFairScheduler {
     assertEquals("*", aclsA.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
 
     // Queue B ACL
-    Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("queueB");
+    Map<QueueACL, AccessControlList> aclsB = queueManager.getQueueAcls("root.queueB");
     assertTrue(aclsB.containsKey(QueueACL.ADMINISTER_QUEUE));
     assertEquals("alice,bob admins", aclsB.get(QueueACL.ADMINISTER_QUEUE).getAclString());
 
     // Queue c ACL
-    Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("queueC");
+    Map<QueueACL, AccessControlList> aclsC = queueManager.getQueueAcls("root.queueC");
     assertTrue(aclsC.containsKey(QueueACL.SUBMIT_APPLICATIONS));
     assertEquals("alice,bob admins", aclsC.get(QueueACL.SUBMIT_APPLICATIONS).getAclString());
 
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout(
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root." +
         YarnConfiguration.DEFAULT_QUEUE_NAME));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueB"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueC"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueD"));
-    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("queueA"));
-    assertEquals(60000, queueManager.getMinSharePreemptionTimeout("queueE"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueB"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueC"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueD"));
+    assertEquals(120000, queueManager.getMinSharePreemptionTimeout("root.queueA"));
+    assertEquals(60000, queueManager.getMinSharePreemptionTimeout("root.queueE"));
     assertEquals(300000, queueManager.getFairSharePreemptionTimeout());
   }
 
@@ -659,25 +757,25 @@ public class TestFairScheduler {
     // Queue B arrives and wants 1 * 1024
     createSchedulingRequest(1 * 1024, "queueB", "user1");
     scheduler.update();
-    Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+    Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
     assertEquals(3, queues.size());
 
     // Queue A should be above min share, B below.
-    for (FSQueue p : queues) {
-      if (p.getName().equals("queueA")) {
-        assertEquals(false, scheduler.isStarvedForMinShare(p.getQueueSchedulable()));
+    for (FSLeafQueue p : queues) {
+      if (p.getName().equals("root.queueA")) {
+        assertEquals(false, scheduler.isStarvedForMinShare(p));
       }
-      else if (p.getName().equals("queueB")) {
-        assertEquals(true, scheduler.isStarvedForMinShare(p.getQueueSchedulable()));
+      else if (p.getName().equals("root.queueB")) {
+        assertEquals(true, scheduler.isStarvedForMinShare(p));
       }
     }
 
     // Node checks in again, should allocate for B
     scheduler.handle(nodeEvent2);
     // Now B should have min share ( = demand here)
-    for (FSQueue p : queues) {
-      if (p.getName().equals("queueB")) {
-        assertEquals(false, scheduler.isStarvedForMinShare(p.getQueueSchedulable()));
+    for (FSLeafQueue p : queues) {
+      if (p.getName().equals("root.queueB")) {
+        assertEquals(false, scheduler.isStarvedForMinShare(p));
       }
     }
   }
@@ -718,16 +816,16 @@ public class TestFairScheduler {
     // Queue B arrives and wants 1 * 1024
     createSchedulingRequest(1 * 1024, "queueB", "user1");
     scheduler.update();
-    Collection<FSQueue> queues = scheduler.getQueueManager().getQueues();
+    Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
     assertEquals(3, queues.size());
 
     // Queue A should be above fair share, B below.
-    for (FSQueue p : queues) {
-      if (p.getName().equals("queueA")) {
-        assertEquals(false, scheduler.isStarvedForFairShare(p.getQueueSchedulable()));
+    for (FSLeafQueue p : queues) {
+      if (p.getName().equals("root.queueA")) {
+        assertEquals(false, scheduler.isStarvedForFairShare(p));
       }
-      else if (p.getName().equals("queueB")) {
-        assertEquals(true, scheduler.isStarvedForFairShare(p.getQueueSchedulable()));
+      else if (p.getName().equals("root.queueB")) {
+        assertEquals(true, scheduler.isStarvedForFairShare(p));
       }
     }
 
@@ -735,9 +833,9 @@ public class TestFairScheduler {
     scheduler.handle(nodeEvent2);
     // B should not be starved for fair share, since entire demand is
     // satisfied.
-    for (FSQueue p : queues) {
-      if (p.getName().equals("queueB")) {
-        assertEquals(false, scheduler.isStarvedForFairShare(p.getQueueSchedulable()));
+    for (FSLeafQueue p : queues) {
+      if (p.getName().equals("root.queueB")) {
+        assertEquals(false, scheduler.isStarvedForFairShare(p));
       }
     }
   }
@@ -845,7 +943,7 @@ public class TestFairScheduler {
 
     // We should be able to claw back one container from A and B each.
     // Make sure it is lowest priority container.
-    scheduler.preemptResources(scheduler.getQueueSchedulables(),
+    scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
         Resources.createResource(2 * 1024));
     assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size());
     assertEquals(1, scheduler.applications.get(app2).getLiveContainers().size());
@@ -856,7 +954,7 @@ public class TestFairScheduler {
 
     // We should be able to claw back another container from A and B each.
     // Make sure it is lowest priority container.
-    scheduler.preemptResources(scheduler.getQueueSchedulables(),
+    scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
         Resources.createResource(2 * 1024));
     assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size());
     assertEquals(0, scheduler.applications.get(app2).getLiveContainers().size());
@@ -866,7 +964,7 @@ public class TestFairScheduler {
     assertEquals(0, scheduler.applications.get(app6).getLiveContainers().size());
 
     // Now A and B are below fair share, so preemption shouldn't do anything
-    scheduler.preemptResources(scheduler.getQueueSchedulables(),
+    scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
         Resources.createResource(2 * 1024));
     assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size());
     assertEquals(0, scheduler.applications.get(app2).getLiveContainers().size());
@@ -977,10 +1075,10 @@ public class TestFairScheduler {
 
     scheduler.update();
 
-    FSQueueSchedulable schedC =
-        scheduler.getQueueManager().getQueue("queueC").getQueueSchedulable();
-    FSQueueSchedulable schedD =
-        scheduler.getQueueManager().getQueue("queueD").getQueueSchedulable();
+    FSLeafQueue schedC =
+        scheduler.getQueueManager().getLeafQueue("queueC");
+    FSLeafQueue schedD =
+        scheduler.getQueueManager().getLeafQueue("queueD");
 
     assertTrue(Resources.equals(
         Resources.none(), scheduler.resToPreempt(schedC, clock.getTime())));

+ 78 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerEventLog.java

@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFairSchedulerEventLog {
+  private File logFile;
+  private FairScheduler scheduler;
+  private ResourceManager resourceManager;
+  
+  @Before
+  public void setUp() throws IOException {
+    scheduler = new FairScheduler();
+    
+    Configuration conf = new YarnConfiguration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, FairScheduler.class,
+        ResourceScheduler.class);
+    conf.set("mapred.fairscheduler.eventlog.enabled", "true");
+
+    // All tests assume only one assignment per node update
+    conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");
+    RMStateStore store = StoreFactory.getStore(conf);
+    resourceManager = new ResourceManager(store);
+    resourceManager.init(conf);
+    ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
+    scheduler.reinitialize(conf, resourceManager.getRMContext());
+  }
+
+  /**
+   * Make sure the scheduler creates the event log.
+   */
+  @Test
+  public void testCreateEventLog() throws IOException {
+    FairSchedulerEventLog eventLog = scheduler.getEventLog();
+    
+    logFile = new File(eventLog.getLogFile());
+    Assert.assertTrue(logFile.exists());
+  }
+  
+  @After
+  public void tearDown() {
+    logFile.delete();
+    logFile.getParentFile().delete(); // fairscheduler/
+    scheduler = null;
+    resourceManager = null;
+  }
+}

+ 3 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java

@@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.Task;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
@@ -59,7 +59,7 @@ public class TestFifoScheduler {
   
   @Before
   public void setUp() throws Exception {
-    Store store = StoreFactory.getStore(new Configuration());
+    RMStateStore store = StoreFactory.getStore(new Configuration());
     resourceManager = new ResourceManager(store);
     Configuration conf = new Configuration();
     conf.setClass(YarnConfiguration.RM_SCHEDULER, 
@@ -91,7 +91,7 @@ public class TestFifoScheduler {
   @Test
   public void testAppAttemptMetrics() throws Exception {
     AsyncDispatcher dispatcher = new InlineDispatcher();
-    RMContext rmContext = new RMContextImpl(null, dispatcher, null,
+    RMContext rmContext = new RMContextImpl(dispatcher, null,
         null, null, null, null, null, null);
 
     FifoScheduler schedular = new FifoScheduler();

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebApp.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.MockAsm;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -160,7 +159,7 @@ public class TestRMWebApp {
     for (RMNode node : deactivatedNodes) {
       deactivatedNodesMap.put(node.getHostName(), node);
     }
-   return new RMContextImpl(new MemStore(), null, null, null, null,
+   return new RMContextImpl(null, null, null, null,
        null, null, null, null) {
       @Override
       public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
@@ -201,7 +200,7 @@ public class TestRMWebApp {
 
     CapacityScheduler cs = new CapacityScheduler();
     cs.setConf(new YarnConfiguration());
-    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null, null,
+    cs.reinitialize(conf, new RMContextImpl(null, null, null, null, null,
       null, new RMContainerTokenSecretManager(conf),
       new ClientToAMTokenSecretManagerInRM()));
     return cs;

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java

@@ -48,7 +48,7 @@ import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
 import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.CompositeService;
@@ -154,7 +154,7 @@ public class MiniYARNCluster extends CompositeService {
           getConfig().set(YarnConfiguration.RM_WEBAPP_ADDRESS,
               MiniYARNCluster.getHostname() + ":0");
         }
-        Store store = StoreFactory.getStore(getConfig());
+        RMStateStore store = StoreFactory.getStore(getConfig());
         resourceManager = new ResourceManager(store) {
           @Override
           protected void doSecureLogin() throws IOException {

+ 21 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm

@@ -53,6 +53,22 @@ Hadoop MapReduce Next Generation - Fair Scheduler
   capacity between the running apps. queues can also be given weights to share 
   the cluster non-proportionally in the config file.
 
+  The fair scheduler supports hierarchical queues. All queues descend from a
+  queue named "root". Available resources are distributed among the children
+  of the root queue in the typical fair scheduling fashion. Then, the children
+  distribute the resources assigned to them to their children in the same
+  fashion.  Applications may only be scheduled on leaf queues. Queues can be
+  specified as children of other queues by placing them as sub-elements of 
+  their parents in the fair scheduler configuration file.
+  
+  A queue's name starts with the names of its parents, with periods as
+  separators.  So a queue named "queue1" under the root named, would be 
+  referred to as "root.queue1", and a queue named "queue2" under a queue
+  named "parent1" would be referred to as "root.parent1.queue2". When
+  referring to queues, the root part of the name is optional, so queue1 could
+  be referred to as just "queue1", and a queue2 could be referred to as just 
+  "parent1.queue2".
+
   In addition to providing fair sharing, the Fair Scheduler allows assigning 
   guaranteed minimum shares to queues, which is useful for ensuring that 
   certain users, groups or production applications always get sufficient 
@@ -163,11 +179,14 @@ Allocation file format
 <?xml version="1.0"?>
 <allocations>
   <queue name="sample_queue">
-    <minResources>100000</minResources>
-    <maxResources>900000</maxResources>
+    <minResources>10000</minResources>
+    <maxResources>90000</maxResources>
     <maxRunningApps>50</maxRunningApps>
     <weight>2.0</weight>
     <schedulingMode>fair</schedulingMode>
+    <queue name="sample_sub_queue">
+      <minResources>5000</minResources>
+    </queue>
   </queue>
   <user name="sample_user">
     <maxRunningApps>30</maxRunningApps>