Browse Source

Merge branch 'trunk' into HDFS-7240

Xiaoyu Yao 7 years ago
parent
commit
df3ff9042a
100 changed files with 4890 additions and 651 deletions
  1. 14 0
      BUILDING.txt
  2. 7 2
      hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
  3. 7 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
  4. 21 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
  5. 26 9
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
  6. 90 16
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
  7. 5 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/HadoopExecutors.java
  8. 1 1
      hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md
  9. 1022 0
      hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGES.3.1.0.md
  10. 199 0
      hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/RELEASENOTES.3.1.0.md
  11. 6 6
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java
  12. 1 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestIOUtils.java
  13. 10 0
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java
  14. 3 0
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/RequestHedgingProxyProvider.java
  15. 34 0
      hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRequestHedgingProxyProvider.java
  16. 3 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java
  17. 30 5
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionContext.java
  18. 6 4
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java
  19. 92 6
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPool.java
  20. 16 3
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPoolId.java
  21. 60 8
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RemoteMethod.java
  22. 187 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNamenodeProtocol.java
  23. 45 17
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java
  24. 132 9
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java
  25. 33 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/SubClusterTimeoutException.java
  26. 4 2
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java
  27. 38 1
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MiniRouterDFSCluster.java
  28. 48 8
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestConnectionManager.java
  29. 54 16
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java
  30. 4 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java
  31. 125 1
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCClientRetries.java
  32. 122 14
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java
  33. 5 0
      hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/webhdfs.xml
  34. 11 0
      hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.1.0.xml
  35. 17 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockIdManager.java
  36. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  37. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java
  38. 6 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java
  39. 18 17
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java
  40. 8 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
  41. 4 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java
  42. 12 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAclOp.java
  43. 339 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java
  44. 248 367
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java
  45. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java
  46. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ArchivalStorage.md
  47. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/MemoryStorage.md
  48. 51 10
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
  49. 32 16
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java
  50. 0 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java
  51. 7 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryptionHandler.java
  52. 72 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java
  53. 19 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testCryptoConf.xml
  54. 11 0
      hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.1.0.xml
  55. 11 0
      hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.1.0.xml
  56. 11 0
      hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.1.0.xml
  57. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
  58. 4 0
      hadoop-project/src/site/site.xml
  59. 14 4
      hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
  60. 12 5
      hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
  61. 12 0
      hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
  62. 4 0
      hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
  63. 13 1
      hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
  64. 9 2
      hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
  65. 13 8
      hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java
  66. 7 0
      hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
  67. 11 0
      hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.1.0.xml
  68. 11 0
      hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.1.0.xml
  69. 11 0
      hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.1.0.xml
  70. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AllocationTagNamespaceType.java
  71. 50 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timelineservice/SubApplicationEntity.java
  72. 42 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  73. 5 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/pom.xml
  74. 381 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/client/SystemServiceManagerImpl.java
  75. 180 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/java/org/apache/hadoop/yarn/service/client/TestSystemServiceImpl.java
  76. 16 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user1/example-app1.yarnfile
  77. 16 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user1/example-app2.yarnfile
  78. 16 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user1/example-app3.json
  79. 16 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user2/example-app1.yarnfile
  80. 16 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user2/example-app2.yarnfile
  81. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java
  82. 156 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestSystemServiceManager.java
  83. 41 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/TimelineV2Client.java
  84. 11 12
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
  85. 25 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java
  86. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/LogAggregationIndexedFileController.java
  87. 1 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java
  88. 8 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
  89. 25 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/service/SystemServiceManager.java
  90. 27 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/service/package-info.java
  91. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java
  92. 152 8
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java
  93. 2 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
  94. 7 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
  95. 20 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
  96. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java
  97. 5 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
  98. 42 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/TestAMRMProxyService.java
  99. 156 11
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java
  100. 11 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java

+ 14 - 0
BUILDING.txt

@@ -10,6 +10,8 @@ Requirements:
 * CMake 3.1 or newer (if compiling native code)
 * Zlib devel (if compiling native code)
 * Cyrus SASL devel (if compiling native code)
+* One of the compilers that support thread_local storage: GCC 4.8.1 or later, Visual Studio,
+  Clang (community version), Clang (version for iOS 9 and later) (if compiling native code)
 * openssl devel (if compiling native hadoop-pipes and to get the best HDFS encryption performance)
 * Linux FUSE (Filesystem in Userspace) version 2.6 or above (if compiling fuse_dfs)
 * Jansson C XML parsing library ( if compiling libwebhdfs )
@@ -138,6 +140,18 @@ Maven build goals:
   * Use -DskipShade to disable client jar shading to speed up build times (in
     development environments only, not to build release artifacts)
 
+ YARN Application Timeline Service V2 build options:
+
+   YARN Timeline Service v.2 chooses Apache HBase as the primary backing storage. The supported
+   versions of Apache HBase are 1.2.6 (default) and 2.0.0-beta1.
+
+  * HBase 1.2.6 is used by default to build Hadoop. The official releases are ready to use if you
+    plan on running Timeline Service v2 with HBase 1.2.6.
+
+  * Use -Dhbase.profile=2.0 to build Hadoop with HBase 2.0.0-beta1. Provide this option if you plan
+    on running Timeline Service v2 with HBase 2.0.
+
+
  Snappy build options:
 
    Snappy is a compression library that can be utilized by the native code.

+ 7 - 2
hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh

@@ -1732,11 +1732,16 @@ function hadoop_status_daemon
   shift
 
   local pid
+  local pspid
 
   if [[ -f "${pidfile}" ]]; then
     pid=$(cat "${pidfile}")
-    if ps -p "${pid}" > /dev/null 2>&1; then
-      return 0
+    if pspid=$(ps -o args= -p"${pid}" 2>/dev/null); then
+      # this is to check that the running process we found is actually the same
+      # daemon that we're interested in
+      if [[ ${pspid} =~ -Dproc_${daemonname} ]]; then
+        return 0
+      fi
     fi
     return 1
   fi

+ 7 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java

@@ -38,6 +38,7 @@ import org.apache.commons.lang.builder.HashCodeBuilder;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 
 import javax.crypto.KeyGenerator;
 
@@ -53,11 +54,13 @@ import javax.crypto.KeyGenerator;
 @InterfaceStability.Unstable
 public abstract class KeyProvider {
   public static final String DEFAULT_CIPHER_NAME =
-      "hadoop.security.key.default.cipher";
-  public static final String DEFAULT_CIPHER = "AES/CTR/NoPadding";
+      CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_DEFAULT_CIPHER_KEY;
+  public static final String DEFAULT_CIPHER =
+      CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_DEFAULT_CIPHER_DEFAULT;
   public static final String DEFAULT_BITLENGTH_NAME =
-      "hadoop.security.key.default.bitlength";
-  public static final int DEFAULT_BITLENGTH = 128;
+      CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_DEFAULT_BITLENGTH_KEY;
+  public static final int DEFAULT_BITLENGTH = CommonConfigurationKeysPublic.
+      HADOOP_SECURITY_KEY_DEFAULT_BITLENGTH_DEFAULT;
 
   private final Configuration conf;
 

+ 21 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java

@@ -678,6 +678,27 @@ public class CommonConfigurationKeysPublic {
   public static final String HADOOP_SECURITY_KEY_PROVIDER_PATH =
       "hadoop.security.key.provider.path";
 
+  /**
+   * @see
+   * <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
+   * core-default.xml</a>
+   */
+  public static final String HADOOP_SECURITY_KEY_DEFAULT_BITLENGTH_KEY =
+      "hadoop.security.key.default.bitlength";
+  /** Defalt value for HADOOP_SECURITY_KEY_DEFAULT_BITLENGTH_KEY. */
+  public static final int HADOOP_SECURITY_KEY_DEFAULT_BITLENGTH_DEFAULT = 128;
+
+  /**
+   * @see
+   * <a href="{@docRoot}/../hadoop-project-dist/hadoop-common/core-default.xml">
+   * core-default.xml</a>
+   */
+  public static final String HADOOP_SECURITY_KEY_DEFAULT_CIPHER_KEY =
+      "hadoop.security.key.default.cipher";
+  /** Defalt value for HADOOP_SECURITY_KEY_DEFAULT_CIPHER_KEY. */
+  public static final String HADOOP_SECURITY_KEY_DEFAULT_CIPHER_DEFAULT =
+      "AES/CTR/NoPadding";
+
   //  <!-- KMSClientProvider configurations -->
   /**
    * @see

+ 26 - 9
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java

@@ -810,22 +810,39 @@ public class RPC {
   
   /** An RPC Server. */
   public abstract static class Server extends org.apache.hadoop.ipc.Server {
-   boolean verbose;
+    boolean verbose;
 
+    private static final Pattern COMPLEX_SERVER_NAME_PATTERN =
+        Pattern.compile("(?:[^\\$]*\\$)*([A-Za-z][^\\$]+)(?:\\$\\d+)?");
+
+    /**
+     * Get a meaningful and short name for a server based on a java class.
+     *
+     * The rules are defined to support the current naming schema of the
+     * generated protobuf classes where the final class usually an anonymous
+     * inner class of an inner class.
+     *
+     * 1. For simple classes it returns with the simple name of the classes
+     *     (with the name without package name)
+     *
+     * 2. For inner classes, this is the simple name of the inner class.
+     *
+     * 3.  If it is an Object created from a class factory
+     *   E.g., org.apache.hadoop.ipc.TestRPC$TestClass$2
+     * this method returns parent class TestClass.
+     *
+     * 4. If it is an anonymous class E.g., 'org.apache.hadoop.ipc.TestRPC$10'
+     * serverNameFromClass returns parent class TestRPC.
+     *
+     *
+     */
     static String serverNameFromClass(Class<?> clazz) {
-      //The basic idea here is to handle names like
-      //org.apache.hadoop.hdsl.protocol.proto.
-      //
-      // StorageDatanodeProtocolProtos$StorageContainerDatanodeProtocolService$2
-      //where the getSimpleName is also empty
       String name = clazz.getName();
       String[] names = clazz.getName().split("\\.", -1);
       if (names != null && names.length > 0) {
         name = names[names.length - 1];
       }
-      Pattern pattern =
-          Pattern.compile("(?:[^\\$]*\\$)*([A-Za-z][^\\$]+)(?:\\$\\d+)?");
-      Matcher matcher = pattern.matcher(name);
+      Matcher matcher = COMPLEX_SERVER_NAME_PATTERN.matcher(name);
       if (matcher.find()) {
         return matcher.group(1);
       } else {

+ 90 - 16
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -522,12 +522,12 @@ public class NetworkTopology {
         numOfDatanodes -= ((InnerNode)node).getNumOfLeaves();
       }
     }
-    if (numOfDatanodes == 0) {
-      LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\").",
-          scope, excludedScope);
+    if (numOfDatanodes <= 0) {
+      LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
+              + " numOfDatanodes={}",
+          scope, excludedScope, numOfDatanodes);
       return null;
     }
-    Node ret = null;
     final int availableNodes;
     if (excludedScope == null) {
       availableNodes = countNumOfAvailableNodes(scope, excludedNodes);
@@ -536,22 +536,96 @@ public class NetworkTopology {
           countNumOfAvailableNodes("~" + excludedScope, excludedNodes);
     }
     LOG.debug("Choosing random from {} available nodes on node {},"
-        + " scope={}, excludedScope={}, excludeNodes={}", availableNodes,
-        innerNode, scope, excludedScope, excludedNodes);
+        + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
+        availableNodes, innerNode, scope, excludedScope, excludedNodes,
+        numOfDatanodes);
+    Node ret = null;
     if (availableNodes > 0) {
-      do {
-        int leaveIndex = r.nextInt(numOfDatanodes);
-        ret = innerNode.getLeaf(leaveIndex, node);
-        if (excludedNodes == null || !excludedNodes.contains(ret)) {
+      ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes,
+          availableNodes);
+    }
+    LOG.debug("chooseRandom returning {}", ret);
+    return ret;
+  }
+
+  /**
+   * Randomly choose one node under <i>parentNode</i>, considering the exclude
+   * nodes and scope. Should be called with {@link #netlock}'s readlock held.
+   *
+   * @param parentNode        the parent node
+   * @param excludedScopeNode the node corresponding to the exclude scope.
+   * @param excludedNodes     a collection of nodes to be excluded from
+   * @param totalInScopeNodes total number of nodes under parentNode, excluding
+   *                          the excludedScopeNode
+   * @param availableNodes    number of available nodes under parentNode that
+   *                          could be chosen, excluding excludedNodes
+   * @return the chosen node, or null if none can be chosen
+   */
+  private Node chooseRandom(final InnerNode parentNode,
+      final Node excludedScopeNode, final Collection<Node> excludedNodes,
+      final int totalInScopeNodes, final int availableNodes) {
+    Preconditions.checkArgument(
+        totalInScopeNodes >= availableNodes && availableNodes > 0, String
+            .format("%d should >= %d, and both should be positive.",
+                totalInScopeNodes, availableNodes));
+    if (excludedNodes == null || excludedNodes.isEmpty()) {
+      // if there are no excludedNodes, randomly choose a node
+      final int index = r.nextInt(totalInScopeNodes);
+      return parentNode.getLeaf(index, excludedScopeNode);
+    }
+
+    // excludedNodes non empty.
+    // Choose the nth VALID node, where n is random. VALID meaning it can be
+    // returned, after considering exclude scope and exclude nodes.
+    // The probability of being chosen should be equal for all VALID nodes.
+    // Notably, we do NOT choose nth node, and find the next valid node
+    // if n is excluded - this will make the probability of the node immediately
+    // after an excluded node higher.
+    //
+    // Start point is always 0 and that's fine, because the nth valid node
+    // logic provides equal randomness.
+    //
+    // Consider this example, where 1,3,5 out of the 10 nodes are excluded:
+    // 1 2 3 4 5 6 7 8 9 10
+    // x   x   x
+    // We will randomly choose the nth valid node where n is [0,6].
+    // We do NOT choose a random number n and just use the closest valid node,
+    // for example both n=3 and n=4 will choose 4, making it a 2/10 probability,
+    // higher than the expected 1/7
+    // totalInScopeNodes=10 and availableNodes=7 in this example.
+    int nthValidToReturn = r.nextInt(availableNodes);
+    LOG.debug("nthValidToReturn is {}", nthValidToReturn);
+    Node ret =
+        parentNode.getLeaf(r.nextInt(totalInScopeNodes), excludedScopeNode);
+    if (!excludedNodes.contains(ret)) {
+      // return if we're lucky enough to get a valid node at a random first pick
+      LOG.debug("Chosen node {} from first random", ret);
+      return ret;
+    } else {
+      ret = null;
+    }
+    Node lastValidNode = null;
+    for (int i = 0; i < totalInScopeNodes; ++i) {
+      ret = parentNode.getLeaf(i, excludedScopeNode);
+      if (!excludedNodes.contains(ret)) {
+        if (nthValidToReturn == 0) {
           break;
-        } else {
-          LOG.debug("Node {} is excluded, continuing.", ret);
         }
-        // We've counted numOfAvailableNodes inside the lock, so there must be
-        // at least 1 satisfying node. Keep trying until we found it.
-      } while (true);
+        --nthValidToReturn;
+        lastValidNode = ret;
+      } else {
+        LOG.debug("Node {} is excluded, continuing.", ret);
+        ret = null;
+      }
+    }
+    if (ret == null && lastValidNode != null) {
+      LOG.error("BUG: Found lastValidNode {} but not nth valid node. "
+              + "parentNode={}, excludedScopeNode={}, excludedNodes={}, "
+              + "totalInScopeNodes={}, availableNodes={}, nthValidToReturn={}.",
+          lastValidNode, parentNode, excludedScopeNode, excludedNodes,
+          totalInScopeNodes, availableNodes, nthValidToReturn);
+      ret = lastValidNode;
     }
-    LOG.debug("chooseRandom returning {}", ret);
     return ret;
   }
 

+ 5 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/concurrent/HadoopExecutors.java

@@ -29,7 +29,7 @@ import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.TimeUnit;
-
+import org.slf4j.Logger;
 
 /** Factory methods for ExecutorService, ScheduledExecutorService instances.
  * These executor service instances provide additional functionality (e.g
@@ -103,10 +103,11 @@ public final class HadoopExecutors {
 
   /**
    * Helper routine to shutdown a executorService.
+   *
    * @param executorService - executorService
-   * @param logger - Logger
-   * @param timeout - Timeout
-   * @param unit - TimeUnits, generally seconds.
+   * @param logger          - Logger
+   * @param timeout         - Timeout
+   * @param unit            - TimeUnits, generally seconds.
    */
   public static void shutdown(ExecutorService executorService, Logger logger,
       long timeout, TimeUnit unit) {

+ 1 - 1
hadoop-common-project/hadoop-common/src/site/markdown/HttpAuthentication.md

@@ -28,7 +28,7 @@ Hadoop HTTP web-consoles can be configured to require Kerberos authentication us
 
 In addition, Hadoop HTTP web-consoles support the equivalent of Hadoop's Pseudo/Simple authentication. If this option is enabled, the user name must be specified in the first browser interaction using the user.name query string parameter. e.g. `http://localhost:8088/cluster?user.name=babu`.
 
-If a custom authentication mechanism is required for the HTTP web-consoles, it is possible to implement a plugin to support the alternate authentication mechanism (refer to Hadoop hadoop-auth for details on writing an `AuthenticatorHandler`).
+If a custom authentication mechanism is required for the HTTP web-consoles, it is possible to implement a plugin to support the alternate authentication mechanism (refer to Hadoop hadoop-auth for details on writing an `AuthenticationHandler`).
 
 The next section describes how to configure Hadoop HTTP web-consoles to require user authentication.
 

+ 1022 - 0
hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/CHANGES.3.1.0.md

@@ -0,0 +1,1022 @@
+
+<!---
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+-->
+# Apache Hadoop Changelog
+
+## Release 3.1.0 - 2018-03-30
+
+### INCOMPATIBLE CHANGES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-15008](https://issues.apache.org/jira/browse/HADOOP-15008) | Metrics sinks may emit too frequently if multiple sink periods are configured |  Minor | metrics | Erik Krogen | Erik Krogen |
+| [HDFS-12825](https://issues.apache.org/jira/browse/HDFS-12825) | Fsck report shows config key name for min replication issues |  Minor | hdfs | Harshakiran Reddy | Gabor Bota |
+| [HDFS-12883](https://issues.apache.org/jira/browse/HDFS-12883) | RBF: Document Router and State Store metrics |  Major | documentation | Yiqun Lin | Yiqun Lin |
+| [HDFS-12895](https://issues.apache.org/jira/browse/HDFS-12895) | RBF: Add ACL support for mount table |  Major | . | Yiqun Lin | Yiqun Lin |
+| [YARN-7190](https://issues.apache.org/jira/browse/YARN-7190) | Ensure only NM classpath in 2.x gets TSv2 related hbase jars, not the user classpath |  Major | timelineclient, timelinereader, timelineserver | Vrushali C | Varun Saxena |
+| [HADOOP-13282](https://issues.apache.org/jira/browse/HADOOP-13282) | S3 blob etags to be made visible in S3A status/getFileChecksum() calls |  Minor | fs/s3 | Steve Loughran | Steve Loughran |
+| [HDFS-13099](https://issues.apache.org/jira/browse/HDFS-13099) | RBF: Use the ZooKeeper as the default State Store |  Minor | documentation | Yiqun Lin | Yiqun Lin |
+| [YARN-7677](https://issues.apache.org/jira/browse/YARN-7677) | Docker image cannot set HADOOP\_CONF\_DIR |  Major | . | Eric Badger | Jim Brennan |
+
+
+### IMPORTANT ISSUES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-13083](https://issues.apache.org/jira/browse/HDFS-13083) | RBF: Fix doc error setting up client |  Major | federation | tartarus | tartarus |
+
+
+### NEW FEATURES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-15005](https://issues.apache.org/jira/browse/HADOOP-15005) | Support meta tag element in Hadoop XML configurations |  Major | . | Ajay Kumar | Ajay Kumar |
+| [YARN-3926](https://issues.apache.org/jira/browse/YARN-3926) | [Umbrella] Extend the YARN resource model for easier resource-type management and profiles |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [HDFS-7877](https://issues.apache.org/jira/browse/HDFS-7877) | [Umbrella] Support maintenance state for datanodes |  Major | datanode, namenode | Ming Ma | Ming Ma |
+| [HADOOP-13055](https://issues.apache.org/jira/browse/HADOOP-13055) | Implement linkMergeSlash and linkFallback for ViewFileSystem |  Major | fs, viewfs | Zhe Zhang | Manoj Govindassamy |
+| [YARN-6871](https://issues.apache.org/jira/browse/YARN-6871) | Add additional deSelects params in RMWebServices#getAppReport |  Major | resourcemanager, router | Giovanni Matteo Fumarola | Tanuj Nayak |
+| [HADOOP-14840](https://issues.apache.org/jira/browse/HADOOP-14840) | Tool to estimate resource requirements of an application pipeline based on prior executions |  Major | tools | Subru Krishnan | Rui Li |
+| [HDFS-206](https://issues.apache.org/jira/browse/HDFS-206) | Support for head in FSShell |  Minor | . | Olga Natkovich | Gabor Bota |
+| [YARN-5079](https://issues.apache.org/jira/browse/YARN-5079) | [Umbrella] Native YARN framework layer for services and beyond |  Major | . | Vinod Kumar Vavilapalli |  |
+| [YARN-4757](https://issues.apache.org/jira/browse/YARN-4757) | [Umbrella] Simplified discovery of services via DNS mechanisms |  Major | . | Vinod Kumar Vavilapalli |  |
+| [HADOOP-13786](https://issues.apache.org/jira/browse/HADOOP-13786) | Add S3A committer for zero-rename commits to S3 endpoints |  Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HDFS-9806](https://issues.apache.org/jira/browse/HDFS-9806) | Allow HDFS block replicas to be provided by an external storage system |  Major | . | Chris Douglas |  |
+| [YARN-6592](https://issues.apache.org/jira/browse/YARN-6592) | [Umbrella] Rich placement constraints in YARN |  Major | . | Konstantinos Karanasos |  |
+| [HDFS-12998](https://issues.apache.org/jira/browse/HDFS-12998) | SnapshotDiff - Provide an iterator-based listing API for calculating snapshotDiff |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+
+
+### IMPROVEMENTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [YARN-7022](https://issues.apache.org/jira/browse/YARN-7022) | Improve click interaction in queue topology in new YARN UI |  Major | yarn-ui-v2 | Abdullah Yousufi | Abdullah Yousufi |
+| [YARN-7033](https://issues.apache.org/jira/browse/YARN-7033) | Add support for NM Recovery of assigned resources (e.g. GPU's, NUMA, FPGA's) to container |  Major | nodemanager | Devaraj K | Devaraj K |
+| [HADOOP-14850](https://issues.apache.org/jira/browse/HADOOP-14850) | Read HttpServer2 resources directly from the source tree (if exists) |  Major | . | Elek, Marton | Elek, Marton |
+| [HADOOP-14849](https://issues.apache.org/jira/browse/HADOOP-14849) | some wrong spelling words update |  Trivial | . | Chen Hongfei | Chen Hongfei |
+| [HADOOP-14844](https://issues.apache.org/jira/browse/HADOOP-14844) | Remove requirement to specify TenantGuid for MSI Token Provider |  Major | fs/adl | Atul Sikaria | Atul Sikaria |
+| [YARN-7057](https://issues.apache.org/jira/browse/YARN-7057) | FSAppAttempt#getResourceUsage doesn't need to consider resources queued for preemption |  Major | fairscheduler | Karthik Kambatla | Karthik Kambatla |
+| [HADOOP-14804](https://issues.apache.org/jira/browse/HADOOP-14804) | correct wrong parameters format order in core-default.xml |  Trivial | . | Chen Hongfei | Chen Hongfei |
+| [HADOOP-14864](https://issues.apache.org/jira/browse/HADOOP-14864) | FSDataInputStream#unbuffer UOE should include stream class name |  Minor | fs | John Zhuge | Bharat Viswanadham |
+| [HDFS-12441](https://issues.apache.org/jira/browse/HDFS-12441) | Suppress UnresolvedPathException in namenode log |  Minor | . | Kihwal Lee | Kihwal Lee |
+| [HADOOP-13714](https://issues.apache.org/jira/browse/HADOOP-13714) | Tighten up our compatibility guidelines for Hadoop 3 |  Blocker | documentation | Karthik Kambatla | Daniel Templeton |
+| [HADOOP-7308](https://issues.apache.org/jira/browse/HADOOP-7308) | Remove unused TaskLogAppender configurations from log4j.properties |  Major | conf | Todd Lipcon | Todd Lipcon |
+| [YARN-7045](https://issues.apache.org/jira/browse/YARN-7045) | Remove FSLeafQueue#addAppSchedulable |  Major | fairscheduler | Yufei Gu | Sen Zhao |
+| [HDFS-12486](https://issues.apache.org/jira/browse/HDFS-12486) | GetConf to get journalnodeslist |  Major | journal-node, shell | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-12320](https://issues.apache.org/jira/browse/HDFS-12320) | Add  quantiles for transactions batched in Journal sync |  Major | metrics, namenode | Hanisha Koneru | Hanisha Koneru |
+| [HDFS-12516](https://issues.apache.org/jira/browse/HDFS-12516) | Suppress the fsnamesystem lock warning on nn startup |  Major | . | Ajay Kumar | Ajay Kumar |
+| [HDFS-12304](https://issues.apache.org/jira/browse/HDFS-12304) | Remove unused parameter from FsDatasetImpl#addVolume |  Minor | . | Chen Liang | Chen Liang |
+| [YARN-65](https://issues.apache.org/jira/browse/YARN-65) | Reduce RM app memory footprint once app has completed |  Major | resourcemanager | Jason Lowe | Manikandan R |
+| [HDFS-5040](https://issues.apache.org/jira/browse/HDFS-5040) | Audit log for admin commands/ logging output of all DFS admin commands |  Major | namenode | Raghu C Doppalapudi | Kuhu Shukla |
+| [HDFS-12560](https://issues.apache.org/jira/browse/HDFS-12560) |  Remove the extra word "it" in HdfsUserGuide.md |  Trivial | . | fang zhenyi | fang zhenyi |
+| [YARN-6333](https://issues.apache.org/jira/browse/YARN-6333) | Improve doc for minSharePreemptionTimeout, fairSharePreemptionTimeout and fairSharePreemptionThreshold |  Major | fairscheduler | Yufei Gu | Chetna Chaudhari |
+| [HDFS-12552](https://issues.apache.org/jira/browse/HDFS-12552) | Use slf4j instead of log4j in FSNamesystem |  Major | . | Ajay Kumar | Ajay Kumar |
+| [HADOOP-14908](https://issues.apache.org/jira/browse/HADOOP-14908) | CrossOriginFilter should trigger regex on more input |  Major | common, security | Allen Wittenauer | Johannes Alberti |
+| [HDFS-12455](https://issues.apache.org/jira/browse/HDFS-12455) | WebHDFS - Adding "snapshot enabled" status to ListStatus query result. |  Major | snapshots, webhdfs | Ajay Kumar | Ajay Kumar |
+| [HDFS-12420](https://issues.apache.org/jira/browse/HDFS-12420) | Add an option to disallow 'namenode format -force' |  Major | . | Ajay Kumar | Ajay Kumar |
+| [YARN-2162](https://issues.apache.org/jira/browse/YARN-2162) | add ability in Fair Scheduler to optionally configure maxResources in terms of percentage |  Major | fairscheduler, scheduler | Ashwin Shankar | Yufei Gu |
+| [YARN-7207](https://issues.apache.org/jira/browse/YARN-7207) | Cache the RM proxy server address |  Major | RM | Yufei Gu | Yufei Gu |
+| [HADOOP-14920](https://issues.apache.org/jira/browse/HADOOP-14920) | KMSClientProvider won't work with KMS delegation token retrieved from non-Java client. |  Major | kms | Xiaoyu Yao | Xiaoyu Yao |
+| [HADOOP-14184](https://issues.apache.org/jira/browse/HADOOP-14184) | Remove service loader config entry for ftp fs |  Minor | fs | John Zhuge | Sen Zhao |
+| [HDFS-12542](https://issues.apache.org/jira/browse/HDFS-12542) | Update javadoc and documentation for listStatus |  Major | documentation | Ajay Kumar | Ajay Kumar |
+| [YARN-7359](https://issues.apache.org/jira/browse/YARN-7359) | TestAppManager.testQueueSubmitWithNoPermission() should be scheduler agnostic |  Minor | . | Haibo Chen | Haibo Chen |
+| [YARN-7261](https://issues.apache.org/jira/browse/YARN-7261) | Add debug message for better download latency monitoring |  Major | nodemanager | Yufei Gu | Yufei Gu |
+| [HDFS-12650](https://issues.apache.org/jira/browse/HDFS-12650) | Use slf4j instead of log4j in LeaseManager |  Major | . | Ajay Kumar | Ajay Kumar |
+| [YARN-7357](https://issues.apache.org/jira/browse/YARN-7357) | Several methods in TestZKRMStateStore.TestZKRMStateStoreTester.TestZKRMStateStoreInternal should have @Override annotations |  Trivial | resourcemanager | Daniel Templeton | Sen Zhao |
+| [YARN-4163](https://issues.apache.org/jira/browse/YARN-4163) | Audit getQueueInfo and getApplications calls |  Major | . | Chang Li | Chang Li |
+| [HADOOP-9657](https://issues.apache.org/jira/browse/HADOOP-9657) | NetUtils.wrapException to have special handling for 0.0.0.0 addresses and :0 ports |  Minor | net | Steve Loughran | Varun Saxena |
+| [YARN-7397](https://issues.apache.org/jira/browse/YARN-7397) | Reduce lock contention in FairScheduler#getAppWeight() |  Major | fairscheduler | Daniel Templeton | Daniel Templeton |
+| [HDFS-7878](https://issues.apache.org/jira/browse/HDFS-7878) | API - expose a unique file identifier |  Major | . | Sergey Shelukhin | Chris Douglas |
+| [YARN-6413](https://issues.apache.org/jira/browse/YARN-6413) | FileSystem based Yarn Registry implementation |  Major | amrmproxy, api, resourcemanager | Ellen Hui | Ellen Hui |
+| [HDFS-12771](https://issues.apache.org/jira/browse/HDFS-12771) | Add genstamp and block size to metasave Corrupt blocks list |  Minor | . | Kuhu Shukla | Kuhu Shukla |
+| [HDFS-10528](https://issues.apache.org/jira/browse/HDFS-10528) | Add logging to successful standby checkpointing |  Major | namenode | Xiaoyu Yao | Xiaoyu Yao |
+| [YARN-7401](https://issues.apache.org/jira/browse/YARN-7401) | Reduce lock contention in ClusterNodeTracker#getClusterCapacity() |  Major | resourcemanager | Daniel Templeton | Daniel Templeton |
+| [HDFS-7060](https://issues.apache.org/jira/browse/HDFS-7060) | Avoid taking locks when sending heartbeats from the DataNode |  Major | . | Haohui Mai | Jiandan Yang |
+| [HADOOP-14872](https://issues.apache.org/jira/browse/HADOOP-14872) | CryptoInputStream should implement unbuffer |  Major | fs, security | John Zhuge | John Zhuge |
+| [YARN-7413](https://issues.apache.org/jira/browse/YARN-7413) | Support resource type in SLS |  Major | scheduler-load-simulator | Yufei Gu | Yufei Gu |
+| [HADOOP-14876](https://issues.apache.org/jira/browse/HADOOP-14876) | Create downstream developer docs from the compatibility guidelines |  Critical | documentation | Daniel Templeton | Daniel Templeton |
+| [YARN-7414](https://issues.apache.org/jira/browse/YARN-7414) | FairScheduler#getAppWeight() should be moved into FSAppAttempt#getWeight() |  Minor | fairscheduler | Daniel Templeton | Soumabrata Chakraborty |
+| [HDFS-12814](https://issues.apache.org/jira/browse/HDFS-12814) | Add blockId when warning slow mirror/disk in BlockReceiver |  Trivial | hdfs | Jiandan Yang | Jiandan Yang |
+| [HADOOP-13514](https://issues.apache.org/jira/browse/HADOOP-13514) | Upgrade maven surefire plugin to 2.20.1 |  Major | build | Ewan Higgs | Akira Ajisaka |
+| [YARN-7524](https://issues.apache.org/jira/browse/YARN-7524) | Remove unused FairSchedulerEventLog |  Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [YARN-6851](https://issues.apache.org/jira/browse/YARN-6851) | Capacity Scheduler: document configs for controlling # containers allowed to be allocated per node heartbeat |  Minor | . | Wei Yan | Wei Yan |
+| [YARN-7495](https://issues.apache.org/jira/browse/YARN-7495) | Improve robustness of the AggregatedLogDeletionService |  Major | log-aggregation | Jonathan Eagles | Jonathan Eagles |
+| [HDFS-12594](https://issues.apache.org/jira/browse/HDFS-12594) | snapshotDiff fails if the report exceeds the RPC response limit |  Major | hdfs | Shashikant Banerjee | Shashikant Banerjee |
+| [HDFS-12877](https://issues.apache.org/jira/browse/HDFS-12877) | Add open(PathHandle) with default buffersize |  Trivial | . | Chris Douglas | Chris Douglas |
+| [HADOOP-14976](https://issues.apache.org/jira/browse/HADOOP-14976) | Set HADOOP\_SHELL\_EXECNAME explicitly in scripts |  Major | . | Arpit Agarwal | Arpit Agarwal |
+| [HADOOP-15039](https://issues.apache.org/jira/browse/HADOOP-15039) | Move SemaphoredDelegatingExecutor to hadoop-common |  Minor | fs, fs/oss, fs/s3 | Genmao Yu | Genmao Yu |
+| [YARN-6483](https://issues.apache.org/jira/browse/YARN-6483) | Add nodes transitioning to DECOMMISSIONING state to the list of updated nodes returned to the AM |  Major | resourcemanager | Juan Rodríguez Hortalá | Juan Rodríguez Hortalá |
+| [HADOOP-15056](https://issues.apache.org/jira/browse/HADOOP-15056) | Fix TestUnbuffer#testUnbufferException failure |  Minor | test | Jack Bearden | Jack Bearden |
+| [HADOOP-15012](https://issues.apache.org/jira/browse/HADOOP-15012) | Add readahead, dropbehind, and unbuffer to StreamCapabilities |  Major | fs | John Zhuge | John Zhuge |
+| [HADOOP-15104](https://issues.apache.org/jira/browse/HADOOP-15104) | AliyunOSS: change the default value of max error retry |  Major | fs/oss | wujinhu | wujinhu |
+| [YARN-7274](https://issues.apache.org/jira/browse/YARN-7274) | Ability to disable elasticity at leaf queue level |  Major | capacityscheduler | Scott Brokaw | Zian Chen |
+| [HDFS-12882](https://issues.apache.org/jira/browse/HDFS-12882) | Support full open(PathHandle) contract in HDFS |  Major | hdfs-client | Chris Douglas | Chris Douglas |
+| [YARN-7625](https://issues.apache.org/jira/browse/YARN-7625) | Expose NM node/containers resource utilization in JVM metrics |  Major | nodemanager | Weiwei Yang | Weiwei Yang |
+| [HADOOP-14914](https://issues.apache.org/jira/browse/HADOOP-14914) | Change to a safely casting long to int. |  Major | . | Yufei Gu | Ajay Kumar |
+| [HDFS-12910](https://issues.apache.org/jira/browse/HDFS-12910) | Secure Datanode Starter should log the port when it fails to bind |  Minor | datanode | Stephen O'Donnell | Stephen O'Donnell |
+| [YARN-7642](https://issues.apache.org/jira/browse/YARN-7642) | Add test case to verify context update after container promotion or demotion with or without auto update |  Minor | nodemanager | Weiwei Yang | Weiwei Yang |
+| [YARN-5418](https://issues.apache.org/jira/browse/YARN-5418) | When partial log aggregation is enabled, display the list of aggregated files on the container log page |  Major | . | Siddharth Seth | Xuan Gong |
+| [HADOOP-15106](https://issues.apache.org/jira/browse/HADOOP-15106) | FileSystem::open(PathHandle) should throw a specific exception on validation failure |  Minor | . | Chris Douglas | Chris Douglas |
+| [HDFS-12818](https://issues.apache.org/jira/browse/HDFS-12818) | Support multiple storages in DataNodeCluster / SimulatedFSDataset |  Minor | datanode, test | Erik Krogen | Erik Krogen |
+| [HDFS-12932](https://issues.apache.org/jira/browse/HDFS-12932) | Fix confusing LOG message for block replication |  Minor | hdfs | Chao Sun | Chao Sun |
+| [HDFS-9023](https://issues.apache.org/jira/browse/HDFS-9023) | When NN is not able to identify DN for replication, reason behind it can be logged |  Critical | hdfs-client, namenode | Surendra Singh Lilhore | Xiao Chen |
+| [YARN-7580](https://issues.apache.org/jira/browse/YARN-7580) | ContainersMonitorImpl logged message lacks detail when exceeding memory limits |  Major | nodemanager | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [HDFS-12351](https://issues.apache.org/jira/browse/HDFS-12351) | Explicitly describe the minimal number of DataNodes required to support an EC policy in EC document. |  Minor | documentation, erasure-coding | Lei (Eddy) Xu | Hanisha Koneru |
+| [HDFS-12629](https://issues.apache.org/jira/browse/HDFS-12629) | NameNode UI should report total blocks count by type - replicated and erasure coded |  Major | hdfs | Manoj Govindassamy | Manoj Govindassamy |
+| [YARN-7687](https://issues.apache.org/jira/browse/YARN-7687) | ContainerLogAppender Improvements |  Trivial | . | BELUGA BEHR |  |
+| [YARN-7688](https://issues.apache.org/jira/browse/YARN-7688) | Miscellaneous Improvements To ProcfsBasedProcessTree |  Minor | nodemanager | BELUGA BEHR |  |
+| [HDFS-11847](https://issues.apache.org/jira/browse/HDFS-11847) | Enhance dfsadmin listOpenFiles command to list files blocking datanode decommissioning |  Major | hdfs | Manoj Govindassamy | Manoj Govindassamy |
+| [YARN-7678](https://issues.apache.org/jira/browse/YARN-7678) | Ability to enable logging of container memory stats |  Major | nodemanager | Jim Brennan | Jim Brennan |
+| [HDFS-11848](https://issues.apache.org/jira/browse/HDFS-11848) | Enhance dfsadmin listOpenFiles command to list files under a given path |  Major | . | Manoj Govindassamy | Yiqun Lin |
+| [HDFS-12945](https://issues.apache.org/jira/browse/HDFS-12945) | Switch to ClientProtocol instead of NamenodeProtocols in NamenodeWebHdfsMethods |  Minor | . | Wei Yan | Wei Yan |
+| [HDFS-12808](https://issues.apache.org/jira/browse/HDFS-12808) | Add LOG.isDebugEnabled() guard for LOG.debug("...") |  Minor | . | Mehran Hassani | Bharat Viswanadham |
+| [YARN-7722](https://issues.apache.org/jira/browse/YARN-7722) | Rename variables in MockNM, MockRM for better clarity |  Trivial | . | lovekesh bansal | lovekesh bansal |
+| [YARN-7622](https://issues.apache.org/jira/browse/YARN-7622) | Allow fair-scheduler configuration on HDFS |  Minor | fairscheduler, resourcemanager | Greg Phillips | Greg Phillips |
+| [HADOOP-15033](https://issues.apache.org/jira/browse/HADOOP-15033) | Use java.util.zip.CRC32C for Java 9 and above |  Major | performance, util | Dmitry Chuyko | Dmitry Chuyko |
+| [YARN-7590](https://issues.apache.org/jira/browse/YARN-7590) | Improve container-executor validation check |  Major | security, yarn | Eric Yang | Eric Yang |
+| [HADOOP-15157](https://issues.apache.org/jira/browse/HADOOP-15157) | Zookeeper authentication related properties to support CredentialProviders |  Minor | security | Gergo Repas | Gergo Repas |
+| [MAPREDUCE-7029](https://issues.apache.org/jira/browse/MAPREDUCE-7029) | FileOutputCommitter is slow on filesystems lacking recursive delete |  Minor | . | Karthik Palaniappan | Karthik Palaniappan |
+| [HADOOP-15114](https://issues.apache.org/jira/browse/HADOOP-15114) | Add closeStreams(...) to IOUtils |  Major | . | Ajay Kumar | Ajay Kumar |
+| [MAPREDUCE-6984](https://issues.apache.org/jira/browse/MAPREDUCE-6984) | MR AM to clean up temporary files from previous attempt in case of no recovery |  Major | applicationmaster | Gergo Repas | Gergo Repas |
+| [HDFS-13036](https://issues.apache.org/jira/browse/HDFS-13036) | Reusing the volume storage ID obtained by replicaInfo |  Major | datanode | liaoyuxiangqin | liaoyuxiangqin |
+| [YARN-7755](https://issues.apache.org/jira/browse/YARN-7755) | Clean up deprecation messages for allocation increments in FS config |  Minor | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [MAPREDUCE-7022](https://issues.apache.org/jira/browse/MAPREDUCE-7022) | Fast fail rogue jobs based on task scratch dir size |  Major | task | Johan Gustavsson | Johan Gustavsson |
+| [YARN-2185](https://issues.apache.org/jira/browse/YARN-2185) | Use pipes when localizing archives |  Major | nodemanager | Jason Lowe | Miklos Szegedi |
+| [HDFS-13092](https://issues.apache.org/jira/browse/HDFS-13092) | Reduce verbosity for ThrottledAsyncChecker.java:schedule |  Minor | datanode | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HDFS-13062](https://issues.apache.org/jira/browse/HDFS-13062) | Provide support for JN to use separate journal disk per namespace |  Major | federation, journal-node | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-15170](https://issues.apache.org/jira/browse/HADOOP-15170) | Add symlink support to FileUtil#unTarUsingJava |  Minor | util | Jason Lowe | Ajay Kumar |
+| [HADOOP-15168](https://issues.apache.org/jira/browse/HADOOP-15168) | Add kdiag tool to hadoop command |  Minor | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-13073](https://issues.apache.org/jira/browse/HDFS-13073) | Cleanup code in InterQJournalProtocol.proto |  Minor | journal-node | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-15212](https://issues.apache.org/jira/browse/HADOOP-15212) | Add independent secret manager method for logging expired tokens |  Major | security | Daryn Sharp | Daryn Sharp |
+| [YARN-7841](https://issues.apache.org/jira/browse/YARN-7841) | Cleanup AllocationFileLoaderService's reloadAllocations method |  Minor | yarn | Szilard Nemeth | Szilard Nemeth |
+| [HDFS-12947](https://issues.apache.org/jira/browse/HDFS-12947) | Limit the number of Snapshots allowed to be created for a Snapshottable Directory |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HDFS-12933](https://issues.apache.org/jira/browse/HDFS-12933) | Improve logging when DFSStripedOutputStream failed to write some blocks |  Minor | erasure-coding | Xiao Chen | chencan |
+| [YARN-7728](https://issues.apache.org/jira/browse/YARN-7728) | Expose container preemptions related information in Capacity Scheduler queue metrics |  Major | . | Eric Payne | Eric Payne |
+| [YARN-7655](https://issues.apache.org/jira/browse/YARN-7655) | Avoid AM preemption caused by RRs for specific nodes or racks |  Major | fairscheduler | Steven Rand | Steven Rand |
+| [HADOOP-15187](https://issues.apache.org/jira/browse/HADOOP-15187) | Remove ADL mock test dependency on REST call invoked from Java SDK |  Major | fs/adl | Vishwajeet Dusane | Vishwajeet Dusane |
+| [MAPREDUCE-7048](https://issues.apache.org/jira/browse/MAPREDUCE-7048) | Uber AM can crash due to unknown task in statusUpdate |  Major | mr-am | Peter Bacsko | Peter Bacsko |
+| [HADOOP-15195](https://issues.apache.org/jira/browse/HADOOP-15195) | With SELinux enabled, directories mounted with start-build-env.sh may not be accessible. |  Major | build | Grigori Rybkine | Grigori Rybkine |
+| [HADOOP-14531](https://issues.apache.org/jira/browse/HADOOP-14531) | [Umbrella] Improve S3A error handling & reporting |  Blocker | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-15204](https://issues.apache.org/jira/browse/HADOOP-15204) | Add Configuration API for parsing storage sizes |  Minor | conf | Anu Engineer | Anu Engineer |
+| [HDFS-13142](https://issues.apache.org/jira/browse/HDFS-13142) | Define and Implement a DiifList Interface to store and manage SnapshotDiffs |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HADOOP-13972](https://issues.apache.org/jira/browse/HADOOP-13972) | ADLS to support per-store configuration |  Major | fs/adl | John Zhuge | Sharad Sonker |
+| [HDFS-13153](https://issues.apache.org/jira/browse/HDFS-13153) | Enable HDFS diskbalancer by default |  Major | diskbalancer | Ajay Kumar | Ajay Kumar |
+| [HADOOP-14875](https://issues.apache.org/jira/browse/HADOOP-14875) | Create end user documentation from the compatibility guidelines |  Critical | documentation | Daniel Templeton | Daniel Templeton |
+| [HADOOP-15070](https://issues.apache.org/jira/browse/HADOOP-15070) | add test to verify FileSystem and paths differentiate on user info |  Minor | fs, test | Steve Loughran | Steve Loughran |
+| [YARN-7813](https://issues.apache.org/jira/browse/YARN-7813) | Capacity Scheduler Intra-queue Preemption should be configurable for each queue |  Major | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne |
+| [HDFS-13168](https://issues.apache.org/jira/browse/HDFS-13168) | XmlImageVisitor - Prefer Array over LinkedList |  Minor | hdfs | BELUGA BEHR | BELUGA BEHR |
+| [HDFS-13167](https://issues.apache.org/jira/browse/HDFS-13167) | DatanodeAdminManager Improvements |  Trivial | hdfs | BELUGA BEHR | BELUGA BEHR |
+| [HADOOP-15235](https://issues.apache.org/jira/browse/HADOOP-15235) | Authentication Tokens should use HMAC instead of MAC |  Major | security | Robert Kanter | Robert Kanter |
+| [HADOOP-12897](https://issues.apache.org/jira/browse/HADOOP-12897) | KerberosAuthenticator.authenticate to include URL on IO failures |  Minor | security | Steve Loughran | Ajay Kumar |
+| [HDFS-13175](https://issues.apache.org/jira/browse/HDFS-13175) | Add more information for checking argument in DiskBalancerVolume |  Minor | diskbalancer | Lei (Eddy) Xu | Lei (Eddy) Xu |
+| [HDFS-11187](https://issues.apache.org/jira/browse/HDFS-11187) | Optimize disk access for last partial chunk checksum of Finalized replica |  Major | datanode | Wei-Chiu Chuang | Gabor Bota |
+| [HADOOP-15255](https://issues.apache.org/jira/browse/HADOOP-15255) | Upper/Lower case conversion support for group names in LdapGroupsMapping |  Major | . | Nanda kumar | Nanda kumar |
+| [HADOOP-13374](https://issues.apache.org/jira/browse/HADOOP-13374) | Add the L&N verification script |  Major | . | Xiao Chen | Allen Wittenauer |
+| [HADOOP-15178](https://issues.apache.org/jira/browse/HADOOP-15178) | Generalize NetUtils#wrapException to handle other subclasses with String Constructor |  Major | . | Ajay Kumar | Ajay Kumar |
+| [HDFS-13193](https://issues.apache.org/jira/browse/HDFS-13193) | Various Improvements for BlockTokenSecretManager |  Trivial | hdfs | BELUGA BEHR | BELUGA BEHR |
+| [HADOOP-14959](https://issues.apache.org/jira/browse/HADOOP-14959) | DelegationTokenAuthenticator.authenticate() to wrap network exceptions |  Minor | net, security | Steve Loughran | Ajay Kumar |
+| [MAPREDUCE-7010](https://issues.apache.org/jira/browse/MAPREDUCE-7010) | Make Job History File Permissions configurable |  Major | . | Andras Bokor | Gergely Novák |
+| [HDFS-13192](https://issues.apache.org/jira/browse/HDFS-13192) | Change the code order in getFileEncryptionInfo to avoid unnecessary call of assignment |  Minor | encryption | LiXin Ge | LiXin Ge |
+| [MAPREDUCE-7061](https://issues.apache.org/jira/browse/MAPREDUCE-7061) | SingleCluster setup document needs to be updated |  Major | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-15263](https://issues.apache.org/jira/browse/HADOOP-15263) | hadoop cloud-storage module to mark hadoop-common as provided; add azure-datalake |  Minor | build | Steve Loughran | Steve Loughran |
+| [HADOOP-15007](https://issues.apache.org/jira/browse/HADOOP-15007) | Stabilize and document Configuration \<tag\> element |  Blocker | conf | Steve Loughran | Ajay Kumar |
+| [HDFS-13102](https://issues.apache.org/jira/browse/HDFS-13102) | Implement SnapshotSkipList class to store Multi level DirectoryDiffs |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HDFS-13202](https://issues.apache.org/jira/browse/HDFS-13202) | Fix the outdated javadocs in HAUtil |  Trivial | . | Chao Sun | Chao Sun |
+| [YARN-5028](https://issues.apache.org/jira/browse/YARN-5028) | RMStateStore should trim down app state for completed applications |  Major | resourcemanager | Karthik Kambatla | Gergo Repas |
+| [HADOOP-15279](https://issues.apache.org/jira/browse/HADOOP-15279) | increase maven heap size recommendations |  Minor | build, documentation, test | Allen Wittenauer | Allen Wittenauer |
+| [HDFS-13171](https://issues.apache.org/jira/browse/HDFS-13171) | Handle Deletion of nodes in SnasphotSkipList |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HADOOP-15252](https://issues.apache.org/jira/browse/HADOOP-15252) | Checkstyle version is not compatible with IDEA's checkstyle plugin |  Major | . | Andras Bokor | Andras Bokor |
+| [HDFS-13173](https://issues.apache.org/jira/browse/HDFS-13173) | Replace ArrayList with DirectoryDiffList(SnapshotSkipList) to store DirectoryDiffs |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HADOOP-15282](https://issues.apache.org/jira/browse/HADOOP-15282) | HADOOP-15235 broke TestHttpFSServerWebServer |  Major | test | Robert Kanter | Robert Kanter |
+| [HDFS-13170](https://issues.apache.org/jira/browse/HDFS-13170) | Port webhdfs unmaskedpermission parameter to HTTPFS |  Major | . | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-13223](https://issues.apache.org/jira/browse/HDFS-13223) | Reduce DiffListBySkipList memory usage |  Major | snapshots | Tsz Wo Nicholas Sze | Shashikant Banerjee |
+| [HDFS-13227](https://issues.apache.org/jira/browse/HDFS-13227) | Add a method to  calculate cumulative diff over multiple snapshots in DirectoryDiffList |  Minor | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HDFS-13222](https://issues.apache.org/jira/browse/HDFS-13222) | Update getBlocks method to take minBlockSize in RPC calls |  Major | balancer & mover | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-13225](https://issues.apache.org/jira/browse/HDFS-13225) | StripeReader#checkMissingBlocks() 's IOException info is incomplete |  Major | erasure-coding, hdfs-client | lufei | lufei |
+| [HDFS-11394](https://issues.apache.org/jira/browse/HDFS-11394) | Support for getting erasure coding policy through WebHDFS#FileStatus |  Major | erasure-coding, namenode | Kai Sasaki | Kai Sasaki |
+| [HDFS-13252](https://issues.apache.org/jira/browse/HDFS-13252) | Code refactoring: Remove Diff.ListType |  Major | snapshots | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze |
+| [HDFS-12780](https://issues.apache.org/jira/browse/HDFS-12780) | Fix spelling mistake in DistCpUtils.java |  Trivial | . | Jianfei Jiang | Jianfei Jiang |
+| [HADOOP-15311](https://issues.apache.org/jira/browse/HADOOP-15311) | HttpServer2 needs a way to configure the acceptor/selector count |  Major | common | Erik Krogen | Erik Krogen |
+| [HDFS-13235](https://issues.apache.org/jira/browse/HDFS-13235) | DiskBalancer: Update Documentation to add newly added options |  Major | diskbalancer, documentation | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-336](https://issues.apache.org/jira/browse/HDFS-336) | dfsadmin -report should report number of blocks from datanode |  Minor | . | Lohit Vijayarenu | Bharat Viswanadham |
+| [HDFS-11600](https://issues.apache.org/jira/browse/HDFS-11600) | Refactor TestDFSStripedOutputStreamWithFailure test classes |  Minor | test | Andrew Wang | SammiChen |
+| [HDFS-13257](https://issues.apache.org/jira/browse/HDFS-13257) | Code cleanup: INode never throws QuotaExceededException |  Major | namenode | Tsz Wo Nicholas Sze | Tsz Wo Nicholas Sze |
+| [HDFS-13275](https://issues.apache.org/jira/browse/HDFS-13275) | Adding log for BlockPoolManager#refreshNamenodes failures |  Minor | datanode | Xiaoyu Yao | Ajay Kumar |
+| [HDFS-13246](https://issues.apache.org/jira/browse/HDFS-13246) | FileInputStream redundant closes in readReplicasFromCache |  Minor | datanode | liaoyuxiangqin | liaoyuxiangqin |
+| [HADOOP-15209](https://issues.apache.org/jira/browse/HADOOP-15209) | DistCp to eliminate needless deletion of files under already-deleted directories |  Major | tools/distcp | Steve Loughran | Steve Loughran |
+| [MAPREDUCE-7047](https://issues.apache.org/jira/browse/MAPREDUCE-7047) | Make HAR tool support IndexedLogAggregtionController |  Major | . | Xuan Gong | Xuan Gong |
+| [HDFS-12884](https://issues.apache.org/jira/browse/HDFS-12884) | BlockUnderConstructionFeature.truncateBlock should be of type BlockInfo |  Major | namenode | Konstantin Shvachko | chencan |
+| [YARN-7064](https://issues.apache.org/jira/browse/YARN-7064) | Use cgroup to get container resource utilization |  Major | nodemanager | Miklos Szegedi | Miklos Szegedi |
+| [HADOOP-15334](https://issues.apache.org/jira/browse/HADOOP-15334) | Upgrade Maven surefire plugin |  Major | build | Arpit Agarwal | Arpit Agarwal |
+| [HADOOP-14825](https://issues.apache.org/jira/browse/HADOOP-14825) | Über-JIRA: S3Guard Phase II: Hadoop 3.1 features |  Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-15312](https://issues.apache.org/jira/browse/HADOOP-15312) | Undocumented KeyProvider configuration keys |  Major | . | Wei-Chiu Chuang | LiXin Ge |
+| [YARN-7623](https://issues.apache.org/jira/browse/YARN-7623) | Fix the CapacityScheduler Queue configuration documentation |  Major | . | Arun Suresh | Jonathan Hung |
+| [HDFS-13314](https://issues.apache.org/jira/browse/HDFS-13314) | NameNode should optionally exit if it detects FsImage corruption |  Major | namenode | Arpit Agarwal | Arpit Agarwal |
+| [YARN-8076](https://issues.apache.org/jira/browse/YARN-8076) | Support to specify application tags in distributed shell |  Major | distributed-shell | Weiwei Yang | Weiwei Yang |
+
+
+### BUG FIXES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [YARN-7023](https://issues.apache.org/jira/browse/YARN-7023) | Incorrect ReservationId.compareTo() implementation |  Minor | reservation system | Oleg Danilov | Oleg Danilov |
+| [YARN-7152](https://issues.apache.org/jira/browse/YARN-7152) | [ATSv2] Registering timeline client before AMRMClient service init throw exception. |  Major | timelineclient | Rohith Sharma K S | Rohith Sharma K S |
+| [YARN-6992](https://issues.apache.org/jira/browse/YARN-6992) | Kill application button is visible even if the application is FINISHED in RM UI |  Major | . | Sumana Sathish | Suma Shivaprasad |
+| [YARN-7140](https://issues.apache.org/jira/browse/YARN-7140) | CollectorInfo should have Public visibility |  Minor | . | Varun Saxena | Varun Saxena |
+| [YARN-7130](https://issues.apache.org/jira/browse/YARN-7130) | ATSv2 documentation changes post merge |  Major | timelineserver | Varun Saxena | Varun Saxena |
+| [HDFS-12406](https://issues.apache.org/jira/browse/HDFS-12406) | dfsadmin command prints "Exception encountered" even if there is no exception, when debug is enabled |  Minor | hdfs-client | Nanda kumar | Nanda kumar |
+| [YARN-4727](https://issues.apache.org/jira/browse/YARN-4727) | Unable to override the $HADOOP\_CONF\_DIR env variable for container |  Major | nodemanager | Terence Yim | Jason Lowe |
+| [YARN-7163](https://issues.apache.org/jira/browse/YARN-7163) | RMContext need not to be injected to webapp and other Always Running services. |  Blocker | resourcemanager | Rohith Sharma K S | Rohith Sharma K S |
+| [HDFS-12424](https://issues.apache.org/jira/browse/HDFS-12424) | Datatable sorting on the Datanode Information page in the Namenode UI is broken |  Major | . | Shawna Martell | Shawna Martell |
+| [HDFS-12323](https://issues.apache.org/jira/browse/HDFS-12323) | NameNode terminates after full GC thinking QJM unresponsive if full GC is much longer than timeout |  Major | namenode, qjm | Erik Krogen | Erik Krogen |
+| [YARN-7149](https://issues.apache.org/jira/browse/YARN-7149) | Cross-queue preemption sometimes starves an underserved queue |  Major | capacity scheduler | Eric Payne | Eric Payne |
+| [YARN-7172](https://issues.apache.org/jira/browse/YARN-7172) | ResourceCalculator.fitsIn() should not take a cluster resource parameter |  Major | scheduler | Daniel Templeton | Sen Zhao |
+| [YARN-7199](https://issues.apache.org/jira/browse/YARN-7199) | Fix TestAMRMClientContainerRequest.testOpportunisticAndGuaranteedRequests |  Blocker | . | Botong Huang | Botong Huang |
+| [MAPREDUCE-6960](https://issues.apache.org/jira/browse/MAPREDUCE-6960) | Shuffle Handler prints disk error stack traces for every read failure. |  Major | . | Kuhu Shukla | Kuhu Shukla |
+| [HDFS-12480](https://issues.apache.org/jira/browse/HDFS-12480) | TestNameNodeMetrics#testTransactionAndCheckpointMetrics Fails in trunk |  Blocker | test | Brahma Reddy Battula | Hanisha Koneru |
+| [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | Introduce a config to allow setting up write pipeline with fewer nodes than replication factor |  Major | . | Yongjun Zhang | Brahma Reddy Battula |
+| [YARN-7196](https://issues.apache.org/jira/browse/YARN-7196) | Fix finicky TestContainerManager tests |  Major | . | Arun Suresh | Arun Suresh |
+| [YARN-6771](https://issues.apache.org/jira/browse/YARN-6771) | Use classloader inside configuration class to make new classes |  Major | . | Jongyoul Lee | Jongyoul Lee |
+| [HDFS-12526](https://issues.apache.org/jira/browse/HDFS-12526) | FSDirectory should use Time.monotonicNow for durations |  Minor | . | Chetna Chaudhari | Bharat Viswanadham |
+| [HDFS-12371](https://issues.apache.org/jira/browse/HDFS-12371) | "BlockVerificationFailures" and "BlocksVerified" show up as 0 in Datanode JMX |  Major | metrics | Sai Nukavarapu | Hanisha Koneru |
+| [YARN-7034](https://issues.apache.org/jira/browse/YARN-7034) | DefaultLinuxContainerRuntime and DockerLinuxContainerRuntime sends client environment variables to container-executor |  Blocker | nodemanager | Miklos Szegedi | Miklos Szegedi |
+| [HDFS-12507](https://issues.apache.org/jira/browse/HDFS-12507) | StripedBlockUtil.java:694: warning - Tag @link: reference not found: StripingCell |  Minor | documentation | Tsz Wo Nicholas Sze | Mukul Kumar Singh |
+| [MAPREDUCE-6966](https://issues.apache.org/jira/browse/MAPREDUCE-6966) | DistSum should use Time.monotonicNow for measuring durations |  Minor | . | Chetna Chaudhari | Chetna Chaudhari |
+| [YARN-6878](https://issues.apache.org/jira/browse/YARN-6878) | TestCapacityScheduler.testDefaultNodeLabelExpressionQueueConfig() has the args to assertEqual() in the wrong order |  Trivial | capacity scheduler, test | Daniel Templeton | Sen Zhao |
+| [HDFS-12064](https://issues.apache.org/jira/browse/HDFS-12064) | Reuse object mapper in HDFS |  Minor | . | Mingliang Liu | Hanisha Koneru |
+| [HDFS-12535](https://issues.apache.org/jira/browse/HDFS-12535) | Change the Scope of the Class DFSUtilClient to Private |  Major | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-12536](https://issues.apache.org/jira/browse/HDFS-12536) | Add documentation for getconf command with -journalnodes option |  Major | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-14905](https://issues.apache.org/jira/browse/HADOOP-14905) | Fix javadocs issues in Hadoop HDFS-NFS |  Major | nfs | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HADOOP-14904](https://issues.apache.org/jira/browse/HADOOP-14904) | Fix javadocs issues in Hadoop HDFS |  Minor | . | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HDFS-12339](https://issues.apache.org/jira/browse/HDFS-12339) | NFS Gateway on Shutdown Gives Unregistration Failure. Does Not Unregister with rpcbind Portmapper |  Major | nfs | Sailesh Patel | Mukul Kumar Singh |
+| [HDFS-12375](https://issues.apache.org/jira/browse/HDFS-12375) | Fail to start/stop journalnodes using start-dfs.sh/stop-dfs.sh. |  Major | federation, journal-node, scripts | Wenxin He | Bharat Viswanadham |
+| [YARN-7153](https://issues.apache.org/jira/browse/YARN-7153) | Remove duplicated code in AMRMClientAsyncImpl.java |  Minor | client | Sen Zhao | Sen Zhao |
+| [HADOOP-14897](https://issues.apache.org/jira/browse/HADOOP-14897) | Loosen compatibility guidelines for native dependencies |  Blocker | documentation, native | Chris Douglas | Daniel Templeton |
+| [HDFS-12529](https://issues.apache.org/jira/browse/HDFS-12529) | Get source for config tags from file name |  Major | . | Ajay Kumar | Ajay Kumar |
+| [YARN-7118](https://issues.apache.org/jira/browse/YARN-7118) | AHS REST API can return NullPointerException |  Major | . | Prabhu Joseph | Billie Rinaldi |
+| [HDFS-12495](https://issues.apache.org/jira/browse/HDFS-12495) | TestPendingInvalidateBlock#testPendingDeleteUnknownBlocks fails intermittently |  Major | . | Eric Badger | Eric Badger |
+| [HADOOP-14822](https://issues.apache.org/jira/browse/HADOOP-14822) | hadoop-project/pom.xml is executable |  Minor | . | Akira Ajisaka | Ajay Kumar |
+| [YARN-7157](https://issues.apache.org/jira/browse/YARN-7157) | Add admin configuration to filter per-user's apps in secure cluster |  Major | webapp | Sunil G | Sunil G |
+| [YARN-7257](https://issues.apache.org/jira/browse/YARN-7257) | AggregatedLogsBlock reports a bad 'end' value as a bad 'start' value |  Major | log-aggregation | Jason Lowe | Jason Lowe |
+| [YARN-7084](https://issues.apache.org/jira/browse/YARN-7084) | TestSchedulingMonitor#testRMStarts fails sporadically |  Major | . | Jason Lowe | Jason Lowe |
+| [HDFS-12271](https://issues.apache.org/jira/browse/HDFS-12271) | Incorrect statement in Downgrade section of HDFS Rolling Upgrade document |  Minor | documentation | Nanda kumar | Nanda kumar |
+| [HDFS-12576](https://issues.apache.org/jira/browse/HDFS-12576) | JournalNodes are getting started, even though dfs.namenode.shared.edits.dir is not configured |  Major | journal-node | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-11968](https://issues.apache.org/jira/browse/HDFS-11968) | ViewFS: StoragePolicies commands fail with HDFS federation |  Major | hdfs | Mukul Kumar Singh | Mukul Kumar Singh |
+| [YARN-6943](https://issues.apache.org/jira/browse/YARN-6943) | Update Yarn to YARN in documentation |  Minor | documentation | Miklos Szegedi | Chetna Chaudhari |
+| [YARN-7211](https://issues.apache.org/jira/browse/YARN-7211) | AMSimulator in SLS does't work due to refactor of responseId |  Blocker | scheduler-load-simulator | Yufei Gu | Botong Huang |
+| [HADOOP-14459](https://issues.apache.org/jira/browse/HADOOP-14459) | SerializationFactory shouldn't throw a NullPointerException if the serializations list is not defined |  Minor | . | Nandor Kollar | Nandor Kollar |
+| [YARN-7279](https://issues.apache.org/jira/browse/YARN-7279) | Fix typo in helper message of ContainerLauncher |  Trivial | . | Elek, Marton | Elek, Marton |
+| [YARN-7258](https://issues.apache.org/jira/browse/YARN-7258) | Add Node and Rack Hints to Opportunistic Scheduler |  Major | . | Arun Suresh | kartheek muthyala |
+| [YARN-7285](https://issues.apache.org/jira/browse/YARN-7285) | ContainerExecutor always launches with priorities due to yarn-default property |  Minor | nodemanager | Jason Lowe | Jason Lowe |
+| [HDFS-12494](https://issues.apache.org/jira/browse/HDFS-12494) | libhdfs SIGSEGV in setTLSExceptionStrings |  Major | libhdfs | John Zhuge | John Zhuge |
+| [YARN-7245](https://issues.apache.org/jira/browse/YARN-7245) | Max AM Resource column in Active Users Info section of Capacity Scheduler UI page should be updated per-user |  Major | capacity scheduler, yarn | Eric Payne | Eric Payne |
+| [HDFS-11575](https://issues.apache.org/jira/browse/HDFS-11575) | Supporting HDFS NFS gateway with Federated HDFS |  Major | nfs | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HADOOP-14910](https://issues.apache.org/jira/browse/HADOOP-14910) | Upgrade netty-all jar to latest 4.0.x.Final |  Critical | . | Vinayakumar B | Vinayakumar B |
+| [MAPREDUCE-6951](https://issues.apache.org/jira/browse/MAPREDUCE-6951) | Improve exception message when mapreduce.jobhistory.webapp.address is in wrong format |  Major | applicationmaster | Prabhu Joseph | Prabhu Joseph |
+| [HDFS-12627](https://issues.apache.org/jira/browse/HDFS-12627) | Fix typo in DFSAdmin command output |  Trivial | . | Ajay Kumar | Ajay Kumar |
+| [HADOOP-13102](https://issues.apache.org/jira/browse/HADOOP-13102) | Update GroupsMapping documentation to reflect the new changes |  Major | documentation | Anu Engineer | Esther Kundin |
+| [YARN-7270](https://issues.apache.org/jira/browse/YARN-7270) | Fix unsafe casting from long to int for class Resource and its sub-classes |  Major | resourcemanager | Yufei Gu | Yufei Gu |
+| [YARN-7124](https://issues.apache.org/jira/browse/YARN-7124) | LogAggregationTFileController deletes/renames while file is open |  Critical | nodemanager | Daryn Sharp | Jason Lowe |
+| [YARN-7341](https://issues.apache.org/jira/browse/YARN-7341) | TestRouterWebServiceUtil#testMergeMetrics is flakey |  Major | federation | Robert Kanter | Robert Kanter |
+| [HADOOP-14958](https://issues.apache.org/jira/browse/HADOOP-14958) | CLONE - Fix source-level compatibility after HADOOP-11252 |  Blocker | . | Junping Du | Junping Du |
+| [YARN-7294](https://issues.apache.org/jira/browse/YARN-7294) | TestSignalContainer#testSignalRequestDeliveryToNM fails intermittently with Fair scheduler |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7355](https://issues.apache.org/jira/browse/YARN-7355) | TestDistributedShell should be scheduler agnostic |  Major | . | Haibo Chen | Haibo Chen |
+| [HDFS-12683](https://issues.apache.org/jira/browse/HDFS-12683) | DFSZKFailOverController re-order logic for logging Exception |  Major | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-14966](https://issues.apache.org/jira/browse/HADOOP-14966) | Handle JDK-8071638 for hadoop-common |  Blocker | . | Bibin A Chundatt | Bibin A Chundatt |
+| [HDFS-12695](https://issues.apache.org/jira/browse/HDFS-12695) | Add a link to HDFS router federation document in site.xml |  Minor | documentation | Yiqun Lin | Yiqun Lin |
+| [YARN-7385](https://issues.apache.org/jira/browse/YARN-7385) | TestFairScheduler#testUpdateDemand and TestFSLeafQueue#testUpdateDemand are failing with NPE |  Major | test | Robert Kanter | Yufei Gu |
+| [HADOOP-14977](https://issues.apache.org/jira/browse/HADOOP-14977) | Xenial dockerfile needs ant in main build for findbugs |  Trivial | build, test | Allen Wittenauer | Akira Ajisaka |
+| [HDFS-12579](https://issues.apache.org/jira/browse/HDFS-12579) | JournalNodeSyncer should use fromUrl field of EditLogManifestResponse to construct servlet Url |  Major | . | Hanisha Koneru | Hanisha Koneru |
+| [YARN-7375](https://issues.apache.org/jira/browse/YARN-7375) | Possible NPE in RMWebapp when HA is enabled and the active RM fails |  Major | . | Chandni Singh | Chandni Singh |
+| [YARN-6747](https://issues.apache.org/jira/browse/YARN-6747) | TestFSAppStarvation.testPreemptionEnable fails intermittently |  Major | . | Sunil G | Miklos Szegedi |
+| [YARN-7336](https://issues.apache.org/jira/browse/YARN-7336) | Unsafe cast from long to int Resource.hashCode() method |  Critical | resourcemanager | Daniel Templeton | Miklos Szegedi |
+| [HADOOP-14990](https://issues.apache.org/jira/browse/HADOOP-14990) | Clean up jdiff xml files added for 2.8.2 release |  Blocker | . | Subru Krishnan | Junping Du |
+| [HADOOP-14980](https://issues.apache.org/jira/browse/HADOOP-14980) | [JDK9] Upgrade maven-javadoc-plugin to 3.0.0-M1 |  Minor | build | ligongyi | ligongyi |
+| [HDFS-12714](https://issues.apache.org/jira/browse/HDFS-12714) | Hadoop 3 missing fix for HDFS-5169 |  Major | native | Joe McDonnell | Joe McDonnell |
+| [YARN-7146](https://issues.apache.org/jira/browse/YARN-7146) | Many RM unit tests failing with FairScheduler |  Major | test | Robert Kanter | Robert Kanter |
+| [YARN-7396](https://issues.apache.org/jira/browse/YARN-7396) | NPE when accessing container logs due to null dirsHandler |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-7370](https://issues.apache.org/jira/browse/YARN-7370) | Preemption properties should be refreshable |  Major | capacity scheduler, scheduler preemption | Eric Payne | Gergely Novák |
+| [YARN-7400](https://issues.apache.org/jira/browse/YARN-7400) | incorrect log preview displayed in jobhistory server ui |  Major | yarn | Santhosh B Gowda | Xuan Gong |
+| [HADOOP-15013](https://issues.apache.org/jira/browse/HADOOP-15013) | Fix ResourceEstimator findbugs issues |  Blocker | . | Allen Wittenauer | Arun Suresh |
+| [YARN-7432](https://issues.apache.org/jira/browse/YARN-7432) | Fix DominantResourceFairnessPolicy serializable findbugs issues |  Blocker | . | Allen Wittenauer | Daniel Templeton |
+| [YARN-7434](https://issues.apache.org/jira/browse/YARN-7434) | Router getApps REST invocation fails with multiple RMs |  Critical | . | Subru Krishnan | Íñigo Goiri |
+| [HADOOP-15015](https://issues.apache.org/jira/browse/HADOOP-15015) | TestConfigurationFieldsBase to use SLF4J for logging |  Trivial | conf, test | Steve Loughran | Steve Loughran |
+| [YARN-7428](https://issues.apache.org/jira/browse/YARN-7428) | Add containerId to Localizer failed logs |  Minor | nodemanager | Prabhu Joseph | Prabhu Joseph |
+| [YARN-4793](https://issues.apache.org/jira/browse/YARN-4793) | [Umbrella] Simplified API layer for services and beyond |  Major | . | Vinod Kumar Vavilapalli |  |
+| [HADOOP-15018](https://issues.apache.org/jira/browse/HADOOP-15018) | Update JAVA\_HOME in create-release for Xenial Dockerfile |  Blocker | build | Andrew Wang | Andrew Wang |
+| [HDFS-12788](https://issues.apache.org/jira/browse/HDFS-12788) | Reset the upload button when file upload fails |  Critical | ui, webhdfs | Brahma Reddy Battula | Brahma Reddy Battula |
+| [YARN-7453](https://issues.apache.org/jira/browse/YARN-7453) | Fix issue where RM fails to switch to active after first successful start |  Blocker | resourcemanager | Rohith Sharma K S | Rohith Sharma K S |
+| [YARN-7458](https://issues.apache.org/jira/browse/YARN-7458) | TestContainerManagerSecurity is still flakey |  Major | test | Robert Kanter | Robert Kanter |
+| [YARN-7454](https://issues.apache.org/jira/browse/YARN-7454) | RMAppAttemptMetrics#getAggregateResourceUsage can NPE due to double lookup |  Minor | resourcemanager | Jason Lowe | Jason Lowe |
+| [YARN-7465](https://issues.apache.org/jira/browse/YARN-7465) | start-yarn.sh fails to start ResourceManager unless running as root |  Blocker | . | Sean Mackrory |  |
+| [HDFS-12791](https://issues.apache.org/jira/browse/HDFS-12791) | NameNode Fsck http Connection can timeout for directories with multiple levels |  Major | tools | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HDFS-12797](https://issues.apache.org/jira/browse/HDFS-12797) | Add Test for NFS mount of not supported filesystems like (file:///) |  Minor | nfs | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HADOOP-14929](https://issues.apache.org/jira/browse/HADOOP-14929) | Cleanup usage of decodecomponent and use QueryStringDecoder from netty |  Major | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-12498](https://issues.apache.org/jira/browse/HDFS-12498) | Journal Syncer is not started in Federated + HA cluster |  Major | federation, journal-node | Bharat Viswanadham | Bharat Viswanadham |
+| [YARN-7452](https://issues.apache.org/jira/browse/YARN-7452) | Decommissioning node default value to be zero in new YARN UI |  Trivial | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7445](https://issues.apache.org/jira/browse/YARN-7445) | Render Applications and Services page with filters in new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [HADOOP-15031](https://issues.apache.org/jira/browse/HADOOP-15031) | Fix javadoc issues in Hadoop Common |  Minor | common | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HDFS-12705](https://issues.apache.org/jira/browse/HDFS-12705) | WebHdfsFileSystem exceptions should retain the caused by exception |  Major | hdfs | Daryn Sharp | Hanisha Koneru |
+| [YARN-7462](https://issues.apache.org/jira/browse/YARN-7462) | Render outstanding resource requests on application page of new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7464](https://issues.apache.org/jira/browse/YARN-7464) | Introduce filters in Nodes page of new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7361](https://issues.apache.org/jira/browse/YARN-7361) | Improve the docker container runtime documentation |  Major | . | Shane Kumpf | Shane Kumpf |
+| [YARN-7492](https://issues.apache.org/jira/browse/YARN-7492) | Set up SASS for new YARN UI styling |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7469](https://issues.apache.org/jira/browse/YARN-7469) | Capacity Scheduler Intra-queue preemption: User can starve if newest app is exactly at user limit |  Major | capacity scheduler, yarn | Eric Payne | Eric Payne |
+| [HADOOP-14982](https://issues.apache.org/jira/browse/HADOOP-14982) | Clients using FailoverOnNetworkExceptionRetry can go into a loop if they're used without authenticating with kerberos in HA env |  Major | common | Peter Bacsko | Peter Bacsko |
+| [YARN-7489](https://issues.apache.org/jira/browse/YARN-7489) | ConcurrentModificationException in RMAppImpl#getRMAppMetrics |  Major | capacityscheduler | Tao Yang | Tao Yang |
+| [YARN-7525](https://issues.apache.org/jira/browse/YARN-7525) | Incorrect query parameters in cluster nodes REST API document |  Minor | documentation | Tao Yang | Tao Yang |
+| [HDFS-12813](https://issues.apache.org/jira/browse/HDFS-12813) | RequestHedgingProxyProvider can hide Exception thrown from the Namenode for proxy size of 1 |  Major | ha | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HADOOP-15046](https://issues.apache.org/jira/browse/HADOOP-15046) | Document Apache Hadoop does not support Java 9 in BUILDING.txt |  Major | documentation | Akira Ajisaka | Hanisha Koneru |
+| [YARN-7513](https://issues.apache.org/jira/browse/YARN-7513) | Remove scheduler lock in FSAppAttempt.getWeight() |  Minor | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [YARN-7390](https://issues.apache.org/jira/browse/YARN-7390) | All reservation related test cases failed when TestYarnClient runs against Fair Scheduler. |  Major | fairscheduler, reservation system | Yufei Gu | Yufei Gu |
+| [YARN-7290](https://issues.apache.org/jira/browse/YARN-7290) | Method canContainerBePreempted can return true when it shouldn't |  Major | fairscheduler | Steven Rand | Steven Rand |
+| [MAPREDUCE-7014](https://issues.apache.org/jira/browse/MAPREDUCE-7014) | Fix java doc errors in jdk1.8 |  Major | . | Rohith Sharma K S | Steve Loughran |
+| [YARN-7363](https://issues.apache.org/jira/browse/YARN-7363) | ContainerLocalizer doesn't have a valid log4j config when using LinuxContainerExecutor |  Major | nodemanager | Yufei Gu | Yufei Gu |
+| [HDFS-12754](https://issues.apache.org/jira/browse/HDFS-12754) | Lease renewal can hit a deadlock |  Major | . | Kuhu Shukla | Kuhu Shukla |
+| [YARN-7499](https://issues.apache.org/jira/browse/YARN-7499) | Layout changes to Application details page in new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [HDFS-12857](https://issues.apache.org/jira/browse/HDFS-12857) | StoragePolicyAdmin should support schema based path |  Major | namenode | Surendra Singh Lilhore | Surendra Singh Lilhore |
+| [HDFS-12832](https://issues.apache.org/jira/browse/HDFS-12832) | INode.getFullPathName may throw ArrayIndexOutOfBoundsException lead to NameNode exit |  Critical | namenode | DENG FEI | Konstantin Shvachko |
+| [HADOOP-15054](https://issues.apache.org/jira/browse/HADOOP-15054) | upgrade hadoop dependency on commons-codec to 1.11 |  Major | . | PJ Fanning | Bharat Viswanadham |
+| [HDFS-11754](https://issues.apache.org/jira/browse/HDFS-11754) | Make FsServerDefaults cache configurable. |  Minor | . | Rushabh S Shah | Mikhail Erofeev |
+| [HADOOP-15042](https://issues.apache.org/jira/browse/HADOOP-15042) | Azure PageBlobInputStream.skip() can return negative value when numberOfPagesRemaining is 0 |  Minor | fs/azure | Rajesh Balamohan | Rajesh Balamohan |
+| [YARN-7509](https://issues.apache.org/jira/browse/YARN-7509) | AsyncScheduleThread and ResourceCommitterService are still running after RM is transitioned to standby |  Critical | . | Tao Yang | Tao Yang |
+| [HDFS-12681](https://issues.apache.org/jira/browse/HDFS-12681) | Make HdfsLocatedFileStatus a subtype of LocatedFileStatus |  Major | . | Chris Douglas | Chris Douglas |
+| [YARN-7558](https://issues.apache.org/jira/browse/YARN-7558) | "yarn logs" command fails to get logs for running containers if UI authentication is enabled. |  Critical | . | Namit Maheshwari | Xuan Gong |
+| [HDFS-12638](https://issues.apache.org/jira/browse/HDFS-12638) | Delete copy-on-truncate block along with the original block, when deleting a file being truncated |  Blocker | hdfs | Jiandan Yang | Konstantin Shvachko |
+| [YARN-7546](https://issues.apache.org/jira/browse/YARN-7546) | Layout changes in Queue UI to show queue details on right pane |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [HDFS-12836](https://issues.apache.org/jira/browse/HDFS-12836) | startTxId could be greater than endTxId when tailing in-progress edit log |  Major | hdfs | Chao Sun | Chao Sun |
+| [YARN-4813](https://issues.apache.org/jira/browse/YARN-4813) | TestRMWebServicesDelegationTokenAuthentication.testDoAs fails intermittently |  Major | resourcemanager | Daniel Templeton | Gergo Repas |
+| [MAPREDUCE-5124](https://issues.apache.org/jira/browse/MAPREDUCE-5124) | AM lacks flow control for task events |  Major | mr-am | Jason Lowe | Peter Bacsko |
+| [YARN-7589](https://issues.apache.org/jira/browse/YARN-7589) | TestPBImplRecords fails with NullPointerException |  Major | . | Jason Lowe | Daniel Templeton |
+| [YARN-7455](https://issues.apache.org/jira/browse/YARN-7455) | quote\_and\_append\_arg can overflow buffer |  Major | nodemanager | Jason Lowe | Jim Brennan |
+| [HADOOP-14600](https://issues.apache.org/jira/browse/HADOOP-14600) | LocatedFileStatus constructor forces RawLocalFS to exec a process to get the permissions |  Major | fs | Steve Loughran | Ping Liu |
+| [YARN-7594](https://issues.apache.org/jira/browse/YARN-7594) | TestNMWebServices#testGetNMResourceInfo fails on trunk |  Major | nodemanager, webapp | Gergely Novák | Gergely Novák |
+| [YARN-5594](https://issues.apache.org/jira/browse/YARN-5594) | Handle old RMDelegationToken format when recovering RM |  Major | resourcemanager | Tatyana But | Robert Kanter |
+| [HADOOP-15058](https://issues.apache.org/jira/browse/HADOOP-15058) | create-release site build outputs dummy shaded jars due to skipShade |  Blocker | . | Andrew Wang | Andrew Wang |
+| [HADOOP-14985](https://issues.apache.org/jira/browse/HADOOP-14985) | Remove subversion related code from VersionInfoMojo.java |  Minor | build | Akira Ajisaka | Ajay Kumar |
+| [YARN-7586](https://issues.apache.org/jira/browse/YARN-7586) | Application Placement should be done before ACL checks in ResourceManager |  Blocker | . | Suma Shivaprasad | Suma Shivaprasad |
+| [HDFS-11751](https://issues.apache.org/jira/browse/HDFS-11751) | DFSZKFailoverController daemon exits with wrong status code |  Major | auto-failover | Doris Gu | Bharat Viswanadham |
+| [HADOOP-15080](https://issues.apache.org/jira/browse/HADOOP-15080) | Aliyun OSS: update oss sdk from 2.8.1 to 2.8.3 to remove its dependency on Cat-x "json-lib" |  Blocker | fs/oss | Chris Douglas | SammiChen |
+| [HADOOP-15098](https://issues.apache.org/jira/browse/HADOOP-15098) | TestClusterTopology#testChooseRandom fails intermittently |  Major | test | Zsolt Venczel | Zsolt Venczel |
+| [YARN-7608](https://issues.apache.org/jira/browse/YARN-7608) | Incorrect sTarget column causing DataTable warning on RM application and scheduler web page |  Major | resourcemanager, webapp | Weiwei Yang | Gergely Novák |
+| [HDFS-12891](https://issues.apache.org/jira/browse/HDFS-12891) | Do not invalidate blocks if toInvalidate is empty |  Major | . | Zsolt Venczel | Zsolt Venczel |
+| [YARN-7635](https://issues.apache.org/jira/browse/YARN-7635) | TestRMWebServicesSchedulerActivities fails in trunk |  Major | test | Sunil G | Sunil G |
+| [HDFS-12833](https://issues.apache.org/jira/browse/HDFS-12833) | Distcp : Update the usage of delete option for dependency with update and overwrite option |  Minor | distcp, hdfs | Harshakiran Reddy | usharani |
+| [YARN-7647](https://issues.apache.org/jira/browse/YARN-7647) | NM print inappropriate error log when node-labels is enabled |  Minor | . | Yang Wang | Yang Wang |
+| [YARN-7536](https://issues.apache.org/jira/browse/YARN-7536) | em-table improvement for better filtering in new YARN UI |  Minor | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [HDFS-12907](https://issues.apache.org/jira/browse/HDFS-12907) | Allow read-only access to reserved raw for non-superusers |  Major | namenode | Daryn Sharp | Rushabh S Shah |
+| [HDFS-12881](https://issues.apache.org/jira/browse/HDFS-12881) | Output streams closed with IOUtils suppressing write errors |  Major | . | Jason Lowe | Ajay Kumar |
+| [YARN-7595](https://issues.apache.org/jira/browse/YARN-7595) | Container launching code suppresses close exceptions after writes |  Major | nodemanager | Jason Lowe | Jim Brennan |
+| [HADOOP-15085](https://issues.apache.org/jira/browse/HADOOP-15085) | Output streams closed with IOUtils suppressing write errors |  Major | . | Jason Lowe | Jim Brennan |
+| [YARN-7629](https://issues.apache.org/jira/browse/YARN-7629) | TestContainerLaunch# fails after YARN-7381 |  Major | . | Jason Lowe | Jason Lowe |
+| [YARN-7664](https://issues.apache.org/jira/browse/YARN-7664) | Several javadoc errors |  Blocker | . | Sean Mackrory | Sean Mackrory |
+| [HADOOP-15123](https://issues.apache.org/jira/browse/HADOOP-15123) | KDiag tries to load krb5.conf from KRB5CCNAME instead of KRB5\_CONFIG |  Minor | security | Vipin Rathor | Vipin Rathor |
+| [HADOOP-15109](https://issues.apache.org/jira/browse/HADOOP-15109) | TestDFSIO -read -random doesn't work on file sized 4GB |  Minor | fs, test | zhoutai.zt | Ajay Kumar |
+| [YARN-7661](https://issues.apache.org/jira/browse/YARN-7661) | NodeManager metrics return wrong value after update node resource |  Major | . | Yang Wang | Yang Wang |
+| [HDFS-12930](https://issues.apache.org/jira/browse/HDFS-12930) | Remove the extra space in HdfsImageViewer.md |  Trivial | documentation | Yiqun Lin | Rahul Pathak |
+| [YARN-7662](https://issues.apache.org/jira/browse/YARN-7662) | [Atsv2] Define new set of configurations for reader and collectors to bind. |  Major | . | Rohith Sharma K S | Rohith Sharma K S |
+| [YARN-7466](https://issues.apache.org/jira/browse/YARN-7466) | ResourceRequest has a different default for allocationRequestId than Container |  Major | . | Chandni Singh | Chandni Singh |
+| [YARN-7674](https://issues.apache.org/jira/browse/YARN-7674) | Update Timeline Reader web app address in UI2 |  Major | . | Rohith Sharma K S | Sunil G |
+| [YARN-7577](https://issues.apache.org/jira/browse/YARN-7577) | Unit Fail: TestAMRestart#testPreemptedAMRestartOnRMRestart |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [HDFS-12949](https://issues.apache.org/jira/browse/HDFS-12949) | Fix findbugs warning in ImageWriter.java |  Major | . | Akira Ajisaka | Akira Ajisaka |
+| [HDFS-12938](https://issues.apache.org/jira/browse/HDFS-12938) | TestErasureCodigCLI testAll failing consistently. |  Major | erasure-coding, hdfs | Rushabh S Shah | Ajay Kumar |
+| [HDFS-12951](https://issues.apache.org/jira/browse/HDFS-12951) | Incorrect javadoc in SaslDataTransferServer.java#receive |  Major | encryption | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HDFS-12959](https://issues.apache.org/jira/browse/HDFS-12959) | Fix TestOpenFilesWithSnapshot redundant configurations |  Minor | hdfs | Manoj Govindassamy | Manoj Govindassamy |
+| [YARN-7542](https://issues.apache.org/jira/browse/YARN-7542) | Fix issue that causes some Running Opportunistic Containers to be recovered as PAUSED |  Major | . | Arun Suresh | Sampada Dehankar |
+| [HDFS-12915](https://issues.apache.org/jira/browse/HDFS-12915) | Fix findbugs warning in INodeFile$HeaderFormat.getBlockLayoutRedundancy |  Major | namenode | Wei-Chiu Chuang | Chris Douglas |
+| [YARN-7555](https://issues.apache.org/jira/browse/YARN-7555) | Support multiple resource types in YARN native services |  Critical | yarn-native-services | Wangda Tan | Wangda Tan |
+| [HADOOP-15122](https://issues.apache.org/jira/browse/HADOOP-15122) | Lock down version of doxia-module-markdown plugin |  Blocker | . | Elek, Marton | Elek, Marton |
+| [HADOOP-15143](https://issues.apache.org/jira/browse/HADOOP-15143) | NPE due to Invalid KerberosTicket in UGI |  Major | . | Jitendra Nath Pandey | Mukul Kumar Singh |
+| [HADOOP-15152](https://issues.apache.org/jira/browse/HADOOP-15152) | Typo in javadoc of ReconfigurableBase#reconfigurePropertyImpl |  Trivial | common | Nanda kumar | Nanda kumar |
+| [HADOOP-15155](https://issues.apache.org/jira/browse/HADOOP-15155) | Error in javadoc of ReconfigurableBase#reconfigureProperty |  Minor | . | Ajay Kumar | Ajay Kumar |
+| [YARN-7585](https://issues.apache.org/jira/browse/YARN-7585) | NodeManager should go unhealthy when state store throws DBException |  Major | nodemanager | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [YARN-6894](https://issues.apache.org/jira/browse/YARN-6894) | RM Apps API returns only active apps when query parameter queue used |  Minor | resourcemanager, restapi | Grant Sohn | Gergely Novák |
+| [YARN-7692](https://issues.apache.org/jira/browse/YARN-7692) | Skip validating priority acls while recovering applications |  Blocker | resourcemanager | Charan Hebri | Sunil G |
+| [MAPREDUCE-7028](https://issues.apache.org/jira/browse/MAPREDUCE-7028) | Concurrent task progress updates causing NPE in Application Master |  Blocker | mr-am | Gergo Repas | Gergo Repas |
+| [YARN-7602](https://issues.apache.org/jira/browse/YARN-7602) | NM should reference the singleton JvmMetrics instance |  Major | nodemanager | Haibo Chen | Haibo Chen |
+| [HADOOP-15093](https://issues.apache.org/jira/browse/HADOOP-15093) | Deprecation of yarn.resourcemanager.zk-address is undocumented |  Major | documentation | Namit Maheshwari | Ajay Kumar |
+| [HDFS-12931](https://issues.apache.org/jira/browse/HDFS-12931) | Handle InvalidEncryptionKeyException during DistributedFileSystem#getFileChecksum |  Major | encryption | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HDFS-12948](https://issues.apache.org/jira/browse/HDFS-12948) | DiskBalancer report command top option should only take positive numeric values |  Minor | diskbalancer | Namit Maheshwari | Shashikant Banerjee |
+| [HDFS-12913](https://issues.apache.org/jira/browse/HDFS-12913) | TestDNFencingWithReplication.testFencingStress fix mini cluster not yet active issue |  Major | . | Zsolt Venczel | Zsolt Venczel |
+| [HDFS-12987](https://issues.apache.org/jira/browse/HDFS-12987) | Document - Disabling the Lazy persist file scrubber. |  Trivial | documentation, hdfs | Karthik Palanisamy | Karthik Palanisamy |
+| [HDFS-12860](https://issues.apache.org/jira/browse/HDFS-12860) | StripedBlockUtil#getRangesInternalBlocks throws exception for the block group size larger than 2GB |  Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu |
+| [YARN-7619](https://issues.apache.org/jira/browse/YARN-7619) | Max AM Resource value in Capacity Scheduler UI has to be refreshed for every user |  Major | capacity scheduler, yarn | Eric Payne | Eric Payne |
+| [YARN-7645](https://issues.apache.org/jira/browse/YARN-7645) | TestContainerResourceUsage#testUsageAfterAMRestartWithMultipleContainers is flakey with FairScheduler |  Major | test | Robert Kanter | Robert Kanter |
+| [YARN-7699](https://issues.apache.org/jira/browse/YARN-7699) | queueUsagePercentage is coming as INF for getApp REST api call |  Major | webapp | Sunil G | Sunil G |
+| [HDFS-12985](https://issues.apache.org/jira/browse/HDFS-12985) | NameNode crashes during restart after an OpenForWrite file present in the Snapshot got deleted |  Major | hdfs | Manoj Govindassamy | Manoj Govindassamy |
+| [YARN-4227](https://issues.apache.org/jira/browse/YARN-4227) | Ignore expired containers from removed nodes in FairScheduler |  Critical | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [YARN-7718](https://issues.apache.org/jira/browse/YARN-7718) | DistributedShell failed to specify resource other than memory/vcores from container\_resources |  Critical | . | Wangda Tan | Wangda Tan |
+| [YARN-7508](https://issues.apache.org/jira/browse/YARN-7508) | NPE in FiCaSchedulerApp when debug log enabled in async-scheduling mode |  Major | capacityscheduler | Tao Yang | Tao Yang |
+| [YARN-7663](https://issues.apache.org/jira/browse/YARN-7663) | RMAppImpl:Invalid event: START at KILLED |  Minor | resourcemanager | lujie | lujie |
+| [YARN-6948](https://issues.apache.org/jira/browse/YARN-6948) | Invalid event: ATTEMPT\_ADDED at FINAL\_SAVING |  Minor | yarn | lujie | lujie |
+| [HDFS-12994](https://issues.apache.org/jira/browse/HDFS-12994) | TestReconstructStripedFile.testNNSendsErasureCodingTasks fails due to socket timeout |  Major | erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu |
+| [YARN-7665](https://issues.apache.org/jira/browse/YARN-7665) | Allow FS scheduler state dump to be turned on/off separately from FS debug log |  Major | . | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [YARN-7689](https://issues.apache.org/jira/browse/YARN-7689) | TestRMContainerAllocator fails after YARN-6124 |  Major | scheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [HADOOP-15163](https://issues.apache.org/jira/browse/HADOOP-15163) | Fix S3ACommitter documentation |  Minor | documentation, fs/s3 | Alessandro Andrioni | Alessandro Andrioni |
+| [HADOOP-15060](https://issues.apache.org/jira/browse/HADOOP-15060) | TestShellBasedUnixGroupsMapping.testFiniteGroupResolutionTime flaky |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7735](https://issues.apache.org/jira/browse/YARN-7735) | Fix typo in YARN documentation |  Minor | documentation | Takanobu Asanuma | Takanobu Asanuma |
+| [YARN-7727](https://issues.apache.org/jira/browse/YARN-7727) | Incorrect log levels in few logs with QueuePriorityContainerCandidateSelector |  Minor | yarn | Prabhu Joseph | Prabhu Joseph |
+| [HDFS-12984](https://issues.apache.org/jira/browse/HDFS-12984) | BlockPoolSlice can leak in a mini dfs cluster |  Major | . | Robert Joseph Evans | Ajay Kumar |
+| [HDFS-11915](https://issues.apache.org/jira/browse/HDFS-11915) | Sync rbw dir on the first hsync() to avoid file lost on power failure |  Critical | . | Kanaka Kumar Avvaru | Vinayakumar B |
+| [YARN-7731](https://issues.apache.org/jira/browse/YARN-7731) | RegistryDNS should handle upstream DNS returning CNAME |  Major | . | Billie Rinaldi | Eric Yang |
+| [YARN-7671](https://issues.apache.org/jira/browse/YARN-7671) | Improve Diagonstic message for stop yarn native service |  Major | . | Yesha Vora | Chandni Singh |
+| [YARN-7705](https://issues.apache.org/jira/browse/YARN-7705) | Create the container log directory with correct sticky bit in C code |  Major | nodemanager | Yufei Gu | Yufei Gu |
+| [HDFS-13016](https://issues.apache.org/jira/browse/HDFS-13016) | globStatus javadoc refers to glob pattern as "regular expression" |  Trivial | documentation, hdfs | Ryanne Dolan | Mukul Kumar Singh |
+| [HADOOP-15172](https://issues.apache.org/jira/browse/HADOOP-15172) | Fix the javadoc warning in WriteOperationHelper.java |  Minor | documentation, fs/s3 | Mukul Kumar Singh | Mukul Kumar Singh |
+| [YARN-7479](https://issues.apache.org/jira/browse/YARN-7479) | TestContainerManagerSecurity.testContainerManager[Simple] flaky in trunk |  Major | test | Botong Huang | Akira Ajisaka |
+| [HDFS-13004](https://issues.apache.org/jira/browse/HDFS-13004) | TestLeaseRecoveryStriped#testLeaseRecovery is failing when safeLength is 0MB or larger than the test file |  Major | hdfs | Zsolt Venczel | Zsolt Venczel |
+| [HDFS-9049](https://issues.apache.org/jira/browse/HDFS-9049) | Make Datanode Netty reverse proxy port to be configurable |  Major | datanode | Vinayakumar B | Vinayakumar B |
+| [YARN-7758](https://issues.apache.org/jira/browse/YARN-7758) | Add an additional check to the validity of container and application ids passed to container-executor |  Major | nodemanager | Miklos Szegedi | Yufei Gu |
+| [YARN-7717](https://issues.apache.org/jira/browse/YARN-7717) | Add configuration consistency for module.enabled and docker.privileged-containers.enabled |  Major | . | Yesha Vora | Eric Badger |
+| [HADOOP-15150](https://issues.apache.org/jira/browse/HADOOP-15150) | in FsShell, UGI params should be overidden through env vars(-D arg) |  Major | . | Brahma Reddy Battula | Brahma Reddy Battula |
+| [YARN-7750](https://issues.apache.org/jira/browse/YARN-7750) | [UI2] Render time related fields in all pages to the browser timezone |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7740](https://issues.apache.org/jira/browse/YARN-7740) | Fix logging for destroy yarn service cli when app does not exist and some minor bugs |  Major | yarn-native-services | Yesha Vora | Jian He |
+| [YARN-7139](https://issues.apache.org/jira/browse/YARN-7139) | FairScheduler: finished applications are always restored to default queue |  Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [YARN-7753](https://issues.apache.org/jira/browse/YARN-7753) | [UI2] Application logs has to be pulled from ATS 1.5 instead of ATS2 |  Major | yarn-ui-v2 | Sunil G | Sunil G |
+| [HADOOP-14788](https://issues.apache.org/jira/browse/HADOOP-14788) | Credentials readTokenStorageFile to stop wrapping IOEs in IOEs |  Minor | security | Steve Loughran | Ajay Kumar |
+| [HDFS-13039](https://issues.apache.org/jira/browse/HDFS-13039) | StripedBlockReader#createBlockReader leaks socket on IOException |  Critical | datanode, erasure-coding | Lei (Eddy) Xu | Lei (Eddy) Xu |
+| [HADOOP-15181](https://issues.apache.org/jira/browse/HADOOP-15181) | Typo in SecureMode.md |  Trivial | documentation | Masahiro Tanaka | Masahiro Tanaka |
+| [YARN-7738](https://issues.apache.org/jira/browse/YARN-7738) | CapacityScheduler: Support refresh maximum allocation for multiple resource types |  Blocker | . | Sumana Sathish | Wangda Tan |
+| [YARN-7766](https://issues.apache.org/jira/browse/YARN-7766) | Introduce a new config property for YARN Service dependency tarball location |  Major | applications, client, yarn-native-services | Gour Saha | Gour Saha |
+| [HDFS-12963](https://issues.apache.org/jira/browse/HDFS-12963) | Error log level in ShortCircuitRegistry#removeShm |  Minor | . | hu xiaodong | hu xiaodong |
+| [YARN-7796](https://issues.apache.org/jira/browse/YARN-7796) | Container-executor fails with segfault on certain OS configurations |  Major | nodemanager | Gergo Repas | Gergo Repas |
+| [YARN-7749](https://issues.apache.org/jira/browse/YARN-7749) | [UI2] GPU information tab in left hand side disappears when we click other tabs below |  Major | . | Sumana Sathish | Vasudevan Skm |
+| [YARN-7806](https://issues.apache.org/jira/browse/YARN-7806) | Distributed Shell should use timeline async api's |  Major | distributed-shell | Sumana Sathish | Rohith Sharma K S |
+| [HDFS-13023](https://issues.apache.org/jira/browse/HDFS-13023) | Journal Sync does not work on a secure cluster |  Major | journal-node | Namit Maheshwari | Bharat Viswanadham |
+| [MAPREDUCE-7015](https://issues.apache.org/jira/browse/MAPREDUCE-7015) | Possible race condition in JHS if the job is not loaded |  Major | jobhistoryserver | Peter Bacsko | Peter Bacsko |
+| [YARN-7737](https://issues.apache.org/jira/browse/YARN-7737) | prelaunch.err file not found exception on container failure |  Major | . | Jonathan Hung | Keqiu Hu |
+| [YARN-7777](https://issues.apache.org/jira/browse/YARN-7777) | Fix user name format in YARN Registry DNS name |  Major | . | Jian He | Jian He |
+| [YARN-7628](https://issues.apache.org/jira/browse/YARN-7628) | [Documentation] Documenting the ability to disable elasticity at leaf queue |  Major | capacity scheduler | Zian Chen | Zian Chen |
+| [HDFS-13063](https://issues.apache.org/jira/browse/HDFS-13063) | Fix the incorrect spelling in HDFSHighAvailabilityWithQJM.md |  Trivial | documentation | Jianfei Jiang | Jianfei Jiang |
+| [YARN-7102](https://issues.apache.org/jira/browse/YARN-7102) | NM heartbeat stuck when responseId overflows MAX\_INT |  Critical | . | Botong Huang | Botong Huang |
+| [MAPREDUCE-7041](https://issues.apache.org/jira/browse/MAPREDUCE-7041) | MR should not try to clean up at first job attempt |  Major | . | Takanobu Asanuma | Gergo Repas |
+| [YARN-7742](https://issues.apache.org/jira/browse/YARN-7742) | [UI2] Duplicated containers are rendered per attempt |  Major | . | Rohith Sharma K S | Vasudevan Skm |
+| [YARN-7760](https://issues.apache.org/jira/browse/YARN-7760) | [UI2] Clicking 'Master Node' or link next to 'AM Node Web UI' under application's appAttempt page goes to OLD RM UI |  Major | . | Sumana Sathish | Vasudevan Skm |
+| [MAPREDUCE-7020](https://issues.apache.org/jira/browse/MAPREDUCE-7020) | Task timeout in uber mode can crash AM |  Major | mr-am | Akira Ajisaka | Peter Bacsko |
+| [YARN-7765](https://issues.apache.org/jira/browse/YARN-7765) | [Atsv2] GSSException: No valid credentials provided - Failed to find any Kerberos tgt thrown by Timelinev2Client & HBaseClient in NM |  Blocker | . | Sumana Sathish | Rohith Sharma K S |
+| [HDFS-13065](https://issues.apache.org/jira/browse/HDFS-13065) | TestErasureCodingMultipleRacks#testSkewedRack3 is failing |  Major | hdfs | Gabor Bota | Gabor Bota |
+| [HDFS-12974](https://issues.apache.org/jira/browse/HDFS-12974) | Exception message is not printed when creating an encryption zone fails with AuthorizationException |  Minor | encryption | fang zhenyi | fang zhenyi |
+| [YARN-7698](https://issues.apache.org/jira/browse/YARN-7698) | A misleading variable's name in ApplicationAttemptEventDispatcher |  Minor | resourcemanager | Jinjiang Ling | Jinjiang Ling |
+| [YARN-7790](https://issues.apache.org/jira/browse/YARN-7790) | Improve Capacity Scheduler Async Scheduling to better handle node failures |  Critical | . | Sumana Sathish | Wangda Tan |
+| [MAPREDUCE-7036](https://issues.apache.org/jira/browse/MAPREDUCE-7036) | ASF License warning in hadoop-mapreduce-client |  Minor | test | Takanobu Asanuma | Takanobu Asanuma |
+| [HDFS-12528](https://issues.apache.org/jira/browse/HDFS-12528) | Add an option to not disable short-circuit reads on failures |  Major | hdfs-client, performance | Andre Araujo | Xiao Chen |
+| [YARN-7861](https://issues.apache.org/jira/browse/YARN-7861) | [UI2] Logs page shows duplicated containers with ATS |  Major | yarn-ui-v2 | Sunil G | Sunil G |
+| [YARN-7828](https://issues.apache.org/jira/browse/YARN-7828) | Clicking on yarn service should take to component tab |  Major | yarn-ui-v2 | Yesha Vora | Sunil G |
+| [HDFS-13061](https://issues.apache.org/jira/browse/HDFS-13061) | SaslDataTransferClient#checkTrustAndSend should not trust a partially trusted channel |  Major | . | Xiaoyu Yao | Ajay Kumar |
+| [HDFS-13060](https://issues.apache.org/jira/browse/HDFS-13060) | Adding a BlacklistBasedTrustedChannelResolver for TrustedChannelResolver |  Major | datanode, security | Xiaoyu Yao | Ajay Kumar |
+| [HDFS-12897](https://issues.apache.org/jira/browse/HDFS-12897) | getErasureCodingPolicy should handle .snapshot dir better |  Major | erasure-coding, hdfs, snapshots | Harshakiran Reddy | LiXin Ge |
+| [MAPREDUCE-7033](https://issues.apache.org/jira/browse/MAPREDUCE-7033) | Map outputs implicitly rely on permissive umask for shuffle |  Critical | mrv2 | Jason Lowe | Jason Lowe |
+| [HDFS-13048](https://issues.apache.org/jira/browse/HDFS-13048) | LowRedundancyReplicatedBlocks metric can be negative |  Major | metrics | Akira Ajisaka | Akira Ajisaka |
+| [HADOOP-15198](https://issues.apache.org/jira/browse/HADOOP-15198) | Correct the spelling in CopyFilter.java |  Major | tools/distcp | Mukul Kumar Singh | Mukul Kumar Singh |
+| [YARN-7831](https://issues.apache.org/jira/browse/YARN-7831) | YARN Service CLI should use hadoop.http.authentication.type to determine authentication method |  Major | . | Eric Yang | Eric Yang |
+| [YARN-7879](https://issues.apache.org/jira/browse/YARN-7879) | NM user is unable to access the application filecache due to permissions |  Critical | . | Shane Kumpf | Jason Lowe |
+| [HDFS-13100](https://issues.apache.org/jira/browse/HDFS-13100) | Handle IllegalArgumentException when GETSERVERDEFAULTS is not implemented in webhdfs. |  Critical | hdfs, webhdfs | Yongjun Zhang | Yongjun Zhang |
+| [YARN-7876](https://issues.apache.org/jira/browse/YARN-7876) | Localized jars that are expanded after localization are not fully copied |  Blocker | . | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7849](https://issues.apache.org/jira/browse/YARN-7849) | TestMiniYarnClusterNodeUtilization#testUpdateNodeUtilization fails due to heartbeat sync error |  Major | test | Jason Lowe | Botong Huang |
+| [YARN-7801](https://issues.apache.org/jira/browse/YARN-7801) | AmFilterInitializer should addFilter after fill all parameters |  Critical | . | Sumana Sathish | Wangda Tan |
+| [YARN-7889](https://issues.apache.org/jira/browse/YARN-7889) | Missing kerberos token when check for RM REST API availability |  Major | yarn-native-services | Eric Yang | Eric Yang |
+| [YARN-7850](https://issues.apache.org/jira/browse/YARN-7850) | [UI2] Log Aggregation status to be displayed in Application Page |  Major | yarn-ui-v2 | Yesha Vora | Gergely Novák |
+| [YARN-7866](https://issues.apache.org/jira/browse/YARN-7866) | [UI2] Error to be displayed correctly while accessing kerberized cluster without kinit |  Major | yarn-ui-v2 | Sumana Sathish | Sunil G |
+| [YARN-7890](https://issues.apache.org/jira/browse/YARN-7890) | NPE during container relaunch |  Major | . | Billie Rinaldi | Jason Lowe |
+| [HDFS-11701](https://issues.apache.org/jira/browse/HDFS-11701) | NPE from Unresolved Host causes permanent DFSInputStream failures |  Major | hdfs-client | James Moore | Lokesh Jain |
+| [HDFS-13115](https://issues.apache.org/jira/browse/HDFS-13115) | In getNumUnderConstructionBlocks(), ignore the inodeIds for which the inodes have been deleted |  Major | . | Yongjun Zhang | Yongjun Zhang |
+| [HDFS-12935](https://issues.apache.org/jira/browse/HDFS-12935) | Get ambiguous result for DFSAdmin command in HA mode when only one namenode is up |  Major | tools | Jianfei Jiang | Jianfei Jiang |
+| [YARN-7827](https://issues.apache.org/jira/browse/YARN-7827) | Stop and Delete Yarn Service from RM UI fails with HTTP ERROR 404 |  Critical | yarn-ui-v2 | Yesha Vora | Sunil G |
+| [HDFS-13120](https://issues.apache.org/jira/browse/HDFS-13120) | Snapshot diff could be corrupted after concat |  Major | namenode, snapshots | Xiaoyu Yao | Xiaoyu Yao |
+| [YARN-7909](https://issues.apache.org/jira/browse/YARN-7909) | YARN service REST API returns charset=null when kerberos enabled |  Major | yarn-native-services | Eric Yang | Eric Yang |
+| [HDFS-13130](https://issues.apache.org/jira/browse/HDFS-13130) | Log object instance get incorrectly in SlowDiskTracker |  Minor | . | Jianfei Jiang | Jianfei Jiang |
+| [YARN-7906](https://issues.apache.org/jira/browse/YARN-7906) | Fix mvn site fails with error: Multiple sources of package comments found for package "o.a.h.y.client.api.impl" |  Blocker | build, documentation | Akira Ajisaka | Akira Ajisaka |
+| [YARN-5848](https://issues.apache.org/jira/browse/YARN-5848) | Remove unnecessary public/crossdomain.xml from YARN UIv2 sub project |  Blocker | yarn-ui-v2 | Allen Wittenauer | Sunil G |
+| [YARN-7697](https://issues.apache.org/jira/browse/YARN-7697) | NM goes down with OOM due to leak in log-aggregation |  Blocker | . | Santhosh B Gowda | Xuan Gong |
+| [YARN-7739](https://issues.apache.org/jira/browse/YARN-7739) | DefaultAMSProcessor should properly check customized resource types against minimum/maximum allocation |  Blocker | . | Wangda Tan | Wangda Tan |
+| [HDFS-10453](https://issues.apache.org/jira/browse/HDFS-10453) | ReplicationMonitor thread could stuck for long time due to the race between replication and delete of same file in a large cluster. |  Major | namenode | He Xiaoqiao | He Xiaoqiao |
+| [HDFS-8693](https://issues.apache.org/jira/browse/HDFS-8693) | refreshNamenodes does not support adding a new standby to a running DN |  Critical | datanode, ha | Jian Fang | Ajith S |
+| [MAPREDUCE-7052](https://issues.apache.org/jira/browse/MAPREDUCE-7052) | TestFixedLengthInputFormat#testFormatCompressedIn is flaky |  Major | client, test | Peter Bacsko | Peter Bacsko |
+| [HDFS-13112](https://issues.apache.org/jira/browse/HDFS-13112) | Token expiration edits may cause log corruption or deadlock |  Critical | namenode | Daryn Sharp | Daryn Sharp |
+| [HDFS-13151](https://issues.apache.org/jira/browse/HDFS-13151) | Fix the javadoc error in ReplicaInfo |  Minor | . | Bharat Viswanadham | Bharat Viswanadham |
+| [MAPREDUCE-7053](https://issues.apache.org/jira/browse/MAPREDUCE-7053) | Timed out tasks can fail to produce thread dump |  Major | . | Jason Lowe | Jason Lowe |
+| [HDFS-13058](https://issues.apache.org/jira/browse/HDFS-13058) | Fix dfs.namenode.shared.edits.dir in TestJournalNode |  Major | journal-node, test | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-15206](https://issues.apache.org/jira/browse/HADOOP-15206) | BZip2 drops and duplicates records when input split size is small |  Major | . | Aki Tanaka | Aki Tanaka |
+| [YARN-7937](https://issues.apache.org/jira/browse/YARN-7937) | Fix http method name in Cluster Application Timeout Update API example request |  Minor | docs, documentation | Charan Hebri | Charan Hebri |
+| [HADOOP-15223](https://issues.apache.org/jira/browse/HADOOP-15223) | Replace Collections.EMPTY\* with empty\* when available |  Minor | . | Akira Ajisaka | fang zhenyi |
+| [HDFS-13159](https://issues.apache.org/jira/browse/HDFS-13159) | TestTruncateQuotaUpdate fails in trunk |  Major | test | Arpit Agarwal | Nanda kumar |
+| [YARN-7947](https://issues.apache.org/jira/browse/YARN-7947) | Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps |  Major | capacity scheduler, scheduler preemption | Eric Payne | Eric Payne |
+| [HADOOP-10571](https://issues.apache.org/jira/browse/HADOOP-10571) | Use Log.\*(Object, Throwable) overload to log exceptions |  Major | . | Arpit Agarwal | Andras Bokor |
+| [HADOOP-6852](https://issues.apache.org/jira/browse/HADOOP-6852) | apparent bug in concatenated-bzip2 support (decoding) |  Major | io | Greg Roelofs | Zsolt Venczel |
+| [YARN-7942](https://issues.apache.org/jira/browse/YARN-7942) | Yarn ServiceClient does not not delete znode from secure ZooKeeper |  Blocker | yarn-native-services | Eric Yang | Billie Rinaldi |
+| [HADOOP-15236](https://issues.apache.org/jira/browse/HADOOP-15236) | Fix typo in RequestHedgingProxyProvider and RequestHedgingRMFailoverProxyProvider |  Trivial | documentation | Akira Ajisaka | Gabor Bota |
+| [YARN-7675](https://issues.apache.org/jira/browse/YARN-7675) | [UI2] Support loading pre-2.8 version /scheduler REST response for queue page |  Major | yarn-ui-v2 | Gergely Novák | Gergely Novák |
+| [YARN-7949](https://issues.apache.org/jira/browse/YARN-7949) | [UI2] ArtifactsId should not be a compulsory field for new service |  Major | yarn-ui-v2 | Yesha Vora | Yesha Vora |
+| [YARN-5714](https://issues.apache.org/jira/browse/YARN-5714) | ContainerExecutor does not order environment map |  Trivial | nodemanager | Remi Catherinot | Remi Catherinot |
+| [MAPREDUCE-7027](https://issues.apache.org/jira/browse/MAPREDUCE-7027) | HadoopArchiveLogs shouldn't delete the original logs if the HAR creation fails |  Critical | mrv2 | Gergely Novák | Gergely Novák |
+| [HDFS-12865](https://issues.apache.org/jira/browse/HDFS-12865) | RequestHedgingProxyProvider should handle case when none of the proxies are available |  Major | ha | Mukul Kumar Singh | Mukul Kumar Singh |
+| [HADOOP-15254](https://issues.apache.org/jira/browse/HADOOP-15254) | Correct the wrong word spelling 'intialize' |  Minor | . | fang zhenyi | fang zhenyi |
+| [HDFS-12781](https://issues.apache.org/jira/browse/HDFS-12781) | After Datanode down, In Namenode UI Datanode tab is throwing warning message. |  Major | datanode | Harshakiran Reddy | Brahma Reddy Battula |
+| [HDFS-12070](https://issues.apache.org/jira/browse/HDFS-12070) | Failed block recovery leaves files open indefinitely and at risk for data loss |  Major | . | Daryn Sharp | Kihwal Lee |
+| [HADOOP-15265](https://issues.apache.org/jira/browse/HADOOP-15265) | Exclude json-smart explicitly in hadoop-auth avoid being pulled in transitively |  Major | . | Nishant Bangarwa | Nishant Bangarwa |
+| [YARN-7963](https://issues.apache.org/jira/browse/YARN-7963) | TestServiceAM and TestServiceMonitor test cases are hanging |  Major | yarn-native-services | Eric Yang | Chandni Singh |
+| [HDFS-13145](https://issues.apache.org/jira/browse/HDFS-13145) | SBN crash when transition to ANN with in-progress edit tailing enabled |  Major | ha, namenode | Chao Sun | Chao Sun |
+| [HDFS-13181](https://issues.apache.org/jira/browse/HDFS-13181) | DiskBalancer: Add an configuration for valid plan hours |  Major | diskbalancer | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-13143](https://issues.apache.org/jira/browse/HDFS-13143) | SnapshotDiff - snapshotDiffReport might be inconsistent if the snapshotDiff calculation happens between a snapshot and the current tree |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HDFS-13194](https://issues.apache.org/jira/browse/HDFS-13194) | CachePool permissions incorrectly checked |  Major | . | Yiqun Lin | Jianfei Jiang |
+| [HDFS-13114](https://issues.apache.org/jira/browse/HDFS-13114) | CryptoAdmin#ReencryptZoneCommand should resolve Namespace info from path |  Major | encryption, hdfs | Hanisha Koneru | Hanisha Koneru |
+| [HDFS-13081](https://issues.apache.org/jira/browse/HDFS-13081) | Datanode#checkSecureConfig should allow SASL and privileged HTTP |  Major | datanode, security | Xiaoyu Yao | Ajay Kumar |
+| [YARN-7985](https://issues.apache.org/jira/browse/YARN-7985) | Service name is validated twice in ServiceClient when a service is created |  Trivial | yarn-native-services | Chandni Singh | Chandni Singh |
+| [MAPREDUCE-7059](https://issues.apache.org/jira/browse/MAPREDUCE-7059) | Downward Compatibility issue: MR job fails because of unknown setErasureCodingPolicy method from 3.x client to HDFS 2.x cluster |  Critical | job submission | Jiandan Yang | Jiandan Yang |
+| [YARN-7835](https://issues.apache.org/jira/browse/YARN-7835) | [Atsv2] Race condition in NM while publishing events if second attempt is launched on the same node |  Critical | . | Rohith Sharma K S | Rohith Sharma K S |
+| [HADOOP-15275](https://issues.apache.org/jira/browse/HADOOP-15275) | Incorrect javadoc for return type of RetryPolicy#shouldRetry |  Minor | documentation | Nanda kumar | Nanda kumar |
+| [YARN-7958](https://issues.apache.org/jira/browse/YARN-7958) | ServiceMaster should only wait for recovery of containers with id that match the current application id |  Critical | yarn | Chandni Singh | Chandni Singh |
+| [HDFS-13211](https://issues.apache.org/jira/browse/HDFS-13211) | Fix a bug in DirectoryDiffList.getMinListForRange |  Major | snapshots | Shashikant Banerjee | Shashikant Banerjee |
+| [HDFS-13210](https://issues.apache.org/jira/browse/HDFS-13210) | Fix the typo in MiniDFSCluster class |  Trivial | test | Yiqun Lin | fang zhenyi |
+| [YARN-7511](https://issues.apache.org/jira/browse/YARN-7511) | NPE in ContainerLocalizer when localization failed for running container |  Major | nodemanager | Tao Yang | Tao Yang |
+| [HADOOP-15261](https://issues.apache.org/jira/browse/HADOOP-15261) | Upgrade commons-io from 2.4 to 2.5 |  Major | minikdc | PandaMonkey | PandaMonkey |
+| [MAPREDUCE-7023](https://issues.apache.org/jira/browse/MAPREDUCE-7023) | TestHadoopArchiveLogs.testCheckFilesAndSeedApps fails on rerun |  Minor | test | Gergely Novák | Gergely Novák |
+| [HDFS-13178](https://issues.apache.org/jira/browse/HDFS-13178) | Disk Balancer: Add skipDateCheck option to DiskBalancer Execute command |  Major | diskbalancer | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-15286](https://issues.apache.org/jira/browse/HADOOP-15286) | Remove unused imports from TestKMSWithZK.java |  Minor | test | Akira Ajisaka | Ajay Kumar |
+| [YARN-7995](https://issues.apache.org/jira/browse/YARN-7995) | Remove unnecessary boxings and unboxings from PlacementConstraintParser.java |  Minor | . | Akira Ajisaka | Sen Zhao |
+| [HDFS-13040](https://issues.apache.org/jira/browse/HDFS-13040) | Kerberized inotify client fails despite kinit properly |  Major | namenode | Wei-Chiu Chuang | Xiao Chen |
+| [HADOOP-15288](https://issues.apache.org/jira/browse/HADOOP-15288) | TestSwiftFileSystemBlockLocation doesn't compile |  Critical | build, fs/swift | Steve Loughran | Steve Loughran |
+| [YARN-7736](https://issues.apache.org/jira/browse/YARN-7736) | Fix itemization in YARN federation document |  Minor | documentation | Akira Ajisaka | Sen Zhao |
+| [HDFS-13164](https://issues.apache.org/jira/browse/HDFS-13164) | File not closed if streamer fail with DSQuotaExceededException |  Major | hdfs-client | Xiao Chen | Xiao Chen |
+| [HADOOP-15289](https://issues.apache.org/jira/browse/HADOOP-15289) | FileStatus.readFields() assertion incorrect |  Critical | . | Steve Loughran | Steve Loughran |
+| [HDFS-13188](https://issues.apache.org/jira/browse/HDFS-13188) | Disk Balancer: Support multiple block pools during block move |  Major | diskbalancer | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-13109](https://issues.apache.org/jira/browse/HDFS-13109) | Support fully qualified hdfs path in EZ commands |  Major | hdfs | Hanisha Koneru | Hanisha Koneru |
+| [HADOOP-15296](https://issues.apache.org/jira/browse/HADOOP-15296) | Fix a wrong link for RBF in the top page |  Minor | documentation | Takanobu Asanuma | Takanobu Asanuma |
+| [YARN-8011](https://issues.apache.org/jira/browse/YARN-8011) | TestOpportunisticContainerAllocatorAMService#testContainerPromoteAndDemoteBeforeContainerStart fails sometimes in trunk |  Minor | . | Tao Yang | Tao Yang |
+| [HADOOP-15292](https://issues.apache.org/jira/browse/HADOOP-15292) | Distcp's use of pread is slowing it down. |  Minor | tools/distcp | Virajith Jalaparti | Virajith Jalaparti |
+| [HADOOP-15273](https://issues.apache.org/jira/browse/HADOOP-15273) | distcp can't handle remote stores with different checksum algorithms |  Critical | tools/distcp | Steve Loughran | Steve Loughran |
+| [HADOOP-15280](https://issues.apache.org/jira/browse/HADOOP-15280) | TestKMS.testWebHDFSProxyUserKerb and TestKMS.testWebHDFSProxyUserSimple fail in trunk |  Major | . | Ray Chiang | Bharat Viswanadham |
+| [YARN-7944](https://issues.apache.org/jira/browse/YARN-7944) | [UI2] Remove master node link from headers of application pages |  Major | yarn-ui-v2 | Yesha Vora | Yesha Vora |
+| [MAPREDUCE-6930](https://issues.apache.org/jira/browse/MAPREDUCE-6930) | mapreduce.map.cpu.vcores and mapreduce.reduce.cpu.vcores are both present twice in mapred-default.xml |  Major | mrv2 | Daniel Templeton | Sen Zhao |
+| [YARN-8000](https://issues.apache.org/jira/browse/YARN-8000) | Yarn Service: component instance name shows up as component name in container record |  Major | . | Chandni Singh | Chandni Singh |
+| [HDFS-13190](https://issues.apache.org/jira/browse/HDFS-13190) | Document WebHDFS support for snapshot diff |  Major | documentation, webhdfs | Xiaoyu Yao | Lokesh Jain |
+| [HDFS-13244](https://issues.apache.org/jira/browse/HDFS-13244) | Add stack, conf, metrics links to utilities dropdown in NN webUI |  Major | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HDFS-10618](https://issues.apache.org/jira/browse/HDFS-10618) | TestPendingReconstruction#testPendingAndInvalidate is flaky due to race condition |  Major | . | Eric Badger | Eric Badger |
+| [YARN-8024](https://issues.apache.org/jira/browse/YARN-8024) | LOG in class MaxRunningAppsEnforcer is initialized with a faulty class FairScheduler |  Major | fairscheduler | Yufei Gu | Sen Zhao |
+| [HDFS-10803](https://issues.apache.org/jira/browse/HDFS-10803) | TestBalancerWithMultipleNameNodes#testBalancing2OutOf3Blockpools fails intermittently due to no free space available |  Major | . | Yiqun Lin | Yiqun Lin |
+| [HDFS-12156](https://issues.apache.org/jira/browse/HDFS-12156) | TestFSImage fails without -Pnative |  Major | test | Akira Ajisaka | Akira Ajisaka |
+| [HDFS-13271](https://issues.apache.org/jira/browse/HDFS-13271) | WebHDFS: Add constructor in SnapshottableDirectoryStatus with HdfsFileStatus as argument |  Major | webhdfs | Lokesh Jain | Lokesh Jain |
+| [HDFS-13239](https://issues.apache.org/jira/browse/HDFS-13239) | Fix non-empty dir warning message when setting default EC policy |  Minor | . | Hanisha Koneru | Bharat Viswanadham |
+| [HADOOP-15308](https://issues.apache.org/jira/browse/HADOOP-15308) | TestConfiguration fails on Windows because of paths |  Major | . | Íñigo Goiri | Xiao Liang |
+| [YARN-8022](https://issues.apache.org/jira/browse/YARN-8022) | ResourceManager UI cluster/app/\<app-id\> page fails to render |  Blocker | webapp | Tarun Parimi | Tarun Parimi |
+| [HDFS-13249](https://issues.apache.org/jira/browse/HDFS-13249) | Document webhdfs support for getting snapshottable directory list |  Major | documentation, webhdfs | Lokesh Jain | Lokesh Jain |
+| [MAPREDUCE-7064](https://issues.apache.org/jira/browse/MAPREDUCE-7064) | Flaky test TestTaskAttempt#testReducerCustomResourceTypes |  Major | client, test | Peter Bacsko | Peter Bacsko |
+| [HDFS-13261](https://issues.apache.org/jira/browse/HDFS-13261) | Fix incorrect null value check |  Minor | hdfs | Jianfei Jiang | Jianfei Jiang |
+| [HADOOP-15305](https://issues.apache.org/jira/browse/HADOOP-15305) | Replace FileUtils.writeStringToFile(File, String) with (File, String, Charset) to fix deprecation warnings |  Minor | . | Akira Ajisaka | fang zhenyi |
+| [HDFS-12723](https://issues.apache.org/jira/browse/HDFS-12723) | TestReadStripedFileWithMissingBlocks#testReadFileWithMissingBlocks failing consistently. |  Major | . | Rushabh S Shah | Ajay Kumar |
+| [HDFS-13251](https://issues.apache.org/jira/browse/HDFS-13251) | Avoid using hard coded datanode data dirs in unit tests |  Major | test | Xiaoyu Yao | Ajay Kumar |
+| [HDFS-13280](https://issues.apache.org/jira/browse/HDFS-13280) | WebHDFS: Fix NPE in get snasphottable directory list call |  Major | webhdfs | Lokesh Jain | Lokesh Jain |
+| [YARN-7952](https://issues.apache.org/jira/browse/YARN-7952) | RM should be able to recover log aggregation status after restart/fail-over |  Major | . | Xuan Gong | Xuan Gong |
+| [HADOOP-15234](https://issues.apache.org/jira/browse/HADOOP-15234) | Throw meaningful message on null when initializing KMSWebApp |  Major | kms | Xiao Chen | fang zhenyi |
+| [YARN-7636](https://issues.apache.org/jira/browse/YARN-7636) | Re-reservation count may overflow when cluster resource exhausted for a long time |  Major | capacityscheduler | Tao Yang | Tao Yang |
+| [HDFS-12886](https://issues.apache.org/jira/browse/HDFS-12886) | Ignore minReplication for block recovery |  Major | hdfs, namenode | Lukas Majercak | Lukas Majercak |
+| [YARN-8039](https://issues.apache.org/jira/browse/YARN-8039) | Clean up log dir configuration in TestLinuxContainerExecutorWithMocks.testStartLocalizer |  Minor | . | Miklos Szegedi | Miklos Szegedi |
+| [HDFS-13296](https://issues.apache.org/jira/browse/HDFS-13296) | GenericTestUtils generates paths with drive letter in Windows and fail webhdfs related test cases |  Major | . | Xiao Liang | Xiao Liang |
+| [HDFS-13268](https://issues.apache.org/jira/browse/HDFS-13268) | TestWebHdfsFileContextMainOperations fails on Windows |  Major | . | Íñigo Goiri | Xiao Liang |
+| [YARN-8054](https://issues.apache.org/jira/browse/YARN-8054) | Improve robustness of the LocalDirsHandlerService MonitoringTimerTask thread |  Major | . | Jonathan Eagles | Jonathan Eagles |
+| [YARN-7873](https://issues.apache.org/jira/browse/YARN-7873) | Revert YARN-6078 |  Blocker | . | Billie Rinaldi | Billie Rinaldi |
+| [HDFS-13195](https://issues.apache.org/jira/browse/HDFS-13195) | DataNode conf page  cannot display the current value after reconfig |  Minor | datanode | maobaolong | maobaolong |
+| [HADOOP-14067](https://issues.apache.org/jira/browse/HADOOP-14067) | VersionInfo should load version-info.properties from its own classloader |  Major | common | Thejas M Nair | Thejas M Nair |
+| [YARN-8063](https://issues.apache.org/jira/browse/YARN-8063) | DistributedShellTimelinePlugin wrongly check for entityId instead of entityType |  Major | . | Rohith Sharma K S | Rohith Sharma K S |
+| [YARN-8062](https://issues.apache.org/jira/browse/YARN-8062) | yarn rmadmin -getGroups returns group from which the user has been removed |  Critical | . | Sumana Sathish | Sunil G |
+| [YARN-8068](https://issues.apache.org/jira/browse/YARN-8068) | Application Priority field causes NPE in app timeline publish when Hadoop 2.7 based clients to 2.8+ |  Blocker | yarn | Sunil G | Sunil G |
+| [YARN-7794](https://issues.apache.org/jira/browse/YARN-7794) | SLSRunner is not loading timeline service jars causing failure |  Blocker | scheduler-load-simulator | Sunil G | Yufei Gu |
+| [YARN-8075](https://issues.apache.org/jira/browse/YARN-8075) | DShell does not fail when we ask more GPUs than available even though AM throws 'InvalidResourceRequestException' |  Major | . | Sumana Sathish | Wangda Tan |
+| [YARN-6629](https://issues.apache.org/jira/browse/YARN-6629) | NPE occurred when container allocation proposal is applied but its resource requests are removed before |  Critical | . | Tao Yang | Tao Yang |
+| [HADOOP-15320](https://issues.apache.org/jira/browse/HADOOP-15320) | Remove customized getFileBlockLocations for hadoop-azure and hadoop-azure-datalake |  Major | fs/adl, fs/azure | shanyu zhao | shanyu zhao |
+| [YARN-8085](https://issues.apache.org/jira/browse/YARN-8085) | ResourceProfilesManager should be set in RMActiveServiceContext |  Blocker | capacityscheduler | Tao Yang | Tao Yang |
+| [YARN-8086](https://issues.apache.org/jira/browse/YARN-8086) | ManagedParentQueue with no leaf queues cause JS error in new UI |  Blocker | . | Suma Shivaprasad | Suma Shivaprasad |
+
+
+### TESTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [MAPREDUCE-6953](https://issues.apache.org/jira/browse/MAPREDUCE-6953) | Skip the testcase testJobWithChangePriority if FairScheduler is used |  Major | client | Peter Bacsko | Peter Bacsko |
+| [HDFS-12730](https://issues.apache.org/jira/browse/HDFS-12730) | Verify open files captured in the snapshots across config disable and enable |  Major | hdfs | Manoj Govindassamy | Manoj Govindassamy |
+| [HADOOP-15117](https://issues.apache.org/jira/browse/HADOOP-15117) | open(PathHandle) contract test should be exhaustive for default options |  Major | . | Chris Douglas | Chris Douglas |
+| [HDFS-13106](https://issues.apache.org/jira/browse/HDFS-13106) | Need to exercise all HDFS APIs for EC |  Major | hdfs | Haibo Yan | Haibo Yan |
+| [HDFS-13107](https://issues.apache.org/jira/browse/HDFS-13107) | Add Mover Cli Unit Tests for Federated cluster |  Major | balancer & mover, test | Bharat Viswanadham | Bharat Viswanadham |
+
+
+### SUB-TASKS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [YARN-4081](https://issues.apache.org/jira/browse/YARN-4081) | Add support for multiple resource types in the Resource class |  Major | resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-4172](https://issues.apache.org/jira/browse/YARN-4172) | Extend DominantResourceCalculator to account for all resources |  Major | resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-4715](https://issues.apache.org/jira/browse/YARN-4715) | Add support to read resource types from a config file |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-4829](https://issues.apache.org/jira/browse/YARN-4829) | Add support for binary units |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-4830](https://issues.apache.org/jira/browse/YARN-4830) | Add support for resource types in the nodemanager |  Major | nodemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-5242](https://issues.apache.org/jira/browse/YARN-5242) | Update DominantResourceCalculator to consider all resource types in calculations |  Major | resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-5586](https://issues.apache.org/jira/browse/YARN-5586) | Update the Resources class to consider all resource types |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-5707](https://issues.apache.org/jira/browse/YARN-5707) | Add manager class for resource profiles |  Major | resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-5708](https://issues.apache.org/jira/browse/YARN-5708) | Implement APIs to get resource profiles from the RM |  Major | client | Varun Vasudev | Varun Vasudev |
+| [YARN-5587](https://issues.apache.org/jira/browse/YARN-5587) | Add support for resource profiles |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-5951](https://issues.apache.org/jira/browse/YARN-5951) | Changes to allow CapacityScheduler to use configuration store |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-5946](https://issues.apache.org/jira/browse/YARN-5946) | Create YarnConfigurationStore interface and InMemoryConfigurationStore class |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-5588](https://issues.apache.org/jira/browse/YARN-5588) | Add support for resource profiles in distributed shell |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-6232](https://issues.apache.org/jira/browse/YARN-6232) | Update resource usage and preempted resource calculations to take into account all resource types |  Major | resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-5948](https://issues.apache.org/jira/browse/YARN-5948) | Implement MutableConfigurationManager for handling storage into configuration store |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-5952](https://issues.apache.org/jira/browse/YARN-5952) | Create REST API for changing YARN scheduler configurations |  Major | . | Jonathan Hung | Jonathan Hung |
+| [HDFS-10706](https://issues.apache.org/jira/browse/HDFS-10706) | [READ] Add tool generating FSImage from external store |  Major | namenode, tools | Chris Douglas | Chris Douglas |
+| [YARN-6445](https://issues.apache.org/jira/browse/YARN-6445) | [YARN-3926] Performance improvements in resource profile branch with respect to SLS |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [HDFS-11653](https://issues.apache.org/jira/browse/HDFS-11653) | [READ] ProvidedReplica should return an InputStream that is bounded by its length |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-11663](https://issues.apache.org/jira/browse/HDFS-11663) | [READ] Fix NullPointerException in ProvidedBlocksBuilder |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-11703](https://issues.apache.org/jira/browse/HDFS-11703) | [READ] Tests for ProvidedStorageMap |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-5949](https://issues.apache.org/jira/browse/YARN-5949) | Add pluggable configuration ACL policy interface and implementation |  Major | . | Jonathan Hung | Jonathan Hung |
+| [HDFS-11791](https://issues.apache.org/jira/browse/HDFS-11791) | [READ] Test for increasing replication of provided files. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-11792](https://issues.apache.org/jira/browse/HDFS-11792) | [READ] Test cases for ProvidedVolumeDF and ProviderBlockIteratorImpl |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-11673](https://issues.apache.org/jira/browse/HDFS-11673) | [READ] Handle failures of Datanode with PROVIDED storage |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-6575](https://issues.apache.org/jira/browse/YARN-6575) | Support global configuration mutation in MutableConfProvider |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-5953](https://issues.apache.org/jira/browse/YARN-5953) | Create CLI for changing YARN configurations |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-6761](https://issues.apache.org/jira/browse/YARN-6761) | Fix build for YARN-3926 branch |  Major | nodemanager, resourcemanager | Varun Vasudev | Varun Vasudev |
+| [YARN-6786](https://issues.apache.org/jira/browse/YARN-6786) | ResourcePBImpl imports cleanup |  Trivial | resourcemanager | Daniel Templeton | Yeliang Cang |
+| [YARN-5947](https://issues.apache.org/jira/browse/YARN-5947) | Create LeveldbConfigurationStore class using Leveldb as backing store |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-6322](https://issues.apache.org/jira/browse/YARN-6322) | Disable queue refresh when configuration mutation is enabled |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-6593](https://issues.apache.org/jira/browse/YARN-6593) | [API] Introduce Placement Constraint object |  Major | . | Konstantinos Karanasos | Konstantinos Karanasos |
+| [YARN-6788](https://issues.apache.org/jira/browse/YARN-6788) | Improve performance of resource profile branch |  Blocker | nodemanager, resourcemanager | Sunil G | Sunil G |
+| [HDFS-12091](https://issues.apache.org/jira/browse/HDFS-12091) | [READ] Check that the replicas served from a {{ProvidedVolumeImpl}} belong to the correct external storage |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12093](https://issues.apache.org/jira/browse/HDFS-12093) | [READ] Share remoteFS between ProvidedReplica instances. |  Major | . | Ewan Higgs | Virajith Jalaparti |
+| [YARN-6471](https://issues.apache.org/jira/browse/YARN-6471) | Support to add min/max resource configuration for a queue |  Major | capacity scheduler | Sunil G | Sunil G |
+| [YARN-6935](https://issues.apache.org/jira/browse/YARN-6935) | ResourceProfilesManagerImpl.parseResource() has no need of the key parameter |  Major | resourcemanager | Daniel Templeton | Manikandan R |
+| [HDFS-12289](https://issues.apache.org/jira/browse/HDFS-12289) | [READ] HDFS-12091 breaks the tests for provided block reads |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-6994](https://issues.apache.org/jira/browse/YARN-6994) | Remove last uses of Long from resource types code |  Minor | resourcemanager | Daniel Templeton | Daniel Templeton |
+| [YARN-6892](https://issues.apache.org/jira/browse/YARN-6892) | Improve API implementation in Resources and DominantResourceCalculator class |  Major | nodemanager, resourcemanager | Sunil G | Sunil G |
+| [YARN-6908](https://issues.apache.org/jira/browse/YARN-6908) | ResourceProfilesManagerImpl is missing @Overrides on methods |  Minor | resourcemanager | Daniel Templeton | Sunil G |
+| [YARN-6610](https://issues.apache.org/jira/browse/YARN-6610) | DominantResourceCalculator#getResourceAsValue dominant param is updated to handle multiple resources |  Critical | resourcemanager | Daniel Templeton | Daniel Templeton |
+| [YARN-7030](https://issues.apache.org/jira/browse/YARN-7030) | Performance optimizations in Resource and ResourceUtils class |  Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [YARN-7042](https://issues.apache.org/jira/browse/YARN-7042) | Clean up unit tests after YARN-6610 |  Major | test | Daniel Templeton | Daniel Templeton |
+| [YARN-6789](https://issues.apache.org/jira/browse/YARN-6789) | Add Client API to get all supported resource types from RM |  Major | nodemanager, resourcemanager | Sunil G | Sunil G |
+| [YARN-6781](https://issues.apache.org/jira/browse/YARN-6781) | ResourceUtils#initializeResourcesMap takes an unnecessary Map parameter |  Minor | resourcemanager | Daniel Templeton | Yu-Tang Lin |
+| [YARN-7043](https://issues.apache.org/jira/browse/YARN-7043) | Cleanup ResourceProfileManager |  Critical | . | Wangda Tan | Wangda Tan |
+| [YARN-7067](https://issues.apache.org/jira/browse/YARN-7067) | Optimize ResourceType information display in UI |  Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [YARN-7039](https://issues.apache.org/jira/browse/YARN-7039) | Fix javac and javadoc errors in YARN-3926 branch |  Major | nodemanager, resourcemanager | Sunil G | Sunil G |
+| [YARN-7024](https://issues.apache.org/jira/browse/YARN-7024) | Fix issues on recovery in LevelDB store |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-7093](https://issues.apache.org/jira/browse/YARN-7093) | Improve log message in ResourceUtils |  Trivial | nodemanager, resourcemanager | Sunil G | Sunil G |
+| [YARN-7075](https://issues.apache.org/jira/browse/YARN-7075) | Better styling for donut charts in new YARN UI |  Major | . | Da Ding | Da Ding |
+| [HADOOP-14103](https://issues.apache.org/jira/browse/HADOOP-14103) | Sort out hadoop-aws contract-test-options.xml |  Minor | fs/s3, test | Steve Loughran | John Zhuge |
+| [YARN-6933](https://issues.apache.org/jira/browse/YARN-6933) | ResourceUtils.DISALLOWED\_NAMES check is duplicated |  Major | resourcemanager | Daniel Templeton | Manikandan R |
+| [YARN-5328](https://issues.apache.org/jira/browse/YARN-5328) | Plan/ResourceAllocation data structure enhancements required to support recurring reservations in ReservationSystem |  Major | resourcemanager | Subru Krishnan | Subru Krishnan |
+| [YARN-7056](https://issues.apache.org/jira/browse/YARN-7056) | Document Resource Profiles feature |  Major | nodemanager, resourcemanager | Sunil G | Sunil G |
+| [YARN-7144](https://issues.apache.org/jira/browse/YARN-7144) | Log Aggregation controller should not swallow the exceptions when it calls closeWriter and closeReader. |  Major | . | Xuan Gong | Xuan Gong |
+| [YARN-7104](https://issues.apache.org/jira/browse/YARN-7104) | Improve Nodes Heatmap in new YARN UI with better color coding |  Major | . | Da Ding | Da Ding |
+| [YARN-6600](https://issues.apache.org/jira/browse/YARN-6600) | Introduce default and max lifetime of application at LeafQueue level |  Major | capacity scheduler | Rohith Sharma K S | Rohith Sharma K S |
+| [YARN-5330](https://issues.apache.org/jira/browse/YARN-5330) | SharingPolicy enhancements required to support recurring reservations in ReservationSystem |  Major | resourcemanager | Subru Krishnan | Carlo Curino |
+| [YARN-7072](https://issues.apache.org/jira/browse/YARN-7072) | Add a new log aggregation file format controller |  Major | . | Xuan Gong | Xuan Gong |
+| [YARN-7136](https://issues.apache.org/jira/browse/YARN-7136) | Additional Performance Improvement for Resource Profile Feature |  Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [YARN-7137](https://issues.apache.org/jira/browse/YARN-7137) | Move newly added APIs to unstable in YARN-3926 branch |  Blocker | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [YARN-7194](https://issues.apache.org/jira/browse/YARN-7194) | Log aggregation status is always Failed with the newly added log aggregation IndexedFileFormat |  Major | . | Xuan Gong | Xuan Gong |
+| [YARN-6612](https://issues.apache.org/jira/browse/YARN-6612) | Update fair scheduler policies to be aware of resource types |  Major | fairscheduler | Daniel Templeton | Daniel Templeton |
+| [YARN-7174](https://issues.apache.org/jira/browse/YARN-7174) | Add retry logic in LogsCLI when fetch running application logs |  Major | . | Xuan Gong | Xuan Gong |
+| [YARN-6840](https://issues.apache.org/jira/browse/YARN-6840) | Implement zookeeper based store for scheduler configuration updates |  Major | . | Wangda Tan | Jonathan Hung |
+| [HDFS-12473](https://issues.apache.org/jira/browse/HDFS-12473) | Change hosts JSON file format |  Major | . | Ming Ma | Ming Ma |
+| [HDFS-11035](https://issues.apache.org/jira/browse/HDFS-11035) | Better documentation for maintenace mode and upgrade domain |  Major | datanode, documentation | Wei-Chiu Chuang | Ming Ma |
+| [YARN-7046](https://issues.apache.org/jira/browse/YARN-7046) | Add closing logic to configuration store |  Major | . | Jonathan Hung | Jonathan Hung |
+| [MAPREDUCE-6947](https://issues.apache.org/jira/browse/MAPREDUCE-6947) |  Moving logging APIs over to slf4j in hadoop-mapreduce-examples |  Major | . | Gergely Novák | Gergely Novák |
+| [HADOOP-14894](https://issues.apache.org/jira/browse/HADOOP-14894) | ReflectionUtils should use Time.monotonicNow to mesaure duration |  Minor | . | Bharat Viswanadham | Bharat Viswanadham |
+| [HADOOP-14892](https://issues.apache.org/jira/browse/HADOOP-14892) | MetricsSystemImpl should use Time.monotonicNow for measuring durations |  Minor | . | Chetna Chaudhari | Chetna Chaudhari |
+| [YARN-7238](https://issues.apache.org/jira/browse/YARN-7238) | Documentation for API based scheduler configuration management |  Major | . | Jonathan Hung | Jonathan Hung |
+| [HADOOP-14893](https://issues.apache.org/jira/browse/HADOOP-14893) | WritableRpcEngine should use Time.monotonicNow |  Minor | . | Chetna Chaudhari | Chetna Chaudhari |
+| [HDFS-12386](https://issues.apache.org/jira/browse/HDFS-12386) | Add fsserver defaults call to WebhdfsFileSystem. |  Minor | webhdfs | Rushabh S Shah | Rushabh S Shah |
+| [YARN-7252](https://issues.apache.org/jira/browse/YARN-7252) | Removing queue then failing over results in exception |  Critical | . | Jonathan Hung | Jonathan Hung |
+| [YARN-7251](https://issues.apache.org/jira/browse/YARN-7251) | Misc changes to YARN-5734 |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-6962](https://issues.apache.org/jira/browse/YARN-6962) | Add support for updateContainers when allocating using FederationInterceptor |  Minor | . | Botong Huang | Botong Huang |
+| [YARN-7259](https://issues.apache.org/jira/browse/YARN-7259) | Add size-based rolling policy to LogAggregationIndexedFileController |  Major | . | Xuan Gong | Xuan Gong |
+| [MAPREDUCE-6971](https://issues.apache.org/jira/browse/MAPREDUCE-6971) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-app |  Major | . | Jinjiang Ling | Jinjiang Ling |
+| [YARN-6916](https://issues.apache.org/jira/browse/YARN-6916) | Moving logging APIs over to slf4j in hadoop-yarn-server-common |  Major | . | Akira Ajisaka | Akira Ajisaka |
+| [HDFS-12584](https://issues.apache.org/jira/browse/HDFS-12584) | [READ] Fix errors in image generation tool from latest rebase |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-6975](https://issues.apache.org/jira/browse/YARN-6975) | Moving logging APIs over to slf4j in hadoop-yarn-server-tests, hadoop-yarn-server-web-proxy and hadoop-yarn-server-router |  Major | . | Yeliang Cang | Yeliang Cang |
+| [YARN-2037](https://issues.apache.org/jira/browse/YARN-2037) | Add work preserving restart support for Unmanaged AMs |  Major | resourcemanager | Karthik Kambatla | Botong Huang |
+| [YARN-5329](https://issues.apache.org/jira/browse/YARN-5329) | Placement Agent enhancements required to support recurring reservations in ReservationSystem |  Blocker | resourcemanager | Subru Krishnan | Carlo Curino |
+| [YARN-6182](https://issues.apache.org/jira/browse/YARN-6182) | Fix alignment issues and missing information in new YARN UI's Queue page |  Major | yarn-ui-v2 | Akhil PB | Akhil PB |
+| [HADOOP-14845](https://issues.apache.org/jira/browse/HADOOP-14845) | Azure wasb: getFileStatus not making any auth checks |  Major | fs/azure, security | Sivaguru Sankaridurg | Sivaguru Sankaridurg |
+| [HADOOP-14899](https://issues.apache.org/jira/browse/HADOOP-14899) | Restrict Access to setPermission operation when authorization is enabled in WASB |  Major | fs/azure | Kannapiran Srinivasan | Kannapiran Srinivasan |
+| [YARN-7237](https://issues.apache.org/jira/browse/YARN-7237) | Cleanup usages of ResourceProfiles |  Critical | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [YARN-7296](https://issues.apache.org/jira/browse/YARN-7296) | convertToProtoFormat(Resource r) is not setting for all resource types |  Major | . | lovekesh bansal | lovekesh bansal |
+| [HADOOP-14913](https://issues.apache.org/jira/browse/HADOOP-14913) | Sticky bit implementation for rename() operation in Azure WASB |  Major | fs, fs/azure | Varada Hemeswari | Varada Hemeswari |
+| [YARN-6620](https://issues.apache.org/jira/browse/YARN-6620) | Add support in NodeManager to isolate GPU devices by using CGroups |  Major | . | Wangda Tan | Wangda Tan |
+| [YARN-7205](https://issues.apache.org/jira/browse/YARN-7205) | Log improvements for the ResourceUtils |  Major | nodemanager, resourcemanager | Jian He | Sunil G |
+| [YARN-7180](https://issues.apache.org/jira/browse/YARN-7180) | Remove class ResourceType |  Major | resourcemanager, scheduler | Yufei Gu | Sunil G |
+| [HADOOP-14935](https://issues.apache.org/jira/browse/HADOOP-14935) | Azure: POSIX permissions are taking effect in access() method even when authorization is enabled |  Major | fs/azure | Santhosh G Nayak | Santhosh G Nayak |
+| [YARN-7254](https://issues.apache.org/jira/browse/YARN-7254) | UI and metrics changes related to absolute resource configuration |  Major | capacity scheduler | Sunil G | Sunil G |
+| [YARN-7311](https://issues.apache.org/jira/browse/YARN-7311) | Fix TestRMWebServicesReservation parametrization for fair scheduler |  Blocker | fairscheduler, reservation system | Yufei Gu | Yufei Gu |
+| [HDFS-12605](https://issues.apache.org/jira/browse/HDFS-12605) | [READ] TestNameNodeProvidedImplementation#testProvidedDatanodeFailures fails after rebase |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7345](https://issues.apache.org/jira/browse/YARN-7345) | GPU Isolation: Incorrect minor device numbers written to devices.deny file |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-7338](https://issues.apache.org/jira/browse/YARN-7338) | Support same origin policy for cross site scripting prevention. |  Major | yarn-ui-v2 | Vrushali C | Sunil G |
+| [YARN-4090](https://issues.apache.org/jira/browse/YARN-4090) | Make Collections.sort() more efficient by caching resource usage |  Major | fairscheduler | Xianyin Xin | Yufei Gu |
+| [YARN-6984](https://issues.apache.org/jira/browse/YARN-6984) | DominantResourceCalculator.isAnyMajorResourceZero() should test all resources |  Major | scheduler | Daniel Templeton | Sunil G |
+| [YARN-4827](https://issues.apache.org/jira/browse/YARN-4827) | Document configuration of ReservationSystem for FairScheduler |  Blocker | capacity scheduler | Subru Krishnan | Yufei Gu |
+| [YARN-5516](https://issues.apache.org/jira/browse/YARN-5516) | Add REST API for supporting recurring reservations |  Major | resourcemanager | Sangeetha Abdu Jyothi | Sean Po |
+| [MAPREDUCE-6977](https://issues.apache.org/jira/browse/MAPREDUCE-6977) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-common |  Major | client | Jinjiang Ling | Jinjiang Ling |
+| [YARN-6505](https://issues.apache.org/jira/browse/YARN-6505) | Define the strings used in SLS JSON input file format |  Major | scheduler-load-simulator | Yufei Gu | Gergely Novák |
+| [YARN-7332](https://issues.apache.org/jira/browse/YARN-7332) | Compute effectiveCapacity per each resource vector |  Major | capacity scheduler | Sunil G | Sunil G |
+| [YARN-7224](https://issues.apache.org/jira/browse/YARN-7224) | Support GPU isolation for docker container |  Major | . | Wangda Tan | Wangda Tan |
+| [YARN-7374](https://issues.apache.org/jira/browse/YARN-7374) | Improve performance of DRF comparisons for resource types in fair scheduler |  Critical | fairscheduler | Daniel Templeton | Daniel Templeton |
+| [YARN-6927](https://issues.apache.org/jira/browse/YARN-6927) | Add support for individual resource types requests in MapReduce |  Major | resourcemanager | Daniel Templeton | Gergo Repas |
+| [YARN-6594](https://issues.apache.org/jira/browse/YARN-6594) | [API] Introduce SchedulingRequest object |  Major | . | Konstantinos Karanasos | Konstantinos Karanasos |
+| [HADOOP-14997](https://issues.apache.org/jira/browse/HADOOP-14997) |  Add hadoop-aliyun as dependency of hadoop-cloud-storage |  Minor | fs/oss | Genmao Yu | Genmao Yu |
+| [YARN-7289](https://issues.apache.org/jira/browse/YARN-7289) | Application lifetime does not work with FairScheduler |  Major | resourcemanager | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7392](https://issues.apache.org/jira/browse/YARN-7392) | Render cluster information on new YARN web ui |  Major | webapp | Vasudevan Skm | Vasudevan Skm |
+| [HDFS-11902](https://issues.apache.org/jira/browse/HDFS-11902) | [READ] Merge BlockFormatProvider and FileRegionProvider. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7307](https://issues.apache.org/jira/browse/YARN-7307) | Allow client/AM update supported resource types via YARN APIs |  Blocker | nodemanager, resourcemanager | Wangda Tan | Sunil G |
+| [HDFS-12607](https://issues.apache.org/jira/browse/HDFS-12607) | [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7394](https://issues.apache.org/jira/browse/YARN-7394) | Merge code paths for Reservation/Plan queues and Auto Created queues |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [HDFS-12671](https://issues.apache.org/jira/browse/HDFS-12671) | [READ] Test NameNode restarts when PROVIDED is configured |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12789](https://issues.apache.org/jira/browse/HDFS-12789) | [READ] Image generation tool does not close an opened stream |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7166](https://issues.apache.org/jira/browse/YARN-7166) | Container REST endpoints should report resource types |  Major | resourcemanager | Daniel Templeton | Daniel Templeton |
+| [YARN-7143](https://issues.apache.org/jira/browse/YARN-7143) | FileNotFound handling in ResourceUtils is inconsistent |  Major | resourcemanager | Daniel Templeton | Daniel Templeton |
+| [HDFS-12776](https://issues.apache.org/jira/browse/HDFS-12776) | [READ] Increasing replication for PROVIDED files should create local replicas |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12779](https://issues.apache.org/jira/browse/HDFS-12779) | [READ] Allow cluster id to be specified to the Image generation tool |  Trivial | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12777](https://issues.apache.org/jira/browse/HDFS-12777) | [READ] Reduce memory and CPU footprint for PROVIDED volumes. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7406](https://issues.apache.org/jira/browse/YARN-7406) | Moving logging APIs over to slf4j in hadoop-yarn-api |  Major | . | Yeliang Cang | Yeliang Cang |
+| [YARN-7442](https://issues.apache.org/jira/browse/YARN-7442) | [YARN-7069] Limit format of resource type name |  Blocker | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [YARN-7369](https://issues.apache.org/jira/browse/YARN-7369) | Improve the resource types docs |  Major | docs | Daniel Templeton | Daniel Templeton |
+| [YARN-6595](https://issues.apache.org/jira/browse/YARN-6595) | [API] Add Placement Constraints at the application level |  Major | . | Konstantinos Karanasos | Arun Suresh |
+| [YARN-7411](https://issues.apache.org/jira/browse/YARN-7411) | Inter-Queue preemption's computeFixpointAllocation need to handle absolute resources while computing normalizedGuarantee |  Major | resourcemanager | Sunil G | Sunil G |
+| [YARN-7488](https://issues.apache.org/jira/browse/YARN-7488) | Make ServiceClient.getAppId method public to return ApplicationId for a service name |  Major | . | Gour Saha | Gour Saha |
+| [HADOOP-14993](https://issues.apache.org/jira/browse/HADOOP-14993) | AliyunOSS: Override listFiles and listLocatedStatus |  Major | fs/oss | Genmao Yu | Genmao Yu |
+| [YARN-6953](https://issues.apache.org/jira/browse/YARN-6953) | Clean up ResourceUtils.setMinimumAllocationForMandatoryResources() and setMaximumAllocationForMandatoryResources() |  Minor | resourcemanager | Daniel Templeton | Manikandan R |
+| [HDFS-12775](https://issues.apache.org/jira/browse/HDFS-12775) | [READ] Fix reporting of Provided volumes |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7482](https://issues.apache.org/jira/browse/YARN-7482) | Max applications calculation per queue has to be retrospected with absolute resource support |  Major | capacity scheduler | Sunil G | Sunil G |
+| [YARN-7486](https://issues.apache.org/jira/browse/YARN-7486) | Race condition in service AM that can cause NPE |  Major | . | Jian He | Jian He |
+| [YARN-7503](https://issues.apache.org/jira/browse/YARN-7503) | Configurable heap size / JVM opts in service AM |  Major | . | Jonathan Hung | Jonathan Hung |
+| [YARN-7419](https://issues.apache.org/jira/browse/YARN-7419) | CapacityScheduler: Allow auto leaf queue creation after queue mapping |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [YARN-7483](https://issues.apache.org/jira/browse/YARN-7483) | CapacityScheduler test cases cleanup post YARN-5881 |  Major | test | Sunil G | Sunil G |
+| [HDFS-12801](https://issues.apache.org/jira/browse/HDFS-12801) | RBF: Set MountTableResolver as default file resolver |  Minor | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-7430](https://issues.apache.org/jira/browse/YARN-7430) | Enable user re-mapping for Docker containers by default |  Blocker | security, yarn | Eric Yang | Eric Yang |
+| [YARN-7218](https://issues.apache.org/jira/browse/YARN-7218) | ApiServer REST API naming convention /ws/v1 is already used in Hadoop v2 |  Major | api, applications | Eric Yang | Eric Yang |
+| [YARN-7448](https://issues.apache.org/jira/browse/YARN-7448) | [API] Add SchedulingRequest to the AllocateRequest |  Major | . | Arun Suresh | Panagiotis Garefalakis |
+| [YARN-7529](https://issues.apache.org/jira/browse/YARN-7529) | TestYarnNativeServices#testRecoverComponentsAfterRMRestart() fails intermittently |  Major | . | Chandni Singh | Chandni Singh |
+| [YARN-6128](https://issues.apache.org/jira/browse/YARN-6128) | Add support for AMRMProxy HA |  Major | amrmproxy, nodemanager | Subru Krishnan | Botong Huang |
+| [HADOOP-15024](https://issues.apache.org/jira/browse/HADOOP-15024) | AliyunOSS: support user agent configuration and include that & Hadoop version information to oss server |  Major | fs, fs/oss | SammiChen | SammiChen |
+| [HDFS-12778](https://issues.apache.org/jira/browse/HDFS-12778) | [READ] Report multiple locations for PROVIDED blocks |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-5534](https://issues.apache.org/jira/browse/YARN-5534) | Allow user provided Docker volume mount list |  Major | yarn | luhuichun | Shane Kumpf |
+| [YARN-7330](https://issues.apache.org/jira/browse/YARN-7330) | Add support to show GPU in UI including metrics |  Blocker | . | Wangda Tan | Wangda Tan |
+| [YARN-7538](https://issues.apache.org/jira/browse/YARN-7538) | Fix performance regression introduced by Capacity Scheduler absolute min/max resource refactoring |  Major | capacity scheduler | Sunil G | Sunil G |
+| [YARN-7544](https://issues.apache.org/jira/browse/YARN-7544) | Use queue-path.capacity/maximum-capacity to specify CapacityScheduler absolute min/max resources |  Major | capacity scheduler | Sunil G | Sunil G |
+| [YARN-6168](https://issues.apache.org/jira/browse/YARN-6168) | Restarted RM may not inform AM about all existing containers |  Major | . | Billie Rinaldi | Chandni Singh |
+| [HDFS-12809](https://issues.apache.org/jira/browse/HDFS-12809) | [READ] Fix the randomized selection of locations in {{ProvidedBlocksBuilder}}. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12858](https://issues.apache.org/jira/browse/HDFS-12858) | RBF: Add router admin commands usage in HDFS commands reference doc |  Minor | documentation | Yiqun Lin | Yiqun Lin |
+| [YARN-7564](https://issues.apache.org/jira/browse/YARN-7564) | Cleanup to fix checkstyle issues of YARN-5881 branch |  Minor | . | Sunil G | Sunil G |
+| [YARN-7480](https://issues.apache.org/jira/browse/YARN-7480) | Render tooltips on columns where text is clipped in new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7575](https://issues.apache.org/jira/browse/YARN-7575) | NPE in scheduler UI when max-capacity is not configured |  Major | capacity scheduler | Eric Payne | Sunil G |
+| [YARN-7533](https://issues.apache.org/jira/browse/YARN-7533) | Documentation for absolute resource support in Capacity Scheduler |  Major | capacity scheduler | Sunil G | Sunil G |
+| [HDFS-12835](https://issues.apache.org/jira/browse/HDFS-12835) | RBF: Fix Javadoc parameter errors |  Minor | . | Wei Yan | Wei Yan |
+| [YARN-7541](https://issues.apache.org/jira/browse/YARN-7541) | Node updates don't update the maximum cluster capability for resources other than CPU and memory |  Critical | resourcemanager | Daniel Templeton | Daniel Templeton |
+| [YARN-7573](https://issues.apache.org/jira/browse/YARN-7573) | Gpu Information page could be empty for nodes without GPU |  Major | webapp, yarn-ui-v2 | Sunil G | Sunil G |
+| [HDFS-12685](https://issues.apache.org/jira/browse/HDFS-12685) | [READ] FsVolumeImpl exception when scanning Provided storage volume |  Major | . | Ewan Higgs | Virajith Jalaparti |
+| [HDFS-12665](https://issues.apache.org/jira/browse/HDFS-12665) | [AliasMap] Create a version of the AliasMap that runs in memory in the Namenode (leveldb) |  Major | . | Ewan Higgs | Ewan Higgs |
+| [YARN-7487](https://issues.apache.org/jira/browse/YARN-7487) | Ensure volume to include GPU base libraries after created by plugin |  Major | . | Wangda Tan | Wangda Tan |
+| [YARN-6507](https://issues.apache.org/jira/browse/YARN-6507) | Add support in NodeManager to isolate FPGA devices with CGroups |  Major | yarn | Zhankun Tang | Zhankun Tang |
+| [MAPREDUCE-6994](https://issues.apache.org/jira/browse/MAPREDUCE-6994) | Uploader tool for Distributed Cache Deploy code changes |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [HDFS-12591](https://issues.apache.org/jira/browse/HDFS-12591) | [READ] Implement LevelDBFileRegionFormat |  Minor | hdfs | Ewan Higgs | Ewan Higgs |
+| [YARN-6907](https://issues.apache.org/jira/browse/YARN-6907) | Node information page in the old web UI should report resource types |  Major | resourcemanager | Daniel Templeton | Gergely Novák |
+| [YARN-7587](https://issues.apache.org/jira/browse/YARN-7587) | Skip dispatching opportunistic containers to nodes whose queue is already full |  Major | . | Weiwei Yang | Weiwei Yang |
+| [HDFS-12396](https://issues.apache.org/jira/browse/HDFS-12396) | Webhdfs file system should get delegation token from kms provider. |  Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah |
+| [YARN-7092](https://issues.apache.org/jira/browse/YARN-7092) | Render application specific log under application tab in new YARN UI |  Major | yarn-ui-v2 | Akhil PB | Akhil PB |
+| [HADOOP-15071](https://issues.apache.org/jira/browse/HADOOP-15071) | s3a troubleshooting docs to add a couple more failure modes |  Minor | documentation, fs/s3 | Steve Loughran | Steve Loughran |
+| [YARN-7438](https://issues.apache.org/jira/browse/YARN-7438) | Additional changes to make SchedulingPlacementSet agnostic to ResourceRequest / placement algorithm |  Major | . | Wangda Tan | Wangda Tan |
+| [HDFS-12885](https://issues.apache.org/jira/browse/HDFS-12885) | Add visibility/stability annotations |  Trivial | . | Chris Douglas | Chris Douglas |
+| [HADOOP-14475](https://issues.apache.org/jira/browse/HADOOP-14475) | Metrics of S3A don't print out  when enable it in Hadoop metrics property file |  Major | fs/s3 | Yonger | Yonger |
+| [HDFS-12713](https://issues.apache.org/jira/browse/HDFS-12713) | [READ] Refactor FileRegion and BlockAliasMap to separate out HDFS metadata and PROVIDED storage metadata |  Major | . | Virajith Jalaparti | Ewan Higgs |
+| [HDFS-12894](https://issues.apache.org/jira/browse/HDFS-12894) | [READ] Skip setting block count of ProvidedDatanodeStorageInfo on DN registration update |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7610](https://issues.apache.org/jira/browse/YARN-7610) | Extend Distributed Shell to support launching job with opportunistic containers |  Major | applications/distributed-shell | Weiwei Yang | Weiwei Yang |
+| [HDFS-11640](https://issues.apache.org/jira/browse/HDFS-11640) | [READ] Datanodes should use a unique identifier when reading from external stores |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12887](https://issues.apache.org/jira/browse/HDFS-12887) | [READ] Allow Datanodes with Provided volumes to start when blocks with the same id exist locally |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [MAPREDUCE-6998](https://issues.apache.org/jira/browse/MAPREDUCE-6998) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-jobclient |  Major | . | Akira Ajisaka | Gergely Novák |
+| [MAPREDUCE-7000](https://issues.apache.org/jira/browse/MAPREDUCE-7000) | Moving logging APIs over to slf4j in hadoop-mapreduce-client-nativetask |  Minor | . | Jinjiang Ling | Jinjiang Ling |
+| [HDFS-12874](https://issues.apache.org/jira/browse/HDFS-12874) | [READ] Documentation for provided storage |  Major | . | Chris Douglas | Virajith Jalaparti |
+| [YARN-7522](https://issues.apache.org/jira/browse/YARN-7522) | Introduce AllocationTagsManager to associate allocation tags to nodes |  Major | . | Wangda Tan | Wangda Tan |
+| [HDFS-12905](https://issues.apache.org/jira/browse/HDFS-12905) | [READ] Handle decommissioning and under-maintenance Datanodes with Provided storage. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-12893](https://issues.apache.org/jira/browse/HDFS-12893) | [READ] Support replication of Provided blocks with non-default topologies. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7443](https://issues.apache.org/jira/browse/YARN-7443) | Add native FPGA module support to do isolation with cgroups |  Major | yarn | Zhankun Tang | Zhankun Tang |
+| [YARN-7473](https://issues.apache.org/jira/browse/YARN-7473) | Implement Framework and policy for capacity management of auto created queues |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [YARN-7420](https://issues.apache.org/jira/browse/YARN-7420) | YARN UI changes to depict auto created queues |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [YARN-7520](https://issues.apache.org/jira/browse/YARN-7520) | Queue Ordering policy changes for ordering auto created leaf queues within Managed parent Queues |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [YARN-6704](https://issues.apache.org/jira/browse/YARN-6704) | Add support for work preserving NM restart when FederationInterceptor is enabled in AMRMProxyService |  Major | . | Botong Huang | Botong Huang |
+| [YARN-7632](https://issues.apache.org/jira/browse/YARN-7632) | Effective min and max resource need to be set for auto created leaf queues upon creation and capacity management |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [MAPREDUCE-7018](https://issues.apache.org/jira/browse/MAPREDUCE-7018) | Apply erasure coding properly to framework tarball and support plain tar |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [HDFS-12875](https://issues.apache.org/jira/browse/HDFS-12875) | RBF: Complete logic for -readonly option of dfsrouteradmin add command |  Major | . | Yiqun Lin | Íñigo Goiri |
+| [YARN-7634](https://issues.apache.org/jira/browse/YARN-7634) | Queue ACL validations should validate parent queue ACLs before auto-creating leaf queues |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [YARN-7641](https://issues.apache.org/jira/browse/YARN-7641) | Allow searchable filter for Application page log viewer in new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7383](https://issues.apache.org/jira/browse/YARN-7383) | Node resource is not parsed correctly for resource names containing dot |  Major | nodemanager, resourcemanager | Jonathan Hung | Gergely Novák |
+| [YARN-7643](https://issues.apache.org/jira/browse/YARN-7643) | Handle recovery of applications in case of auto-created leaf queue mapping |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [HDFS-12912](https://issues.apache.org/jira/browse/HDFS-12912) | [READ] Fix configuration and implementation of LevelDB-based alias maps |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [YARN-7119](https://issues.apache.org/jira/browse/YARN-7119) | Support multiple resource types in rmadmin updateNodeResource command |  Major | nodemanager, resourcemanager | Daniel Templeton | Manikandan R |
+| [YARN-7630](https://issues.apache.org/jira/browse/YARN-7630) | Fix AMRMToken rollover handling in AMRMProxy |  Minor | . | Botong Huang | Botong Huang |
+| [YARN-7565](https://issues.apache.org/jira/browse/YARN-7565) | Yarn service pre-maturely releases the container after AM restart |  Major | . | Chandni Singh | Chandni Singh |
+| [YARN-7638](https://issues.apache.org/jira/browse/YARN-7638) | Unit tests related to preemption for auto created leaf queues feature |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [YARN-7633](https://issues.apache.org/jira/browse/YARN-7633) | Documentation for auto queue creation feature and related configurations |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [HDFS-12712](https://issues.apache.org/jira/browse/HDFS-12712) | [9806] Code style cleanup |  Minor | . | Íñigo Goiri | Virajith Jalaparti |
+| [HDFS-12903](https://issues.apache.org/jira/browse/HDFS-12903) | [READ] Fix closing streams in ImageWriter |  Major | . | Íñigo Goiri | Virajith Jalaparti |
+| [YARN-7617](https://issues.apache.org/jira/browse/YARN-7617) | Add a flag in distributed shell to automatically PROMOTE opportunistic containers to guaranteed once they are started |  Minor | applications/distributed-shell | Weiwei Yang | Weiwei Yang |
+| [HDFS-12937](https://issues.apache.org/jira/browse/HDFS-12937) | RBF: Add more unit tests for router admin commands |  Major | test | Yiqun Lin | Yiqun Lin |
+| [YARN-7620](https://issues.apache.org/jira/browse/YARN-7620) | Allow node partition filters on Queues page of new YARN UI |  Major | yarn-ui-v2 | Vasudevan Skm | Vasudevan Skm |
+| [YARN-7670](https://issues.apache.org/jira/browse/YARN-7670) | Modifications to the ResourceScheduler to support SchedulingRequests |  Major | . | Arun Suresh | Arun Suresh |
+| [YARN-7032](https://issues.apache.org/jira/browse/YARN-7032) | [ATSv2] NPE while starting hbase co-processor when HBase authorization is enabled. |  Critical | . | Rohith Sharma K S | Rohith Sharma K S |
+| [HADOOP-14965](https://issues.apache.org/jira/browse/HADOOP-14965) | s3a input stream "normal" fadvise mode to be adaptive |  Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-15133](https://issues.apache.org/jira/browse/HADOOP-15133) | [JDK9] Ignore com.sun.javadoc.\* and com.sun.tools.\* in animal-sniffer-maven-plugin to compile with Java 9 |  Major | . | Akira Ajisaka | Akira Ajisaka |
+| [YARN-7669](https://issues.apache.org/jira/browse/YARN-7669) | API and interface modifications for placement constraint processor |  Major | . | Arun Suresh | Arun Suresh |
+| [HADOOP-15113](https://issues.apache.org/jira/browse/HADOOP-15113) | NPE in S3A getFileStatus: null instrumentation on using closed instance |  Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-15086](https://issues.apache.org/jira/browse/HADOOP-15086) | NativeAzureFileSystem file rename is not atomic |  Major | fs/azure | Shixiong Zhu | Thomas Marquardt |
+| [YARN-7653](https://issues.apache.org/jira/browse/YARN-7653) | Rack cardinality support for AllocationTagsManager |  Major | . | Panagiotis Garefalakis | Panagiotis Garefalakis |
+| [YARN-6596](https://issues.apache.org/jira/browse/YARN-6596) | Introduce Placement Constraint Manager module |  Major | . | Konstantinos Karanasos | Konstantinos Karanasos |
+| [YARN-7612](https://issues.apache.org/jira/browse/YARN-7612) | Add Processor Framework for Rich Placement Constraints |  Major | . | Arun Suresh | Arun Suresh |
+| [YARN-7613](https://issues.apache.org/jira/browse/YARN-7613) | Implement Basic algorithm for constraint based placement |  Major | . | Arun Suresh | Panagiotis Garefalakis |
+| [YARN-7682](https://issues.apache.org/jira/browse/YARN-7682) | Expose canSatisfyConstraints utility function to validate a placement against a constraint |  Major | . | Arun Suresh | Panagiotis Garefalakis |
+| [HDFS-12988](https://issues.apache.org/jira/browse/HDFS-12988) | RBF: Mount table entries not properly updated in the local cache |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-7557](https://issues.apache.org/jira/browse/YARN-7557) | It should be possible to specify resource types in the fair scheduler increment value |  Critical | fairscheduler | Daniel Templeton | Gergo Repas |
+| [YARN-7666](https://issues.apache.org/jira/browse/YARN-7666) | Introduce scheduler specific environment variable support in ApplicationSubmissionContext for better scheduling placement configurations |  Major | . | Sunil G | Sunil G |
+| [YARN-7242](https://issues.apache.org/jira/browse/YARN-7242) | Support to specify values of different resource types in DistributedShell for easier testing |  Critical | nodemanager, resourcemanager | Wangda Tan | Gergely Novák |
+| [YARN-7704](https://issues.apache.org/jira/browse/YARN-7704) | Document improvement for registry dns |  Major | . | Jian He | Jian He |
+| [HADOOP-15161](https://issues.apache.org/jira/browse/HADOOP-15161) | s3a: Stream and common statistics missing from metrics |  Major | . | Sean Mackrory | Sean Mackrory |
+| [HDFS-12802](https://issues.apache.org/jira/browse/HDFS-12802) | RBF: Control MountTableResolver cache size |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [HDFS-12934](https://issues.apache.org/jira/browse/HDFS-12934) | RBF: Federation supports global quota |  Major | . | Yiqun Lin | Yiqun Lin |
+| [YARN-7681](https://issues.apache.org/jira/browse/YARN-7681) | Double-check placement constraints in scheduling phase before actual allocation is made |  Major | RM, scheduler | Weiwei Yang | Weiwei Yang |
+| [YARN-5366](https://issues.apache.org/jira/browse/YARN-5366) | Improve handling of the Docker container life cycle |  Major | yarn | Shane Kumpf | Shane Kumpf |
+| [MAPREDUCE-7030](https://issues.apache.org/jira/browse/MAPREDUCE-7030) | Uploader tool should ignore symlinks to the same directory |  Minor | . | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7724](https://issues.apache.org/jira/browse/YARN-7724) | yarn application status should support application name |  Major | yarn-native-services | Yesha Vora | Jian He |
+| [YARN-7696](https://issues.apache.org/jira/browse/YARN-7696) | Add container tags to ContainerTokenIdentifier, api.Container and NMContainerStatus to handle all recovery cases |  Major | . | Arun Suresh | Arun Suresh |
+| [HDFS-12972](https://issues.apache.org/jira/browse/HDFS-12972) | RBF: Display mount table quota info in Web UI and admin command |  Major | . | Yiqun Lin | Yiqun Lin |
+| [MAPREDUCE-7034](https://issues.apache.org/jira/browse/MAPREDUCE-7034) | Moving logging APIs over to slf4j the rest of all in hadoop-mapreduce |  Major | . | Takanobu Asanuma | Takanobu Asanuma |
+| [HADOOP-15079](https://issues.apache.org/jira/browse/HADOOP-15079) | ITestS3AFileOperationCost#testFakeDirectoryDeletion failing after OutputCommitter patch |  Critical | . | Sean Mackrory | Steve Loughran |
+| [HDFS-12919](https://issues.apache.org/jira/browse/HDFS-12919) | RBF: Support erasure coding methods in RouterRpcServer |  Critical | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-6736](https://issues.apache.org/jira/browse/YARN-6736) | Consider writing to both ats v1 & v2 from RM for smoother upgrades |  Major | timelineserver | Vrushali C | Aaron Gresch |
+| [MAPREDUCE-7032](https://issues.apache.org/jira/browse/MAPREDUCE-7032) | Add the ability to specify a delayed replication count |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [HADOOP-15141](https://issues.apache.org/jira/browse/HADOOP-15141) | Support IAM Assumed roles in S3A |  Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-15027](https://issues.apache.org/jira/browse/HADOOP-15027) | AliyunOSS: Support multi-thread pre-read to improve sequential read from Hadoop to Aliyun OSS performance |  Major | fs/oss | wujinhu | wujinhu |
+| [YARN-6619](https://issues.apache.org/jira/browse/YARN-6619) | AMRMClient Changes to use the PlacementConstraint and SchcedulingRequest objects |  Major | . | Arun Suresh | Arun Suresh |
+| [YARN-7709](https://issues.apache.org/jira/browse/YARN-7709) | Remove SELF from TargetExpression type |  Blocker | . | Wangda Tan | Konstantinos Karanasos |
+| [YARN-6599](https://issues.apache.org/jira/browse/YARN-6599) | Support anti-affinity constraint via AppPlacementAllocator |  Major | . | Wangda Tan | Wangda Tan |
+| [YARN-7745](https://issues.apache.org/jira/browse/YARN-7745) | Allow DistributedShell to take a placement specification for containers it wants to launch |  Major | . | Arun Suresh | Arun Suresh |
+| [HDFS-12973](https://issues.apache.org/jira/browse/HDFS-12973) | RBF: Document global quota supporting in federation |  Major | . | Yiqun Lin | Yiqun Lin |
+| [HDFS-13028](https://issues.apache.org/jira/browse/HDFS-13028) | RBF: Fix spurious TestRouterRpc#testProxyGetStats |  Minor | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-5094](https://issues.apache.org/jira/browse/YARN-5094) | some YARN container events have timestamp of -1 |  Critical | timelineserver | Sangjin Lee | Haibo Chen |
+| [MAPREDUCE-6995](https://issues.apache.org/jira/browse/MAPREDUCE-6995) | Uploader tool for Distributed Cache Deploy documentation |  Major | . | Miklos Szegedi | Miklos Szegedi |
+| [YARN-7774](https://issues.apache.org/jira/browse/YARN-7774) | Miscellaneous fixes to the PlacementProcessor |  Blocker | . | Arun Suresh | Arun Suresh |
+| [YARN-7763](https://issues.apache.org/jira/browse/YARN-7763) | Allow Constraints specified in the SchedulingRequest to override application level constraints |  Blocker | . | Wangda Tan | Weiwei Yang |
+| [YARN-7788](https://issues.apache.org/jira/browse/YARN-7788) | Factor out management of temp tags from AllocationTagsManager |  Major | . | Arun Suresh | Arun Suresh |
+| [YARN-7779](https://issues.apache.org/jira/browse/YARN-7779) | Display allocation tags in RM web UI and expose same through REST API |  Major | RM | Weiwei Yang | Weiwei Yang |
+| [YARN-7782](https://issues.apache.org/jira/browse/YARN-7782) | Enable user re-mapping for Docker containers in yarn-default.xml |  Blocker | security, yarn | Eric Yang | Eric Yang |
+| [YARN-7605](https://issues.apache.org/jira/browse/YARN-7605) | Implement doAs for Api Service REST API |  Major | . | Eric Yang | Eric Yang |
+| [YARN-7540](https://issues.apache.org/jira/browse/YARN-7540) | Convert yarn app cli to call yarn api services |  Major | . | Eric Yang | Eric Yang |
+| [HDFS-12772](https://issues.apache.org/jira/browse/HDFS-12772) | RBF: Federation Router State State Store internal API |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-7783](https://issues.apache.org/jira/browse/YARN-7783) | Add validation step to ensure constraints are not violated due to order in which a request is processed |  Blocker | . | Arun Suresh | Arun Suresh |
+| [YARN-7807](https://issues.apache.org/jira/browse/YARN-7807) | Assume intra-app anti-affinity as default for scheduling request inside AppPlacementAllocator |  Blocker | . | Wangda Tan | Wangda Tan |
+| [YARN-7795](https://issues.apache.org/jira/browse/YARN-7795) | Fix jenkins issues of YARN-6592 branch |  Blocker | . | Sunil G | Sunil G |
+| [YARN-7810](https://issues.apache.org/jira/browse/YARN-7810) | TestDockerContainerRuntime test failures due to UID lookup of a non-existent user |  Major | . | Shane Kumpf | Shane Kumpf |
+| [HDFS-13042](https://issues.apache.org/jira/browse/HDFS-13042) | RBF: Heartbeat Router State |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-7798](https://issues.apache.org/jira/browse/YARN-7798) | Refactor SLS Reservation Creation |  Minor | . | Young Chen | Young Chen |
+| [HDFS-13049](https://issues.apache.org/jira/browse/HDFS-13049) | RBF: Inconsistent Router OPTS config in branch-2 and branch-3 |  Minor | . | Wei Yan | Wei Yan |
+| [YARN-7814](https://issues.apache.org/jira/browse/YARN-7814) | Remove automatic mounting of the cgroups root directory into Docker containers |  Major | . | Shane Kumpf | Shane Kumpf |
+| [YARN-7784](https://issues.apache.org/jira/browse/YARN-7784) | Fix Cluster metrics when placement processor is enabled |  Major | metrics, RM | Weiwei Yang | Arun Suresh |
+| [YARN-6597](https://issues.apache.org/jira/browse/YARN-6597) | Add RMContainer recovery test to verify tag population in the AllocationTagsManager |  Major | . | Konstantinos Karanasos | Panagiotis Garefalakis |
+| [YARN-7817](https://issues.apache.org/jira/browse/YARN-7817) | Add Resource reference to RM's NodeInfo object so REST API can get non memory/vcore resource usages. |  Major | . | Sumana Sathish | Sunil G |
+| [YARN-7797](https://issues.apache.org/jira/browse/YARN-7797) | Docker host network can not obtain IP address for RegistryDNS |  Major | nodemanager | Eric Yang | Eric Yang |
+| [HDFS-12574](https://issues.apache.org/jira/browse/HDFS-12574) | Add CryptoInputStream to WebHdfsFileSystem read call. |  Major | encryption, kms, webhdfs | Rushabh S Shah | Rushabh S Shah |
+| [YARN-5148](https://issues.apache.org/jira/browse/YARN-5148) | [UI2] Add page to new YARN UI to view server side configurations/logs/JVM-metrics |  Major | webapp, yarn-ui-v2 | Wangda Tan | Kai Sasaki |
+| [YARN-7723](https://issues.apache.org/jira/browse/YARN-7723) | Avoid using docker volume --format option to run against older docker releases |  Major | . | Wangda Tan | Wangda Tan |
+| [YARN-7780](https://issues.apache.org/jira/browse/YARN-7780) | Documentation for Placement Constraints |  Major | . | Arun Suresh | Konstantinos Karanasos |
+| [YARN-7811](https://issues.apache.org/jira/browse/YARN-7811) | Service AM should use configured default docker network |  Major | yarn-native-services | Billie Rinaldi | Billie Rinaldi |
+| [YARN-7822](https://issues.apache.org/jira/browse/YARN-7822) | Constraint satisfaction checker support for composite OR and AND constraints |  Major | . | Arun Suresh | Weiwei Yang |
+| [HDFS-13044](https://issues.apache.org/jira/browse/HDFS-13044) | RBF: Add a safe mode for the Router |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-7816](https://issues.apache.org/jira/browse/YARN-7816) | YARN Service - Two different users are unable to launch a service of the same name |  Major | applications | Gour Saha | Gour Saha |
+| [HDFS-13043](https://issues.apache.org/jira/browse/HDFS-13043) | RBF: Expose the state of the Routers in the federation |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [HDFS-12997](https://issues.apache.org/jira/browse/HDFS-12997) | Move logging to slf4j in BlockPoolSliceStorage and Storage |  Major | . | Ajay Kumar | Ajay Kumar |
+| [HDFS-13068](https://issues.apache.org/jira/browse/HDFS-13068) | RBF: Add router admin option to manage safe mode |  Major | . | Íñigo Goiri | Yiqun Lin |
+| [YARN-7839](https://issues.apache.org/jira/browse/YARN-7839) | Modify PlacementAlgorithm to Check node capacity before placing request on node |  Major | . | Arun Suresh | Panagiotis Garefalakis |
+| [YARN-7868](https://issues.apache.org/jira/browse/YARN-7868) | Provide improved error message when YARN service is disabled |  Major | yarn-native-services | Eric Yang | Eric Yang |
+| [YARN-7778](https://issues.apache.org/jira/browse/YARN-7778) | Merging of placement constraints defined at different levels |  Major | . | Konstantinos Karanasos | Weiwei Yang |
+| [YARN-7860](https://issues.apache.org/jira/browse/YARN-7860) | Fix UT failure TestRMWebServiceAppsNodelabel#testAppsRunning |  Major | . | Weiwei Yang | Sunil G |
+| [YARN-7516](https://issues.apache.org/jira/browse/YARN-7516) | Security check for trusted docker image |  Major | . | Eric Yang | Eric Yang |
+| [YARN-7815](https://issues.apache.org/jira/browse/YARN-7815) | Make the YARN mounts added to Docker containers more restrictive |  Major | . | Shane Kumpf | Shane Kumpf |
+| [HADOOP-15214](https://issues.apache.org/jira/browse/HADOOP-15214) | Make Hadoop compatible with Guava 21.0 |  Minor | . | Igor Dvorzhak | Igor Dvorzhak |
+| [YARN-5428](https://issues.apache.org/jira/browse/YARN-5428) | Allow for specifying the docker client configuration directory |  Major | yarn | Shane Kumpf | Shane Kumpf |
+| [YARN-7838](https://issues.apache.org/jira/browse/YARN-7838) | Support AND/OR constraints in Distributed Shell |  Critical | distributed-shell | Weiwei Yang | Weiwei Yang |
+| [HADOOP-13974](https://issues.apache.org/jira/browse/HADOOP-13974) | S3Guard CLI to support list/purge of pending multipart commits |  Major | fs/s3 | Steve Loughran | Aaron Fabbri |
+| [YARN-7917](https://issues.apache.org/jira/browse/YARN-7917) | Fix failing test TestDockerContainerRuntime#testLaunchContainerWithDockerTokens |  Minor | nodemanager | Shane Kumpf | Shane Kumpf |
+| [YARN-7914](https://issues.apache.org/jira/browse/YARN-7914) | Fix exit code handling for short lived Docker containers |  Critical | . | Shane Kumpf | Shane Kumpf |
+| [HADOOP-15040](https://issues.apache.org/jira/browse/HADOOP-15040) | Upgrade AWS SDK to 1.11.271: NPE bug spams logs w/ Yarn Log Aggregation |  Blocker | fs/s3 | Aaron Fabbri | Aaron Fabbri |
+| [YARN-7789](https://issues.apache.org/jira/browse/YARN-7789) | Should fail RM if 3rd resource type is configured but RM uses DefaultResourceCalculator |  Critical | . | Sumana Sathish | Zian Chen |
+| [HADOOP-15076](https://issues.apache.org/jira/browse/HADOOP-15076) | Enhance S3A troubleshooting documents and add a performance document |  Blocker | documentation, fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-15176](https://issues.apache.org/jira/browse/HADOOP-15176) | Enhance IAM Assumed Role support in S3A client |  Blocker | fs/s3, test | Steve Loughran | Steve Loughran |
+| [YARN-7920](https://issues.apache.org/jira/browse/YARN-7920) | Simplify configuration for PlacementConstraints |  Blocker | . | Wangda Tan | Wangda Tan |
+| [YARN-7292](https://issues.apache.org/jira/browse/YARN-7292) | Retrospect Resource Profile Behavior for overriding capability |  Blocker | nodemanager, resourcemanager | Wangda Tan | Wangda Tan |
+| [HADOOP-14507](https://issues.apache.org/jira/browse/HADOOP-14507) | extend per-bucket secret key config with explicit getPassword() on fs.s3a.$bucket.secret.key |  Critical | fs/s3 | Steve Loughran | Steve Loughran |
+| [YARN-7328](https://issues.apache.org/jira/browse/YARN-7328) | ResourceUtils allows yarn.nodemanager.resource-types.memory-mb and .vcores to override yarn.nodemanager.resource.memory-mb and .cpu-vcores |  Critical | nodemanager | Daniel Templeton | lovekesh bansal |
+| [HDFS-13119](https://issues.apache.org/jira/browse/HDFS-13119) | RBF: Manage unavailable clusters |  Major | . | Íñigo Goiri | Yiqun Lin |
+| [YARN-7940](https://issues.apache.org/jira/browse/YARN-7940) | Service AM gets NoAuth with secure ZK |  Blocker | yarn-native-services | Billie Rinaldi | Billie Rinaldi |
+| [YARN-7223](https://issues.apache.org/jira/browse/YARN-7223) | Document GPU isolation feature |  Blocker | . | Wangda Tan | Wangda Tan |
+| [HADOOP-15247](https://issues.apache.org/jira/browse/HADOOP-15247) | Move commons-net up to 3.6 |  Minor | fs | Steve Loughran | Steve Loughran |
+| [YARN-7916](https://issues.apache.org/jira/browse/YARN-7916) | Remove call to docker logs on failure in container-executor |  Major | . | Shane Kumpf | Shane Kumpf |
+| [YARN-7836](https://issues.apache.org/jira/browse/YARN-7836) | YARN Service component update PUT API should not use component name from JSON body |  Major | api, yarn-native-services | Gour Saha | Gour Saha |
+| [YARN-7934](https://issues.apache.org/jira/browse/YARN-7934) | [GQ] Refactor preemption calculators to allow overriding for Federation Global Algos |  Major | . | Carlo Curino | Carlo Curino |
+| [YARN-7921](https://issues.apache.org/jira/browse/YARN-7921) | Transform a PlacementConstraint to a string expression |  Major | . | Weiwei Yang | Weiwei Yang |
+| [HDFS-13187](https://issues.apache.org/jira/browse/HDFS-13187) | RBF: Fix Routers information shown in the web UI |  Minor | . | Wei Yan | Wei Yan |
+| [HDFS-13184](https://issues.apache.org/jira/browse/HDFS-13184) | RBF: Improve the unit test TestRouterRPCClientRetries |  Minor | test | Yiqun Lin | Yiqun Lin |
+| [YARN-7893](https://issues.apache.org/jira/browse/YARN-7893) | Document the FPGA isolation feature |  Blocker | . | Zhankun Tang | Zhankun Tang |
+| [YARN-7959](https://issues.apache.org/jira/browse/YARN-7959) | Add .vm extension to PlacementConstraints.md to ensure proper filtering |  Critical | documentation | Weiwei Yang | Weiwei Yang |
+| [HDFS-13199](https://issues.apache.org/jira/browse/HDFS-13199) | RBF: Fix the hdfs router page missing label icon issue |  Major | federation, hdfs | maobaolong | maobaolong |
+| [YARN-7929](https://issues.apache.org/jira/browse/YARN-7929) | Support to set container execution type in SLS |  Major | scheduler-load-simulator | Jiandan Yang | Jiandan Yang |
+| [HADOOP-15264](https://issues.apache.org/jira/browse/HADOOP-15264) | AWS "shaded" SDK 1.11.271 is pulling in netty 4.1.17 |  Blocker | fs/s3 | Steve Loughran | Steve Loughran |
+| [YARN-7446](https://issues.apache.org/jira/browse/YARN-7446) | Docker container privileged mode and --user flag contradict each other |  Major | . | Eric Yang | Eric Yang |
+| [YARN-7954](https://issues.apache.org/jira/browse/YARN-7954) | Component status stays "Ready" when yarn service is stopped |  Major | . | Yesha Vora | Gour Saha |
+| [YARN-7955](https://issues.apache.org/jira/browse/YARN-7955) | Calling stop on an already stopped service says "Successfully stopped service" |  Major | . | Gour Saha | Gour Saha |
+| [YARN-7637](https://issues.apache.org/jira/browse/YARN-7637) | GPU volume creation command fails when work preserving is disabled at NM |  Critical | nodemanager | Sunil G | Zian Chen |
+| [HADOOP-15274](https://issues.apache.org/jira/browse/HADOOP-15274) | Move hadoop-openstack to slf4j |  Minor | fs/swift | Steve Loughran | fang zhenyi |
+| [HADOOP-14652](https://issues.apache.org/jira/browse/HADOOP-14652) | Update metrics-core version to 3.2.4 |  Major | . | Ray Chiang | Ray Chiang |
+| [HDFS-1686](https://issues.apache.org/jira/browse/HDFS-1686) | Federation: Add more Balancer tests with federation setting |  Minor | balancer & mover, test | Tsz Wo Nicholas Sze | Bharat Viswanadham |
+| [HADOOP-13761](https://issues.apache.org/jira/browse/HADOOP-13761) | S3Guard: implement retries for DDB failures and throttling; translate exceptions |  Blocker | fs/s3 | Aaron Fabbri | Aaron Fabbri |
+| [YARN-7915](https://issues.apache.org/jira/browse/YARN-7915) | Trusted image log message repeated multiple times |  Major | . | Eric Badger | Shane Kumpf |
+| [HADOOP-15090](https://issues.apache.org/jira/browse/HADOOP-15090) | Add ADL troubleshooting doc |  Major | documentation, fs/adl | Steve Loughran | Steve Loughran |
+| [YARN-7972](https://issues.apache.org/jira/browse/YARN-7972) | Support inter-app placement constraints for allocation tags by application ID |  Major | . | Weiwei Yang | Weiwei Yang |
+| [HADOOP-15271](https://issues.apache.org/jira/browse/HADOOP-15271) | Remove unicode multibyte characters from JavaDoc |  Major | documentation | Akira Ajisaka | Takanobu Asanuma |
+| [HADOOP-15287](https://issues.apache.org/jira/browse/HADOOP-15287) | JDK9 JavaDoc build fails due to one-character underscore identifiers in hadoop-yarn-common |  Major | documentation | Takanobu Asanuma | Takanobu Asanuma |
+| [HADOOP-15291](https://issues.apache.org/jira/browse/HADOOP-15291) | TestMiniKdc fails on Java 9 |  Major | test | Akira Ajisaka | Takanobu Asanuma |
+| [YARN-7346](https://issues.apache.org/jira/browse/YARN-7346) | Add a profile to allow optional compilation for ATSv2 with HBase-2.0 |  Major | . | Ted Yu | Haibo Chen |
+| [YARN-7919](https://issues.apache.org/jira/browse/YARN-7919) | Refactor timelineservice-hbase module into submodules |  Major | timelineservice | Haibo Chen | Haibo Chen |
+| [HDFS-13214](https://issues.apache.org/jira/browse/HDFS-13214) | RBF: Complete document of Router configuration |  Major | . | Tao Jie | Yiqun Lin |
+| [HADOOP-15267](https://issues.apache.org/jira/browse/HADOOP-15267) | S3A multipart upload fails when SSE-C encryption is enabled |  Critical | fs/s3 | Anis Elleuch | Anis Elleuch |
+| [YARN-7891](https://issues.apache.org/jira/browse/YARN-7891) | LogAggregationIndexedFileController should support read from HAR file |  Major | . | Xuan Gong | Xuan Gong |
+| [YARN-7626](https://issues.apache.org/jira/browse/YARN-7626) | Allow regular expression matching in container-executor.cfg for devices and named docker volumes mount |  Major | . | Zian Chen | Zian Chen |
+| [HDFS-13230](https://issues.apache.org/jira/browse/HDFS-13230) | RBF: ConnectionManager's cleanup task will compare each pool's own active conns with its total conns |  Minor | . | Wei Yan | Chao Sun |
+| [HDFS-13233](https://issues.apache.org/jira/browse/HDFS-13233) | RBF: MountTableResolver doesn't return the correct mount point of the given path |  Major | hdfs | wangzhiyuan | wangzhiyuan |
+| [HADOOP-15277](https://issues.apache.org/jira/browse/HADOOP-15277) | remove .FluentPropertyBeanIntrospector from CLI operation log output |  Minor | conf | Steve Loughran | Steve Loughran |
+| [HADOOP-15293](https://issues.apache.org/jira/browse/HADOOP-15293) | TestLogLevel fails on Java 9 |  Major | test | Akira Ajisaka | Takanobu Asanuma |
+| [HDFS-13212](https://issues.apache.org/jira/browse/HDFS-13212) | RBF: Fix router location cache issue |  Major | federation, hdfs | Weiwei Wu | Weiwei Wu |
+| [HDFS-13232](https://issues.apache.org/jira/browse/HDFS-13232) | RBF: ConnectionPool should return first usable connection |  Minor | . | Wei Yan | Ekanth S |
+| [HDFS-13240](https://issues.apache.org/jira/browse/HDFS-13240) | RBF: Update some inaccurate document descriptions |  Minor | . | Yiqun Lin | Yiqun Lin |
+| [YARN-7523](https://issues.apache.org/jira/browse/YARN-7523) | Introduce description and version field in Service record |  Critical | . | Gour Saha | Chandni Singh |
+| [HADOOP-15297](https://issues.apache.org/jira/browse/HADOOP-15297) | Make S3A etag =\> checksum feature optional |  Blocker | fs/s3 | Steve Loughran | Steve Loughran |
+| [HDFS-11399](https://issues.apache.org/jira/browse/HDFS-11399) | Many tests fails in Windows due to injecting disk failures |  Major | . | Yiqun Lin | Yiqun Lin |
+| [HDFS-12677](https://issues.apache.org/jira/browse/HDFS-12677) | Extend TestReconstructStripedFile with a random EC policy |  Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma |
+| [HDFS-13241](https://issues.apache.org/jira/browse/HDFS-13241) | RBF: TestRouterSafemode failed if the port 8888 is in use |  Major | hdfs, test | maobaolong | maobaolong |
+| [HDFS-13253](https://issues.apache.org/jira/browse/HDFS-13253) | RBF: Quota management incorrect parent-child relationship judgement |  Major | . | Yiqun Lin | Yiqun Lin |
+| [HDFS-13226](https://issues.apache.org/jira/browse/HDFS-13226) | RBF: Throw the exception if mount table entry validated failed |  Major | hdfs | maobaolong | maobaolong |
+| [HDFS-12505](https://issues.apache.org/jira/browse/HDFS-12505) | Extend TestFileStatusWithECPolicy with a random EC policy |  Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma |
+| [HDFS-12587](https://issues.apache.org/jira/browse/HDFS-12587) | Use Parameterized tests in TestBlockInfoStriped and TestLowRedundancyBlockQueues to test all EC policies |  Major | erasure-coding, test | Takanobu Asanuma | Takanobu Asanuma |
+| [YARN-5015](https://issues.apache.org/jira/browse/YARN-5015) | Support sliding window retry capability for container restart |  Major | nodemanager | Varun Vasudev | Chandni Singh |
+| [YARN-7657](https://issues.apache.org/jira/browse/YARN-7657) | Queue Mapping could provide options to provide 'user' specific auto-created queues under a specified group parent queue |  Major | capacity scheduler | Suma Shivaprasad | Suma Shivaprasad |
+| [HDFS-12773](https://issues.apache.org/jira/browse/HDFS-12773) | RBF: Improve State Store FS implementation |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [HADOOP-15294](https://issues.apache.org/jira/browse/HADOOP-15294) | TestUGILoginFromKeytab fails on Java9 |  Major | security | Takanobu Asanuma | Takanobu Asanuma |
+| [YARN-7999](https://issues.apache.org/jira/browse/YARN-7999) | Docker launch fails when user private filecache directory is missing |  Major | . | Eric Yang | Jason Lowe |
+| [HDFS-13198](https://issues.apache.org/jira/browse/HDFS-13198) | RBF: RouterHeartbeatService throws out CachedStateStore related exceptions when starting router |  Minor | . | Wei Yan | Wei Yan |
+| [HADOOP-15278](https://issues.apache.org/jira/browse/HADOOP-15278) | log s3a at info |  Major | fs/s3 | Steve Loughran | Steve Loughran |
+| [HDFS-13224](https://issues.apache.org/jira/browse/HDFS-13224) | RBF: Resolvers to support mount points across multiple subclusters |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-8027](https://issues.apache.org/jira/browse/YARN-8027) | Setting hostname of docker container breaks for --net=host in docker 1.13 |  Major | yarn | Jim Brennan | Jim Brennan |
+| [HDFS-13215](https://issues.apache.org/jira/browse/HDFS-13215) | RBF: Move Router to its own module |  Major | . | Íñigo Goiri | Wei Yan |
+| [YARN-8053](https://issues.apache.org/jira/browse/YARN-8053) | Add hadoop-distcp in exclusion in hbase-server dependencies for timelineservice-hbase packages. |  Major | . | Rohith Sharma K S | Rohith Sharma K S |
+| [HDFS-13250](https://issues.apache.org/jira/browse/HDFS-13250) | RBF: Router to manage requests across multiple subclusters |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [HDFS-11190](https://issues.apache.org/jira/browse/HDFS-11190) | [READ] Namenode support for data stored in external stores. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-10675](https://issues.apache.org/jira/browse/HDFS-10675) | [READ] Datanode support to read from external stores. |  Major | . | Virajith Jalaparti | Virajith Jalaparti |
+| [HDFS-13318](https://issues.apache.org/jira/browse/HDFS-13318) | RBF: Fix FindBugs in hadoop-hdfs-rbf |  Minor | . | Íñigo Goiri | Ekanth S |
+| [HDFS-12792](https://issues.apache.org/jira/browse/HDFS-12792) | RBF: Test Router-based federation using HDFSContract |  Major | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-7581](https://issues.apache.org/jira/browse/YARN-7581) | HBase filters are not constructed correctly in ATSv2 |  Major | ATSv2 | Haibo Chen | Haibo Chen |
+| [YARN-7986](https://issues.apache.org/jira/browse/YARN-7986) | ATSv2 REST API queries do not return results for uppercase application tags |  Critical | . | Charan Hebri | Charan Hebri |
+| [HDFS-12512](https://issues.apache.org/jira/browse/HDFS-12512) | RBF: Add WebHDFS |  Major | fs | Íñigo Goiri | Wei Yan |
+| [YARN-8070](https://issues.apache.org/jira/browse/YARN-8070) | Yarn Service API site doc broken due to unwanted character in YarnServiceAPI.md |  Blocker | site | Gour Saha | Gour Saha |
+| [HDFS-13291](https://issues.apache.org/jira/browse/HDFS-13291) | RBF: Implement available space based OrderResolver |  Major | . | Yiqun Lin | Yiqun Lin |
+| [HDFS-13204](https://issues.apache.org/jira/browse/HDFS-13204) | RBF: Optimize name service safe mode icon |  Minor | . | liuhongtong | liuhongtong |
+| [HDFS-13352](https://issues.apache.org/jira/browse/HDFS-13352) | RBF: Add xsl stylesheet for hdfs-rbf-default.xml |  Major | documentation | Takanobu Asanuma | Takanobu Asanuma |
+| [YARN-8010](https://issues.apache.org/jira/browse/YARN-8010) | Add config in FederationRMFailoverProxy to not bypass facade cache when failing over |  Minor | . | Botong Huang | Botong Huang |
+| [HDFS-13347](https://issues.apache.org/jira/browse/HDFS-13347) | RBF: Cache datanode reports |  Minor | . | Íñigo Goiri | Íñigo Goiri |
+| [YARN-8069](https://issues.apache.org/jira/browse/YARN-8069) | Clean up example hostnames |  Major | . | Billie Rinaldi | Billie Rinaldi |
+
+
+### OTHER:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-12376](https://issues.apache.org/jira/browse/HDFS-12376) | Enable JournalNode Sync by default |  Major | hdfs | Hanisha Koneru | Hanisha Koneru |
+| [YARN-6499](https://issues.apache.org/jira/browse/YARN-6499) | Remove the doc about Schedulable#redistributeShare() |  Trivial | fairscheduler | Yufei Gu | Chetna Chaudhari |
+| [YARN-7343](https://issues.apache.org/jira/browse/YARN-7343) | Add a junit test for ContainerScheduler recovery |  Minor | . | kartheek muthyala | Sampada Dehankar |
+| [YARN-6124](https://issues.apache.org/jira/browse/YARN-6124) | Make SchedulingEditPolicy can be enabled / disabled / updated with RMAdmin -refreshQueues |  Major | . | Wangda Tan | Zian Chen |
+| [HADOOP-15149](https://issues.apache.org/jira/browse/HADOOP-15149) | CryptoOutputStream should implement StreamCapabilities |  Major | fs | Mike Drob | Xiao Chen |
+| [YARN-7691](https://issues.apache.org/jira/browse/YARN-7691) | Add Unit Tests for ContainersLauncher |  Major | . | Sampada Dehankar | Sampada Dehankar |
+| [YARN-7468](https://issues.apache.org/jira/browse/YARN-7468) | Provide means for container network policy control |  Major | nodemanager | Clay B. | Xuan Gong |
+| [YARN-6486](https://issues.apache.org/jira/browse/YARN-6486) | FairScheduler: Deprecate continuous scheduling |  Major | fairscheduler | Wilfred Spiegelenburg | Wilfred Spiegelenburg |
+| [HADOOP-15177](https://issues.apache.org/jira/browse/HADOOP-15177) | Update the release year to 2018 |  Blocker | build | Akira Ajisaka | Bharat Viswanadham |
+| [HADOOP-15197](https://issues.apache.org/jira/browse/HADOOP-15197) | Remove tomcat from the Hadoop-auth test bundle |  Major | . | Xiao Chen | Xiao Chen |
+| [HADOOP-14325](https://issues.apache.org/jira/browse/HADOOP-14325) | [Umbrella] Stabilise S3A Server Side Encryption |  Major | documentation, fs/s3, test | Steve Loughran |  |
+| [YARN-7918](https://issues.apache.org/jira/browse/YARN-7918) | Fix TestAMRMClientPlacementConstraints |  Critical | . | Botong Huang | Gergely Novák |
+| [HDFS-13052](https://issues.apache.org/jira/browse/HDFS-13052) | WebHDFS: Add support for snasphot diff |  Major | . | Lokesh Jain | Lokesh Jain |
+| [HADOOP-14742](https://issues.apache.org/jira/browse/HADOOP-14742) | Document multi-URI replication Inode for ViewFS |  Major | documentation, viewfs | Chris Douglas | Gera Shegalov |
+| [HDFS-13141](https://issues.apache.org/jira/browse/HDFS-13141) | WebHDFS: Add support for getting snasphottable directory list |  Major | webhdfs | Lokesh Jain | Lokesh Jain |
+| [YARN-8072](https://issues.apache.org/jira/browse/YARN-8072) | RM log is getting flooded with MemoryPlacementConstraintManager info logs |  Critical | . | Zian Chen | Zian Chen |
+
+

+ 199 - 0
hadoop-common-project/hadoop-common/src/site/markdown/release/3.1.0/RELEASENOTES.3.1.0.md

@@ -0,0 +1,199 @@
+
+<!---
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+-->
+# Apache Hadoop  3.1.0 Release Notes
+
+These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements.
+
+
+---
+
+* [HDFS-11799](https://issues.apache.org/jira/browse/HDFS-11799) | *Major* | **Introduce a config to allow setting up write pipeline with fewer nodes than replication factor**
+
+Added new configuration "dfs.client.block.write.replace-datanode-on-failure.min-replication".
+     
+    The minimum number of replications that are needed to not to fail
+      the write pipeline if new datanodes can not be found to replace
+      failed datanodes (could be due to network failure) in the write pipeline.
+      If the number of the remaining datanodes in the write pipeline is greater
+      than or equal to this property value, continue writing to the remaining nodes.
+      Otherwise throw exception.
+
+      If this is set to 0, an exception will be thrown, when a replacement
+      can not be found.
+
+
+---
+
+* [HDFS-12486](https://issues.apache.org/jira/browse/HDFS-12486) | *Major* | **GetConf to get journalnodeslist**
+
+Adds a getconf command option to list the journal nodes.
+Usage: hdfs getconf -journalnodes
+
+
+---
+
+* [HADOOP-14840](https://issues.apache.org/jira/browse/HADOOP-14840) | *Major* | **Tool to estimate resource requirements of an application pipeline based on prior executions**
+
+The first version of Resource Estimator service, a tool that captures the historical resource usage of an app and predicts its future resource requirement.
+
+
+---
+
+* [YARN-5079](https://issues.apache.org/jira/browse/YARN-5079) | *Major* | **[Umbrella] Native YARN framework layer for services and beyond**
+
+A framework is implemented to orchestrate containers on YARN
+
+
+---
+
+* [YARN-4757](https://issues.apache.org/jira/browse/YARN-4757) | *Major* | **[Umbrella] Simplified discovery of services via DNS mechanisms**
+
+A DNS server backed by yarn service registry is implemented to enable service discovery on YARN using standard DNS lookup.
+
+
+---
+
+* [YARN-4793](https://issues.apache.org/jira/browse/YARN-4793) | *Major* | **[Umbrella] Simplified API layer for services and beyond**
+
+A REST API service is implemented to enable users to launch and manage container based services on YARN via REST API
+
+
+---
+
+* [HADOOP-15008](https://issues.apache.org/jira/browse/HADOOP-15008) | *Minor* | **Metrics sinks may emit too frequently if multiple sink periods are configured**
+
+Previously if multiple metrics sinks were configured with different periods, they may emit more frequently than configured, at a period as low as the GCD of the configured periods. This change makes all metrics sinks emit at their configured period.
+
+
+---
+
+* [HDFS-12825](https://issues.apache.org/jira/browse/HDFS-12825) | *Minor* | **Fsck report shows config key name for min replication issues**
+
+**WARNING: No release note provided for this change.**
+
+
+---
+
+* [HDFS-12883](https://issues.apache.org/jira/browse/HDFS-12883) | *Major* | **RBF: Document Router and State Store metrics**
+
+This JIRA makes following change:
+Change Router metrics context from 'router' to 'dfs'.
+
+
+---
+
+* [HDFS-12895](https://issues.apache.org/jira/browse/HDFS-12895) | *Major* | **RBF: Add ACL support for mount table**
+
+Mount tables support ACL, The users won't be able to modify their own entries (we are assuming these old (no-permissions before) mount table with owner:superuser, group:supergroup, permission:755 as the default permissions).  The fix way is login as superuser to modify these mount table entries.
+
+
+---
+
+* [YARN-7190](https://issues.apache.org/jira/browse/YARN-7190) | *Major* | **Ensure only NM classpath in 2.x gets TSv2 related hbase jars, not the user classpath**
+
+Ensure only NM classpath in 2.x gets TSv2 related hbase jars, not the user classpath.
+
+
+---
+
+* [HDFS-9806](https://issues.apache.org/jira/browse/HDFS-9806) | *Major* | **Allow HDFS block replicas to be provided by an external storage system**
+
+Provided storage allows data stored outside HDFS to be mapped to and addressed from HDFS. It builds on heterogeneous storage by introducing a new storage type, PROVIDED, to the set of media in a datanode. Clients accessing data in PROVIDED storages can cache replicas in local media, enforce HDFS invariants (e.g., security, quotas), and address more data than the cluster could persist in the storage attached to DataNodes.
+
+
+---
+
+* [HADOOP-13282](https://issues.apache.org/jira/browse/HADOOP-13282) | *Minor* | **S3 blob etags to be made visible in S3A status/getFileChecksum() calls**
+
+now that S3A has a checksum, you need to explicitly disable checksums when uploading from HDFS : use -skipCrc
+
+checksum verification does work between s3a buckets, provided the block size on uploads was identical
+
+
+---
+
+* [YARN-7688](https://issues.apache.org/jira/browse/YARN-7688) | *Minor* | **Miscellaneous Improvements To ProcfsBasedProcessTree**
+
+Added new patch.  Fixes white spaces and some check-style items.
+
+
+---
+
+* [YARN-6486](https://issues.apache.org/jira/browse/YARN-6486) | *Major* | **FairScheduler: Deprecate continuous scheduling**
+
+FairScheduler Continuous Scheduling is deprecated starting from 3.1.0.
+
+
+---
+
+* [HADOOP-15027](https://issues.apache.org/jira/browse/HADOOP-15027) | *Major* | **AliyunOSS: Support multi-thread pre-read to improve sequential read from Hadoop to Aliyun OSS performance**
+
+Support multi-thread pre-read in AliyunOSSInputStream to improve the sequential read performance from Hadoop to Aliyun OSS.
+
+
+---
+
+* [MAPREDUCE-7029](https://issues.apache.org/jira/browse/MAPREDUCE-7029) | *Minor* | **FileOutputCommitter is slow on filesystems lacking recursive delete**
+
+MapReduce jobs that output to filesystems without direct support for recursive delete can set mapreduce.fileoutputcommitter.task.cleanup.enabled=true to have each task delete their intermediate work directory rather than waiting for the ApplicationMaster to clean up at the end of the job. This can significantly speed up the cleanup phase for large jobs on such filesystems.
+
+
+---
+
+* [HDFS-12528](https://issues.apache.org/jira/browse/HDFS-12528) | *Major* | **Add an option to not disable short-circuit reads on failures**
+
+Added an option to not disables short-circuit reads on failures, by setting dfs.domain.socket.disable.interval.seconds to 0.
+
+
+---
+
+* [HDFS-13083](https://issues.apache.org/jira/browse/HDFS-13083) | *Major* | **RBF: Fix doc error setting up client**
+
+Fix the document error of setting up HFDS Router Federation
+
+
+---
+
+* [HDFS-13099](https://issues.apache.org/jira/browse/HDFS-13099) | *Minor* | **RBF: Use the ZooKeeper as the default State Store**
+
+Change default State Store from local file to ZooKeeper. This will require additional zk address to be configured.
+
+
+---
+
+* [HADOOP-15252](https://issues.apache.org/jira/browse/HADOOP-15252) | *Major* | **Checkstyle version is not compatible with IDEA's checkstyle plugin**
+
+Updated checkstyle to 8.8 and updated maven-checkstyle-plugin to 3.0.0.
+
+
+---
+
+* [YARN-7919](https://issues.apache.org/jira/browse/YARN-7919) | *Major* | **Refactor timelineservice-hbase module into submodules**
+
+HBase integration module was mixed up with for hbase-server and hbase-client dependencies. This JIRA split into sub modules such that hbase-client dependent modules and hbase-server dependent modules are separated. This allows to make conditional compilation with different version of Hbase.
+
+
+---
+
+* [YARN-7677](https://issues.apache.org/jira/browse/YARN-7677) | *Major* | **Docker image cannot set HADOOP\_CONF\_DIR**
+
+The HADOOP\_CONF\_DIR environment variable is no longer unconditionally inherited by containers even if it does not appear in the nodemanager whitelist variables specified by the yarn.nodemanager.env-whitelist property. If the whitelist property has been modified from the default to not include HADOOP\_CONF\_DIR yet containers need it to be inherited from the nodemanager's environment then the whitelist settings need to be updated to include HADOOP\_CONF\_DIR.
+
+
+

+ 6 - 6
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java

@@ -244,12 +244,12 @@ public abstract class AbstractContractCreateTest extends
       out.write('a');
       out.flush();
       if (!fs.exists(path)) {
-
-        if (isSupported(IS_BLOBSTORE)) {
-          // object store: downgrade to a skip so that the failure is visible
-          // in test results
-          skip("Filesystem is an object store and newly created files are not "
-              + "immediately visible");
+        if (isSupported(IS_BLOBSTORE) ||
+            isSupported(CREATE_VISIBILITY_DELAYED)) {
+          // object store or some file systems: downgrade to a skip so that the
+          // failure is visible in test results
+          skip("For object store or some file systems, newly created files are"
+              + " not immediately visible");
         }
         assertPathExists("expected path to be visible before file closed",
             path);

+ 1 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestIOUtils.java

@@ -39,7 +39,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.commons.io.FileUtils;;
+import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.fs.PathIOException;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.LambdaTestUtils;

+ 10 - 0
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java

@@ -36,4 +36,14 @@ public class AclException extends IOException {
   public AclException(String message) {
     super(message);
   }
+
+  /**
+   * Creates a new AclException.
+   *
+   * @param message String message
+   * @param cause The cause of the exception
+   */
+  public AclException(String message, Throwable cause) {
+    super(message, cause);
+  }
 }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/RequestHedgingProxyProvider.java

@@ -79,6 +79,9 @@ public class RequestHedgingProxyProvider<T> extends
     public Object
     invoke(Object proxy, final Method method, final Object[] args)
             throws Throwable {
+      if (currentUsedProxy != null) {
+        return method.invoke(currentUsedProxy.proxy, args);
+      }
       Map<Future<Object>, ProxyInfo<T>> proxyMap = new HashMap<>();
       int numAttempts = 0;
 

+ 34 - 0
hadoop-hdfs-project/hadoop-hdfs-client/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRequestHedgingProxyProvider.java

@@ -43,10 +43,13 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import static org.junit.Assert.assertEquals;
 import org.mockito.Matchers;
 import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
+import static org.mockito.Mockito.when;
+import static org.mockito.Mockito.mock;
 
 import com.google.common.collect.Lists;
 
@@ -99,6 +102,37 @@ public class TestRequestHedgingProxyProvider {
     Mockito.verify(goodMock).getStats();
   }
 
+  @Test
+  public void testRequestNNAfterOneSuccess() throws Exception {
+    final AtomicInteger count = new AtomicInteger(0);
+    final ClientProtocol goodMock = mock(ClientProtocol.class);
+    when(goodMock.getStats()).thenAnswer(new Answer<long[]>() {
+      @Override
+      public long[] answer(InvocationOnMock invocation) throws Throwable {
+        count.incrementAndGet();
+        Thread.sleep(1000);
+        return new long[]{1};
+      }
+    });
+    final ClientProtocol badMock = mock(ClientProtocol.class);
+    when(badMock.getStats()).thenAnswer(new Answer<long[]>() {
+      @Override
+      public long[] answer(InvocationOnMock invocation) throws Throwable {
+        count.incrementAndGet();
+        throw new IOException("Bad mock !!");
+      }
+    });
+
+    RequestHedgingProxyProvider<ClientProtocol> provider =
+        new RequestHedgingProxyProvider<>(conf, nnUri, ClientProtocol.class,
+            createFactory(badMock, goodMock, goodMock, badMock));
+    ClientProtocol proxy = provider.getProxy().proxy;
+    proxy.getStats();
+    assertEquals(2, count.get());
+    proxy.getStats();
+    assertEquals(3, count.get());
+  }
+
   @Test
   public void testHedgingWhenOneIsSlow() throws Exception {
     final ClientProtocol goodMock = Mockito.mock(ClientProtocol.class);

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java

@@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamespaceInfo
 import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys;
 import org.apache.hadoop.hdfs.server.federation.router.Router;
 import org.apache.hadoop.hdfs.server.federation.router.RouterRpcServer;
+import org.apache.hadoop.hdfs.server.federation.router.SubClusterTimeoutException;
 import org.apache.hadoop.hdfs.server.federation.store.MembershipStore;
 import org.apache.hadoop.hdfs.server.federation.store.StateStoreService;
 import org.apache.hadoop.hdfs.server.federation.store.protocol.GetNamespaceInfoRequest;
@@ -396,6 +397,8 @@ public class NamenodeBeanMetrics
       }
     } catch (StandbyException e) {
       LOG.error("Cannot get {} nodes, Router in safe mode", type);
+    } catch (SubClusterTimeoutException e) {
+      LOG.error("Cannot get {} nodes, subclusters timed out responding", type);
     } catch (IOException e) {
       LOG.error("Cannot get " + type + " nodes", e);
     }

+ 30 - 5
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionContext.java

@@ -17,8 +17,9 @@
  */
 package org.apache.hadoop.hdfs.server.federation.router;
 
+import java.net.InetSocketAddress;
+
 import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.ipc.RPC;
 
 /**
@@ -26,18 +27,24 @@ import org.apache.hadoop.ipc.RPC;
  * a connection, it increments a counter to mark it as active. Once the client
  * is done with the connection, it decreases the counter. It also takes care of
  * closing the connection once is not active.
+ *
+ * The protocols currently used are:
+ * <ul>
+ * <li>{@link org.apache.hadoop.hdfs.protocol.ClientProtocol}
+ * <li>{@link org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol}
+ * </ul>
  */
 public class ConnectionContext {
 
   /** Client for the connection. */
-  private final ProxyAndInfo<ClientProtocol> client;
+  private final ProxyAndInfo<?> client;
   /** How many threads are using this connection. */
   private int numThreads = 0;
   /** If the connection is closed. */
   private boolean closed = false;
 
 
-  public ConnectionContext(ProxyAndInfo<ClientProtocol> connection) {
+  public ConnectionContext(ProxyAndInfo<?> connection) {
     this.client = connection;
   }
 
@@ -74,7 +81,7 @@ public class ConnectionContext {
    *
    * @return Connection client.
    */
-  public synchronized ProxyAndInfo<ClientProtocol> getClient() {
+  public synchronized ProxyAndInfo<?> getClient() {
     this.numThreads++;
     return this.client;
   }
@@ -96,9 +103,27 @@ public class ConnectionContext {
   public synchronized void close() {
     this.closed = true;
     if (this.numThreads == 0) {
-      ClientProtocol proxy = this.client.getProxy();
+      Object proxy = this.client.getProxy();
       // Nobody should be using this anymore so it should close right away
       RPC.stopProxy(proxy);
     }
   }
+
+  @Override
+  public String toString() {
+    InetSocketAddress addr = this.client.getAddress();
+    Object proxy = this.client.getProxy();
+    Class<?> clazz = proxy.getClass();
+
+    StringBuilder sb = new StringBuilder();
+    sb.append(clazz.getSimpleName());
+    sb.append("@");
+    sb.append(addr);
+    sb.append("x");
+    sb.append(numThreads);
+    if (closed) {
+      sb.append("[CLOSED]");
+    }
+    return sb.toString();
+  }
 }

+ 6 - 4
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionManager.java

@@ -166,11 +166,12 @@ public class ConnectionManager {
    *
    * @param ugi User group information.
    * @param nnAddress Namenode address for the connection.
+   * @param protocol Protocol for the connection.
    * @return Proxy client to connect to nnId as UGI.
    * @throws IOException If the connection cannot be obtained.
    */
-  public ConnectionContext getConnection(
-      UserGroupInformation ugi, String nnAddress) throws IOException {
+  public ConnectionContext getConnection(UserGroupInformation ugi,
+      String nnAddress, Class<?> protocol) throws IOException {
 
     // Check if the manager is shutdown
     if (!this.running) {
@@ -181,7 +182,8 @@ public class ConnectionManager {
     }
 
     // Try to get the pool if created
-    ConnectionPoolId connectionId = new ConnectionPoolId(ugi, nnAddress);
+    ConnectionPoolId connectionId =
+        new ConnectionPoolId(ugi, nnAddress, protocol);
     ConnectionPool pool = null;
     readLock.lock();
     try {
@@ -197,7 +199,7 @@ public class ConnectionManager {
         pool = this.pools.get(connectionId);
         if (pool == null) {
           pool = new ConnectionPool(
-              this.conf, nnAddress, ugi, this.minSize, this.maxSize);
+              this.conf, nnAddress, ugi, this.minSize, this.maxSize, protocol);
           this.pools.put(connectionId, pool);
         }
       } finally {

+ 92 - 6
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPool.java

@@ -38,6 +38,9 @@ import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.io.retry.RetryUtils;
@@ -75,6 +78,8 @@ public class ConnectionPool {
   private final String namenodeAddress;
   /** User for this connections. */
   private final UserGroupInformation ugi;
+  /** Class of the protocol. */
+  private final Class<?> protocol;
 
   /** Pool of connections. We mimic a COW array. */
   private volatile List<ConnectionContext> connections = new ArrayList<>();
@@ -91,16 +96,17 @@ public class ConnectionPool {
 
 
   protected ConnectionPool(Configuration config, String address,
-      UserGroupInformation user, int minPoolSize, int maxPoolSize)
-          throws IOException {
+      UserGroupInformation user, int minPoolSize, int maxPoolSize,
+      Class<?> proto) throws IOException {
 
     this.conf = config;
 
     // Connection pool target
     this.ugi = user;
     this.namenodeAddress = address;
+    this.protocol = proto;
     this.connectionPoolId =
-        new ConnectionPoolId(this.ugi, this.namenodeAddress);
+        new ConnectionPoolId(this.ugi, this.namenodeAddress, this.protocol);
 
     // Set configuration parameters for the pool
     this.minSize = minPoolSize;
@@ -287,7 +293,8 @@ public class ConnectionPool {
    * @throws IOException
    */
   public ConnectionContext newConnection() throws IOException {
-    return newConnection(this.conf, this.namenodeAddress, this.ugi);
+    return newConnection(
+        this.conf, this.namenodeAddress, this.ugi, this.protocol);
   }
 
   /**
@@ -299,12 +306,46 @@ public class ConnectionPool {
    * @param conf Configuration for the connection.
    * @param nnAddress Address of server supporting the ClientProtocol.
    * @param ugi User context.
-   * @return Proxy for the target ClientProtocol that contains the user's
+   * @param proto Interface of the protocol.
+   * @return proto for the target ClientProtocol that contains the user's
    *         security context.
    * @throws IOException If it cannot be created.
    */
   protected static ConnectionContext newConnection(Configuration conf,
-      String nnAddress, UserGroupInformation ugi)
+      String nnAddress, UserGroupInformation ugi, Class<?> proto)
+          throws IOException {
+    ConnectionContext ret;
+    if (proto == ClientProtocol.class) {
+      ret = newClientConnection(conf, nnAddress, ugi);
+    } else if (proto == NamenodeProtocol.class) {
+      ret = newNamenodeConnection(conf, nnAddress, ugi);
+    } else {
+      String msg = "Unsupported protocol for connection to NameNode: " +
+          ((proto != null) ? proto.getClass().getName() : "null");
+      LOG.error(msg);
+      throw new IllegalStateException(msg);
+    }
+    return ret;
+  }
+
+  /**
+   * Creates a proxy wrapper for a client NN connection. Each proxy contains
+   * context for a single user/security context. To maximize throughput it is
+   * recommended to use multiple connection per user+server, allowing multiple
+   * writes and reads to be dispatched in parallel.
+   *
+   * Mostly based on NameNodeProxies#createNonHAProxy() but it needs the
+   * connection identifier.
+   *
+   * @param conf Configuration for the connection.
+   * @param nnAddress Address of server supporting the ClientProtocol.
+   * @param ugi User context.
+   * @return Proxy for the target ClientProtocol that contains the user's
+   *         security context.
+   * @throws IOException If it cannot be created.
+   */
+  private static ConnectionContext newClientConnection(
+      Configuration conf, String nnAddress, UserGroupInformation ugi)
           throws IOException {
     RPC.setProtocolEngine(
         conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class);
@@ -334,4 +375,49 @@ public class ConnectionPool {
     ConnectionContext connection = new ConnectionContext(clientProxy);
     return connection;
   }
+
+  /**
+   * Creates a proxy wrapper for a NN connection. Each proxy contains context
+   * for a single user/security context. To maximize throughput it is
+   * recommended to use multiple connection per user+server, allowing multiple
+   * writes and reads to be dispatched in parallel.
+   *
+   * @param conf Configuration for the connection.
+   * @param nnAddress Address of server supporting the ClientProtocol.
+   * @param ugi User context.
+   * @return Proxy for the target NamenodeProtocol that contains the user's
+   *         security context.
+   * @throws IOException If it cannot be created.
+   */
+  private static ConnectionContext newNamenodeConnection(
+      Configuration conf, String nnAddress, UserGroupInformation ugi)
+          throws IOException {
+    RPC.setProtocolEngine(
+        conf, NamenodeProtocolPB.class, ProtobufRpcEngine.class);
+
+    final RetryPolicy defaultPolicy = RetryUtils.getDefaultRetryPolicy(
+        conf,
+        HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY,
+        HdfsClientConfigKeys.Retry.POLICY_ENABLED_DEFAULT,
+        HdfsClientConfigKeys.Retry.POLICY_SPEC_KEY,
+        HdfsClientConfigKeys.Retry.POLICY_SPEC_DEFAULT,
+        HdfsConstants.SAFEMODE_EXCEPTION_CLASS_NAME);
+
+    SocketFactory factory = SocketFactory.getDefault();
+    if (UserGroupInformation.isSecurityEnabled()) {
+      SaslRpcServer.init(conf);
+    }
+    InetSocketAddress socket = NetUtils.createSocketAddr(nnAddress);
+    final long version = RPC.getProtocolVersion(NamenodeProtocolPB.class);
+    NamenodeProtocolPB proxy = RPC.getProtocolProxy(NamenodeProtocolPB.class,
+        version, socket, ugi, conf,
+        factory, RPC.getRpcTimeout(conf), defaultPolicy, null).getProxy();
+    NamenodeProtocol client = new NamenodeProtocolTranslatorPB(proxy);
+    Text dtService = SecurityUtil.buildTokenService(socket);
+
+    ProxyAndInfo<NamenodeProtocol> clientProxy =
+        new ProxyAndInfo<NamenodeProtocol>(client, dtService, socket);
+    ConnectionContext connection = new ConnectionContext(clientProxy);
+    return connection;
+  }
 }

+ 16 - 3
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ConnectionPoolId.java

@@ -42,16 +42,21 @@ public class ConnectionPoolId implements Comparable<ConnectionPoolId> {
   private final String nnId;
   /** Information about the user. */
   private final UserGroupInformation ugi;
+  /** Protocol for the connection. */
+  private final Class<?> protocol;
 
   /**
    * New connection pool identifier.
    *
    * @param ugi Information of the user issuing the request.
    * @param nnId Namenode address with port.
+   * @param proto Protocol of the connection.
    */
-  public ConnectionPoolId(final UserGroupInformation ugi, final String nnId) {
+  public ConnectionPoolId(final UserGroupInformation ugi, final String nnId,
+      final Class<?> proto) {
     this.nnId = nnId;
     this.ugi = ugi;
+    this.protocol = proto;
   }
 
   @Override
@@ -60,6 +65,7 @@ public class ConnectionPoolId implements Comparable<ConnectionPoolId> {
         .append(this.nnId)
         .append(this.ugi.toString())
         .append(this.getTokenIds())
+        .append(this.protocol)
         .toHashCode();
     return hash;
   }
@@ -76,14 +82,18 @@ public class ConnectionPoolId implements Comparable<ConnectionPoolId> {
       }
       String thisTokens = this.getTokenIds().toString();
       String otherTokens = other.getTokenIds().toString();
-      return thisTokens.equals(otherTokens);
+      if (!thisTokens.equals(otherTokens)) {
+        return false;
+      }
+      return this.protocol.equals(other.protocol);
     }
     return false;
   }
 
   @Override
   public String toString() {
-    return this.ugi + " " + this.getTokenIds() + "->" + this.nnId;
+    return this.ugi + " " + this.getTokenIds() + "->" + this.nnId + " [" +
+        this.protocol.getSimpleName() + "]";
   }
 
   @Override
@@ -97,6 +107,9 @@ public class ConnectionPoolId implements Comparable<ConnectionPoolId> {
       String otherTokens = other.getTokenIds().toString();
       ret = thisTokens.compareTo(otherTokens);
     }
+    if (ret == 0) {
+      ret = this.protocol.toString().compareTo(other.protocol.toString());
+    }
     return ret;
   }
 

+ 60 - 8
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RemoteMethod.java

@@ -38,22 +38,35 @@ public class RemoteMethod {
   private final Object[] params;
   /** List of method parameters types, matches parameters. */
   private final Class<?>[] types;
+  /** Class of the protocol for the method. */
+  private final Class<?> protocol;
   /** String name of the ClientProtocol method. */
   private final String methodName;
 
+  /**
+   * Create a remote method generator for the ClientProtocol with no parameters.
+   *
+   * @param method The string name of the protocol method.
+   */
+  public RemoteMethod(String method) {
+    this(ClientProtocol.class, method);
+  }
+
   /**
    * Create a method with no parameters.
    *
+   * @param proto Protocol of the method.
    * @param method The string name of the ClientProtocol method.
    */
-  public RemoteMethod(String method) {
+  public RemoteMethod(Class<?> proto, String method) {
     this.params = null;
     this.types = null;
     this.methodName = method;
+    this.protocol = proto;
   }
 
   /**
-   * Creates a remote method generator.
+   * Create a remote method generator for the ClientProtocol.
    *
    * @param method The string name of the ClientProtocol method.
    * @param pTypes A list of types to use to locate the specific method.
@@ -70,16 +83,49 @@ public class RemoteMethod {
    */
   public RemoteMethod(String method, Class<?>[] pTypes, Object... pParams)
       throws IOException {
+    this(ClientProtocol.class, method, pTypes, pParams);
+  }
+
+  /**
+   * Creates a remote method generator.
+   *
+   * @param proto Protocol of the method.
+   * @param method The string name of the ClientProtocol method.
+   * @param pTypes A list of types to use to locate the specific method.
+   * @param pParams A list of parameters for the method. The order of the
+   *          parameter list must match the order and number of the types.
+   *          Parameters are grouped into 2 categories:
+   *          <ul>
+   *          <li>Static parameters that are immutable across locations.
+   *          <li>Dynamic parameters that are determined for each location by a
+   *          RemoteParam object. To specify a dynamic parameter, pass an
+   *          instance of RemoteParam in place of the parameter value.
+   *          </ul>
+   * @throws IOException If the types and parameter lists are not valid.
+   */
+  public RemoteMethod(Class<?> proto, String method, Class<?>[] pTypes,
+      Object... pParams) throws IOException {
 
     if (pParams.length != pTypes.length) {
       throw new IOException("Invalid parameters for method " + method);
     }
 
+    this.protocol = proto;
     this.params = pParams;
     this.types = Arrays.copyOf(pTypes, pTypes.length);
     this.methodName = method;
   }
 
+  /**
+   * Get the interface/protocol for this method. For example, ClientProtocol or
+   * NamenodeProtocol.
+   *
+   * @return Protocol for this method.
+   */
+  public Class<?> getProtocol() {
+    return this.protocol;
+  }
+
   /**
    * Get the represented java method.
    *
@@ -89,18 +135,18 @@ public class RemoteMethod {
   public Method getMethod() throws IOException {
     try {
       if (types != null) {
-        return ClientProtocol.class.getDeclaredMethod(methodName, types);
+        return protocol.getDeclaredMethod(methodName, types);
       } else {
-        return ClientProtocol.class.getDeclaredMethod(methodName);
+        return protocol.getDeclaredMethod(methodName);
       }
     } catch (NoSuchMethodException e) {
       // Re-throw as an IOException
-      LOG.error("Cannot get method {} with types {}",
-          methodName, Arrays.toString(types), e);
+      LOG.error("Cannot get method {} with types {} from {}",
+          methodName, Arrays.toString(types), protocol.getSimpleName(), e);
       throw new IOException(e);
     } catch (SecurityException e) {
-      LOG.error("Cannot access method {} with types {}",
-          methodName, Arrays.toString(types), e);
+      LOG.error("Cannot access method {} with types {} from {}",
+          methodName, Arrays.toString(types), protocol.getSimpleName(), e);
       throw new IOException(e);
     }
   }
@@ -161,4 +207,10 @@ public class RemoteMethod {
     }
     return objList;
   }
+
+  @Override
+  public String toString() {
+    return this.protocol.getSimpleName() + "#" + this.methodName + " " +
+        Arrays.toString(this.params);
+  }
 }

+ 187 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterNamenodeProtocol.java

@@ -0,0 +1,187 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
+import org.apache.hadoop.hdfs.server.federation.resolver.FileSubclusterResolver;
+import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+
+/**
+ * Module that implements all the RPC calls in {@link NamenodeProtocol} in the
+ * {@link RouterRpcServer}.
+ */
+public class RouterNamenodeProtocol implements NamenodeProtocol {
+
+  /** RPC server to receive client calls. */
+  private final RouterRpcServer rpcServer;
+  /** RPC clients to connect to the Namenodes. */
+  private final RouterRpcClient rpcClient;
+  /** Interface to map global name space to HDFS subcluster name spaces. */
+  private final FileSubclusterResolver subclusterResolver;
+
+
+  public RouterNamenodeProtocol(RouterRpcServer server) {
+    this.rpcServer = server;
+    this.rpcClient =  this.rpcServer.getRPCClient();
+    this.subclusterResolver = this.rpcServer.getSubclusterResolver();
+  }
+
+  @Override
+  public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size,
+      long minBlockSize) throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ);
+
+    // Get the namespace where the datanode is located
+    Map<String, DatanodeStorageReport[]> map =
+        rpcServer.getDatanodeStorageReportMap(DatanodeReportType.ALL);
+    String nsId = null;
+    for (Entry<String, DatanodeStorageReport[]> entry : map.entrySet()) {
+      DatanodeStorageReport[] dns = entry.getValue();
+      for (DatanodeStorageReport dn : dns) {
+        DatanodeInfo dnInfo = dn.getDatanodeInfo();
+        if (dnInfo.getDatanodeUuid().equals(datanode.getDatanodeUuid())) {
+          nsId = entry.getKey();
+          break;
+        }
+      }
+      // Break the loop if already found
+      if (nsId != null) {
+        break;
+      }
+    }
+
+    // Forward to the proper namenode
+    if (nsId != null) {
+      RemoteMethod method = new RemoteMethod(
+          NamenodeProtocol.class, "getBlocks",
+          new Class<?>[] {DatanodeInfo.class, long.class, long.class},
+          datanode, size, minBlockSize);
+      return rpcClient.invokeSingle(nsId, method, BlocksWithLocations.class);
+    }
+    return null;
+  }
+
+  @Override
+  public ExportedBlockKeys getBlockKeys() throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ);
+
+    // We return the information from the default name space
+    String defaultNsId = subclusterResolver.getDefaultNamespace();
+    RemoteMethod method =
+        new RemoteMethod(NamenodeProtocol.class, "getBlockKeys");
+    return rpcClient.invokeSingle(defaultNsId, method, ExportedBlockKeys.class);
+  }
+
+  @Override
+  public long getTransactionID() throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ);
+
+    // We return the information from the default name space
+    String defaultNsId = subclusterResolver.getDefaultNamespace();
+    RemoteMethod method =
+        new RemoteMethod(NamenodeProtocol.class, "getTransactionID");
+    return rpcClient.invokeSingle(defaultNsId, method, long.class);
+  }
+
+  @Override
+  public long getMostRecentCheckpointTxId() throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ);
+
+    // We return the information from the default name space
+    String defaultNsId = subclusterResolver.getDefaultNamespace();
+    RemoteMethod method =
+        new RemoteMethod(NamenodeProtocol.class, "getMostRecentCheckpointTxId");
+    return rpcClient.invokeSingle(defaultNsId, method, long.class);
+  }
+
+  @Override
+  public CheckpointSignature rollEditLog() throws IOException {
+    rpcServer.checkOperation(OperationCategory.WRITE, false);
+    return null;
+  }
+
+  @Override
+  public NamespaceInfo versionRequest() throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ);
+
+    // We return the information from the default name space
+    String defaultNsId = subclusterResolver.getDefaultNamespace();
+    RemoteMethod method =
+        new RemoteMethod(NamenodeProtocol.class, "versionRequest");
+    return rpcClient.invokeSingle(defaultNsId, method, NamespaceInfo.class);
+  }
+
+  @Override
+  public void errorReport(NamenodeRegistration registration, int errorCode,
+      String msg) throws IOException {
+    rpcServer.checkOperation(OperationCategory.UNCHECKED, false);
+  }
+
+  @Override
+  public NamenodeRegistration registerSubordinateNamenode(
+      NamenodeRegistration registration) throws IOException {
+    rpcServer.checkOperation(OperationCategory.WRITE, false);
+    return null;
+  }
+
+  @Override
+  public NamenodeCommand startCheckpoint(NamenodeRegistration registration)
+      throws IOException {
+    rpcServer.checkOperation(OperationCategory.WRITE, false);
+    return null;
+  }
+
+  @Override
+  public void endCheckpoint(NamenodeRegistration registration,
+      CheckpointSignature sig) throws IOException {
+    rpcServer.checkOperation(OperationCategory.WRITE, false);
+  }
+
+  @Override
+  public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
+      throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ, false);
+    return null;
+  }
+
+  @Override
+  public boolean isUpgradeFinalized() throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ, false);
+    return false;
+  }
+
+  @Override
+  public boolean isRollingUpgrade() throws IOException {
+    rpcServer.checkOperation(OperationCategory.READ, false);
+    return false;
+  }
+}

+ 45 - 17
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcClient.java

@@ -48,7 +48,6 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
 import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver;
 import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext;
@@ -225,14 +224,14 @@ public class RouterRpcClient {
    *
    * @param ugi User group information.
    * @param nsId Nameservice identifier.
-   * @param rpcAddress ClientProtocol RPC server address of the NN.
+   * @param rpcAddress RPC server address of the NN.
+   * @param proto Protocol of the connection.
    * @return ConnectionContext containing a ClientProtocol proxy client for the
    *         NN + current user.
    * @throws IOException If we cannot get a connection to the NameNode.
    */
-  private ConnectionContext getConnection(
-      UserGroupInformation ugi, String nsId, String rpcAddress)
-          throws IOException {
+  private ConnectionContext getConnection(UserGroupInformation ugi, String nsId,
+      String rpcAddress, Class<?> proto) throws IOException {
     ConnectionContext connection = null;
     try {
       // Each proxy holds the UGI info for the current user when it is created.
@@ -242,7 +241,7 @@ public class RouterRpcClient {
       // for each individual request.
 
       // TODO Add tokens from the federated UGI
-      connection = this.connectionManager.getConnection(ugi, rpcAddress);
+      connection = this.connectionManager.getConnection(ugi, rpcAddress, proto);
       LOG.debug("User {} NN {} is using connection {}",
           ugi.getUserName(), rpcAddress, connection);
     } catch (Exception ex) {
@@ -326,7 +325,8 @@ public class RouterRpcClient {
   private Object invokeMethod(
       final UserGroupInformation ugi,
       final List<? extends FederationNamenodeContext> namenodes,
-      final Method method, final Object... params) throws IOException {
+      final Class<?> protocol, final Method method, final Object... params)
+          throws IOException {
 
     if (namenodes == null || namenodes.isEmpty()) {
       throw new IOException("No namenodes to invoke " + method.getName() +
@@ -344,9 +344,10 @@ public class RouterRpcClient {
       try {
         String nsId = namenode.getNameserviceId();
         String rpcAddress = namenode.getRpcAddress();
-        connection = this.getConnection(ugi, nsId, rpcAddress);
-        ProxyAndInfo<ClientProtocol> client = connection.getClient();
-        ClientProtocol proxy = client.getProxy();
+        connection = this.getConnection(ugi, nsId, rpcAddress, protocol);
+        ProxyAndInfo<?> client = connection.getClient();
+        final Object proxy = client.getProxy();
+
         ret = invoke(nsId, 0, method, proxy, params);
         if (failover) {
           // Success on alternate server, update
@@ -611,7 +612,29 @@ public class RouterRpcClient {
     List<? extends FederationNamenodeContext> nns =
         getNamenodesForNameservice(nsId);
     RemoteLocationContext loc = new RemoteLocation(nsId, "/");
-    return invokeMethod(ugi, nns, method.getMethod(), method.getParams(loc));
+    Class<?> proto = method.getProtocol();
+    Method m = method.getMethod();
+    Object[] params = method.getParams(loc);
+    return invokeMethod(ugi, nns, proto, m, params);
+  }
+
+  /**
+   * Invokes a remote method against the specified namespace.
+   *
+   * Re-throws exceptions generated by the remote RPC call as either
+   * RemoteException or IOException.
+   *
+   * @param nsId Target namespace for the method.
+   * @param method The remote method and parameters to invoke.
+   * @param clazz Class for the return type.
+   * @return The result of invoking the method.
+   * @throws IOException If the invoke generated an error.
+   */
+  public <T> T invokeSingle(final String nsId, RemoteMethod method,
+      Class<T> clazz) throws IOException {
+    @SuppressWarnings("unchecked")
+    T ret = (T)invokeSingle(nsId, method);
+    return ret;
   }
 
   /**
@@ -689,8 +712,9 @@ public class RouterRpcClient {
       List<? extends FederationNamenodeContext> namenodes =
           getNamenodesForNameservice(ns);
       try {
+        Class<?> proto = remoteMethod.getProtocol();
         Object[] params = remoteMethod.getParams(loc);
-        Object result = invokeMethod(ugi, namenodes, m, params);
+        Object result = invokeMethod(ugi, namenodes, proto, m, params);
         // Check if the result is what we expected
         if (isExpectedClass(expectedResultClass, result) &&
             isExpectedValue(expectedResultValue, result)) {
@@ -908,14 +932,17 @@ public class RouterRpcClient {
     final UserGroupInformation ugi = RouterRpcServer.getRemoteUser();
     final Method m = method.getMethod();
 
-    if (locations.size() == 1) {
+    if (locations.isEmpty()) {
+      throw new IOException("No remote locations available");
+    } else if (locations.size() == 1) {
       // Shortcut, just one call
       T location = locations.iterator().next();
       String ns = location.getNameserviceId();
       final List<? extends FederationNamenodeContext> namenodes =
           getNamenodesForNameservice(ns);
+      Class<?> proto = method.getProtocol();
       Object[] paramList = method.getParams(location);
-      Object result = invokeMethod(ugi, namenodes, m, paramList);
+      Object result = invokeMethod(ugi, namenodes, proto, m, paramList);
       return Collections.singletonMap(location, clazz.cast(result));
     }
 
@@ -925,6 +952,7 @@ public class RouterRpcClient {
       String nsId = location.getNameserviceId();
       final List<? extends FederationNamenodeContext> namenodes =
           getNamenodesForNameservice(nsId);
+      final Class<?> proto = method.getProtocol();
       final Object[] paramList = method.getParams(location);
       if (standby) {
         // Call the objectGetter to all NNs (including standby)
@@ -939,7 +967,7 @@ public class RouterRpcClient {
           orderedLocations.add(nnLocation);
           callables.add(new Callable<Object>() {
             public Object call() throws Exception {
-              return invokeMethod(ugi, nnList, m, paramList);
+              return invokeMethod(ugi, nnList, proto, m, paramList);
             }
           });
         }
@@ -948,7 +976,7 @@ public class RouterRpcClient {
         orderedLocations.add(location);
         callables.add(new Callable<Object>() {
           public Object call() throws Exception {
-            return invokeMethod(ugi, namenodes, m, paramList);
+            return invokeMethod(ugi, namenodes, proto, m, paramList);
           }
         });
       }
@@ -979,7 +1007,7 @@ public class RouterRpcClient {
           String msg =
               "Invocation to \"" + loc + "\" for \"" + method + "\" timed out";
           LOG.error(msg);
-          IOException ioe = new IOException(msg);
+          IOException ioe = new SubClusterTimeoutException(msg);
           exceptions.put(location, ioe);
         } catch (ExecutionException ex) {
           Throwable cause = ex.getCause();

+ 132 - 9
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterRpcServer.java

@@ -101,9 +101,13 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing;
 import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
 import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol;
+import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.NamenodeProtocolService;
 import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolServerSideTranslatorPB;
 import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCMetrics;
 import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver;
@@ -113,11 +117,18 @@ import org.apache.hadoop.hdfs.server.federation.resolver.MountTableResolver;
 import org.apache.hadoop.hdfs.server.federation.resolver.PathLocation;
 import org.apache.hadoop.hdfs.server.federation.resolver.RemoteLocation;
 import org.apache.hadoop.hdfs.server.federation.store.records.MountTable;
+import org.apache.hadoop.hdfs.server.namenode.CheckpointSignature;
 import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
+import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -145,7 +156,8 @@ import com.google.protobuf.BlockingService;
  * the requests to the active
  * {@link org.apache.hadoop.hdfs.server.namenode.NameNode NameNode}.
  */
-public class RouterRpcServer extends AbstractService implements ClientProtocol {
+public class RouterRpcServer extends AbstractService
+    implements ClientProtocol, NamenodeProtocol {
 
   private static final Logger LOG =
       LoggerFactory.getLogger(RouterRpcServer.class);
@@ -191,6 +203,8 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
   private final Quota quotaCall;
   /** Erasure coding calls. */
   private final ErasureCoding erasureCoding;
+  /** NamenodeProtocol calls. */
+  private final RouterNamenodeProtocol nnProto;
 
 
   /**
@@ -243,6 +257,11 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
     BlockingService clientNNPbService = ClientNamenodeProtocol
         .newReflectiveBlockingService(clientProtocolServerTranslator);
 
+    NamenodeProtocolServerSideTranslatorPB namenodeProtocolXlator =
+        new NamenodeProtocolServerSideTranslatorPB(this);
+    BlockingService nnPbService = NamenodeProtocolService
+        .newReflectiveBlockingService(namenodeProtocolXlator);
+
     InetSocketAddress confRpcAddress = conf.getSocketAddr(
         RBFConfigKeys.DFS_ROUTER_RPC_BIND_HOST_KEY,
         RBFConfigKeys.DFS_ROUTER_RPC_ADDRESS_KEY,
@@ -261,6 +280,11 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
         .setQueueSizePerHandler(handlerQueueSize)
         .setVerbose(false)
         .build();
+
+    // Add all the RPC protocols that the Router implements
+    DFSUtil.addPBProtocol(
+        conf, NamenodeProtocolPB.class, nnPbService, this.rpcServer);
+
     // We don't want the server to log the full stack trace for some exceptions
     this.rpcServer.addTerseExceptions(
         RemoteException.class,
@@ -292,6 +316,7 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
     // Initialize modules
     this.quotaCall = new Quota(this.router, this);
     this.erasureCoding = new ErasureCoding(this);
+    this.nnProto = new RouterNamenodeProtocol(this);
   }
 
   @Override
@@ -336,6 +361,15 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
     return rpcClient;
   }
 
+  /**
+   * Get the subcluster resolver.
+   *
+   * @return Subcluster resolver.
+   */
+  public FileSubclusterResolver getSubclusterResolver() {
+    return subclusterResolver;
+  }
+
   /**
    * Get the RPC monitor and metrics.
    *
@@ -866,7 +900,8 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
       throws IOException {
     checkOperation(OperationCategory.WRITE);
 
-    final List<RemoteLocation> srcLocations = getLocationsForPath(src, true);
+    final List<RemoteLocation> srcLocations =
+        getLocationsForPath(src, true, false);
     // srcLocations may be trimmed by getRenameDestinations()
     final List<RemoteLocation> locs = new LinkedList<>(srcLocations);
     RemoteParam dstParam = getRenameDestinations(locs, dst);
@@ -887,7 +922,8 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
       final Options.Rename... options) throws IOException {
     checkOperation(OperationCategory.WRITE);
 
-    final List<RemoteLocation> srcLocations = getLocationsForPath(src, true);
+    final List<RemoteLocation> srcLocations =
+        getLocationsForPath(src, true, false);
     // srcLocations may be trimmed by getRenameDestinations()
     final List<RemoteLocation> locs = new LinkedList<>(srcLocations);
     RemoteParam dstParam = getRenameDestinations(locs, dst);
@@ -964,7 +1000,8 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
   public boolean delete(String src, boolean recursive) throws IOException {
     checkOperation(OperationCategory.WRITE);
 
-    final List<RemoteLocation> locations = getLocationsForPath(src, true);
+    final List<RemoteLocation> locations =
+        getLocationsForPath(src, true, false);
     RemoteMethod method = new RemoteMethod("delete",
         new Class<?>[] {String.class, boolean.class}, new RemoteParam(),
         recursive);
@@ -1349,7 +1386,7 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
         action, isChecked);
     Set<FederationNamespaceInfo> nss = namenodeResolver.getNamespaces();
     Map<FederationNamespaceInfo, Boolean> results =
-        rpcClient.invokeConcurrent(nss, method, true, true, boolean.class);
+        rpcClient.invokeConcurrent(nss, method, true, true, Boolean.class);
 
     // We only report true if all the name space are in safe mode
     int numSafemode = 0;
@@ -1369,7 +1406,7 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
         new Class<?>[] {String.class}, arg);
     final Set<FederationNamespaceInfo> nss = namenodeResolver.getNamespaces();
     Map<FederationNamespaceInfo, Boolean> ret =
-        rpcClient.invokeConcurrent(nss, method, true, false, boolean.class);
+        rpcClient.invokeConcurrent(nss, method, true, false, Boolean.class);
 
     boolean success = true;
     for (boolean s : ret.values()) {
@@ -2070,6 +2107,77 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
     return null;
   }
 
+  @Override // NamenodeProtocol
+  public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size,
+      long minBlockSize) throws IOException {
+    return nnProto.getBlocks(datanode, size, minBlockSize);
+  }
+
+  @Override // NamenodeProtocol
+  public ExportedBlockKeys getBlockKeys() throws IOException {
+    return nnProto.getBlockKeys();
+  }
+
+  @Override // NamenodeProtocol
+  public long getTransactionID() throws IOException {
+    return nnProto.getTransactionID();
+  }
+
+  @Override // NamenodeProtocol
+  public long getMostRecentCheckpointTxId() throws IOException {
+    return nnProto.getMostRecentCheckpointTxId();
+  }
+
+  @Override // NamenodeProtocol
+  public CheckpointSignature rollEditLog() throws IOException {
+    return nnProto.rollEditLog();
+  }
+
+  @Override // NamenodeProtocol
+  public NamespaceInfo versionRequest() throws IOException {
+    return nnProto.versionRequest();
+  }
+
+  @Override // NamenodeProtocol
+  public void errorReport(NamenodeRegistration registration, int errorCode,
+      String msg) throws IOException {
+    nnProto.errorReport(registration, errorCode, msg);
+  }
+
+  @Override // NamenodeProtocol
+  public NamenodeRegistration registerSubordinateNamenode(
+      NamenodeRegistration registration) throws IOException {
+    return nnProto.registerSubordinateNamenode(registration);
+  }
+
+  @Override // NamenodeProtocol
+  public NamenodeCommand startCheckpoint(NamenodeRegistration registration)
+      throws IOException {
+    return nnProto.startCheckpoint(registration);
+  }
+
+  @Override // NamenodeProtocol
+  public void endCheckpoint(NamenodeRegistration registration,
+      CheckpointSignature sig) throws IOException {
+    nnProto.endCheckpoint(registration, sig);
+  }
+
+  @Override // NamenodeProtocol
+  public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
+      throws IOException {
+    return nnProto.getEditLogManifest(sinceTxId);
+  }
+
+  @Override // NamenodeProtocol
+  public boolean isUpgradeFinalized() throws IOException {
+    return nnProto.isUpgradeFinalized();
+  }
+
+  @Override // NamenodeProtocol
+  public boolean isRollingUpgrade() throws IOException {
+    return nnProto.isRollingUpgrade();
+  }
+
   /**
    * Locate the location with the matching block pool id.
    *
@@ -2106,16 +2214,31 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
         "Cannot locate a nameservice for block pool " + blockPoolId);
   }
 
+  /**
+   * Get the possible locations of a path in the federated cluster.
+   * During the get operation, it will do the quota verification.
+   *
+   * @param path Path to check.
+   * @param failIfLocked Fail the request if locked (top mount point).
+   * @return Prioritized list of locations in the federated cluster.
+   * @throws IOException If the location for this path cannot be determined.
+   */
+  protected List<RemoteLocation> getLocationsForPath(String path,
+      boolean failIfLocked) throws IOException {
+    return getLocationsForPath(path, failIfLocked, true);
+  }
+
   /**
    * Get the possible locations of a path in the federated cluster.
    *
    * @param path Path to check.
    * @param failIfLocked Fail the request if locked (top mount point).
+   * @param needQuotaVerify If need to do the quota verification.
    * @return Prioritized list of locations in the federated cluster.
    * @throws IOException If the location for this path cannot be determined.
    */
-  protected List<RemoteLocation> getLocationsForPath(
-      String path, boolean failIfLocked) throws IOException {
+  protected List<RemoteLocation> getLocationsForPath(String path,
+      boolean failIfLocked, boolean needQuotaVerify) throws IOException {
     try {
       // Check the location for this path
       final PathLocation location =
@@ -2136,7 +2259,7 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol {
         }
 
         // Check quota
-        if (this.router.isQuotaEnabled()) {
+        if (this.router.isQuotaEnabled() && needQuotaVerify) {
           RouterQuotaUsage quotaUsage = this.router.getQuotaManager()
               .getQuotaUsage(path);
           if (quotaUsage != null) {

+ 33 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/SubClusterTimeoutException.java

@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.federation.router;
+
+import java.io.IOException;
+
+
+/**
+ * Exception when timing out waiting for the reply of a subcluster.
+ */
+public class SubClusterTimeoutException extends IOException {
+
+  private static final long serialVersionUID = 1L;
+
+  public SubClusterTimeoutException(String msg) {
+    super(msg);
+  }
+}

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileSystemImpl.java

@@ -35,13 +35,15 @@ import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.server.federation.store.driver.StateStoreDriver;
 import org.apache.hadoop.hdfs.server.federation.store.records.BaseRecord;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * StateStoreDriver} implementation based on a filesystem. The most common uses
- * HDFS as a backend.
+ * {@link StateStoreDriver} implementation based on a filesystem. The common
+ * implementation uses HDFS as a backend. The path can be specified setting
+ * dfs.federation.router.driver.fs.path=hdfs://host:port/path/to/store.
  */
 public class StateStoreFileSystemImpl extends StateStoreFileBaseImpl {
 

+ 38 - 1
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/MiniRouterDFSCluster.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.federation;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_INTERNAL_NAMESERVICES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_BIND_HOST_KEY;
@@ -132,6 +133,9 @@ public class MiniRouterDFSCluster {
   /** Namenode configuration overrides. */
   private Configuration namenodeOverrides;
 
+  /** If the DNs are shared. */
+  private boolean sharedDNs = true;
+
 
   /**
    * Router context.
@@ -239,6 +243,10 @@ public class MiniRouterDFSCluster {
       }
       return client;
     }
+
+    public Configuration getConf() {
+      return conf;
+    }
   }
 
   /**
@@ -351,6 +359,10 @@ public class MiniRouterDFSCluster {
       }
       return suffix;
     }
+
+    public Configuration getConf() {
+      return conf;
+    }
   }
 
   public MiniRouterDFSCluster(
@@ -550,6 +562,13 @@ public class MiniRouterDFSCluster {
     this.numDatanodesPerNameservice = num;
   }
 
+  /**
+   * Set the DNs to belong to only one subcluster.
+   */
+  public void setIndependentDNs() {
+    this.sharedDNs = false;
+  }
+
   public String getNameservicesKey() {
     StringBuilder sb = new StringBuilder();
     for (String nsId : this.nameservices) {
@@ -669,15 +688,33 @@ public class MiniRouterDFSCluster {
       }
       topology.setFederation(true);
 
+      // Set independent DNs across subclusters
+      int numDNs = nameservices.size() * numDatanodesPerNameservice;
+      Configuration[] dnConfs = null;
+      if (!sharedDNs) {
+        dnConfs = new Configuration[numDNs];
+        int dnId = 0;
+        for (String nsId : nameservices) {
+          Configuration subclusterConf = new Configuration();
+          subclusterConf.set(DFS_INTERNAL_NAMESERVICES_KEY, nsId);
+          for (int i = 0; i < numDatanodesPerNameservice; i++) {
+            dnConfs[dnId] = subclusterConf;
+            dnId++;
+          }
+        }
+      }
+
       // Start mini DFS cluster
       String ns0 = nameservices.get(0);
       Configuration nnConf = generateNamenodeConfiguration(ns0);
       if (overrideConf != null) {
         nnConf.addResource(overrideConf);
       }
+
       cluster = new MiniDFSCluster.Builder(nnConf)
-          .numDataNodes(nameservices.size() * numDatanodesPerNameservice)
+          .numDataNodes(numDNs)
           .nnTopology(topology)
+          .dataNodeConfOverlays(dnConfs)
           .build();
       cluster.waitActive();
 

+ 48 - 8
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestConnectionManager.java

@@ -18,6 +18,8 @@
 package org.apache.hadoop.hdfs.server.federation.router;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.After;
@@ -68,14 +70,18 @@ public class TestConnectionManager {
     Map<ConnectionPoolId, ConnectionPool> poolMap = connManager.getPools();
 
     ConnectionPool pool1 = new ConnectionPool(
-        conf, TEST_NN_ADDRESS, TEST_USER1, 0, 10);
+        conf, TEST_NN_ADDRESS, TEST_USER1, 0, 10, ClientProtocol.class);
     addConnectionsToPool(pool1, 9, 4);
-    poolMap.put(new ConnectionPoolId(TEST_USER1, TEST_NN_ADDRESS), pool1);
+    poolMap.put(
+        new ConnectionPoolId(TEST_USER1, TEST_NN_ADDRESS, ClientProtocol.class),
+        pool1);
 
     ConnectionPool pool2 = new ConnectionPool(
-        conf, TEST_NN_ADDRESS, TEST_USER2, 0, 10);
+        conf, TEST_NN_ADDRESS, TEST_USER2, 0, 10, ClientProtocol.class);
     addConnectionsToPool(pool2, 10, 10);
-    poolMap.put(new ConnectionPoolId(TEST_USER2, TEST_NN_ADDRESS), pool2);
+    poolMap.put(
+        new ConnectionPoolId(TEST_USER2, TEST_NN_ADDRESS, ClientProtocol.class),
+        pool2);
 
     checkPoolConnections(TEST_USER1, 9, 4);
     checkPoolConnections(TEST_USER2, 10, 10);
@@ -94,9 +100,11 @@ public class TestConnectionManager {
 
     // Make sure the number of connections doesn't go below minSize
     ConnectionPool pool3 = new ConnectionPool(
-        conf, TEST_NN_ADDRESS, TEST_USER3, 2, 10);
+        conf, TEST_NN_ADDRESS, TEST_USER3, 2, 10, ClientProtocol.class);
     addConnectionsToPool(pool3, 8, 0);
-    poolMap.put(new ConnectionPoolId(TEST_USER3, TEST_NN_ADDRESS), pool3);
+    poolMap.put(
+        new ConnectionPoolId(TEST_USER3, TEST_NN_ADDRESS, ClientProtocol.class),
+        pool3);
     checkPoolConnections(TEST_USER3, 10, 0);
     for (int i = 0; i < 10; i++) {
       connManager.cleanup(pool3);
@@ -119,9 +127,41 @@ public class TestConnectionManager {
     int activeConns = 5;
 
     ConnectionPool pool = new ConnectionPool(
-        conf, TEST_NN_ADDRESS, TEST_USER1, 0, 10);
+        conf, TEST_NN_ADDRESS, TEST_USER1, 0, 10, ClientProtocol.class);
     addConnectionsToPool(pool, totalConns, activeConns);
-    poolMap.put(new ConnectionPoolId(TEST_USER1, TEST_NN_ADDRESS), pool);
+    poolMap.put(
+        new ConnectionPoolId(TEST_USER1, TEST_NN_ADDRESS, ClientProtocol.class),
+        pool);
+
+    // All remaining connections should be usable
+    final int remainingSlots = totalConns - activeConns;
+    for (int i = 0; i < remainingSlots; i++) {
+      ConnectionContext cc = pool.getConnection();
+      assertTrue(cc.isUsable());
+      cc.getClient();
+      activeConns++;
+    }
+
+    checkPoolConnections(TEST_USER1, totalConns, activeConns);
+
+    // Ask for more and this returns an active connection
+    ConnectionContext cc = pool.getConnection();
+    assertTrue(cc.isActive());
+  }
+
+  @Test
+  public void getGetConnectionNamenodeProtocol() throws Exception {
+    Map<ConnectionPoolId, ConnectionPool> poolMap = connManager.getPools();
+    final int totalConns = 10;
+    int activeConns = 5;
+
+    ConnectionPool pool = new ConnectionPool(
+        conf, TEST_NN_ADDRESS, TEST_USER1, 0, 10, NamenodeProtocol.class);
+    addConnectionsToPool(pool, totalConns, activeConns);
+    poolMap.put(
+        new ConnectionPoolId(
+            TEST_USER1, TEST_NN_ADDRESS, NamenodeProtocol.class),
+        pool);
 
     // All remaining connections should be usable
     final int remainingSlots = totalConns - activeConns;

+ 54 - 16
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouter.java

@@ -17,23 +17,25 @@
  */
 package org.apache.hadoop.hdfs.server.federation.router;
 
+import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
-import java.net.URISyntaxException;
+import java.net.InetSocketAddress;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.federation.MockResolver;
 import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
 import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver;
 import org.apache.hadoop.hdfs.server.federation.resolver.FileSubclusterResolver;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.service.Service.STATE;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -77,27 +79,31 @@ public class TestRouter {
             "0.0.0.0");
   }
 
-  @AfterClass
-  public static void destroy() {
-  }
-
-  @Before
-  public void setup() throws IOException, URISyntaxException {
-  }
-
-  @After
-  public void cleanup() {
-  }
-
   private static void testRouterStartup(Configuration routerConfig)
       throws InterruptedException, IOException {
     Router router = new Router();
     assertEquals(STATE.NOTINITED, router.getServiceState());
+    assertEquals(RouterServiceState.UNINITIALIZED, router.getRouterState());
     router.init(routerConfig);
+    if (routerConfig.getBoolean(
+        RBFConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE,
+        RBFConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE_DEFAULT)) {
+      assertEquals(RouterServiceState.SAFEMODE, router.getRouterState());
+    } else {
+      assertEquals(RouterServiceState.INITIALIZING, router.getRouterState());
+    }
     assertEquals(STATE.INITED, router.getServiceState());
     router.start();
+    if (routerConfig.getBoolean(
+        RBFConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE,
+        RBFConfigKeys.DFS_ROUTER_SAFEMODE_ENABLE_DEFAULT)) {
+      assertEquals(RouterServiceState.SAFEMODE, router.getRouterState());
+    } else {
+      assertEquals(RouterServiceState.RUNNING, router.getRouterState());
+    }
     assertEquals(STATE.STARTED, router.getServiceState());
     router.stop();
+    assertEquals(RouterServiceState.SHUTDOWN, router.getRouterState());
     assertEquals(STATE.STOPPED, router.getServiceState());
     router.close();
   }
@@ -114,6 +120,9 @@ public class TestRouter {
     // Rpc only
     testRouterStartup(new RouterConfigBuilder(conf).rpc().build());
 
+    // Safemode only
+    testRouterStartup(new RouterConfigBuilder(conf).rpc().safemode().build());
+
     // Metrics only
     testRouterStartup(new RouterConfigBuilder(conf).metrics().build());
 
@@ -147,4 +156,33 @@ public class TestRouter {
 
     router.close();
   }
+
+  @Test
+  public void testRouterRpcWithNoSubclusters() throws IOException {
+
+    Router router = new Router();
+    router.init(new RouterConfigBuilder(conf).rpc().build());
+    router.start();
+
+    InetSocketAddress serverAddress = router.getRpcServerAddress();
+    DFSClient dfsClient = new DFSClient(serverAddress, conf);
+
+    try {
+      dfsClient.create("/test.txt", false);
+      fail("Create with no subclusters should fail");
+    } catch (RemoteException e) {
+      assertExceptionContains("Cannot find locations for /test.txt", e);
+    }
+
+    try {
+      dfsClient.datanodeReport(DatanodeReportType.LIVE);
+      fail("Get datanode reports with no subclusters should fail");
+    } catch (IOException e) {
+      assertExceptionContains("No remote locations available", e);
+    }
+
+    dfsClient.close();
+    router.stop();
+    router.close();
+  }
 }

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterQuota.java

@@ -151,6 +151,10 @@ public class TestRouterQuota {
     // mkdir in real FileSystem should be okay
     nnFs1.mkdirs(new Path("/testdir1/" + UUID.randomUUID()));
     nnFs2.mkdirs(new Path("/testdir2/" + UUID.randomUUID()));
+
+    // delete/rename call should be still okay
+    routerFs.delete(new Path("/nsquota"), true);
+    routerFs.rename(new Path("/nsquota/subdir"), new Path("/nsquota/subdir"));
   }
 
   @Test

+ 125 - 1
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRPCClientRetries.java

@@ -18,11 +18,16 @@
 package org.apache.hadoop.hdfs.server.federation.router;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.spy;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
@@ -30,40 +35,65 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
-import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
 import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.NamenodeContext;
 import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext;
+import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
 import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster;
 import org.apache.hadoop.hdfs.server.federation.metrics.FederationRPCMetrics;
+import org.apache.hadoop.hdfs.server.federation.metrics.NamenodeBeanMetrics;
 import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext;
 import org.apache.hadoop.hdfs.server.federation.resolver.MembershipNamenodeResolver;
 import org.apache.hadoop.hdfs.server.federation.resolver.NamenodeStatusReport;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.Timeout;
+import org.mockito.internal.util.reflection.Whitebox;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Supplier;
 
 /**
  * Test retry behavior of the Router RPC Client.
  */
 public class TestRouterRPCClientRetries {
 
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestRouterRPCClientRetries.class);
+
   private static StateStoreDFSCluster cluster;
   private static NamenodeContext nnContext1;
   private static RouterContext routerContext;
   private static MembershipNamenodeResolver resolver;
   private static ClientProtocol routerProtocol;
 
+  @Rule
+  public final Timeout testTimeout = new Timeout(100000);
+
   @Before
   public void setUp() throws Exception {
     // Build and start a federated cluster
     cluster = new StateStoreDFSCluster(false, 2);
     Configuration routerConf = new RouterConfigBuilder()
         .stateStore()
+        .metrics()
         .admin()
         .rpc()
         .build();
+    routerConf.setTimeDuration(
+        NamenodeBeanMetrics.DN_REPORT_CACHE_EXPIRE, 1, TimeUnit.SECONDS);
 
     // reduce IPC client connection retry times and interval time
     Configuration clientConf = new Configuration(false);
@@ -72,6 +102,9 @@ public class TestRouterRPCClientRetries {
     clientConf.setInt(
         CommonConfigurationKeys.IPC_CLIENT_CONNECT_RETRY_INTERVAL_KEY, 100);
 
+    // Set the DNs to belong to only one subcluster
+    cluster.setIndependentDNs();
+
     cluster.addRouterOverrides(routerConf);
     // override some settings for the client
     cluster.startCluster(clientConf);
@@ -157,4 +190,95 @@ public class TestRouterRPCClientRetries {
     assertTrue(resolver.registerNamenode(report));
     resolver.loadCache(true);
   }
+
+  @Test
+  public void testNamenodeMetricsSlow() throws Exception {
+    final Router router = routerContext.getRouter();
+    final NamenodeBeanMetrics metrics = router.getNamenodeMetrics();
+
+    // Initially, there are 4 DNs in total
+    final String jsonString0 = metrics.getLiveNodes();
+    assertEquals(4, getNumDatanodes(jsonString0));
+
+    // The response should be cached
+    assertEquals(jsonString0, metrics.getLiveNodes());
+
+    // Check that the cached value gets updated eventually
+    waitUpdateLiveNodes(jsonString0, metrics);
+    final String jsonString2 = metrics.getLiveNodes();
+    assertNotEquals(jsonString0, jsonString2);
+    assertEquals(4, getNumDatanodes(jsonString2));
+
+    // Making subcluster0 slow to reply, should only get DNs from nn1
+    MiniDFSCluster dfsCluster = cluster.getCluster();
+    NameNode nn0 = dfsCluster.getNameNode(0);
+    simulateNNSlow(nn0);
+    waitUpdateLiveNodes(jsonString2, metrics);
+    final String jsonString3 = metrics.getLiveNodes();
+    assertEquals(2, getNumDatanodes(jsonString3));
+
+    // Making subcluster1 slow to reply, shouldn't get any DNs
+    NameNode nn1 = dfsCluster.getNameNode(1);
+    simulateNNSlow(nn1);
+    waitUpdateLiveNodes(jsonString3, metrics);
+    final String jsonString4 = metrics.getLiveNodes();
+    assertEquals(0, getNumDatanodes(jsonString4));
+  }
+
+  /**
+   * Get the number of nodes in a JSON string.
+   * @param jsonString JSON string containing nodes.
+   * @return Number of nodes.
+   * @throws JSONException If the JSON string is not properly formed.
+   */
+  private static int getNumDatanodes(final String jsonString)
+      throws JSONException {
+    JSONObject jsonObject = new JSONObject(jsonString);
+    if (jsonObject.length() == 0) {
+      return 0;
+    }
+    return jsonObject.names().length();
+  }
+
+  /**
+   * Wait until the cached live nodes value is updated.
+   * @param oldValue Old cached value.
+   * @param metrics Namenode metrics beans to get the live nodes from.
+   * @throws Exception If it cannot wait.
+   */
+  private static void waitUpdateLiveNodes(
+      final String oldValue, final NamenodeBeanMetrics metrics)
+          throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return !oldValue.equals(metrics.getLiveNodes());
+      }
+    }, 500, 5 * 1000);
+  }
+
+  /**
+   * Simulate that a Namenode is slow by adding a sleep to the check operation
+   * in the NN.
+   * @param nn Namenode to simulate slow.
+   * @throws Exception If we cannot add the sleep time.
+   */
+  private static void simulateNNSlow(final NameNode nn) throws Exception {
+    FSNamesystem namesystem = nn.getNamesystem();
+    HAContext haContext = namesystem.getHAContext();
+    HAContext spyHAContext = spy(haContext);
+    doAnswer(new Answer<Object>() {
+      @Override
+      public Object answer(InvocationOnMock invocation) throws Throwable {
+        LOG.info("Simulating slow namenode {}", invocation.getMock());
+        try {
+          Thread.sleep(3 * 1000);
+        } catch(InterruptedException e) {
+          LOG.error("Simulating a slow namenode aborted");
+        }
+        return null;
+      }
+    }).when(spyHAContext).checkOperation(any(OperationCategory.class));
+    Whitebox.setInternalState(namesystem, "haContext", spyHAContext);
+  }
 }

+ 122 - 14
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterRpc.java

@@ -59,6 +59,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
@@ -70,16 +71,23 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
 import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster;
 import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.NamenodeContext;
 import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext;
+import org.apache.hadoop.hdfs.server.federation.MockResolver;
 import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
 import org.apache.hadoop.hdfs.server.federation.metrics.NamenodeBeanMetrics;
 import org.apache.hadoop.hdfs.server.federation.resolver.FileSubclusterResolver;
+import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
+import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.erasurecode.ECSchema;
 import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
@@ -133,6 +141,11 @@ public class TestRouterRpc {
   /** Client interface to the Namenode. */
   private ClientProtocol nnProtocol;
 
+  /** NameNodeProtocol interface to the Router. */
+  private NamenodeProtocol routerNamenodeProtocol;
+  /** NameNodeProtocol interface to the Namenode. */
+  private NamenodeProtocol nnNamenodeProtocol;
+
   /** Filesystem interface to the Router. */
   private FileSystem routerFS;
   /** Filesystem interface to the Namenode. */
@@ -189,22 +202,18 @@ public class TestRouterRpc {
     // Wait to ensure NN has fully created its test directories
     Thread.sleep(100);
 
-    // Default namenode and random router for this test
-    this.router = cluster.getRandomRouter();
-    this.ns = cluster.getNameservices().get(0);
-    this.namenode = cluster.getNamenode(ns, null);
-
-    // Handles to the ClientProtocol interface
-    this.routerProtocol = router.getClient().getNamenode();
-    this.nnProtocol = namenode.getClient().getNamenode();
+    // Random router for this test
+    RouterContext rndRouter = cluster.getRandomRouter();
+    this.setRouter(rndRouter);
 
-    // Handles to the filesystem client
-    this.nnFS = namenode.getFileSystem();
-    this.routerFS = router.getFileSystem();
+    // Pick a namenode for this test
+    String ns0 = cluster.getNameservices().get(0);
+    this.setNs(ns0);
+    this.setNamenode(cluster.getNamenode(ns0, null));
 
     // Create a test file on the NN
-    Random r = new Random();
-    String randomFile = "testfile-" + r.nextInt();
+    Random rnd = new Random();
+    String randomFile = "testfile-" + rnd.nextInt();
     this.nnFile =
         cluster.getNamenodeTestDirectoryForNS(ns) + "/" + randomFile;
     this.routerFile =
@@ -245,6 +254,8 @@ public class TestRouterRpc {
     this.router = r;
     this.routerProtocol = r.getClient().getNamenode();
     this.routerFS = r.getFileSystem();
+    this.routerNamenodeProtocol = NameNodeProxies.createProxy(router.getConf(),
+        router.getFileSystem().getUri(), NamenodeProtocol.class).getProxy();
   }
 
   protected FileSystem getRouterFileSystem() {
@@ -288,6 +299,12 @@ public class TestRouterRpc {
     this.namenode = nn;
     this.nnProtocol = nn.getClient().getNamenode();
     this.nnFS = nn.getFileSystem();
+
+    // Namenode from the default namespace
+    String ns0 = cluster.getNameservices().get(0);
+    NamenodeContext nn0 = cluster.getNamenode(ns0, null);
+    this.nnNamenodeProtocol = NameNodeProxies.createProxy(nn0.getConf(),
+        nn0.getFileSystem().getUri(), NamenodeProtocol.class).getProxy();
   }
 
   protected String getNs() {
@@ -932,6 +949,79 @@ public class TestRouterRpc {
     assertEquals(routerFailure.getClass(), nnFailure.getClass());
   }
 
+  @Test
+  public void testProxyVersionRequest() throws Exception {
+    NamespaceInfo rVersion = routerNamenodeProtocol.versionRequest();
+    NamespaceInfo nnVersion = nnNamenodeProtocol.versionRequest();
+    assertEquals(nnVersion.getBlockPoolID(), rVersion.getBlockPoolID());
+    assertEquals(nnVersion.getNamespaceID(), rVersion.getNamespaceID());
+    assertEquals(nnVersion.getClusterID(), rVersion.getClusterID());
+    assertEquals(nnVersion.getLayoutVersion(), rVersion.getLayoutVersion());
+    assertEquals(nnVersion.getCTime(), rVersion.getCTime());
+  }
+
+  @Test
+  public void testProxyGetBlockKeys() throws Exception {
+    ExportedBlockKeys rKeys = routerNamenodeProtocol.getBlockKeys();
+    ExportedBlockKeys nnKeys = nnNamenodeProtocol.getBlockKeys();
+    assertEquals(nnKeys.getCurrentKey(), rKeys.getCurrentKey());
+    assertEquals(nnKeys.getKeyUpdateInterval(), rKeys.getKeyUpdateInterval());
+    assertEquals(nnKeys.getTokenLifetime(), rKeys.getTokenLifetime());
+  }
+
+  @Test
+  public void testProxyGetBlocks() throws Exception {
+    // Get datanodes
+    DatanodeInfo[] dns =
+        routerProtocol.getDatanodeReport(DatanodeReportType.ALL);
+    DatanodeInfo dn0 = dns[0];
+
+    // Verify that checking that datanode works
+    BlocksWithLocations routerBlockLocations =
+        routerNamenodeProtocol.getBlocks(dn0, 1024, 0);
+    BlocksWithLocations nnBlockLocations =
+        nnNamenodeProtocol.getBlocks(dn0, 1024, 0);
+    BlockWithLocations[] routerBlocks = routerBlockLocations.getBlocks();
+    BlockWithLocations[] nnBlocks = nnBlockLocations.getBlocks();
+    assertEquals(nnBlocks.length, routerBlocks.length);
+    for (int i = 0; i < routerBlocks.length; i++) {
+      assertEquals(
+          nnBlocks[i].getBlock().getBlockId(),
+          routerBlocks[i].getBlock().getBlockId());
+    }
+  }
+
+  @Test
+  public void testProxyGetTransactionID() throws IOException {
+    long routerTransactionID = routerNamenodeProtocol.getTransactionID();
+    long nnTransactionID = nnNamenodeProtocol.getTransactionID();
+    assertEquals(nnTransactionID, routerTransactionID);
+  }
+
+  @Test
+  public void testProxyGetMostRecentCheckpointTxId() throws IOException {
+    long routerCheckPointId =
+        routerNamenodeProtocol.getMostRecentCheckpointTxId();
+    long nnCheckPointId = nnNamenodeProtocol.getMostRecentCheckpointTxId();
+    assertEquals(nnCheckPointId, routerCheckPointId);
+  }
+
+  @Test
+  public void testProxySetSafemode() throws Exception {
+    boolean routerSafemode =
+        routerProtocol.setSafeMode(SafeModeAction.SAFEMODE_GET, false);
+    boolean nnSafemode =
+        nnProtocol.setSafeMode(SafeModeAction.SAFEMODE_GET, false);
+    assertEquals(nnSafemode, routerSafemode);
+  }
+
+  @Test
+  public void testProxyRestoreFailedStorage() throws Exception {
+    boolean routerSuccess = routerProtocol.restoreFailedStorage("check");
+    boolean nnSuccess = nnProtocol.restoreFailedStorage("check");
+    assertEquals(nnSuccess, routerSuccess);
+  }
+
   @Test
   public void testErasureCoding() throws IOException {
 
@@ -1066,7 +1156,25 @@ public class TestRouterRpc {
     }, 500, 5 * 1000);
 
     // The cache should be updated now
-    assertNotEquals(jsonString0, metrics.getLiveNodes());
+    final String jsonString2 = metrics.getLiveNodes();
+    assertNotEquals(jsonString0, jsonString2);
+
+
+    // Without any subcluster available, we should return an empty list
+    MockResolver resolver =
+        (MockResolver) router.getRouter().getNamenodeResolver();
+    resolver.cleanRegistrations();
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return !jsonString2.equals(metrics.getLiveNodes());
+      }
+    }, 500, 5 * 1000);
+    assertEquals("{}", metrics.getLiveNodes());
+
+    // Reset the registrations again
+    cluster.registerNamenodes();
+    cluster.waitNamenodeRegistration();
   }
 
   /**

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/resources/contract/webhdfs.xml

@@ -23,4 +23,9 @@
     <value>false</value>
   </property>
 
+  <property>
+    <name>fs.contract.create-visibility-delayed</name>
+    <value>true</value>
+  </property>
+
 </configuration>

File diff suppressed because it is too large
+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/dev-support/jdiff/Apache_Hadoop_HDFS_3.1.0.xml


+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockIdManager.java

@@ -239,6 +239,23 @@ public class BlockIdManager {
     legacyGenerationStampLimit = HdfsConstants.GRANDFATHER_GENERATION_STAMP;
   }
 
+  /**
+   * Return true if the block is a striped block.
+   *
+   * Before HDFS-4645, block ID was randomly generated (legacy), so it is
+   * possible that legacy block ID to be negative, which should not be
+   * considered as striped block ID.
+   *
+   * @see #isLegacyBlock(Block) detecting legacy block IDs.
+   */
+  public boolean isStripedBlock(Block block) {
+    return isStripedBlockID(block.getBlockId()) && !isLegacyBlock(block);
+  }
+
+  /**
+   * See {@link #isStripedBlock(Block)}, we should not use this function alone
+   * to determine a block is striped block.
+   */
   public static boolean isStripedBlockID(long id) {
     return BlockType.fromBlockId(id) == STRIPED;
   }

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -448,7 +448,8 @@ public class BlockManager implements BlockStatsMXBean {
         DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_DEFAULT) * 1000L;
     invalidateBlocks = new InvalidateBlocks(
         datanodeManager.getBlockInvalidateLimit(),
-        startupDelayBlockDeletionInMs);
+        startupDelayBlockDeletionInMs,
+        blockIdManager);
 
     // Compute the map capacity by allocating 2% of total memory
     blocksMap = new BlocksMap(
@@ -1677,7 +1678,7 @@ public class BlockManager implements BlockStatsMXBean {
       corrupted.setBlockId(b.getStored().getBlockId());
     }
     corruptReplicas.addToCorruptReplicasMap(corrupted, node, b.getReason(),
-        b.getReasonCode());
+        b.getReasonCode(), b.getStored().isStriped());
 
     NumberReplicas numberOfReplicas = countNodes(b.getStored());
     boolean hasEnoughLiveReplicas = numberOfReplicas.liveReplicas() >=

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerSafeMode.java

@@ -486,7 +486,7 @@ class BlockManagerSafeMode {
 
     if (!blockManager.getShouldPostponeBlocksFromFuture() &&
         !inRollBack && blockManager.isGenStampInFuture(brr)) {
-      if (BlockIdManager.isStripedBlockID(brr.getBlockId())) {
+      if (blockManager.getBlockIdManager().isStripedBlock(brr)) {
         bytesInFutureECBlockGroups.add(brr.getBytesOnDisk());
       } else {
         bytesInFutureBlocks.add(brr.getBytesOnDisk());

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java

@@ -93,7 +93,7 @@ class BlocksMap {
    * remove it from all data-node lists it belongs to;
    * and remove all data-node locations associated with the block.
    */
-  void removeBlock(Block block) {
+  void removeBlock(BlockInfo block) {
     BlockInfo blockInfo = blocks.remove(block);
     if (blockInfo == null) {
       return;
@@ -175,7 +175,7 @@ class BlocksMap {
     if (info.hasNoStorage()    // no datanodes left
         && info.isDeleted()) { // does not belong to a file
       blocks.remove(b);  // remove block from the map
-      decrementBlockStat(b);
+      decrementBlockStat(info);
     }
     return removed;
   }
@@ -207,16 +207,16 @@ class BlocksMap {
     return capacity;
   }
 
-  private void incrementBlockStat(Block block) {
-    if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+  private void incrementBlockStat(BlockInfo block) {
+    if (block.isStriped()) {
       totalECBlockGroups.increment();
     } else {
       totalReplicatedBlocks.increment();
     }
   }
 
-  private void decrementBlockStat(Block block) {
-    if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+  private void decrementBlockStat(BlockInfo block) {
+    if (block.isStriped()) {
       totalECBlockGroups.decrement();
       assert totalECBlockGroups.longValue() >= 0 :
           "Total number of ec block groups should be non-negative";

+ 18 - 17
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java

@@ -69,12 +69,12 @@ public class CorruptReplicasMap{
    * @param reasonCode the enum representation of the reason
    */
   void addToCorruptReplicasMap(Block blk, DatanodeDescriptor dn,
-      String reason, Reason reasonCode) {
+      String reason, Reason reasonCode, boolean isStriped) {
     Map <DatanodeDescriptor, Reason> nodes = corruptReplicasMap.get(blk);
     if (nodes == null) {
       nodes = new HashMap<DatanodeDescriptor, Reason>();
       corruptReplicasMap.put(blk, nodes);
-      incrementBlockStat(blk);
+      incrementBlockStat(isStriped);
     }
     
     String reasonText;
@@ -103,11 +103,11 @@ public class CorruptReplicasMap{
    * Remove Block from CorruptBlocksMap.
    * @param blk Block to be removed
    */
-  void removeFromCorruptReplicasMap(Block blk) {
+  void removeFromCorruptReplicasMap(BlockInfo blk) {
     if (corruptReplicasMap != null) {
       Map<DatanodeDescriptor, Reason> value = corruptReplicasMap.remove(blk);
       if (value != null) {
-        decrementBlockStat(blk);
+        decrementBlockStat(blk.isStriped());
       }
     }
   }
@@ -119,12 +119,13 @@ public class CorruptReplicasMap{
    * @return true if the removal is successful; 
              false if the replica is not in the map
    */ 
-  boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode) {
+  boolean removeFromCorruptReplicasMap(
+      BlockInfo blk, DatanodeDescriptor datanode) {
     return removeFromCorruptReplicasMap(blk, datanode, Reason.ANY);
   }
 
-  boolean removeFromCorruptReplicasMap(Block blk, DatanodeDescriptor datanode,
-      Reason reason) {
+  boolean removeFromCorruptReplicasMap(
+      BlockInfo blk, DatanodeDescriptor datanode, Reason reason) {
     Map <DatanodeDescriptor, Reason> datanodes = corruptReplicasMap.get(blk);
     if (datanodes == null) {
       return false;
@@ -141,23 +142,23 @@ public class CorruptReplicasMap{
       if (datanodes.isEmpty()) {
         // remove the block if there is no more corrupted replicas
         corruptReplicasMap.remove(blk);
-        decrementBlockStat(blk);
+        decrementBlockStat(blk.isStriped());
       }
       return true;
     }
     return false;
   }
 
-  private void incrementBlockStat(Block block) {
-    if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+  private void incrementBlockStat(boolean isStriped) {
+    if (isStriped) {
       totalCorruptECBlockGroups.increment();
     } else {
       totalCorruptBlocks.increment();
     }
   }
 
-  private void decrementBlockStat(Block block) {
-    if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+  private void decrementBlockStat(boolean isStriped) {
+    if (isStriped) {
       totalCorruptECBlockGroups.decrement();
     } else {
       totalCorruptBlocks.decrement();
@@ -205,6 +206,8 @@ public class CorruptReplicasMap{
    * is null, up to numExpectedBlocks blocks are returned from the beginning.
    * If startingBlockId cannot be found, null is returned.
    *
+   * @param bim BlockIdManager to determine the block type.
+   * @param blockType desired block type to return.
    * @param numExpectedBlocks Number of block ids to return.
    *  0 <= numExpectedBlocks <= 100
    * @param startingBlockId Block id from which to start. If null, start at
@@ -212,7 +215,7 @@ public class CorruptReplicasMap{
    * @return Up to numExpectedBlocks blocks from startingBlockId if it exists
    */
   @VisibleForTesting
-  long[] getCorruptBlockIdsForTesting(BlockType blockType,
+  long[] getCorruptBlockIdsForTesting(BlockIdManager bim, BlockType blockType,
       int numExpectedBlocks, Long startingBlockId) {
     if (numExpectedBlocks < 0 || numExpectedBlocks > 100) {
       return null;
@@ -223,11 +226,9 @@ public class CorruptReplicasMap{
         .stream()
         .filter(r -> {
           if (blockType == BlockType.STRIPED) {
-            return BlockIdManager.isStripedBlockID(r.getBlockId()) &&
-                r.getBlockId() >= cursorBlockId;
+            return bim.isStripedBlock(r) && r.getBlockId() >= cursorBlockId;
           } else {
-            return !BlockIdManager.isStripedBlockID(r.getBlockId()) &&
-                r.getBlockId() >= cursorBlockId;
+            return !bim.isStripedBlock(r) && r.getBlockId() >= cursorBlockId;
           }
         })
         .sorted()

+ 8 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java

@@ -57,6 +57,7 @@ class InvalidateBlocks {
   private final LongAdder numBlocks = new LongAdder();
   private final LongAdder numECBlocks = new LongAdder();
   private final int blockInvalidateLimit;
+  private final BlockIdManager blockIdManager;
 
   /**
    * The period of pending time for block invalidation since the NameNode
@@ -66,9 +67,11 @@ class InvalidateBlocks {
   /** the startup time */
   private final long startupTime = Time.monotonicNow();
 
-  InvalidateBlocks(final int blockInvalidateLimit, long pendingPeriodInMs) {
+  InvalidateBlocks(final int blockInvalidateLimit, long pendingPeriodInMs,
+                   final BlockIdManager blockIdManager) {
     this.blockInvalidateLimit = blockInvalidateLimit;
     this.pendingPeriodInMs = pendingPeriodInMs;
+    this.blockIdManager = blockIdManager;
     printBlockDeletionTime(BlockManager.LOG);
   }
 
@@ -124,7 +127,7 @@ class InvalidateBlocks {
 
   private LightWeightHashSet<Block> getBlocksSet(final DatanodeInfo dn,
       final Block block) {
-    if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+    if (blockIdManager.isStripedBlock(block)) {
       return getECBlocksSet(dn);
     } else {
       return getBlocksSet(dn);
@@ -133,7 +136,7 @@ class InvalidateBlocks {
 
   private void putBlocksSet(final DatanodeInfo dn, final Block block,
       final LightWeightHashSet set) {
-    if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+    if (blockIdManager.isStripedBlock(block)) {
       assert getECBlocksSet(dn) == null;
       nodeToECBlocks.put(dn, set);
     } else {
@@ -178,7 +181,7 @@ class InvalidateBlocks {
       putBlocksSet(datanode, block, set);
     }
     if (set.add(block)) {
-      if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+      if (blockIdManager.isStripedBlock(block)) {
         numECBlocks.increment();
       } else {
         numBlocks.increment();
@@ -206,7 +209,7 @@ class InvalidateBlocks {
   synchronized void remove(final DatanodeInfo dn, final Block block) {
     final LightWeightHashSet<Block> v = getBlocksSet(dn, block);
     if (v != null && v.remove(block)) {
-      if (BlockIdManager.isStripedBlockID(block.getBlockId())) {
+      if (blockIdManager.isStripedBlock(block)) {
         numECBlocks.decrement();
       } else {
         numBlocks.decrement();

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java

@@ -541,10 +541,6 @@ public class EncryptionZoneManager {
     if (srcIIP.getLastINode() == null) {
       throw new FileNotFoundException("cannot find " + srcIIP.getPath());
     }
-    if (dir.isNonEmptyDirectory(srcIIP)) {
-      throw new IOException(
-          "Attempt to create an encryption zone for a non-empty directory.");
-    }
 
     INode srcINode = srcIIP.getLastINode();
     if (!srcINode.isDirectory()) {
@@ -557,6 +553,10 @@ public class EncryptionZoneManager {
           "Directory " + srcIIP.getPath() + " is already an encryption zone.");
     }
 
+    if (dir.isNonEmptyDirectory(srcIIP)) {
+      throw new IOException(
+          "Attempt to create an encryption zone for a non-empty directory.");
+    }
     final HdfsProtos.ZoneEncryptionInfoProto proto =
         PBHelperClient.convert(suite, version, keyName);
     final XAttr ezXAttr = XAttrHelper

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAclOp.java

@@ -53,6 +53,8 @@ class FSDirAclOp {
           existingAcl, aclSpec);
       AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
       fsd.getEditLog().logSetAcl(src, newAcl);
+    } catch (AclException e){
+      throw new AclException(e.getMessage() + " Path: " + src, e);
     } finally {
       fsd.writeUnlock();
     }
@@ -77,6 +79,8 @@ class FSDirAclOp {
         existingAcl, aclSpec);
       AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
       fsd.getEditLog().logSetAcl(src, newAcl);
+    } catch (AclException e){
+      throw new AclException(e.getMessage() + " Path: " + src, e);
     } finally {
       fsd.writeUnlock();
     }
@@ -100,6 +104,8 @@ class FSDirAclOp {
         existingAcl);
       AclStorage.updateINodeAcl(inode, newAcl, snapshotId);
       fsd.getEditLog().logSetAcl(src, newAcl);
+    } catch (AclException e){
+      throw new AclException(e.getMessage() + " Path: " + src, e);
     } finally {
       fsd.writeUnlock();
     }
@@ -117,6 +123,8 @@ class FSDirAclOp {
       src = iip.getPath();
       fsd.checkOwner(pc, iip);
       unprotectedRemoveAcl(fsd, iip);
+    } catch (AclException e){
+      throw new AclException(e.getMessage() + " Path: " + src, e);
     } finally {
       fsd.writeUnlock();
     }
@@ -136,6 +144,8 @@ class FSDirAclOp {
       fsd.checkOwner(pc, iip);
       List<AclEntry> newAcl = unprotectedSetAcl(fsd, iip, aclSpec, false);
       fsd.getEditLog().logSetAcl(iip.getPath(), newAcl);
+    } catch (AclException e){
+      throw new AclException(e.getMessage() + " Path: " + src, e);
     } finally {
       fsd.writeUnlock();
     }
@@ -162,6 +172,8 @@ class FSDirAclOp {
           .stickyBit(fsPermission.getStickyBit())
           .setPermission(fsPermission)
           .addEntries(acl).build();
+    } catch (AclException e){
+      throw new AclException(e.getMessage() + " Path: " + src, e);
     } finally {
       fsd.readUnlock();
     }

+ 339 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeTraverser.java

@@ -0,0 +1,339 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
+import org.apache.hadoop.util.Timer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * FSTreeTraverser traverse directory recursively and process files
+ * in batches.
+ */
+@InterfaceAudience.Private
+public abstract class FSTreeTraverser {
+
+
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FSTreeTraverser.class);
+
+  private final FSDirectory dir;
+
+  private long readLockReportingThresholdMs;
+
+  private Timer timer;
+
+  public FSTreeTraverser(FSDirectory dir, Configuration conf) {
+    this.dir = dir;
+    this.readLockReportingThresholdMs = conf.getLong(
+        DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY,
+        DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_DEFAULT);
+    timer = new Timer();
+  }
+
+  public FSDirectory getFSDirectory() {
+    return dir;
+  }
+
+  /**
+   * Iterate through all files directly inside parent, and recurse down
+   * directories. The listing is done in batch, and can optionally start after
+   * a position. The iteration of the inode tree is done in a depth-first
+   * fashion. But instead of holding all {@link INodeDirectory}'s in memory
+   * on the fly, only the path components to the current inode is held. This
+   * is to reduce memory consumption.
+   *
+   * @param parent
+   *          The inode id of parent directory
+   * @param startId
+   *          Id of the start inode.
+   * @param startAfter
+   *          Full path of a file the traverse should start after.
+   * @param traverseInfo
+   *          info which may required for processing the child's.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  protected void traverseDir(final INodeDirectory parent, final long startId,
+      byte[] startAfter, final TraverseInfo traverseInfo)
+      throws IOException, InterruptedException {
+    List<byte[]> startAfters = new ArrayList<>();
+    if (parent == null) {
+      return;
+    }
+    INode curr = parent;
+    // construct startAfters all the way up to the zone inode.
+    startAfters.add(startAfter);
+    while (curr.getId() != startId) {
+      startAfters.add(0, curr.getLocalNameBytes());
+      curr = curr.getParent();
+    }
+    curr = traverseDirInt(startId, parent, startAfters, traverseInfo);
+    while (!startAfters.isEmpty()) {
+      if (curr == null) {
+        // lock was reacquired, re-resolve path.
+        curr = resolvePaths(startId, startAfters);
+      }
+      curr = traverseDirInt(startId, curr, startAfters, traverseInfo);
+    }
+  }
+
+  /**
+   * Iterates the parent directory, and add direct children files to current
+   * batch. If batch size meets configured threshold, current batch will be
+   * submitted for the processing.
+   * <p>
+   * Locks could be released and reacquired when a batch submission is
+   * finished.
+   *
+   * @param startId
+   *          Id of the start inode.
+   * @return The inode which was just processed, if lock is held in the entire
+   *         process. Null if lock is released.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  protected INode traverseDirInt(final long startId, INode curr,
+      List<byte[]> startAfters, final TraverseInfo traverseInfo)
+      throws IOException, InterruptedException {
+    assert dir.hasReadLock();
+    assert dir.getFSNamesystem().hasReadLock();
+    long lockStartTime = timer.monotonicNow();
+    Preconditions.checkNotNull(curr, "Current inode can't be null");
+    checkINodeReady(startId);
+    final INodeDirectory parent = curr.isDirectory() ? curr.asDirectory()
+        : curr.getParent();
+    ReadOnlyList<INode> children = parent
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Traversing directory {}", parent.getFullPathName());
+    }
+
+    final byte[] startAfter = startAfters.get(startAfters.size() - 1);
+    boolean lockReleased = false;
+    for (int i = INodeDirectory.nextChild(children, startAfter); i < children
+        .size(); ++i) {
+      final INode inode = children.get(i);
+      if (!processFileInode(inode, traverseInfo)) {
+        // inode wasn't processes. Recurse down if it's a dir,
+        // skip otherwise.
+        if (!inode.isDirectory()) {
+          continue;
+        }
+
+        if (!canTraverseDir(inode)) {
+          continue;
+        }
+        // add 1 level to the depth-first search.
+        curr = inode;
+        if (!startAfters.isEmpty()) {
+          startAfters.remove(startAfters.size() - 1);
+          startAfters.add(curr.getLocalNameBytes());
+        }
+        startAfters.add(HdfsFileStatus.EMPTY_NAME);
+        return lockReleased ? null : curr;
+      }
+      if (shouldSubmitCurrentBatch()) {
+        final byte[] currentStartAfter = inode.getLocalNameBytes();
+        final String parentPath = parent.getFullPathName();
+        lockReleased = true;
+        readUnlock();
+        submitCurrentBatch(startId);
+        try {
+          throttle();
+          checkPauseForTesting();
+        } finally {
+          readLock();
+          lockStartTime = timer.monotonicNow();
+        }
+        checkINodeReady(startId);
+
+        // Things could have changed when the lock was released.
+        // Re-resolve the parent inode.
+        FSPermissionChecker pc = dir.getPermissionChecker();
+        INode newParent = dir
+            .resolvePath(pc, parentPath, FSDirectory.DirOp.READ)
+            .getLastINode();
+        if (newParent == null || !newParent.equals(parent)) {
+          // parent dir is deleted or recreated. We're done.
+          return null;
+        }
+        children = parent.getChildrenList(Snapshot.CURRENT_STATE_ID);
+        // -1 to counter the ++ on the for loop
+        i = INodeDirectory.nextChild(children, currentStartAfter) - 1;
+      }
+      if ((timer.monotonicNow()
+          - lockStartTime) > readLockReportingThresholdMs) {
+        readUnlock();
+        try {
+          throttle();
+        } finally {
+          readLock();
+          lockStartTime = timer.monotonicNow();
+        }
+      }
+    }
+    // Successfully finished this dir, adjust pointers to 1 level up, and
+    // startAfter this dir.
+    startAfters.remove(startAfters.size() - 1);
+    if (!startAfters.isEmpty()) {
+      startAfters.remove(startAfters.size() - 1);
+      startAfters.add(curr.getLocalNameBytes());
+    }
+    curr = curr.getParent();
+    return lockReleased ? null : curr;
+  }
+
+  /**
+   * Resolve the cursor of traverse to an inode.
+   * <p>
+   * The parent of the lowest level startAfter is returned. If somewhere in the
+   * middle of startAfters changed, the parent of the lowest unchanged level is
+   * returned.
+   *
+   * @param startId
+   *          Id of the start inode.
+   * @param startAfters
+   *          the cursor, represented by a list of path bytes.
+   * @return the parent inode corresponding to the startAfters, or null if the
+   *         furthest parent is deleted.
+   */
+  private INode resolvePaths(final long startId, List<byte[]> startAfters)
+      throws IOException {
+    // If the readlock was reacquired, we need to resolve the paths again
+    // in case things have changed. If our cursor file/dir is changed,
+    // continue from the next one.
+    INode zoneNode = dir.getInode(startId);
+    if (zoneNode == null) {
+      throw new FileNotFoundException("Zone " + startId + " is deleted.");
+    }
+    INodeDirectory parent = zoneNode.asDirectory();
+    for (int i = 0; i < startAfters.size(); ++i) {
+      if (i == startAfters.size() - 1) {
+        // last startAfter does not need to be resolved, since search for
+        // nextChild will cover that automatically.
+        break;
+      }
+      INode curr = parent.getChild(startAfters.get(i),
+          Snapshot.CURRENT_STATE_ID);
+      if (curr == null) {
+        // inode at this level has changed. Update startAfters to point to
+        // the next dir at the parent level (and dropping any startAfters
+        // at lower levels).
+        for (; i < startAfters.size(); ++i) {
+          startAfters.remove(startAfters.size() - 1);
+        }
+        break;
+      }
+      parent = curr.asDirectory();
+    }
+    return parent;
+  }
+
+  protected void readLock() {
+    dir.getFSNamesystem().readLock();
+    dir.readLock();
+  }
+
+  protected void readUnlock() {
+    dir.readUnlock();
+    dir.getFSNamesystem().readUnlock("FSTreeTraverser");
+  }
+
+
+  protected abstract void checkPauseForTesting() throws InterruptedException;
+
+  /**
+   * Process an Inode. Add to current batch if it's a file, no-op otherwise.
+   *
+   * @param inode
+   *          the inode
+   * @return true if inode is added to currentBatch and should be process for
+   *         next operation. false otherwise: could be inode is not a file.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  protected abstract boolean processFileInode(INode inode,
+      TraverseInfo traverseInfo) throws IOException, InterruptedException;
+
+  /**
+   * Check whether current batch can be submitted for the processing.
+   *
+   * @return true if batch size meets meet the condition, otherwise false.
+   */
+  protected abstract boolean shouldSubmitCurrentBatch();
+
+  /**
+   * Check whether inode is ready for traverse. Throws IOE if it's not.
+   *
+   * @param startId
+   *          Id of the start inode.
+   * @throws IOException
+   */
+  protected abstract void checkINodeReady(long startId) throws IOException;
+
+  /**
+   * Submit the current batch for processing.
+   *
+   * @param startId
+   *          Id of the start inode.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  protected abstract void submitCurrentBatch(long startId)
+      throws IOException, InterruptedException;
+
+  /**
+   * Throttles the FSTreeTraverser.
+   *
+   * @throws InterruptedException
+   */
+  protected abstract void throttle() throws InterruptedException;
+
+  /**
+   * Check whether dir is traversable or not.
+   *
+   * @param inode
+   *          Dir inode
+   * @return true if dir is traversable otherwise false.
+   * @throws IOException
+   */
+  protected abstract boolean canTraverseDir(INode inode) throws IOException;
+
+  /**
+   * Class will represent the additional info required for traverse.
+   */
+  public static class TraverseInfo {
+
+  }
+}

+ 248 - 367
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionHandler.java

@@ -29,18 +29,16 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.ReencryptionStatus;
 import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus;
 import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus.State;
-import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.hdfs.server.namenode.FSTreeTraverser.TraverseInfo;
 import org.apache.hadoop.hdfs.server.namenode.ReencryptionUpdater.FileEdekInfo;
 import org.apache.hadoop.hdfs.server.namenode.ReencryptionUpdater.ReencryptionTask;
 import org.apache.hadoop.hdfs.server.namenode.ReencryptionUpdater.ZoneSubmissionTracker;
-import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.ipc.RetriableException;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.StopWatch;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.security.GeneralSecurityException;
 import java.util.ArrayList;
@@ -117,6 +115,8 @@ public class ReencryptionHandler implements Runnable {
   // be single-threaded, see class javadoc for more details.
   private ReencryptionBatch currentBatch;
 
+  private final ReencryptionPendingInodeIdCollector traverser;
+
   private final ReencryptionUpdater reencryptionUpdater;
   private ExecutorService updaterExecutor;
 
@@ -185,16 +185,6 @@ public class ReencryptionHandler implements Runnable {
     reencryptionUpdater.pauseForTestingAfterNthCheckpoint(zoneId, count);
   }
 
-  private synchronized void checkPauseForTesting() throws InterruptedException {
-    assert !dir.hasReadLock();
-    assert !dir.getFSNamesystem().hasReadLock();
-    while (shouldPauseForTesting) {
-      LOG.info("Sleeping in the re-encrypt handler for unit test.");
-      wait();
-      LOG.info("Continuing re-encrypt handler after pausing.");
-    }
-  }
-
   ReencryptionHandler(final EncryptionZoneManager ezMgr,
       final Configuration conf) {
     this.ezManager = ezMgr;
@@ -255,6 +245,7 @@ public class ReencryptionHandler implements Runnable {
     reencryptionUpdater =
         new ReencryptionUpdater(dir, batchService, this, conf);
     currentBatch = new ReencryptionBatch(reencryptBatchSize);
+    traverser = new ReencryptionPendingInodeIdCollector(dir, this, conf);
   }
 
   ReencryptionStatus getReencryptionStatus() {
@@ -338,7 +329,7 @@ public class ReencryptionHandler implements Runnable {
         synchronized (this) {
           wait(interval);
         }
-        checkPauseForTesting();
+        traverser.checkPauseForTesting();
       } catch (InterruptedException ie) {
         LOG.info("Re-encrypt handler interrupted. Exiting");
         Thread.currentThread().interrupt();
@@ -396,7 +387,7 @@ public class ReencryptionHandler implements Runnable {
     final INode zoneNode;
     final ZoneReencryptionStatus zs;
 
-    readLock();
+    traverser.readLock();
     try {
       zoneNode = dir.getInode(zoneId);
       // start re-encrypting the zone from the beginning
@@ -418,18 +409,19 @@ public class ReencryptionHandler implements Runnable {
           zoneId);
       if (zs.getLastCheckpointFile() == null) {
         // new re-encryption
-        reencryptDir(zoneNode.asDirectory(), zoneId, HdfsFileStatus.EMPTY_NAME,
-            zs.getEzKeyVersionName());
+        traverser.traverseDir(zoneNode.asDirectory(), zoneId,
+            HdfsFileStatus.EMPTY_NAME,
+            new ZoneTraverseInfo(zs.getEzKeyVersionName()));
       } else {
         // resuming from a past re-encryption
         restoreFromLastProcessedFile(zoneId, zs);
       }
       // save the last batch and mark complete
-      submitCurrentBatch(zoneId);
+      traverser.submitCurrentBatch(zoneId);
       LOG.info("Submission completed of zone {} for re-encryption.", zoneId);
       reencryptionUpdater.markZoneSubmissionDone(zoneId);
     } finally {
-      readUnlock();
+      traverser.readUnlock();
     }
   }
 
@@ -478,131 +470,8 @@ public class ReencryptionHandler implements Runnable {
         dir.getINodesInPath(zs.getLastCheckpointFile(), FSDirectory.DirOp.READ);
     parent = lpfIIP.getLastINode().getParent();
     startAfter = lpfIIP.getLastINode().getLocalNameBytes();
-    reencryptDir(parent, zoneId, startAfter, zs.getEzKeyVersionName());
-  }
-
-  /**
-   * Iterate through all files directly inside parent, and recurse down
-   * directories. The listing is done in batch, and can optionally start after
-   * a position.
-   * <p>
-   * Each batch is then send to the threadpool, where KMS will be contacted and
-   * edek re-encrypted. {@link ReencryptionUpdater} handles the tasks completed
-   * from the threadpool.
-   * <p>
-   * The iteration of the inode tree is done in a depth-first fashion. But
-   * instead of holding all INodeDirectory's in memory on the fly, only the
-   * path components to the current inode is held. This is to reduce memory
-   * consumption.
-   *
-   * @param parent     The inode id of parent directory
-   * @param zoneId     Id of the EZ inode
-   * @param startAfter Full path of a file the re-encrypt should start after.
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  private void reencryptDir(final INodeDirectory parent, final long zoneId,
-      byte[] startAfter, final String ezKeyVerName)
-      throws IOException, InterruptedException {
-    List<byte[]> startAfters = new ArrayList<>();
-    if (parent == null) {
-      return;
-    }
-    INode curr = parent;
-    // construct startAfters all the way up to the zone inode.
-    startAfters.add(startAfter);
-    while (curr.getId() != zoneId) {
-      startAfters.add(0, curr.getLocalNameBytes());
-      curr = curr.getParent();
-    }
-    curr = reencryptDirInt(zoneId, parent, startAfters, ezKeyVerName);
-    while (!startAfters.isEmpty()) {
-      if (curr == null) {
-        // lock was reacquired, re-resolve path.
-        curr = resolvePaths(zoneId, startAfters);
-      }
-      curr = reencryptDirInt(zoneId, curr, startAfters, ezKeyVerName);
-    }
-  }
-
-  /**
-   * Resolve the cursor of re-encryption to an inode.
-   * <p>
-   * The parent of the lowest level startAfter is returned. If somewhere in the
-   * middle of startAfters changed, the parent of the lowest unchanged level is
-   * returned.
-   *
-   * @param zoneId      Id of the EZ inode.
-   * @param startAfters the cursor, represented by a list of path bytes.
-   * @return the parent inode corresponding to the startAfters, or null if
-   * the EZ node (furthest parent) is deleted.
-   */
-  private INode resolvePaths(final long zoneId, List<byte[]> startAfters)
-      throws IOException {
-    // If the readlock was reacquired, we need to resolve the paths again
-    // in case things have changed. If our cursor file/dir is changed,
-    // continue from the next one.
-    INode zoneNode = dir.getInode(zoneId);
-    if (zoneNode == null) {
-      throw new FileNotFoundException("Zone " + zoneId + " is deleted.");
-    }
-    INodeDirectory parent = zoneNode.asDirectory();
-    for (int i = 0; i < startAfters.size(); ++i) {
-      if (i == startAfters.size() - 1) {
-        // last startAfter does not need to be resolved, since search for
-        // nextChild will cover that automatically.
-        break;
-      }
-      INode curr =
-          parent.getChild(startAfters.get(i), Snapshot.CURRENT_STATE_ID);
-      if (curr == null) {
-        // inode at this level has changed. Update startAfters to point to
-        // the next dir at the parent level (and dropping any startAfters
-        // at lower levels).
-        for (; i < startAfters.size(); ++i) {
-          startAfters.remove(startAfters.size() - 1);
-        }
-        break;
-      }
-      parent = curr.asDirectory();
-    }
-    return parent;
-  }
-
-  /**
-   * Submit the current batch to the thread pool.
-   *
-   * @param zoneId Id of the EZ INode
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  private void submitCurrentBatch(final long zoneId)
-      throws IOException, InterruptedException {
-    assert dir.hasReadLock();
-    if (currentBatch.isEmpty()) {
-      return;
-    }
-    ZoneSubmissionTracker zst;
-    synchronized (this) {
-      zst = submissions.get(zoneId);
-      if (zst == null) {
-        zst = new ZoneSubmissionTracker();
-        submissions.put(zoneId, zst);
-      }
-    }
-    Future future = batchService
-        .submit(new EDEKReencryptCallable(zoneId, currentBatch, this));
-    zst.addTask(future);
-    LOG.info("Submitted batch (start:{}, size:{}) of zone {} to re-encrypt.",
-        currentBatch.getFirstFilePath(), currentBatch.size(), zoneId);
-    currentBatch = new ReencryptionBatch(reencryptBatchSize);
-    // flip the pause flag if this is nth submission.
-    // The actual pause need to happen outside of the lock.
-    if (pauseAfterNthSubmission > 0) {
-      if (--pauseAfterNthSubmission == 0) {
-        shouldPauseForTesting = true;
-      }
-    }
+    traverser.traverseDir(parent, zoneId, startAfter,
+        new ZoneTraverseInfo(zs.getEzKeyVersionName()));
   }
 
   final class ReencryptionBatch {
@@ -710,256 +579,268 @@ public class ReencryptionHandler implements Runnable {
     }
   }
 
+
   /**
-   * Iterates the parent directory, and add direct children files to
-   * current batch. If batch size meets configured threshold, a Callable
-   * is created and sent to the thread pool, which will communicate to the KMS
-   * to get new edeks.
-   * <p>
-   * Locks could be released and reacquired when a Callable is created.
-   *
-   * @param zoneId Id of the EZ INode
-   * @return The inode which was just processed, if lock is held in the entire
-   * process. Null if lock is released.
-   * @throws IOException
-   * @throws InterruptedException
+   * Called when a new zone is submitted for re-encryption. This will interrupt
+   * the background thread if it's waiting for the next
+   * DFS_NAMENODE_REENCRYPT_SLEEP_INTERVAL_KEY.
    */
-  private INode reencryptDirInt(final long zoneId, INode curr,
-      List<byte[]> startAfters, final String ezKeyVerName)
-      throws IOException, InterruptedException {
-    assert dir.hasReadLock();
-    assert dir.getFSNamesystem().hasReadLock();
-    Preconditions.checkNotNull(curr, "Current inode can't be null");
-    checkZoneReady(zoneId);
-    final INodeDirectory parent =
-        curr.isDirectory() ? curr.asDirectory() : curr.getParent();
-    ReadOnlyList<INode> children =
-        parent.getChildrenList(Snapshot.CURRENT_STATE_ID);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Re-encrypting directory {}", parent.getFullPathName());
-    }
-
-    final byte[] startAfter = startAfters.get(startAfters.size() - 1);
-    boolean lockReleased = false;
-    for (int i = INodeDirectory.nextChild(children, startAfter);
-         i < children.size(); ++i) {
-      final INode inode = children.get(i);
-      if (!reencryptINode(inode, ezKeyVerName)) {
-        // inode wasn't added for re-encryption. Recurse down if it's a dir,
-        // skip otherwise.
-        if (!inode.isDirectory()) {
-          continue;
-        }
-        if (ezManager.isEncryptionZoneRoot(inode, inode.getFullPathName())) {
-          // nested EZ, ignore.
-          LOG.info("{}({}) is a nested EZ, skipping for re-encryption",
-              inode.getFullPathName(), inode.getId());
-          continue;
+  synchronized void notifyNewSubmission() {
+    LOG.debug("Notifying handler for new re-encryption command.");
+    this.notify();
+  }
+
+  public ReencryptionPendingInodeIdCollector getTraverser() {
+    return traverser;
+  }
+
+  /**
+   * ReencryptionPendingInodeIdCollector which throttle based on configured
+   * throttle ratio.
+   */
+  class ReencryptionPendingInodeIdCollector extends FSTreeTraverser {
+
+    private final ReencryptionHandler reencryptionHandler;
+
+    ReencryptionPendingInodeIdCollector(FSDirectory dir,
+        ReencryptionHandler rHandler, Configuration conf) {
+      super(dir, conf);
+      this.reencryptionHandler = rHandler;
+    }
+
+    @Override
+    protected void checkPauseForTesting()
+        throws InterruptedException {
+      assert !dir.hasReadLock();
+      assert !dir.getFSNamesystem().hasReadLock();
+      while (shouldPauseForTesting) {
+        LOG.info("Sleeping in the re-encrypt handler for unit test.");
+        synchronized (reencryptionHandler) {
+          reencryptionHandler.wait(30000);
         }
-        // add 1 level to the depth-first search.
-        curr = inode;
-        if (!startAfters.isEmpty()) {
-          startAfters.remove(startAfters.size() - 1);
-          startAfters.add(curr.getLocalNameBytes());
+        LOG.info("Continuing re-encrypt handler after pausing.");
+      }
+    }
+
+    /**
+     * Process an Inode for re-encryption. Add to current batch if it's a file,
+     * no-op otherwise.
+     *
+     * @param inode
+     *          the inode
+     * @return true if inode is added to currentBatch and should be
+     *         re-encrypted. false otherwise: could be inode is not a file, or
+     *         inode's edek's key version is not changed.
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    @Override
+    public boolean processFileInode(INode inode, TraverseInfo traverseInfo)
+        throws IOException, InterruptedException {
+      assert dir.hasReadLock();
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Processing {} for re-encryption", inode.getFullPathName());
+      }
+      if (!inode.isFile()) {
+        return false;
+      }
+      FileEncryptionInfo feInfo = FSDirEncryptionZoneOp.getFileEncryptionInfo(
+          dir, INodesInPath.fromINode(inode));
+      if (feInfo == null) {
+        LOG.warn("File {} skipped re-encryption because it is not encrypted! "
+            + "This is very likely a bug.", inode.getId());
+        return false;
+      }
+      if (traverseInfo instanceof ZoneTraverseInfo
+          && ((ZoneTraverseInfo) traverseInfo).getEzKeyVerName().equals(
+              feInfo.getEzKeyVersionName())) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("File {} skipped re-encryption because edek's key version"
+              + " name is not changed.", inode.getFullPathName());
         }
-        startAfters.add(HdfsFileStatus.EMPTY_NAME);
-        return lockReleased ? null : curr;
+        return false;
+      }
+      currentBatch.add(inode.asFile());
+      return true;
+    }
+
+    /**
+     * Check whether zone is ready for re-encryption. Throws IOE if it's not. 1.
+     * If EZ is deleted. 2. if the re-encryption is canceled. 3. If NN is not
+     * active or is in safe mode.
+     *
+     * @throws IOException
+     *           if zone does not exist / is cancelled, or if NN is not ready
+     *           for write.
+     */
+    @Override
+    protected void checkINodeReady(long zoneId) throws IOException {
+      final ZoneReencryptionStatus zs = getReencryptionStatus().getZoneStatus(
+          zoneId);
+      if (zs == null) {
+        throw new IOException("Zone " + zoneId + " status cannot be found.");
+      }
+      if (zs.isCanceled()) {
+        throw new IOException("Re-encryption is canceled for zone " + zoneId);
       }
-      if (currentBatch.size() >= reencryptBatchSize) {
-        final byte[] currentStartAfter = inode.getLocalNameBytes();
-        final String parentPath = parent.getFullPathName();
-        submitCurrentBatch(zoneId);
-        lockReleased = true;
-        readUnlock();
-        try {
-          throttle();
-          checkPauseForTesting();
-        } finally {
-          readLock();
+      dir.getFSNamesystem().checkNameNodeSafeMode(
+          "NN is in safe mode, cannot re-encrypt.");
+      // re-encryption should be cancelled when NN goes to standby. Just
+      // double checking for sanity.
+      dir.getFSNamesystem().checkOperation(NameNode.OperationCategory.WRITE);
+    }
+
+    /**
+     * Submit the current batch to the thread pool.
+     *
+     * @param zoneId
+     *          Id of the EZ INode
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    @Override
+    protected void submitCurrentBatch(final long zoneId) throws IOException,
+        InterruptedException {
+      if (currentBatch.isEmpty()) {
+        return;
+      }
+      ZoneSubmissionTracker zst;
+      synchronized (ReencryptionHandler.this) {
+        zst = submissions.get(zoneId);
+        if (zst == null) {
+          zst = new ZoneSubmissionTracker();
+          submissions.put(zoneId, zst);
         }
-        checkZoneReady(zoneId);
-
-        // Things could have changed when the lock was released.
-        // Re-resolve the parent inode.
-        FSPermissionChecker pc = dir.getPermissionChecker();
-        INode newParent =
-            dir.resolvePath(pc, parentPath, FSDirectory.DirOp.READ)
-                .getLastINode();
-        if (newParent == null || !newParent.equals(parent)) {
-          // parent dir is deleted or recreated. We're done.
-          return null;
+      }
+      Future future = batchService.submit(new EDEKReencryptCallable(zoneId,
+          currentBatch, reencryptionHandler));
+      zst.addTask(future);
+      LOG.info("Submitted batch (start:{}, size:{}) of zone {} to re-encrypt.",
+          currentBatch.getFirstFilePath(), currentBatch.size(), zoneId);
+      currentBatch = new ReencryptionBatch(reencryptBatchSize);
+      // flip the pause flag if this is nth submission.
+      // The actual pause need to happen outside of the lock.
+      if (pauseAfterNthSubmission > 0) {
+        if (--pauseAfterNthSubmission == 0) {
+          shouldPauseForTesting = true;
         }
-        children = parent.getChildrenList(Snapshot.CURRENT_STATE_ID);
-        // -1 to counter the ++ on the for loop
-        i = INodeDirectory.nextChild(children, currentStartAfter) - 1;
       }
     }
-    // Successfully finished this dir, adjust pointers to 1 level up, and
-    // startAfter this dir.
-    startAfters.remove(startAfters.size() - 1);
-    if (!startAfters.isEmpty()) {
-      startAfters.remove(startAfters.size() - 1);
-      startAfters.add(curr.getLocalNameBytes());
-    }
-    curr = curr.getParent();
-    return lockReleased ? null : curr;
-  }
 
-  private void readLock() {
-    dir.getFSNamesystem().readLock();
-    dir.readLock();
-    throttleTimerLocked.start();
-  }
+    /**
+     * Throttles the ReencryptionHandler in 3 aspects:
+     * 1. Prevents generating more Callables than the CPU could possibly
+     * handle.
+     * 2. Prevents generating more Callables than the ReencryptionUpdater
+     * can handle, under its own throttling.
+     * 3. Prevents contending FSN/FSD read locks. This is done based
+     * on the DFS_NAMENODE_REENCRYPT_THROTTLE_LIMIT_RATIO_KEY configuration.
+     * <p>
+     * Item 1 and 2 are to control NN heap usage.
+     *
+     * @throws InterruptedException
+     */
+    @VisibleForTesting
+    @Override
+    protected void throttle() throws InterruptedException {
+      assert !dir.hasReadLock();
+      assert !dir.getFSNamesystem().hasReadLock();
+      final int numCores = Runtime.getRuntime().availableProcessors();
+      if (taskQueue.size() >= numCores) {
+        LOG.debug("Re-encryption handler throttling because queue size {} is"
+            + "larger than number of cores {}", taskQueue.size(), numCores);
+        while (taskQueue.size() >= numCores) {
+          Thread.sleep(100);
+        }
+      }
 
-  private void readUnlock() {
-    dir.readUnlock();
-    dir.getFSNamesystem().readUnlock("reencryptHandler");
-    throttleTimerLocked.stop();
-  }
+      // 2. if tasks are piling up on the updater, don't create new callables
+      // until the queue size goes down.
+      final int maxTasksPiled = Runtime.getRuntime().availableProcessors() * 2;
+      int numTasks = numTasksSubmitted();
+      if (numTasks >= maxTasksPiled) {
+        LOG.debug("Re-encryption handler throttling because total tasks pending"
+            + " re-encryption updater is {}", numTasks);
+        while (numTasks >= maxTasksPiled) {
+          Thread.sleep(500);
+          numTasks = numTasksSubmitted();
+        }
+      }
 
-  /**
-   * Throttles the ReencryptionHandler in 3 aspects:
-   * 1. Prevents generating more Callables than the CPU could possibly handle.
-   * 2. Prevents generating more Callables than the ReencryptionUpdater can
-   *   handle, under its own throttling
-   * 3. Prevents contending FSN/FSD read locks. This is done based on the
-   *   DFS_NAMENODE_REENCRYPT_THROTTLE_LIMIT_RATIO_KEY configuration.
-   * <p>
-   * Item 1 and 2 are to control NN heap usage.
-   *
-   * @throws InterruptedException
-   */
-  @VisibleForTesting
-  void throttle() throws InterruptedException {
-    // 1.
-    final int numCores = Runtime.getRuntime().availableProcessors();
-    if (taskQueue.size() >= numCores) {
-      LOG.debug("Re-encryption handler throttling because queue size {} is"
-          + "larger than number of cores {}", taskQueue.size(), numCores);
-      while (taskQueue.size() >= numCores) {
-        Thread.sleep(100);
+      // 3.
+      if (throttleLimitHandlerRatio >= 1.0) {
+        return;
+      }
+      final long expect = (long) (throttleTimerAll.now(TimeUnit.MILLISECONDS)
+          * throttleLimitHandlerRatio);
+      final long actual = throttleTimerLocked.now(TimeUnit.MILLISECONDS);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Re-encryption handler throttling expect: {}, actual: {},"
+            + " throttleTimerAll:{}", expect, actual,
+            throttleTimerAll.now(TimeUnit.MILLISECONDS));
       }
+      if (expect - actual < 0) {
+        // in case throttleLimitHandlerRatio is very small, expect will be 0.
+        // so sleepMs should not be calculated from expect, to really meet the
+        // ratio. e.g. if ratio is 0.001, expect = 0 and actual = 1, sleepMs
+        // should be 1000 - throttleTimerAll.now()
+        final long sleepMs = (long) (actual / throttleLimitHandlerRatio)
+            - throttleTimerAll.now(TimeUnit.MILLISECONDS);
+        LOG.debug("Throttling re-encryption, sleeping for {} ms", sleepMs);
+        Thread.sleep(sleepMs);
+      }
+      throttleTimerAll.reset().start();
+      throttleTimerLocked.reset();
     }
 
-    // 2. if tasks are piling up on the updater, don't create new callables
-    // until the queue size goes down.
-    final int maxTasksPiled = Runtime.getRuntime().availableProcessors() * 2;
-    int numTasks = numTasksSubmitted();
-    if (numTasks >= maxTasksPiled) {
-      LOG.debug("Re-encryption handler throttling because total tasks pending"
-          + " re-encryption updater is {}", numTasks);
-      while (numTasks >= maxTasksPiled) {
-        Thread.sleep(500);
-        numTasks = numTasksSubmitted();
+    private int numTasksSubmitted() {
+      int ret = 0;
+      synchronized (ReencryptionHandler.this) {
+        for (ZoneSubmissionTracker zst : submissions.values()) {
+          ret += zst.getTasks().size();
+        }
       }
+      return ret;
     }
 
-    // 3.
-    if (throttleLimitHandlerRatio >= 1.0) {
-      return;
-    }
-    final long expect = (long) (throttleTimerAll.now(TimeUnit.MILLISECONDS)
-        * throttleLimitHandlerRatio);
-    final long actual = throttleTimerLocked.now(TimeUnit.MILLISECONDS);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Re-encryption handler throttling expect: {}, actual: {},"
-              + " throttleTimerAll:{}", expect, actual,
-          throttleTimerAll.now(TimeUnit.MILLISECONDS));
-    }
-    if (expect - actual < 0) {
-      // in case throttleLimitHandlerRatio is very small, expect will be 0.
-      // so sleepMs should not be calculated from expect, to really meet the
-      // ratio. e.g. if ratio is 0.001, expect = 0 and actual = 1, sleepMs
-      // should be 1000 - throttleTimerAll.now()
-      final long sleepMs =
-          (long) (actual / throttleLimitHandlerRatio) - throttleTimerAll
-              .now(TimeUnit.MILLISECONDS);
-      LOG.debug("Throttling re-encryption, sleeping for {} ms", sleepMs);
-      Thread.sleep(sleepMs);
+    @Override
+    public boolean shouldSubmitCurrentBatch() {
+      return currentBatch.size() >= reencryptBatchSize;
     }
-    throttleTimerAll.reset().start();
-    throttleTimerLocked.reset();
-  }
 
-  private synchronized int numTasksSubmitted() {
-    int ret = 0;
-    for (ZoneSubmissionTracker zst : submissions.values()) {
-      ret += zst.getTasks().size();
+    @Override
+    public boolean canTraverseDir(INode inode) throws IOException {
+      if (ezManager.isEncryptionZoneRoot(inode, inode.getFullPathName())) {
+        // nested EZ, ignore.
+        LOG.info("{}({}) is a nested EZ, skipping for re-encryption",
+            inode.getFullPathName(), inode.getId());
+        return false;
+      }
+      return true;
     }
-    return ret;
-  }
 
-  /**
-   * Process an Inode for re-encryption. Add to current batch if it's a file,
-   * no-op otherwise.
-   *
-   * @param inode the inode
-   * @return true if inode is added to currentBatch and should be re-encrypted.
-   * false otherwise: could be inode is not a file, or inode's edek's
-   * key version is not changed.
-   * @throws IOException
-   * @throws InterruptedException
-   */
-  private boolean reencryptINode(final INode inode, final String ezKeyVerName)
-      throws IOException, InterruptedException {
-    assert dir.hasReadLock();
-    if (LOG.isTraceEnabled()) {
-      LOG.trace("Processing {} for re-encryption", inode.getFullPathName());
-    }
-    if (!inode.isFile()) {
-      return false;
-    }
-    FileEncryptionInfo feInfo = FSDirEncryptionZoneOp
-        .getFileEncryptionInfo(dir, INodesInPath.fromINode(inode));
-    if (feInfo == null) {
-      LOG.warn("File {} skipped re-encryption because it is not encrypted! "
-          + "This is very likely a bug.", inode.getId());
-      return false;
+    @Override
+    protected void readLock() {
+      super.readLock();
+      throttleTimerLocked.start();
     }
-    if (ezKeyVerName.equals(feInfo.getEzKeyVersionName())) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("File {} skipped re-encryption because edek's key version"
-            + " name is not changed.", inode.getFullPathName());
-      }
-      return false;
+
+    @Override
+    protected void readUnlock() {
+      super.readUnlock();
+      throttleTimerLocked.stop();
     }
-    currentBatch.add(inode.asFile());
-    return true;
   }
 
-  /**
-   * Check whether zone is ready for re-encryption. Throws IOE if it's not.
-   * 1. If EZ is deleted.
-   * 2. if the re-encryption is canceled.
-   * 3. If NN is not active or is in safe mode.
-   *
-   * @throws IOException if zone does not exist / is cancelled, or if NN is not
-   *                     ready for write.
-   */
-  void checkZoneReady(final long zoneId)
-      throws RetriableException, SafeModeException, IOException {
-    final ZoneReencryptionStatus zs =
-        getReencryptionStatus().getZoneStatus(zoneId);
-    if (zs == null) {
-      throw new IOException("Zone " + zoneId + " status cannot be found.");
-    }
-    if (zs.isCanceled()) {
-      throw new IOException("Re-encryption is canceled for zone " + zoneId);
+  private class ZoneTraverseInfo extends TraverseInfo {
+    private String ezKeyVerName;
+
+    ZoneTraverseInfo(String ezKeyVerName) {
+      this.ezKeyVerName = ezKeyVerName;
     }
-    dir.getFSNamesystem()
-        .checkNameNodeSafeMode("NN is in safe mode, cannot re-encrypt.");
-    // re-encryption should be cancelled when NN goes to standby. Just
-    // double checking for sanity.
-    dir.getFSNamesystem().checkOperation(NameNode.OperationCategory.WRITE);
-  }
 
-  /**
-   * Called when a new zone is submitted for re-encryption. This will interrupt
-   * the background thread if it's waiting for the next
-   * DFS_NAMENODE_REENCRYPT_SLEEP_INTERVAL_KEY.
-   */
-  synchronized void notifyNewSubmission() {
-    LOG.debug("Notifying handler for new re-encryption command.");
-    this.notify();
+    public String getEzKeyVerName() {
+      return ezKeyVerName;
+    }
   }
 }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ReencryptionUpdater.java

@@ -464,7 +464,7 @@ public final class ReencryptionUpdater implements Runnable {
     final String zonePath;
     dir.writeLock();
     try {
-      handler.checkZoneReady(task.zoneId);
+      handler.getTraverser().checkINodeReady(task.zoneId);
       final INode zoneNode = dir.getInode(task.zoneId);
       if (zoneNode == null) {
         // ez removed.

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ArchivalStorage.md

@@ -1,4 +1,4 @@
-<!---
+<!---
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/MemoryStorage.md

@@ -1,4 +1,4 @@
-<!---
+<!---
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

+ 51 - 10
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java

@@ -39,6 +39,8 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.StripedFileTestUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
+import org.apache.hadoop.hdfs.protocol.BlockType;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
@@ -114,10 +116,13 @@ import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.reset;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
 public class TestBlockManager {
   private DatanodeStorageInfo[] storages;
@@ -1343,14 +1348,14 @@ public class TestBlockManager {
     spyBM.createLocatedBlocks(new BlockInfo[]{blockInfo}, 3L, false, 0L, 3L,
         false, false, null, null);
     verify(spyBM, Mockito.atLeast(0)).
-        isReplicaCorrupt(Mockito.any(BlockInfo.class),
-            Mockito.any(DatanodeDescriptor.class));
+        isReplicaCorrupt(any(BlockInfo.class),
+            any(DatanodeDescriptor.class));
     addCorruptBlockOnNodes(0, origNodes);
     spyBM.createLocatedBlocks(new BlockInfo[]{blockInfo}, 3L, false, 0L, 3L,
         false, false, null, null);
     verify(spyBM, Mockito.atLeast(1)).
-        isReplicaCorrupt(Mockito.any(BlockInfo.class),
-            Mockito.any(DatanodeDescriptor.class));
+        isReplicaCorrupt(any(BlockInfo.class),
+            any(DatanodeDescriptor.class));
   }
 
   @Test (timeout = 300000)
@@ -1506,8 +1511,8 @@ public class TestBlockManager {
         blockInfo.getGenerationStamp() + 1,
         blockInfo.getNumBytes(),
         new DatanodeStorageInfo[]{});
-    BlockCollection mockedBc = Mockito.mock(BlockCollection.class);
-    Mockito.when(mockedBc.getBlocks()).thenReturn(new BlockInfo[]{blockInfo});
+    BlockCollection mockedBc = mock(BlockCollection.class);
+    when(mockedBc.getBlocks()).thenReturn(new BlockInfo[]{blockInfo});
     bm.checkRedundancy(mockedBc);
     return blockInfo;
   }
@@ -1524,8 +1529,8 @@ public class TestBlockManager {
     Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId);
     bm.blocksMap.addBlockCollection(blockInfo, bc);
     nodesList.get(0).setInMaintenance();
-    BlockCollection mockedBc = Mockito.mock(BlockCollection.class);
-    Mockito.when(mockedBc.getBlocks()).thenReturn(new BlockInfo[]{blockInfo});
+    BlockCollection mockedBc = mock(BlockCollection.class);
+    when(mockedBc.getBlocks()).thenReturn(new BlockInfo[]{blockInfo});
     bm.checkRedundancy(mockedBc);
     return blockInfo;
   }
@@ -1580,8 +1585,8 @@ public class TestBlockManager {
     Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId);
     bm.blocksMap.addBlockCollection(blockInfo, bc);
     nodesList.get(0).startDecommission();
-    BlockCollection mockedBc = Mockito.mock(BlockCollection.class);
-    Mockito.when(mockedBc.getBlocks()).thenReturn(new BlockInfo[]{blockInfo});
+    BlockCollection mockedBc = mock(BlockCollection.class);
+    when(mockedBc.getBlocks()).thenReturn(new BlockInfo[]{blockInfo});
     bm.checkRedundancy(mockedBc);
     return blockInfo;
   }
@@ -1623,4 +1628,40 @@ public class TestBlockManager {
     }
   }
 
+  @Test
+  public void testLegacyBlockInInvalidateBlocks() {
+    final long legancyGenerationStampLimit = 10000;
+    BlockIdManager bim = Mockito.mock(BlockIdManager.class);
+
+    when(bim.getLegacyGenerationStampLimit())
+        .thenReturn(legancyGenerationStampLimit);
+    when(bim.isStripedBlock(any(Block.class))).thenCallRealMethod();
+    when(bim.isLegacyBlock(any(Block.class))).thenCallRealMethod();
+
+    InvalidateBlocks ibs = new InvalidateBlocks(100, 30000, bim);
+
+    Block legacy = new Block(-1, 10, legancyGenerationStampLimit / 10);
+    Block striped = new Block(
+        bm.nextBlockId(BlockType.STRIPED), 10,
+        legancyGenerationStampLimit + 10);
+
+    DatanodeInfo legacyDnInfo = DFSTestUtil.getLocalDatanodeInfo();
+    DatanodeInfo stripedDnInfo = DFSTestUtil.getLocalDatanodeInfo();
+
+    ibs.add(legacy, legacyDnInfo, false);
+    assertEquals(1, ibs.getBlocks());
+    assertEquals(0, ibs.getECBlocks());
+
+    ibs.add(striped, stripedDnInfo, false);
+    assertEquals(1, ibs.getBlocks());
+    assertEquals(1, ibs.getECBlocks());
+
+    ibs.remove(legacyDnInfo);
+    assertEquals(0, ibs.getBlocks());
+    assertEquals(1, ibs.getECBlocks());
+
+    ibs.remove(stripedDnInfo);
+    assertEquals(0, ibs.getBlocks());
+    assertEquals(0, ibs.getECBlocks());
+  }
 }

+ 32 - 16
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCorruptReplicaInfo.java

@@ -21,6 +21,8 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.when;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -30,10 +32,12 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.StripedFileTestUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockType;
 import org.apache.hadoop.hdfs.server.blockmanagement.CorruptReplicasMap.Reason;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 
 /**
@@ -46,27 +50,31 @@ public class TestCorruptReplicaInfo {
   
   private static final Log LOG = LogFactory.getLog(
       TestCorruptReplicaInfo.class);
-  private final Map<Long, Block> replicaMap = new HashMap<>();
-  private final Map<Long, Block> stripedBlocksMap = new HashMap<>();
+  private final Map<Long, BlockInfo> replicaMap = new HashMap<>();
+  private final Map<Long, BlockInfo> stripedBlocksMap = new HashMap<>();
 
   // Allow easy block creation by block id. Return existing
   // replica block if one with same block id already exists.
-  private Block getReplica(Long blockId) {
+  private BlockInfo getReplica(Long blockId) {
     if (!replicaMap.containsKey(blockId)) {
-      replicaMap.put(blockId, new Block(blockId, 0, 0));
+      short replFactor = 3;
+      replicaMap.put(blockId,
+          new BlockInfoContiguous(new Block(blockId, 0, 0), replFactor));
     }
     return replicaMap.get(blockId);
   }
 
-  private Block getReplica(int blkId) {
+  private BlockInfo getReplica(int blkId) {
     return getReplica(Long.valueOf(blkId));
   }
 
-  private Block getStripedBlock(int blkId) {
+  private BlockInfo getStripedBlock(int blkId) {
     Long stripedBlockId = (1L << 63) + blkId;
     assertTrue(BlockIdManager.isStripedBlockID(stripedBlockId));
     if (!stripedBlocksMap.containsKey(stripedBlockId)) {
-      stripedBlocksMap.put(stripedBlockId, new Block(stripedBlockId, 1024, 0));
+      stripedBlocksMap.put(stripedBlockId,
+          new BlockInfoStriped(new Block(stripedBlockId, 1024, 0),
+              StripedFileTestUtil.getDefaultECPolicy()));
     }
     return stripedBlocksMap.get(stripedBlockId);
   }
@@ -88,6 +96,10 @@ public class TestCorruptReplicaInfo {
   public void testCorruptReplicaInfo()
       throws IOException, InterruptedException {
     CorruptReplicasMap crm = new CorruptReplicasMap();
+    BlockIdManager bim = Mockito.mock(BlockIdManager.class);
+    when(bim.isLegacyBlock(any(Block.class))).thenReturn(false);
+    when(bim.isStripedBlock(any(Block.class))).thenCallRealMethod();
+    assertTrue(!bim.isLegacyBlock(new Block(-1)));
 
     // Make sure initial values are returned correctly
     assertEquals("Total number of corrupt blocks must initially be 0!",
@@ -97,10 +109,11 @@ public class TestCorruptReplicaInfo {
     assertEquals("Number of corrupt striped block groups must initially be 0!",
         0, crm.getCorruptECBlockGroups());
     assertNull("Param n cannot be less than 0",
-        crm.getCorruptBlockIdsForTesting(BlockType.CONTIGUOUS, -1, null));
+        crm.getCorruptBlockIdsForTesting(bim, BlockType.CONTIGUOUS, -1, null));
     assertNull("Param n cannot be greater than 100",
-        crm.getCorruptBlockIdsForTesting(BlockType.CONTIGUOUS, 101, null));
-    long[] l = crm.getCorruptBlockIdsForTesting(BlockType.CONTIGUOUS, 0, null);
+        crm.getCorruptBlockIdsForTesting(bim, BlockType.CONTIGUOUS, 101, null));
+    long[] l = crm.getCorruptBlockIdsForTesting(
+        bim, BlockType.CONTIGUOUS, 0, null);
     assertNotNull("n = 0 must return non-null", l);
     assertEquals("n = 0 must return an empty list", 0, l.length);
 
@@ -156,22 +169,25 @@ public class TestCorruptReplicaInfo {
         2 * blockCount, crm.size());
     assertTrue("First five corrupt replica blocks ids are not right!",
         Arrays.equals(Arrays.copyOfRange(replicaIds, 0, 5),
-            crm.getCorruptBlockIdsForTesting(BlockType.CONTIGUOUS, 5, null)));
+            crm.getCorruptBlockIdsForTesting(
+                bim, BlockType.CONTIGUOUS, 5, null)));
     assertTrue("First five corrupt striped blocks ids are not right!",
         Arrays.equals(Arrays.copyOfRange(stripedIds, 0, 5),
-            crm.getCorruptBlockIdsForTesting(BlockType.STRIPED, 5, null)));
+            crm.getCorruptBlockIdsForTesting(
+                bim, BlockType.STRIPED, 5, null)));
 
     assertTrue("10 replica blocks after 7 not returned correctly!",
         Arrays.equals(Arrays.copyOfRange(replicaIds, 7, 17),
-            crm.getCorruptBlockIdsForTesting(BlockType.CONTIGUOUS, 10, 7L)));
+            crm.getCorruptBlockIdsForTesting(
+                bim, BlockType.CONTIGUOUS, 10, 7L)));
     assertTrue("10 striped blocks after 7 not returned correctly!",
         Arrays.equals(Arrays.copyOfRange(stripedIds, 7, 17),
-            crm.getCorruptBlockIdsForTesting(BlockType.STRIPED,
+            crm.getCorruptBlockIdsForTesting(bim, BlockType.STRIPED,
                 10, getStripedBlock(7).getBlockId())));
   }
   
   private static void addToCorruptReplicasMap(CorruptReplicasMap crm,
-      Block blk, DatanodeDescriptor dn) {
-    crm.addToCorruptReplicasMap(blk, dn, "TEST", Reason.NONE);
+      BlockInfo blk, DatanodeDescriptor dn) {
+    crm.addToCorruptReplicasMap(blk, dn, "TEST", Reason.NONE, blk.isStriped());
   }
 }

+ 0 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryption.java

@@ -32,7 +32,6 @@ import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.google.common.base.Supplier;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.crypto.key.JavaKeyStoreProvider;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -64,7 +63,6 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
-
 import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -72,7 +70,6 @@ import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
-
 import org.junit.rules.Timeout;
 import org.mockito.internal.util.reflection.Whitebox;
 import org.slf4j.LoggerFactory;

+ 7 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestReencryptionHandler.java

@@ -75,6 +75,10 @@ public class TestReencryptionHandler {
         CommonConfigurationKeysPublic.HADOOP_SECURITY_KEY_PROVIDER_PATH);
     Mockito.when(ezm.getProvider()).thenReturn(
         KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp));
+    FSDirectory fsd = Mockito.mock(FSDirectory.class);
+    FSNamesystem fns = Mockito.mock(FSNamesystem.class);
+    Mockito.when(fsd.getFSNamesystem()).thenReturn(fns);
+    Mockito.when(ezm.getFSDirectory()).thenReturn(fsd);
     return new ReencryptionHandler(ezm, conf);
   }
 
@@ -99,7 +103,7 @@ public class TestReencryptionHandler {
     Whitebox.setInternalState(rh, "throttleTimerLocked", mockLocked);
     Whitebox.setInternalState(rh, "taskQueue", queue);
     final StopWatch sw = new StopWatch().start();
-    rh.throttle();
+    rh.getTraverser().throttle();
     sw.stop();
     assertTrue("should have throttled for at least 8 second",
         sw.now(TimeUnit.MILLISECONDS) > 8000);
@@ -130,7 +134,7 @@ public class TestReencryptionHandler {
         submissions = new HashMap<>();
     Whitebox.setInternalState(rh, "submissions", submissions);
     StopWatch sw = new StopWatch().start();
-    rh.throttle();
+    rh.getTraverser().throttle();
     sw.stop();
     assertTrue("should not have throttled",
         sw.now(TimeUnit.MILLISECONDS) < 1000);
@@ -189,7 +193,7 @@ public class TestReencryptionHandler {
     Whitebox.setInternalState(rh, "submissions", submissions);
     final StopWatch sw = new StopWatch().start();
     removeTaskThread.start();
-    rh.throttle();
+    rh.getTraverser().throttle();
     sw.stop();
     LOG.info("Throttle completed, consumed {}", sw.now(TimeUnit.MILLISECONDS));
     assertTrue("should have throttled for at least 3 second",

+ 72 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java

@@ -27,10 +27,9 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Random;
 import java.util.Set;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -39,14 +38,19 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.Timeout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.event.Level;
 
 public class TestNetworkTopology {
-  private static final Log LOG = LogFactory.getLog(TestNetworkTopology.class);
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestNetworkTopology.class);
   private final static NetworkTopology cluster =
       NetworkTopology.getInstance(new Configuration());
   private DatanodeDescriptor dataNodes[];
@@ -83,6 +87,7 @@ public class TestNetworkTopology {
     }
     dataNodes[9].setDecommissioned();
     dataNodes[10].setDecommissioned();
+    GenericTestUtils.setLogLevel(NetworkTopology.LOG, Level.TRACE);
   }
   
   @Test
@@ -324,6 +329,7 @@ public class TestNetworkTopology {
         frequency.put(random, frequency.get(random) + 1);
       }
     }
+    LOG.info("Result:" + frequency);
     return frequency;
   }
 
@@ -471,4 +477,67 @@ public class TestNetworkTopology {
     }
   }
 
+  /**
+   * Tests chooseRandom with include scope, excluding a few nodes.
+   */
+  @Test
+  public void testChooseRandomInclude1() {
+    final String scope = "/d1";
+    final Set<Node> excludedNodes = new HashSet<>();
+    final Random r = new Random();
+    for (int i = 0; i < 4; ++i) {
+      final int index = r.nextInt(5);
+      excludedNodes.add(dataNodes[index]);
+    }
+    Map<Node, Integer> frequency = pickNodesAtRandom(100, scope, excludedNodes);
+
+    verifyResults(5, excludedNodes, frequency);
+  }
+
+  /**
+   * Tests chooseRandom with include scope at rack, excluding a node.
+   */
+  @Test
+  public void testChooseRandomInclude2() {
+    String scope = dataNodes[0].getNetworkLocation();
+    Set<Node> excludedNodes = new HashSet<>();
+    final Random r = new Random();
+    int index = r.nextInt(1);
+    excludedNodes.add(dataNodes[index]);
+    final int count = 100;
+    Map<Node, Integer> frequency =
+        pickNodesAtRandom(count, scope, excludedNodes);
+
+    verifyResults(1, excludedNodes, frequency);
+  }
+
+  private void verifyResults(int upperbound, Set<Node> excludedNodes,
+      Map<Node, Integer> frequency) {
+    LOG.info("Excluded nodes are: {}", excludedNodes);
+    for (int i = 0; i < upperbound; ++i) {
+      final Node n = dataNodes[i];
+      LOG.info("Verifying node {}", n);
+      if (excludedNodes.contains(n)) {
+        assertEquals(n + " should not have been chosen.", 0,
+            (int) frequency.get(n));
+      } else {
+        assertTrue(n + " should have been chosen", frequency.get(n) > 0);
+      }
+    }
+  }
+
+  /**
+   * Tests chooseRandom with include scope, no exlucde nodes.
+   */
+  @Test
+  public void testChooseRandomInclude3() {
+    String scope = "/d1";
+    Map<Node, Integer> frequency = pickNodesAtRandom(200, scope, null);
+    LOG.info("No node is excluded.");
+    for (int i = 0; i < 5; ++i) {
+      // all nodes should be more than zero
+      assertTrue(dataNodes[i] + " should have been chosen.",
+          frequency.get(dataNodes[i]) > 0);
+    }
+  }
 }

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testCryptoConf.xml

@@ -113,6 +113,25 @@
       </comparators>
     </test>
 
+    <test>
+      <description>Test failure of creating EZ on an existing EZ</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir /foo</command>
+        <command>-fs NAMENODE -ls /</command>-
+        <crypto-admin-command>-createZone -path /foo -keyName myKey</crypto-admin-command>
+        <crypto-admin-command>-createZone -path /foo -keyName myKey</crypto-admin-command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rmdir /foo</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>SubstringComparator</type>
+          <expected-output>Directory /foo is already an encryption zone</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
     <test>
       <description>Test success of creating an EZ as a subdir of an existing EZ.</description>
       <test-commands>

File diff suppressed because it is too large
+ 11 - 0
hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Common_3.1.0.xml


File diff suppressed because it is too large
+ 11 - 0
hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_Core_3.1.0.xml


File diff suppressed because it is too large
+ 11 - 0
hadoop-mapreduce-project/dev-support/jdiff/Apache_Hadoop_MapReduce_JobClient_3.1.0.xml


+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java

@@ -269,7 +269,7 @@ public class JobHistoryEventHandler extends AbstractService
       LOG.info("Emitting job history data to the timeline service is enabled");
       if (YarnConfiguration.timelineServiceEnabled(conf)) {
         boolean timelineServiceV2Enabled =
-            ((int) YarnConfiguration.getTimelineServiceVersion(conf) == 2);
+            YarnConfiguration.timelineServiceV2Enabled(conf);
         if(timelineServiceV2Enabled) {
           timelineV2Client =
               ((MRAppMaster.RunningAppContext)context).getTimelineV2Client();

+ 4 - 0
hadoop-project/src/site/site.xml

@@ -106,6 +106,7 @@
       <item name="Upgrade Domain" href="hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html"/>
       <item name="DataNode Admin" href="hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html"/>
       <item name="Router Federation" href="hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html"/>
+      <item name="Provided Storage" href="hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html"/>
     </menu>
 
     <menu name="Ozone" inherit="top">
@@ -156,6 +157,9 @@
       <item name="Opportunistic Containers" href="hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html"/>
       <item name="YARN Federation" href="hadoop-yarn/hadoop-yarn-site/Federation.html"/>
       <item name="Shared Cache" href="hadoop-yarn/hadoop-yarn-site/SharedCache.html"/>
+      <item name="Using GPU" href="hadoop-yarn/hadoop-yarn-site/UsingGpus.html"/>
+      <item name="Using FPGA" href="hadoop-yarn/hadoop-yarn-site/UsingFPGA.html"/>
+      <item name="Placement Constraints" href="hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html"/>
     </menu>
 
     <menu name="YARN REST APIs" inherit="top">

+ 14 - 4
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java

@@ -812,23 +812,33 @@ public class DynamoDBMetadataStore implements MetadataStore {
   }
 
   @Retries.OnceRaw
-  private ItemCollection<ScanOutcome> expiredFiles(long modTime) {
-    String filterExpression = "mod_time < :mod_time";
+  private ItemCollection<ScanOutcome> expiredFiles(long modTime,
+      String keyPrefix) {
+    String filterExpression =
+        "mod_time < :mod_time and begins_with(parent, :parent)";
     String projectionExpression = "parent,child";
-    ValueMap map = new ValueMap().withLong(":mod_time", modTime);
+    ValueMap map = new ValueMap()
+        .withLong(":mod_time", modTime)
+        .withString(":parent", keyPrefix);
     return table.scan(filterExpression, projectionExpression, null, map);
   }
 
   @Override
   @Retries.OnceRaw("once(batchWrite)")
   public void prune(long modTime) throws IOException {
+    prune(modTime, "/");
+  }
+
+  @Override
+  @Retries.OnceRaw("once(batchWrite)")
+  public void prune(long modTime, String keyPrefix) throws IOException {
     int itemCount = 0;
     try {
       Collection<Path> deletionBatch =
           new ArrayList<>(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT);
       int delay = conf.getInt(S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY,
           S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT);
-      for (Item item : expiredFiles(modTime)) {
+      for (Item item : expiredFiles(modTime, keyPrefix)) {
         PathMetadata md = PathMetadataDynamoDBTranslation
             .itemToPathMetadata(item, username);
         Path path = md.getFileStatus().getPath();

+ 12 - 5
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java

@@ -303,12 +303,18 @@ public class LocalMetadataStore implements MetadataStore {
   }
 
   @Override
-  public synchronized void prune(long modTime) throws IOException {
+  public void prune(long modTime) throws IOException{
+    prune(modTime, "");
+  }
+
+  @Override
+  public synchronized void prune(long modTime, String keyPrefix)
+      throws IOException {
     Iterator<Map.Entry<Path, PathMetadata>> files =
         fileHash.entrySet().iterator();
     while (files.hasNext()) {
       Map.Entry<Path, PathMetadata> entry = files.next();
-      if (expired(entry.getValue().getFileStatus(), modTime)) {
+      if (expired(entry.getValue().getFileStatus(), modTime, keyPrefix)) {
         files.remove();
       }
     }
@@ -323,7 +329,7 @@ public class LocalMetadataStore implements MetadataStore {
 
       for (PathMetadata child : oldChildren) {
         FileStatus status = child.getFileStatus();
-        if (!expired(status, modTime)) {
+        if (!expired(status, modTime, keyPrefix)) {
           newChildren.add(child);
         }
       }
@@ -339,10 +345,11 @@ public class LocalMetadataStore implements MetadataStore {
     }
   }
 
-  private boolean expired(FileStatus status, long expiry) {
+  private boolean expired(FileStatus status, long expiry, String keyPrefix) {
     // Note: S3 doesn't track modification time on directories, so for
     // consistency with the DynamoDB implementation we ignore that here
-    return status.getModificationTime() < expiry && !status.isDirectory();
+    return status.getModificationTime() < expiry && !status.isDirectory()
+      && status.getPath().toString().startsWith(keyPrefix);
   }
 
   @VisibleForTesting

+ 12 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java

@@ -223,6 +223,18 @@ public interface MetadataStore extends Closeable {
    */
   void prune(long modTime) throws IOException, UnsupportedOperationException;
 
+  /**
+   * Same as {@link MetadataStore#prune(long)}, but with an additional
+   * keyPrefix parameter to filter the pruned keys with a prefix.
+   *
+   * @param modTime Oldest modification time to allow
+   * @param keyPrefix The prefix for the keys that should be removed
+   * @throws IOException if there is an error
+   * @throws UnsupportedOperationException if not implemented
+   */
+  void prune(long modTime, String keyPrefix)
+      throws IOException, UnsupportedOperationException;
+
   /**
    * Get any diagnostics information from a store, as a list of (key, value)
    * tuples for display. Arbitrary values; no guarantee of stability.

+ 4 - 0
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java

@@ -99,6 +99,10 @@ public class NullMetadataStore implements MetadataStore {
   public void prune(long modTime) {
   }
 
+  @Override
+  public void prune(long modTime, String keyPrefix) {
+  }
+
   @Override
   public String toString() {
     return "NullMetadataStore";

+ 13 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java

@@ -966,7 +966,19 @@ public abstract class S3GuardTool extends Configured implements Tool {
       long now = System.currentTimeMillis();
       long divide = now - delta;
 
-      getStore().prune(divide);
+      // remove the protocol from path string to get keyPrefix
+      // by default the keyPrefix is "/" - unless the s3 URL is provided
+      String keyPrefix = "/";
+      if(paths.size() > 0) {
+        Path path = new Path(paths.get(0));
+        keyPrefix = PathMetadataDynamoDBTranslation.pathToParentKey(path);
+      }
+
+      try {
+        getStore().prune(divide, keyPrefix);
+      } catch (UnsupportedOperationException e){
+        errorln("Prune operation not supported in metadata store.");
+      }
 
       out.flush();
       return SUCCESS;

+ 9 - 2
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md

@@ -592,8 +592,8 @@ A time value of hours, minutes and/or seconds must be supplied.
 1. This does not delete the entries in the bucket itself.
 1. The modification time is effectively the creation time of the objects
 in the S3 Bucket.
-1. Even when an S3A URI is supplied, all entries in the table older than
-a specific age are deleted &mdash; even those from other buckets.
+1. If an S3A URI is supplied, only the entries in the table specified by the
+URI and older than a specific age are deleted.
 
 Example
 
@@ -604,6 +604,13 @@ hadoop s3guard prune -days 7 s3a://ireland-1
 Deletes all entries in the S3Guard table for files older than seven days from
 the table associated with `s3a://ireland-1`.
 
+```bash
+hadoop s3guard prune -days 7 s3a://ireland-1/path_prefix/
+```
+
+Deletes all entries in the S3Guard table for files older than seven days from
+the table associated with `s3a://ireland-1` and with the prefix "path_prefix"
+
 ```bash
 hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
 ```

+ 13 - 8
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java

@@ -181,22 +181,26 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
     }
   }
 
-  private void testPruneCommand(Configuration cmdConf, String...args)
-      throws Exception {
-    Path parent = path("prune-cli");
+  private void testPruneCommand(Configuration cmdConf, Path parent,
+      String...args) throws Exception {
+    Path keepParent = path("prune-cli-keep");
     try {
       getFileSystem().mkdirs(parent);
+      getFileSystem().mkdirs(keepParent);
 
       S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf);
       cmd.setMetadataStore(ms);
 
       createFile(new Path(parent, "stale"), true, true);
+      createFile(new Path(keepParent, "stale-to-keep"), true, true);
       Thread.sleep(TimeUnit.SECONDS.toMillis(2));
       createFile(new Path(parent, "fresh"), true, true);
 
       assertMetastoreListingCount(parent, "Children count before pruning", 2);
       exec(cmd, args);
       assertMetastoreListingCount(parent, "Pruned children count", 1);
+      assertMetastoreListingCount(keepParent,
+          "This child should have been kept (prefix restriction).", 1);
     } finally {
       getFileSystem().delete(parent, true);
       ms.prune(Long.MAX_VALUE);
@@ -213,17 +217,18 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
 
   @Test
   public void testPruneCommandCLI() throws Exception {
-    String testPath = path("testPruneCommandCLI").toString();
-    testPruneCommand(getFileSystem().getConf(),
-        "prune", "-seconds", "1", testPath);
+    Path testPath = path("testPruneCommandCLI");
+    testPruneCommand(getFileSystem().getConf(), testPath,
+        "prune", "-seconds", "1", testPath.toString());
   }
 
   @Test
   public void testPruneCommandConf() throws Exception {
     getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
         TimeUnit.SECONDS.toMillis(1));
-    String testPath = path("testPruneCommandConf").toString();
-    testPruneCommand(getConfiguration(), "prune", testPath);
+    Path testPath = path("testPruneCommandConf");
+    testPruneCommand(getConfiguration(), testPath,
+        "prune", testPath.toString());
   }
 
   @Test

+ 7 - 0
hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml

@@ -658,4 +658,11 @@
     <Bug pattern="NP_NULL_ON_SOME_PATH" />
   </Match>
 
+  <!-- Expose the reference to avoid performance overhead -->
+  <Match>
+    <Class name="org.apache.hadoop.yarn.api.records.Resource" />
+    <Method name="getResources" />
+    <Bug pattern="EI_EXPOSE_REP" />
+  </Match>
+
 </FindBugsFilter>

File diff suppressed because it is too large
+ 11 - 0
hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Client_3.1.0.xml


File diff suppressed because it is too large
+ 11 - 0
hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Common_3.1.0.xml


File diff suppressed because it is too large
+ 11 - 0
hadoop-yarn-project/hadoop-yarn/dev-support/jdiff/Apache_Hadoop_YARN_Server_Common_3.1.0.xml


+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/AllocationTagNamespaceType.java

@@ -26,7 +26,7 @@ public enum AllocationTagNamespaceType {
   SELF("self"),
   NOT_SELF("not-self"),
   APP_ID("app-id"),
-  APP_LABEL("app-label"),
+  APP_TAG("app-tag"),
   ALL("all");
 
   private String typeKeyword;

+ 50 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timelineservice/SubApplicationEntity.java

@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.api.records.timelineservice;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * This entity represents a user defined entities to be stored under sub
+ * application table.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+public class SubApplicationEntity extends HierarchicalTimelineEntity {
+
+  public static final String YARN_APPLICATION_ID = "YARN_APPLICATION_ID";
+
+  public SubApplicationEntity(TimelineEntity entity) {
+    super(entity);
+  }
+
+  /**
+   * Checks if the input TimelineEntity object is an SubApplicationEntity.
+   *
+   * @param te TimelineEntity object.
+   * @return true if input is an SubApplicationEntity, false otherwise
+   */
+  public static boolean isSubApplicationEntity(TimelineEntity te) {
+    return (te != null && te instanceof SubApplicationEntity);
+  }
+
+  public void setApplicationId(String appId) {
+    addInfo(YARN_APPLICATION_ID, appId);
+  }
+}

+ 42 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -343,6 +343,10 @@ public class YarnConfiguration extends Configuration {
   public static final String YARN_API_SERVICES_ENABLE = "yarn."
       + "webapp.api-service.enable";
 
+  @Private
+  public static final String DEFAULT_YARN_API_SYSTEM_SERVICES_CLASS =
+      "org.apache.hadoop.yarn.service.client.SystemServiceManagerImpl";
+
   public static final String RM_RESOURCE_TRACKER_ADDRESS =
     RM_PREFIX + "resource-tracker.address";
   public static final int DEFAULT_RM_RESOURCE_TRACKER_PORT = 8031;
@@ -1947,6 +1951,20 @@ public class YarnConfiguration extends Configuration {
    */
   public static final boolean DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL = false;
 
+  /**
+   * A configurable value to pass to the Docker Stop command. This value
+   * defines the number of seconds between the docker stop command sending
+   * a SIGTERM and a SIGKILL.
+   */
+  public static final String NM_DOCKER_STOP_GRACE_PERIOD =
+      DOCKER_CONTAINER_RUNTIME_PREFIX + "stop.grace-period";
+
+  /**
+   * The default value for the grace period between the SIGTERM and the
+   * SIGKILL in the Docker Stop command.
+   */
+  public static final int DEFAULT_NM_DOCKER_STOP_GRACE_PERIOD = 10;
+
   /** The mode in which the Java Container Sandbox should run detailed by
    *  the JavaSandboxLinuxContainerRuntime. */
   public static final String YARN_CONTAINER_SANDBOX =
@@ -2102,6 +2120,9 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_AUX_SERVICES_CLASSPATH =
       NM_AUX_SERVICES + ".%s.classpath";
 
+  public static final String NM_AUX_SERVICE_REMOTE_CLASSPATH =
+      NM_AUX_SERVICES + ".%s.remote-classpath";
+
   public static final String NM_AUX_SERVICES_SYSTEM_CLASSES =
       NM_AUX_SERVICES + ".%s.system-classes";
 
@@ -3796,6 +3817,27 @@ public class YarnConfiguration extends Configuration {
     return enabled;
   }
 
+  /**
+   * Returns whether the timeline service v.1,5 is enabled via configuration.
+   *
+   * @param conf the configuration
+   * @return whether the timeline service v.1.5 is enabled. V.1.5 refers to a
+   * version equal to 1.5.
+   */
+  public static boolean timelineServiceV15Enabled(Configuration conf) {
+    boolean enabled = false;
+    if (timelineServiceEnabled(conf)) {
+      Collection<Float> versions = getTimelineServiceVersions(conf);
+      for (Float version : versions) {
+        if (Float.compare(version, 1.5f) == 0) {
+          enabled = true;
+          break;
+        }
+      }
+    }
+    return enabled;
+  }
+
   /**
    * Returns all the active timeline service versions. It does not check
    * whether the timeline service itself is enabled.

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/pom.xml

@@ -71,6 +71,7 @@
         <configuration>
           <excludes>
             <exclude>**/*.json</exclude>
+            <exclude>**/*.yarnfile</exclude>
           </excludes>
         </configuration>
       </plugin>
@@ -94,6 +95,10 @@
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-common</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-common</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>

+ 381 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/main/java/org/apache/hadoop/yarn/service/client/SystemServiceManagerImpl.java

@@ -0,0 +1,381 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.service.client;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.service.SystemServiceManager;
+import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
+import org.apache.hadoop.yarn.service.conf.YarnServiceConf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.lang.reflect.UndeclaredThrowableException;
+import java.security.PrivilegedExceptionAction;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.jsonSerDeser;
+
+/**
+ * SystemServiceManager implementation.
+ * Scan for configure system service path.
+ *
+ * The service path structure is as follows:
+ * SYSTEM_SERVICE_DIR_PATH
+ * |---- sync
+ * |     |--- user1
+ * |     |    |---- service1.yarnfile
+ * |     |    |---- service2.yarnfile
+ * |     |--- user2
+ * |     |    |---- service1.yarnfile
+ * |     |    ....
+ * |     |
+ * |---- async
+ * |     |--- user3
+ * |     |    |---- service1.yarnfile
+ * |     |    |---- service2.yarnfile
+ * |     |--- user4
+ * |     |    |---- service1.yarnfile
+ * |     |    ....
+ * |     |
+ *
+ * sync: These services are launched at the time of service start synchronously.
+ *       It is a blocking service start.
+ * async: These services are launched in separate thread without any delay after
+ *       service start. Non-blocking service start.
+ */
+public class SystemServiceManagerImpl extends AbstractService
+    implements SystemServiceManager {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(SystemServiceManagerImpl.class);
+
+  private static final String YARN_FILE_SUFFIX = ".yarnfile";
+  private static final String SYNC = "sync";
+  private static final String ASYNC = "async";
+
+  private FileSystem fs;
+  private Path systemServiceDir;
+  private AtomicBoolean stopExecutors = new AtomicBoolean(false);
+  private Map<String, Set<Service>> syncUserServices = new HashMap<>();
+  private Map<String, Set<Service>> asyncUserServices = new HashMap<>();
+  private UserGroupInformation loginUGI;
+  private Thread serviceLaucher;
+
+  @VisibleForTesting
+  private int skipCounter;
+  @VisibleForTesting
+  private Map<String, Integer> ignoredUserServices =
+      new HashMap<>();
+
+  public SystemServiceManagerImpl() {
+    super(SystemServiceManagerImpl.class.getName());
+  }
+
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+    String dirPath =
+        conf.get(YarnServiceConf.YARN_SERVICES_SYSTEM_SERVICE_DIRECTORY);
+    if (dirPath != null) {
+      systemServiceDir = new Path(dirPath);
+      LOG.info("System Service Directory is configured to {}",
+          systemServiceDir);
+      fs = systemServiceDir.getFileSystem(conf);
+      this.loginUGI = UserGroupInformation.isSecurityEnabled() ?
+          UserGroupInformation.getLoginUser() :
+          UserGroupInformation.getCurrentUser();
+      LOG.info("UserGroupInformation initialized to {}", loginUGI);
+    }
+  }
+
+  @Override
+  protected void serviceStart() throws Exception {
+    scanForUserServices();
+    launchUserService(syncUserServices);
+    // Create a thread and submit services in background otherwise it
+    // block RM switch time.
+    serviceLaucher = new Thread(createRunnable());
+    serviceLaucher.setName("System service launcher");
+    serviceLaucher.start();
+  }
+
+  @Override
+  protected void serviceStop() throws Exception {
+    LOG.info("Stopping {}", getName());
+    stopExecutors.set(true);
+
+    if (serviceLaucher != null) {
+      serviceLaucher.interrupt();
+      try {
+        serviceLaucher.join();
+      } catch (InterruptedException ie) {
+        LOG.warn("Interrupted Exception while stopping", ie);
+      }
+    }
+  }
+
+  private Runnable createRunnable() {
+    return new Runnable() {
+      @Override
+      public void run() {
+        launchUserService(asyncUserServices);
+      }
+    };
+  }
+
+  void launchUserService(Map<String, Set<Service>> userServices) {
+    for (Map.Entry<String, Set<Service>> entry : userServices.entrySet()) {
+      String user = entry.getKey();
+      Set<Service> services = entry.getValue();
+      if (services.isEmpty()) {
+        continue;
+      }
+      ServiceClient serviceClient = null;
+      try {
+        UserGroupInformation userUgi = getProxyUser(user);
+        serviceClient = createServiceClient(userUgi);
+        for (Service service : services) {
+          LOG.info("POST: createService = {} user = {}", service, userUgi);
+          try {
+            launchServices(userUgi, serviceClient, service);
+          } catch (IOException | UndeclaredThrowableException e) {
+            if (e.getCause() != null) {
+              LOG.warn(e.getCause().getMessage());
+            } else {
+              String message =
+                  "Failed to create service " + service.getName() + " : ";
+              LOG.error(message, e);
+            }
+          }
+        }
+      } catch (InterruptedException e) {
+        LOG.warn("System service launcher thread interrupted", e);
+        break;
+      } catch (Exception e) {
+        LOG.error("Error while submitting services for user " + user, e);
+      } finally {
+        if (serviceClient != null) {
+          try {
+            serviceClient.close();
+          } catch (IOException e) {
+            LOG.warn("Error while closing serviceClient for user {}", user);
+          }
+        }
+      }
+    }
+  }
+
+  private ServiceClient createServiceClient(UserGroupInformation userUgi)
+      throws IOException, InterruptedException {
+    ServiceClient serviceClient =
+        userUgi.doAs(new PrivilegedExceptionAction<ServiceClient>() {
+          @Override public ServiceClient run()
+              throws IOException, YarnException {
+            ServiceClient sc = getServiceClient();
+            sc.init(getConfig());
+            sc.start();
+            return sc;
+          }
+        });
+    return serviceClient;
+  }
+
+  private void launchServices(UserGroupInformation userUgi,
+      ServiceClient serviceClient, Service service)
+      throws IOException, InterruptedException {
+    if (service.getState() == ServiceState.STOPPED) {
+      userUgi.doAs(new PrivilegedExceptionAction<Void>() {
+        @Override public Void run() throws IOException, YarnException {
+          serviceClient.actionBuild(service);
+          return null;
+        }
+      });
+      LOG.info("Service {} version {} saved.", service.getName(),
+          service.getVersion());
+    } else {
+      ApplicationId applicationId =
+          userUgi.doAs(new PrivilegedExceptionAction<ApplicationId>() {
+            @Override public ApplicationId run()
+                throws IOException, YarnException {
+              ApplicationId applicationId = serviceClient.actionCreate(service);
+              return applicationId;
+            }
+          });
+      LOG.info("Service {} submitted with Application ID: {}",
+          service.getName(), applicationId);
+    }
+  }
+
+  ServiceClient getServiceClient() {
+    return new ServiceClient();
+  }
+
+  private UserGroupInformation getProxyUser(String user) {
+    UserGroupInformation ugi;
+    if (UserGroupInformation.isSecurityEnabled()) {
+      ugi = UserGroupInformation.createProxyUser(user, loginUGI);
+    } else {
+      ugi = UserGroupInformation.createRemoteUser(user);
+    }
+    return ugi;
+  }
+
+  // scan for both launch service types i.e sync and async
+  void scanForUserServices() throws IOException {
+    if (systemServiceDir == null) {
+      return;
+    }
+    try {
+      LOG.info("Scan for launch type on {}", systemServiceDir);
+      RemoteIterator<FileStatus> iterLaunchType = list(systemServiceDir);
+      while (iterLaunchType.hasNext()) {
+        FileStatus launchType = iterLaunchType.next();
+        if (!launchType.isDirectory()) {
+          LOG.debug("Scanner skips for unknown file {}", launchType.getPath());
+          continue;
+        }
+        if (launchType.getPath().getName().equals(SYNC)) {
+          scanForUserServiceDefinition(launchType.getPath(), syncUserServices);
+        } else if (launchType.getPath().getName().equals(ASYNC)) {
+          scanForUserServiceDefinition(launchType.getPath(), asyncUserServices);
+        } else {
+          LOG.debug("Scanner skips for unknown dir {}.", launchType.getPath());
+        }
+      }
+    } catch (FileNotFoundException e) {
+      LOG.warn("System service directory {} doesn't not exist.",
+          systemServiceDir);
+    }
+  }
+
+  // Files are under systemServiceDir/<users>. Scan for 2 levels
+  // 1st level for users
+  // 2nd level for service definitions under user
+  private void scanForUserServiceDefinition(Path userDirPath,
+      Map<String, Set<Service>> userServices) throws IOException {
+    LOG.info("Scan for users on {}", userDirPath);
+    RemoteIterator<FileStatus> iterUsers = list(userDirPath);
+    while (iterUsers.hasNext()) {
+      FileStatus userDir = iterUsers.next();
+      // if 1st level is not user directory then skip it.
+      if (!userDir.isDirectory()) {
+        LOG.info(
+            "Service definition {} doesn't belong to any user. Ignoring.. ",
+            userDir.getPath().getName());
+        continue;
+      }
+      String userName = userDir.getPath().getName();
+      LOG.info("Scanning service definitions for user {}.", userName);
+
+      //2nd level scan
+      RemoteIterator<FileStatus> iterServices = list(userDir.getPath());
+      while (iterServices.hasNext()) {
+        FileStatus serviceCache = iterServices.next();
+        String filename = serviceCache.getPath().getName();
+        if (!serviceCache.isFile()) {
+          LOG.info("Scanner skips for unknown dir {}", filename);
+          continue;
+        }
+        if (!filename.endsWith(YARN_FILE_SUFFIX)) {
+          LOG.info("Scanner skips for unknown file extension, filename = {}",
+              filename);
+          skipCounter++;
+          continue;
+        }
+        Service service = getServiceDefinition(serviceCache.getPath());
+        if (service != null) {
+          Set<Service> services = userServices.get(userName);
+          if (services == null) {
+            services = new HashSet<>();
+            userServices.put(userName, services);
+          }
+          if (!services.add(service)) {
+            int count = ignoredUserServices.containsKey(userName) ?
+                ignoredUserServices.get(userName) : 0;
+            ignoredUserServices.put(userName, count + 1);
+            LOG.warn(
+                "Ignoring service {} for the user {} as it is already present,"
+                    + " filename = {}", service.getName(), userName, filename);
+          }
+          LOG.info("Added service {} for the user {}, filename = {}",
+              service.getName(), userName, filename);
+        }
+      }
+    }
+  }
+
+  private Service getServiceDefinition(Path filePath) {
+    Service service = null;
+    try {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Loading service definition from FS: " + filePath);
+      }
+      service = jsonSerDeser.load(fs, filePath);
+    } catch (IOException e) {
+      LOG.info("Error while loading service definition from FS: {}", e);
+    }
+    return service;
+  }
+
+  private RemoteIterator<FileStatus> list(Path path) throws IOException {
+    return new StoppableRemoteIterator(fs.listStatusIterator(path));
+  }
+
+  @VisibleForTesting Map<String, Integer> getIgnoredUserServices() {
+    return ignoredUserServices;
+  }
+
+  private class StoppableRemoteIterator implements RemoteIterator<FileStatus> {
+    private final RemoteIterator<FileStatus> remote;
+
+    StoppableRemoteIterator(RemoteIterator<FileStatus> remote) {
+      this.remote = remote;
+    }
+
+    @Override public boolean hasNext() throws IOException {
+      return !stopExecutors.get() && remote.hasNext();
+    }
+
+    @Override public FileStatus next() throws IOException {
+      return remote.next();
+    }
+  }
+
+  @VisibleForTesting
+  Map<String, Set<Service>> getSyncUserServices() {
+    return syncUserServices;
+  }
+
+  @VisibleForTesting int getSkipCounter() {
+    return skipCounter;
+  }
+}

+ 180 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/java/org/apache/hadoop/yarn/service/client/TestSystemServiceImpl.java

@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.service.client;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.conf.YarnServiceConf;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Test class for system service manager.
+ */
+public class TestSystemServiceImpl {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestSystemServiceImpl.class);
+  private SystemServiceManagerImpl systemService;
+  private Configuration conf;
+  private String resourcePath = "users";
+
+  private String[] users = new String[] {"user1", "user2"};
+  private static Map<String, Set<String>> loadedServices = new HashMap<>();
+  private static Map<String, Set<String>> submittedServices = new HashMap<>();
+
+  @Before
+  public void setup() {
+    File file = new File(
+        getClass().getClassLoader().getResource(resourcePath).getFile());
+    conf = new Configuration();
+    conf.set(YarnServiceConf.YARN_SERVICES_SYSTEM_SERVICE_DIRECTORY,
+        file.getAbsolutePath());
+    systemService = new SystemServiceManagerImpl() {
+      @Override ServiceClient getServiceClient() {
+        return new TestServiceClient();
+      }
+    };
+    systemService.init(conf); // do not call explicit start
+
+    constructUserService(users[0], "example-app1");
+    constructUserService(users[1], "example-app1", "example-app2");
+  }
+
+  @After
+  public void teadDown() {
+    systemService.stop();
+  }
+
+  @Test
+  public void testSystemServiceSubmission() throws Exception {
+    systemService.start();
+
+    /* verify for ignored sevices count */
+    Map<String, Integer> ignoredUserServices =
+        systemService.getIgnoredUserServices();
+    Assert.assertEquals(1, ignoredUserServices.size());
+    Assert.assertTrue("User user1 doesn't exist.",
+        ignoredUserServices.containsKey(users[0]));
+    int count = ignoredUserServices.get(users[0]);
+    Assert.assertEquals(1, count);
+    Assert.assertEquals(1, systemService.getSkipCounter());
+
+    Map<String, Set<Service>> userServices =
+        systemService.getSyncUserServices();
+    Assert.assertEquals(loadedServices.size(), userServices.size());
+    verifyForScannedUserServices(userServices);
+
+    verifyForLaunchedUserServices();
+
+    // 2nd time launch service to handle if service exist scenario
+    systemService.launchUserService(userServices);
+    verifyForLaunchedUserServices();
+  }
+
+  private void verifyForScannedUserServices(
+      Map<String, Set<Service>> userServices) {
+    for (String user : users) {
+      Set<Service> services = userServices.get(user);
+      Set<String> serviceNames = loadedServices.get(user);
+      Assert.assertEquals(serviceNames.size(), services.size());
+      Iterator<Service> iterator = services.iterator();
+      while (iterator.hasNext()) {
+        Service next = iterator.next();
+        Assert.assertTrue(
+            "Service name doesn't exist in expected " + "userService "
+                + serviceNames, serviceNames.contains(next.getName()));
+      }
+    }
+  }
+
+  public void constructUserService(String user, String... serviceNames) {
+    Set<String> service = loadedServices.get(user);
+    if (service == null) {
+      service = new HashSet<>();
+      for (String serviceName : serviceNames) {
+        service.add(serviceName);
+      }
+      loadedServices.put(user, service);
+    }
+  }
+
+  class TestServiceClient extends ServiceClient {
+    @Override
+    protected void serviceStart() throws Exception {
+      // do nothing
+    }
+
+    @Override
+    protected void serviceStop() throws Exception {
+      // do nothing
+    }
+
+    @Override
+    protected void serviceInit(Configuration configuration)
+        throws Exception {
+      // do nothing
+    }
+
+    @Override
+    public ApplicationId actionCreate(Service service)
+        throws YarnException, IOException {
+      String userName =
+          UserGroupInformation.getCurrentUser().getShortUserName();
+      Set<String> services = submittedServices.get(userName);
+      if (services == null) {
+        services = new HashSet<>();
+        submittedServices.put(userName, services);
+      }
+      if (services.contains(service.getName())) {
+        String message = "Failed to create service " + service.getName()
+            + ", because it already exists.";
+        throw new YarnException(message);
+      }
+      services.add(service.getName());
+      return ApplicationId.newInstance(System.currentTimeMillis(), 1);
+    }
+  }
+
+  private void verifyForLaunchedUserServices() {
+    Assert.assertEquals(loadedServices.size(), submittedServices.size());
+    for (Map.Entry<String, Set<String>> entry : submittedServices.entrySet()) {
+      String user = entry.getKey();
+      Set<String> serviceSet = entry.getValue();
+      Assert.assertTrue(loadedServices.containsKey(user));
+      Set<String> services = loadedServices.get(user);
+      Assert.assertEquals(services.size(), serviceSet.size());
+      Assert.assertTrue(services.containsAll(serviceSet));
+    }
+  }
+}

+ 16 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user1/example-app1.yarnfile

@@ -0,0 +1,16 @@
+{
+  "name": "example-app1",
+  "version": "1.0.0",
+  "components" :
+  [
+    {
+      "name": "simple",
+      "number_of_containers": 1,
+      "launch_command": "sleep 2",
+      "resource": {
+        "cpus": 1,
+        "memory": "128"
+      }
+    }
+  ]
+}

+ 16 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user1/example-app2.yarnfile

@@ -0,0 +1,16 @@
+{
+  "name": "example-app1",
+  "version": "1.0.0",
+  "components" :
+  [
+    {
+      "name": "simple",
+      "number_of_containers": 1,
+      "launch_command": "sleep 2",
+      "resource": {
+        "cpus": 1,
+        "memory": "128"
+      }
+    }
+  ]
+}

+ 16 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user1/example-app3.json

@@ -0,0 +1,16 @@
+{
+  "name": "example-app3",
+  "version": "1.0.0",
+  "components" :
+  [
+    {
+      "name": "simple",
+      "number_of_containers": 1,
+      "launch_command": "sleep 2",
+      "resource": {
+        "cpus": 1,
+        "memory": "128"
+      }
+    }
+  ]
+}

+ 16 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user2/example-app1.yarnfile

@@ -0,0 +1,16 @@
+{
+  "name": "example-app1",
+  "version": "1.0.0",
+  "components" :
+  [
+    {
+      "name": "simple",
+      "number_of_containers": 1,
+      "launch_command": "sleep 2",
+      "resource": {
+        "cpus": 1,
+        "memory": "128"
+      }
+    }
+  ]
+}

+ 16 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services-api/src/test/resources/users/sync/user2/example-app2.yarnfile

@@ -0,0 +1,16 @@
+{
+  "name": "example-app2",
+  "version": "1.0.0",
+  "components" :
+  [
+    {
+      "name": "simple",
+      "number_of_containers": 1,
+      "launch_command": "sleep 2",
+      "resource": {
+        "cpus": 1,
+        "memory": "128"
+      }
+    }
+  ]
+}

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java

@@ -50,6 +50,8 @@ public class YarnServiceConf {
   public static final String ROLLING_LOG_INCLUSION_PATTERN = "yarn.service.rolling-log.include-pattern";
   public static final String ROLLING_LOG_EXCLUSION_PATTERN = "yarn.service.rolling-log.exclude-pattern";
 
+  public static final String YARN_SERVICES_SYSTEM_SERVICE_DIRECTORY =
+      YARN_SERVICE_PREFIX + "system-service.dir";
 
   /**
    * The yarn service base path:

+ 156 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestSystemServiceManager.java

@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.service;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.registry.client.api.RegistryOperations;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.service.api.records.Artifact;
+import org.apache.hadoop.yarn.service.api.records.ComponentState;
+import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.api.records.ServiceState;
+import org.apache.hadoop.yarn.service.exceptions.SliderException;
+import org.apache.hadoop.yarn.service.registry.YarnRegistryViewForProviders;
+import org.apache.hadoop.yarn.service.utils.ServiceApiUtil;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Map;
+
+import static org.mockito.Mockito.mock;
+
+/**
+ * Tests for {@link ServiceManager}.
+ */
+public class TestSystemServiceManager {
+
+  @Rule
+  public ServiceTestUtils.ServiceFSWatcher rule =
+      new ServiceTestUtils.ServiceFSWatcher();
+
+  @Test
+  public void testUpgrade() throws IOException, SliderException {
+    ServiceManager serviceManager = createTestServiceManager("testUpgrade");
+    upgrade(serviceManager, "v2", false);
+    Assert.assertEquals("service not upgraded", ServiceState.UPGRADING,
+        serviceManager.getServiceSpec().getState());
+  }
+
+  @Test
+  public void testRestartNothingToUpgrade()
+      throws IOException, SliderException {
+    ServiceManager serviceManager = createTestServiceManager("testRestart");
+    upgrade(serviceManager, "v2", false);
+
+    //make components stable
+    serviceManager.getServiceSpec().getComponents().forEach(comp -> {
+      comp.setState(ComponentState.STABLE);
+    });
+    serviceManager.handle(new ServiceEvent(ServiceEventType.START));
+    Assert.assertEquals("service not re-started", ServiceState.STABLE,
+        serviceManager.getServiceSpec().getState());
+  }
+
+  @Test
+  public void testRestartWithPendingUpgrade()
+      throws IOException, SliderException {
+    ServiceManager serviceManager = createTestServiceManager("testRestart");
+    upgrade(serviceManager, "v2", true);
+    serviceManager.handle(new ServiceEvent(ServiceEventType.START));
+    Assert.assertEquals("service should still be upgrading",
+        ServiceState.UPGRADING, serviceManager.getServiceSpec().getState());
+  }
+
+
+  private void upgrade(ServiceManager service, String version,
+      boolean upgradeArtifact)
+      throws IOException, SliderException {
+    Service upgradedDef = ServiceTestUtils.createExampleApplication();
+    upgradedDef.setName(service.getName());
+    upgradedDef.setVersion(version);
+    if (upgradeArtifact) {
+      Artifact upgradedArtifact = createTestArtifact("2");
+      upgradedDef.getComponents().forEach(component -> {
+        component.setArtifact(upgradedArtifact);
+      });
+    }
+    writeUpgradedDef(upgradedDef);
+    ServiceEvent upgradeEvent = new ServiceEvent(ServiceEventType.UPGRADE);
+    upgradeEvent.setVersion("v2");
+    service.handle(upgradeEvent);
+  }
+
+  private ServiceManager createTestServiceManager(String name)
+      throws IOException {
+    ServiceContext context = new ServiceContext();
+    context.service = createBaseDef(name);
+    context.fs = rule.getFs();
+
+    context.scheduler = new ServiceScheduler(context) {
+      @Override
+      protected YarnRegistryViewForProviders createYarnRegistryOperations(
+          ServiceContext context, RegistryOperations registryClient) {
+        return mock(YarnRegistryViewForProviders.class);
+      }
+    };
+
+    context.scheduler.init(rule.getConf());
+
+    Map<String, org.apache.hadoop.yarn.service.component.Component>
+        componentState = context.scheduler.getAllComponents();
+    context.service.getComponents().forEach(component -> {
+      componentState.put(component.getName(),
+          new org.apache.hadoop.yarn.service.component.Component(component,
+              1L, context));
+    });
+    return new ServiceManager(context);
+  }
+
+  static Service createBaseDef(String name) {
+    ApplicationId applicationId = ApplicationId.newInstance(
+        System.currentTimeMillis(), 1);
+    Service serviceDef = ServiceTestUtils.createExampleApplication();
+    serviceDef.setId(applicationId.toString());
+    serviceDef.setName(name);
+    serviceDef.setState(ServiceState.STARTED);
+    Artifact artifact = createTestArtifact("1");
+
+    serviceDef.getComponents().forEach(component ->
+        component.setArtifact(artifact));
+    return serviceDef;
+  }
+
+  static Artifact createTestArtifact(String artifactId) {
+    Artifact artifact = new Artifact();
+    artifact.setId(artifactId);
+    artifact.setType(Artifact.TypeEnum.TARBALL);
+    return artifact;
+  }
+
+  private void writeUpgradedDef(Service upgradedDef)
+      throws IOException, SliderException {
+    Path upgradePath = rule.getFs().buildClusterUpgradeDirPath(
+        upgradedDef.getName(), upgradedDef.getVersion());
+    ServiceApiUtil.createDirAndPersistApp(rule.getFs(), upgradePath,
+        upgradedDef);
+  }
+
+}

+ 41 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/TimelineV2Client.java

@@ -54,9 +54,10 @@ public abstract class TimelineV2Client extends CompositeService {
 
   /**
    * <p>
-   * Send the information of a number of conceptual entities to the timeline
-   * service v.2 collector. It is a blocking API. The method will not return
-   * until all the put entities have been persisted.
+   * Send the information of a number of conceptual entities within the scope
+   * of YARN application to the timeline service v.2 collector. It is a blocking
+   * API. The method will not return until all the put entities have been
+   * persisted.
    * </p>
    *
    * @param entities the collection of {@link TimelineEntity}
@@ -69,9 +70,10 @@ public abstract class TimelineV2Client extends CompositeService {
 
   /**
    * <p>
-   * Send the information of a number of conceptual entities to the timeline
-   * service v.2 collector. It is an asynchronous API. The method will return
-   * once all the entities are received.
+   * Send the information of a number of conceptual entities within the scope
+   * of YARN application to the timeline service v.2 collector. It is an
+   * asynchronous API. The method will return once all the entities are
+   * received.
    * </p>
    *
    * @param entities the collection of {@link TimelineEntity}
@@ -93,4 +95,37 @@ public abstract class TimelineV2Client extends CompositeService {
    * address and timeline delegation token.
    */
   public abstract void setTimelineCollectorInfo(CollectorInfo collectorInfo);
+
+
+  /**
+   * <p>
+   * Send the information of a number of conceptual entities within the scope of
+   * a sub-application to the timeline service v.2 collector. It is a blocking
+   * API. The method will not return until all the put entities have been
+   * persisted.
+   * </p>
+   *
+   * @param entities the collection of {@link TimelineEntity}
+   * @throws IOException  if there are I/O errors
+   * @throws YarnException if entities are incomplete/invalid
+   */
+  @Public
+  public abstract void putSubAppEntities(TimelineEntity... entities)
+      throws IOException, YarnException;
+
+  /**
+   * <p>
+   * Send the information of a number of conceptual entities within the scope of
+   * a sub-application to the timeline service v.2 collector. It is an
+   * asynchronous API. The method will return once all the entities are received
+   * .
+   * </p>
+   *
+   * @param entities the collection of {@link TimelineEntity}
+   * @throws IOException  if there are I/O errors
+   * @throws YarnException if entities are incomplete/invalid
+   */
+  @Public
+  public abstract void putSubAppEntitiesAsync(TimelineEntity... entities)
+      throws IOException, YarnException;
 }

+ 11 - 12
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java

@@ -82,7 +82,7 @@ public class TimelineClientImpl extends TimelineClient {
   @VisibleForTesting
   protected String doAsUser;
 
-  private float timelineServiceVersion;
+  private boolean timelineServiceV15Enabled;
   private TimelineWriter timelineWriter;
 
   private String timelineServiceAddress;
@@ -96,15 +96,15 @@ public class TimelineClientImpl extends TimelineClient {
   }
 
   protected void serviceInit(Configuration conf) throws Exception {
-    timelineServiceVersion =
-        conf.getFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION,
-            YarnConfiguration.DEFAULT_TIMELINE_SERVICE_VERSION);
     if (!YarnConfiguration.timelineServiceV1Enabled(conf)) {
       throw new IOException("Timeline V1 client is not properly configured. "
           + "Either timeline service is not enabled or version is not set to"
           + " 1.x");
     }
-    LOG.info("Timeline service address: " + getTimelineServiceAddress());
+
+    timelineServiceV15Enabled =
+        YarnConfiguration.timelineServiceV15Enabled(conf);
+
     UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
     UserGroupInformation realUgi = ugi.getRealUser();
     if (realUgi != null) {
@@ -126,6 +126,7 @@ public class TimelineClientImpl extends TimelineClient {
           conf.get(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS,
               YarnConfiguration.DEFAULT_TIMELINE_SERVICE_WEBAPP_ADDRESS);
     }
+    LOG.info("Timeline service address: " + getTimelineServiceAddress());
     super.serviceInit(conf);
   }
 
@@ -147,7 +148,7 @@ public class TimelineClientImpl extends TimelineClient {
   protected TimelineWriter createTimelineWriter(Configuration conf,
       UserGroupInformation ugi, Client webClient, URI uri)
       throws IOException {
-    if (Float.compare(this.timelineServiceVersion, 1.5f) == 0) {
+    if (timelineServiceV15Enabled) {
       return new FileSystemTimelineWriter(
           conf, ugi, webClient, uri);
     } else {
@@ -406,10 +407,9 @@ public class TimelineClientImpl extends TimelineClient {
   public TimelinePutResponse putEntities(ApplicationAttemptId appAttemptId,
       TimelineEntityGroupId groupId, TimelineEntity... entities)
       throws IOException, YarnException {
-    if (Float.compare(this.timelineServiceVersion, 1.5f) != 0) {
+    if (!timelineServiceV15Enabled) {
       throw new YarnException(
-        "This API is not supported under current Timeline Service Version: "
-            + timelineServiceVersion);
+        "This API is not supported under current Timeline Service Version:");
     }
 
     return timelineWriter.putEntities(appAttemptId, groupId, entities);
@@ -418,10 +418,9 @@ public class TimelineClientImpl extends TimelineClient {
   @Override
   public void putDomain(ApplicationAttemptId appAttemptId,
       TimelineDomain domain) throws IOException, YarnException {
-    if (Float.compare(this.timelineServiceVersion, 1.5f) != 0) {
+    if (!timelineServiceV15Enabled) {
       throw new YarnException(
-        "This API is not supported under current Timeline Service Version: "
-            + timelineServiceVersion);
+        "This API is not supported under current Timeline Service Version:");
     }
     timelineWriter.putDomain(appAttemptId, domain);
   }

+ 25 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineV2ClientImpl.java

@@ -69,6 +69,7 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
   private static final String RESOURCE_URI_STR_V2 = "/ws/v2/timeline/";
 
   private TimelineEntityDispatcher entityDispatcher;
+  private TimelineEntityDispatcher subAppEntityDispatcher;
   private volatile String timelineServiceAddress;
   @VisibleForTesting
   volatile Token currentTimelineToken = null;
@@ -124,6 +125,7 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
         YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS,
         YarnConfiguration.DEFAULT_TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS);
     entityDispatcher = new TimelineEntityDispatcher(conf);
+    subAppEntityDispatcher = new TimelineEntityDispatcher(conf);
     super.serviceInit(conf);
   }
 
@@ -131,24 +133,38 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
   protected void serviceStart() throws Exception {
     super.serviceStart();
     entityDispatcher.start();
+    subAppEntityDispatcher.start();
   }
 
   @Override
   protected void serviceStop() throws Exception {
     entityDispatcher.stop();
+    subAppEntityDispatcher.stop();
     super.serviceStop();
   }
 
   @Override
   public void putEntities(TimelineEntity... entities)
       throws IOException, YarnException {
-    entityDispatcher.dispatchEntities(true, entities);
+    entityDispatcher.dispatchEntities(true, entities, false);
   }
 
   @Override
   public void putEntitiesAsync(TimelineEntity... entities)
       throws IOException, YarnException {
-    entityDispatcher.dispatchEntities(false, entities);
+    entityDispatcher.dispatchEntities(false, entities, false);
+  }
+
+  @Override
+  public void putSubAppEntities(TimelineEntity... entities)
+      throws IOException, YarnException {
+    subAppEntityDispatcher.dispatchEntities(true, entities, true);
+  }
+
+  @Override
+  public void putSubAppEntitiesAsync(TimelineEntity... entities)
+      throws IOException, YarnException {
+    subAppEntityDispatcher.dispatchEntities(false, entities, true);
   }
 
   @Override
@@ -346,13 +362,15 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
     private final TimelineEntities entities;
     private final boolean isSync;
 
-    EntitiesHolder(final TimelineEntities entities, final boolean isSync) {
+    EntitiesHolder(final TimelineEntities entities, final boolean isSync,
+        final boolean subappwrite) {
       super(new Callable<Void>() {
         // publishEntities()
         public Void call() throws Exception {
           MultivaluedMap<String, String> params = new MultivaluedMapImpl();
           params.add("appid", getContextAppId().toString());
           params.add("async", Boolean.toString(!isSync));
+          params.add("subappwrite", Boolean.toString(subappwrite));
           putObjects("entities", params, entities);
           return null;
         }
@@ -496,7 +514,8 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
     }
 
     public void dispatchEntities(boolean sync,
-        TimelineEntity[] entitiesTobePublished) throws YarnException {
+        TimelineEntity[] entitiesTobePublished, boolean subappwrite)
+        throws YarnException {
       if (executor.isShutdown()) {
         throw new YarnException("Timeline client is in the process of stopping,"
             + " not accepting any more TimelineEntities");
@@ -509,7 +528,8 @@ public class TimelineV2ClientImpl extends TimelineV2Client {
       }
 
       // created a holder and place it in queue
-      EntitiesHolder entitiesHolder = new EntitiesHolder(entities, sync);
+      EntitiesHolder entitiesHolder =
+          new EntitiesHolder(entities, sync, subappwrite);
       try {
         timelineEntityQueue.put(entitiesHolder);
       } catch (InterruptedException e) {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/ifile/LogAggregationIndexedFileController.java

@@ -865,7 +865,8 @@ public class LogAggregationIndexedFileController
       byte[] array = new byte[offset];
       fsDataIStream.seek(
           fileLength - offset - Integer.SIZE/ Byte.SIZE - UUID_LENGTH);
-      int actual = fsDataIStream.read(array);
+      fsDataIStream.readFully(array);
+      int actual = array.length;
       if (actual != offset) {
         throw new IOException("Error on loading log meta from "
             + remoteLogPath);

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java

@@ -126,8 +126,7 @@ public class TimelineUtils {
    * version equal to 1.5.
    */
   public static boolean timelineServiceV1_5Enabled(Configuration conf) {
-    return timelineServiceEnabled(conf) &&
-        Math.abs(getTimelineServiceVersion(conf) - 1.5) < 0.00001;
+    return YarnConfiguration.timelineServiceV15Enabled(conf);
   }
 
   public static TimelineAbout createTimelineAbout(String about) {

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -1786,6 +1786,14 @@
     <value>false</value>
   </property>
 
+  <property>
+    <description>A configurable value to pass to the Docker Stop command. This value
+      defines the number of seconds between the docker stop command sending
+      a SIGTERM and a SIGKILL.</description>
+    <name>yarn.nodemanager.runtime.linux.docker.stop.grace-period</name>
+    <value>10</value>
+  </property>
+
   <property>
     <description>The mode in which the Java Container Sandbox should run detailed by
       the JavaSandboxLinuxContainerRuntime.</description>

+ 25 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/service/SystemServiceManager.java

@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.service;
+
+/**
+ * Marker interface for starting services from RM. The implementation should
+ * launch configured services.
+ */
+public interface SystemServiceManager {
+
+}

+ 27 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/service/package-info.java

@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Package org.apache.hadoop.yarn.server.service contains service related
+ * classes.
+ */
+@InterfaceAudience.Private @InterfaceStability.Unstable
+
+package org.apache.hadoop.yarn.server.service;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/AMRMProxyService.java

@@ -261,7 +261,7 @@ public class AMRMProxyService extends CompositeService implements
         // Create the intercepter pipeline for the AM
         initializePipeline(attemptId, user, amrmToken, localToken,
             entry.getValue(), true, amCred);
-      } catch (IOException e) {
+      } catch (Throwable e) {
         LOG.error("Exception when recovering " + attemptId
             + ", removing it from NMStateStore and move on", e);
         this.nmContext.getNMStateStore().removeAMRMProxyAppContext(attemptId);

+ 152 - 8
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java

@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.containermanager;
 
+import java.io.IOException;
+import java.net.URI;
 import java.nio.ByteBuffer;
 import java.util.Collection;
 import java.util.Collections;
@@ -29,45 +31,70 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.service.Service;
 import org.apache.hadoop.service.ServiceStateChangeListener;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.URL;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
 import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
 import org.apache.hadoop.yarn.server.api.AuxiliaryLocalPathHandler;
 import org.apache.hadoop.yarn.server.api.AuxiliaryService;
 import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
 import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
-
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.FileDeletionTask;
+import org.apache.hadoop.yarn.util.FSDownload;
 import com.google.common.base.Preconditions;
 
 public class AuxServices extends AbstractService
     implements ServiceStateChangeListener, EventHandler<AuxServicesEvent> {
 
+  public static final String NM_AUX_SERVICE_DIR = "nmAuxService";
+  public static final FsPermission NM_AUX_SERVICE_DIR_PERM =
+      new FsPermission((short) 0700);
+
   static final String STATE_STORE_ROOT_NAME = "nm-aux-services";
 
   private static final Logger LOG =
        LoggerFactory.getLogger(AuxServices.class);
+  private static final String DEL_SUFFIX = "_DEL_";
 
   protected final Map<String,AuxiliaryService> serviceMap;
   protected final Map<String,ByteBuffer> serviceMetaData;
   private final AuxiliaryLocalPathHandler auxiliaryLocalPathHandler;
+  private final LocalDirsHandlerService dirsHandler;
+  private final DeletionService delService;
+  private final UserGroupInformation userUGI;
 
   private final Pattern p = Pattern.compile("^[A-Za-z_]+[A-Za-z0-9_]*$");
 
-  public AuxServices(AuxiliaryLocalPathHandler auxiliaryLocalPathHandler) {
+  public AuxServices(AuxiliaryLocalPathHandler auxiliaryLocalPathHandler,
+      Context nmContext, DeletionService deletionService) {
     super(AuxServices.class.getName());
     serviceMap =
       Collections.synchronizedMap(new HashMap<String,AuxiliaryService>());
     serviceMetaData =
       Collections.synchronizedMap(new HashMap<String,ByteBuffer>());
     this.auxiliaryLocalPathHandler = auxiliaryLocalPathHandler;
+    this.dirsHandler = nmContext.getLocalDirsHandler();
+    this.delService = deletionService;
+    this.userUGI = getRemoteUgi();
     // Obtain services from configuration in init()
   }
 
@@ -125,15 +152,103 @@ public class AuxServices extends AbstractService
         String classKey = String.format(
             YarnConfiguration.NM_AUX_SERVICE_FMT, sName);
         String className = conf.get(classKey);
-        final String appClassPath = conf.get(String.format(
+        final String appLocalClassPath = conf.get(String.format(
             YarnConfiguration.NM_AUX_SERVICES_CLASSPATH, sName));
+        final String appRemoteClassPath = conf.get(String.format(
+            YarnConfiguration.NM_AUX_SERVICE_REMOTE_CLASSPATH, sName));
         AuxiliaryService s = null;
-        boolean useCustomerClassLoader = appClassPath != null
-            && !appClassPath.isEmpty() && className != null
-            && !className.isEmpty();
+        boolean useCustomerClassLoader = ((appLocalClassPath != null
+            && !appLocalClassPath.isEmpty()) ||
+            (appRemoteClassPath != null && !appRemoteClassPath.isEmpty()))
+            && className != null && !className.isEmpty();
         if (useCustomerClassLoader) {
-          s = AuxiliaryServiceWithCustomClassLoader.getInstance(
-              conf, className, appClassPath);
+          // load AuxiliaryService from local class path
+          if (appRemoteClassPath == null || appRemoteClassPath.isEmpty()) {
+            s = AuxiliaryServiceWithCustomClassLoader.getInstance(
+                conf, className, appLocalClassPath);
+          } else {
+            // load AuxiliaryService from remote class path
+            if (appLocalClassPath != null && !appLocalClassPath.isEmpty()) {
+              throw new YarnRuntimeException("The aux serivce:" + sName
+                  + " has configured local classpath:" + appLocalClassPath
+                  + " and remote classpath:" + appRemoteClassPath
+                  + ". Only one of them should be configured.");
+            }
+            FileContext localLFS = getLocalFileContext(conf);
+            // create NM aux-service dir in NM localdir if it does not exist.
+            Path nmAuxDir = dirsHandler.getLocalPathForWrite("."
+                + Path.SEPARATOR + NM_AUX_SERVICE_DIR);
+            if (!localLFS.util().exists(nmAuxDir)) {
+              try {
+                localLFS.mkdir(nmAuxDir, NM_AUX_SERVICE_DIR_PERM, true);
+              } catch (IOException ex) {
+                throw new YarnRuntimeException("Fail to create dir:"
+                    + nmAuxDir.toString(), ex);
+              }
+            }
+            Path src = new Path(appRemoteClassPath);
+            FileContext remoteLFS = getRemoteFileContext(src.toUri(), conf);
+            FileStatus scFileStatus = remoteLFS.getFileStatus(src);
+            if (!scFileStatus.getOwner().equals(
+                this.userUGI.getShortUserName())) {
+              throw new YarnRuntimeException("The remote jarfile owner:"
+                  + scFileStatus.getOwner() + " is not the same as the NM user:"
+                  + this.userUGI.getShortUserName() + ".");
+            }
+            if ((scFileStatus.getPermission().toShort() & 0022) != 0) {
+              throw new YarnRuntimeException("The remote jarfile should not "
+                  + "be writable by group or others. "
+                  + "The current Permission is "
+                  + scFileStatus.getPermission().toShort());
+            }
+            Path dest = null;
+            Path downloadDest = new Path(nmAuxDir,
+                className + "_" + scFileStatus.getModificationTime());
+            // check whether we need to re-download the jar
+            // from remote directory
+            Path targetDirPath = new Path(downloadDest,
+                scFileStatus.getPath().getName());
+            FileStatus[] allSubDirs = localLFS.util().listStatus(nmAuxDir);
+            boolean reDownload = true;
+            for (FileStatus sub : allSubDirs) {
+              if (sub.getPath().getName().equals(downloadDest.getName())) {
+                reDownload = false;
+                dest = new Path(targetDirPath + Path.SEPARATOR + "*");
+                break;
+              } else {
+                if (sub.getPath().getName().contains(className) &&
+                    !sub.getPath().getName().endsWith(DEL_SUFFIX)) {
+                  Path delPath = new Path(sub.getPath().getParent(),
+                      sub.getPath().getName() + DEL_SUFFIX);
+                  localLFS.rename(sub.getPath(), delPath);
+                  LOG.info("delete old aux service jar dir:"
+                      + delPath.toString());
+                  FileDeletionTask deletionTask = new FileDeletionTask(
+                      this.delService, null, delPath, null);
+                  this.delService.delete(deletionTask);
+                }
+              }
+            }
+            if (reDownload) {
+              LocalResource scRsrc = LocalResource.newInstance(
+                  URL.fromURI(src.toUri()),
+                  LocalResourceType.ARCHIVE, LocalResourceVisibility.PRIVATE,
+                  scFileStatus.getLen(), scFileStatus.getModificationTime());
+              FSDownload download = new FSDownload(localLFS, null, conf,
+                  downloadDest, scRsrc, null);
+              try {
+                Path downloaded = download.call();
+                dest = new Path(downloaded + Path.SEPARATOR + "*");
+              } catch (Exception ex) {
+                throw new YarnRuntimeException(
+                    "Exception happend while downloading files "
+                    + "for aux-service:" + sName + " and remote-file-path:"
+                    + src + ".\n" + ex.getMessage());
+              }
+            }
+            s = AuxiliaryServiceWithCustomClassLoader.getInstance(
+                conf, className, dest.toString());
+          }
           LOG.info("The aux service:" + sName
               + " are using the custom classloader");
         } else {
@@ -289,4 +404,33 @@ public class AuxServices extends AbstractService
         : "The auxService name is " + service.getName())
         + " and it got an error at event: " + eventType, th);
   }
+
+  FileContext getLocalFileContext(Configuration conf) {
+    try {
+      return FileContext.getLocalFSFileContext(conf);
+    } catch (IOException e) {
+      throw new YarnRuntimeException("Failed to access local fs");
+    }
+  }
+
+  FileContext getRemoteFileContext(final URI path, Configuration conf) {
+    try {
+      return FileContext.getFileContext(path, conf);
+    } catch (IOException e) {
+      throw new YarnRuntimeException("Failed to access remote fs");
+    }
+  }
+
+  private UserGroupInformation getRemoteUgi() {
+    UserGroupInformation remoteUgi;
+    try {
+      remoteUgi = UserGroupInformation.getCurrentUser();
+    } catch (IOException e) {
+      String msg = "Cannot obtain the user-name. Got exception: "
+          + StringUtils.stringifyException(e);
+      LOG.warn(msg);
+      throw new YarnRuntimeException(msg);
+    }
+    return remoteUgi;
+  }
 }

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -253,7 +253,8 @@ public class ContainerManagerImpl extends CompositeService implements
     AuxiliaryLocalPathHandler auxiliaryLocalPathHandler =
         new AuxiliaryLocalPathHandlerImpl(dirsHandler);
     // Start configurable services
-    auxiliaryServices = new AuxServices(auxiliaryLocalPathHandler);
+    auxiliaryServices = new AuxServices(auxiliaryLocalPathHandler,
+        this.context, this.deletionService);
     auxiliaryServices.registerServiceListener(this);
     addService(auxiliaryServices);
 

+ 7 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java

@@ -245,6 +245,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
   private int userRemappingGidThreshold;
   private Set<String> capabilities;
   private boolean delayedRemovalAllowed;
+  private int dockerStopGracePeriod;
 
   /**
    * Return whether the given environment variables indicate that the operation
@@ -348,6 +349,10 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
     delayedRemovalAllowed = conf.getBoolean(
         YarnConfiguration.NM_DOCKER_ALLOW_DELAYED_REMOVAL,
         YarnConfiguration.DEFAULT_NM_DOCKER_ALLOW_DELAYED_REMOVAL);
+
+    dockerStopGracePeriod = conf.getInt(
+        YarnConfiguration.NM_DOCKER_STOP_GRACE_PERIOD,
+        YarnConfiguration.DEFAULT_NM_DOCKER_STOP_GRACE_PERIOD);
   }
 
   private Set<String> getDockerCapabilitiesFromConf() throws
@@ -1138,7 +1143,8 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
         DockerCommandExecutor.getContainerStatus(containerId, conf,
             privilegedOperationExecutor);
     if (DockerCommandExecutor.isStoppable(containerStatus)) {
-      DockerStopCommand dockerStopCommand = new DockerStopCommand(containerId);
+      DockerStopCommand dockerStopCommand = new DockerStopCommand(
+          containerId).setGracePeriod(dockerStopGracePeriod);
       DockerCommandExecutor.executeDockerCommand(dockerStopCommand, containerId,
           env, conf, privilegedOperationExecutor, false);
     } else {

+ 20 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java

@@ -152,6 +152,8 @@ public class ResourceLocalizationService extends CompositeService
        LoggerFactory.getLogger(ResourceLocalizationService.class);
   public static final String NM_PRIVATE_DIR = "nmPrivate";
   public static final FsPermission NM_PRIVATE_PERM = new FsPermission((short) 0700);
+  private static final FsPermission PUBLIC_FILECACHE_FOLDER_PERMS =
+      new FsPermission((short) 0755);
 
   private Server server;
   private InetSocketAddress localizationServerAddress;
@@ -881,6 +883,7 @@ public class ResourceLocalizationService extends CompositeService
                 publicRsrc.getPathForLocalization(key, publicRootPath,
                     delService);
             if (!publicDirDestPath.getParent().equals(publicRootPath)) {
+              createParentDirs(publicDirDestPath, publicRootPath);
               if (diskValidator != null) {
                 diskValidator.checkStatus(
                     new File(publicDirDestPath.toUri().getPath()));
@@ -932,6 +935,23 @@ public class ResourceLocalizationService extends CompositeService
       }
     }
 
+    private void createParentDirs(Path destDirPath, Path destDirRoot)
+        throws IOException {
+      if (destDirPath == null || destDirPath.equals(destDirRoot)) {
+        return;
+      }
+      createParentDirs(destDirPath.getParent(), destDirRoot);
+      createDir(destDirPath, PUBLIC_FILECACHE_FOLDER_PERMS);
+    }
+
+    private void createDir(Path dirPath, FsPermission perms)
+        throws IOException {
+      lfs.mkdir(dirPath, perms, false);
+      if (!perms.equals(perms.applyUMask(lfs.getUMask()))) {
+        lfs.setPermission(dirPath, perms);
+      }
+    }
+
     @Override
     public void run() {
       try {

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainerMetrics.java

@@ -274,7 +274,7 @@ public class ContainerMetrics implements MetricsSource {
   }
 
   public void recordProcessId(String processId) {
-    registry.tag(PROCESSID_INFO, processId);
+    registry.tag(PROCESSID_INFO, processId, true);
   }
 
   public void recordResourceLimit(int vmemLimit, int pmemLimit, int cpuVcores) {

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java

@@ -112,6 +112,11 @@ public abstract class BaseAMRMProxyTest {
     return this.amrmProxyService;
   }
 
+  protected Context getNMContext() {
+    Assert.assertNotNull(this.nmContext);
+    return this.nmContext;
+  }
+
   @Before
   public void setUp() throws IOException {
     this.conf = createConfiguration();

+ 42 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/TestAMRMProxyService.java

@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.server.MockResourceManagerFacade;
 import org.apache.hadoop.yarn.server.nodemanager.amrmproxy.AMRMProxyService.RequestInterceptorChainWrapper;
+import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService.RecoveredAMRMProxyState;
 import org.apache.hadoop.yarn.util.Records;
 import org.junit.Assert;
 import org.junit.Test;
@@ -633,6 +634,35 @@ public class TestAMRMProxyService extends BaseAMRMProxyTest {
     mockRM = null;
   }
 
+  /**
+   * Test AMRMProxy restart with application recovery failure.
+   */
+  @Test
+  public void testAppRecoveryFailure() throws YarnException, Exception {
+    Configuration conf = createConfiguration();
+    // Use the MockRequestInterceptorAcrossRestart instead for the chain
+    conf.set(YarnConfiguration.AMRM_PROXY_INTERCEPTOR_CLASS_PIPELINE,
+        BadRequestInterceptorAcrossRestart.class.getName());
+
+    mockRM = new MockResourceManagerFacade(new YarnConfiguration(conf), 0);
+
+    createAndStartAMRMProxyService(conf);
+
+    // Create an app entry in NMSS
+    registerApplicationMaster(1);
+
+    RecoveredAMRMProxyState state =
+        getNMContext().getNMStateStore().loadAMRMProxyState();
+    Assert.assertEquals(1, state.getAppContexts().size());
+
+    // AMRMProxy restarts and recover
+    createAndStartAMRMProxyService(conf);
+
+    state = getNMContext().getNMStateStore().loadAMRMProxyState();
+    // The app that failed to recover should have been removed from NMSS
+    Assert.assertEquals(0, state.getAppContexts().size());
+  }
+
   /**
    * A mock intercepter implementation that uses the same mockRM instance across
    * restart.
@@ -672,4 +702,16 @@ public class TestAMRMProxyService extends BaseAMRMProxyTest {
     }
   }
 
+  /**
+   * A mock intercepter implementation that throws when recovering.
+   */
+  public static class BadRequestInterceptorAcrossRestart
+      extends MockRequestInterceptorAcrossRestart {
+
+    @Override
+    public void recover(Map<String, byte[]> recoveredDataMap) {
+      throw new RuntimeException("Kaboom");
+    }
+  }
+
 }

+ 156 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestAuxServices.java

@@ -25,8 +25,12 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
-import org.mockito.Mockito;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -37,6 +41,10 @@ import java.io.IOException;
 import java.net.URL;
 import java.net.URLClassLoader;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.attribute.FileTime;
+import java.nio.file.attribute.PosixFilePermission;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -46,6 +54,7 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
@@ -61,6 +70,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
 import org.apache.hadoop.yarn.server.api.ApplicationInitializationContext;
 import org.apache.hadoop.yarn.server.api.ApplicationTerminationContext;
@@ -69,8 +79,11 @@ import org.apache.hadoop.yarn.server.api.AuxiliaryService;
 import org.apache.hadoop.yarn.server.api.ContainerInitializationContext;
 import org.apache.hadoop.yarn.server.api.ContainerTerminationContext;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.FileDeletionTask;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -82,7 +95,10 @@ public class TestAuxServices {
           System.getProperty("java.io.tmpdir")),
       TestAuxServices.class.getName());
   private final static AuxiliaryLocalPathHandler MOCK_AUX_PATH_HANDLER =
-      Mockito.mock(AuxiliaryLocalPathHandler.class);
+      mock(AuxiliaryLocalPathHandler.class);
+  private final static Context MOCK_CONTEXT = mock(Context.class);
+  private final static DeletionService MOCK_DEL_SERVICE = mock(
+      DeletionService.class);
 
   static class LightService extends AuxiliaryService implements Service
        {
@@ -188,6 +204,126 @@ public class TestAuxServices {
     }
   }
 
+  @SuppressWarnings("resource")
+  @Test
+  public void testRemoteAuxServiceClassPath() throws Exception {
+    Configuration conf = new YarnConfiguration();
+    FileSystem fs = FileSystem.get(conf);
+    conf.setStrings(YarnConfiguration.NM_AUX_SERVICES,
+        new String[] {"ServiceC"});
+    conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT,
+        "ServiceC"), ServiceC.class, Service.class);
+
+    Context mockContext2 = mock(Context.class);
+    LocalDirsHandlerService mockDirsHandler = mock(
+        LocalDirsHandlerService.class);
+    String root = "target/LocalDir";
+    Path rootAuxServiceDirPath = new Path(root, "nmAuxService");
+    when(mockDirsHandler.getLocalPathForWrite(anyString())).thenReturn(
+        rootAuxServiceDirPath);
+    when(mockContext2.getLocalDirsHandler()).thenReturn(mockDirsHandler);
+
+    File rootDir = GenericTestUtils.getTestDir(getClass()
+        .getSimpleName());
+    if (!rootDir.exists()) {
+      rootDir.mkdirs();
+    }
+    AuxServices aux = null;
+    File testJar = null;
+    try {
+      // the remote jar file should not be be writable by group or others.
+      try {
+        testJar = JarFinder.makeClassLoaderTestJar(this.getClass(), rootDir,
+            "test-runjar.jar", 2048, ServiceC.class.getName());
+        // Give group a write permission.
+        // We should not load the auxservice from remote jar file.
+        Set<PosixFilePermission> perms = new HashSet<PosixFilePermission>();
+        perms.add(PosixFilePermission.OWNER_READ);
+        perms.add(PosixFilePermission.OWNER_WRITE);
+        perms.add(PosixFilePermission.GROUP_WRITE);
+        Files.setPosixFilePermissions(Paths.get(testJar.getAbsolutePath()),
+            perms);
+        conf.set(String.format(
+            YarnConfiguration.NM_AUX_SERVICE_REMOTE_CLASSPATH, "ServiceC"),
+            testJar.getAbsolutePath());
+        aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+            mockContext2, MOCK_DEL_SERVICE);
+        aux.init(conf);
+        Assert.fail("The permission of the jar is wrong."
+            + "Should throw out exception.");
+      } catch (YarnRuntimeException ex) {
+        Assert.assertTrue(ex.getMessage(), ex.getMessage().contains(
+            "The remote jarfile should not be writable by group or others"));
+      }
+
+      Files.delete(Paths.get(testJar.getAbsolutePath()));
+
+      testJar = JarFinder.makeClassLoaderTestJar(this.getClass(), rootDir,
+          "test-runjar.jar", 2048, ServiceC.class.getName());
+      conf.set(String.format(
+          YarnConfiguration.NM_AUX_SERVICE_REMOTE_CLASSPATH, "ServiceC"),
+          testJar.getAbsolutePath());
+      aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+          mockContext2, MOCK_DEL_SERVICE);
+      aux.init(conf);
+      aux.start();
+      Map<String, ByteBuffer> meta = aux.getMetaData();
+      String auxName = "";
+      Assert.assertTrue(meta.size() == 1);
+      for(Entry<String, ByteBuffer> i : meta.entrySet()) {
+        auxName = i.getKey();
+      }
+      Assert.assertEquals("ServiceC", auxName);
+      aux.serviceStop();
+      FileStatus[] status = fs.listStatus(rootAuxServiceDirPath);
+      Assert.assertTrue(status.length == 1);
+
+      // initialize the same auxservice again, and make sure that we did not
+      // re-download the jar from remote directory.
+      aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+          mockContext2, MOCK_DEL_SERVICE);
+      aux.init(conf);
+      aux.start();
+      meta = aux.getMetaData();
+      Assert.assertTrue(meta.size() == 1);
+      for(Entry<String, ByteBuffer> i : meta.entrySet()) {
+        auxName = i.getKey();
+      }
+      Assert.assertEquals("ServiceC", auxName);
+      verify(MOCK_DEL_SERVICE, times(0)).delete(any(FileDeletionTask.class));
+      status = fs.listStatus(rootAuxServiceDirPath);
+      Assert.assertTrue(status.length == 1);
+      aux.serviceStop();
+
+      // change the last modification time for remote jar,
+      // we will re-download the jar and clean the old jar
+      long time = System.currentTimeMillis() + 3600*1000;
+      FileTime fileTime = FileTime.fromMillis(time);
+      Files.setLastModifiedTime(Paths.get(testJar.getAbsolutePath()),
+          fileTime);
+      conf.set(
+          String.format(YarnConfiguration.NM_AUX_SERVICE_REMOTE_CLASSPATH,
+              "ServiceC"),
+          testJar.getAbsolutePath());
+      aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+          mockContext2, MOCK_DEL_SERVICE);
+      aux.init(conf);
+      aux.start();
+      verify(MOCK_DEL_SERVICE, times(1)).delete(any(FileDeletionTask.class));
+      status = fs.listStatus(rootAuxServiceDirPath);
+      Assert.assertTrue(status.length == 2);
+      aux.serviceStop();
+    } finally {
+      if (testJar != null) {
+        testJar.delete();
+        rootDir.delete();
+      }
+      if (fs.exists(new Path(root))) {
+        fs.delete(new Path(root), true);
+      }
+    }
+  }
+
   // To verify whether we could load class from customized class path.
   // We would use ServiceC in this test. Also create a separate jar file
   // including ServiceC class, and add this jar to customized directory.
@@ -202,7 +338,8 @@ public class TestAuxServices {
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT,
         "ServiceC"), ServiceC.class, Service.class);
     @SuppressWarnings("resource")
-    AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     aux.init(conf);
     aux.start();
     Map<String, ByteBuffer> meta = aux.getMetaData();
@@ -244,7 +381,8 @@ public class TestAuxServices {
       conf.set(String.format(
           YarnConfiguration.NM_AUX_SERVICES_SYSTEM_CLASSES,
           "ServiceC"), systemClasses);
-      aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+      aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+          MOCK_CONTEXT, MOCK_DEL_SERVICE);
       aux.init(conf);
       aux.start();
       meta = aux.getMetaData();
@@ -282,7 +420,8 @@ public class TestAuxServices {
         ServiceB.class, Service.class);
     conf.setInt("A.expected.init", 1);
     conf.setInt("B.expected.stop", 1);
-    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     aux.init(conf);
     aux.start();
 
@@ -346,7 +485,8 @@ public class TestAuxServices {
         ServiceA.class, Service.class);
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"),
         ServiceB.class, Service.class);
-    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     aux.init(conf);
 
     int latch = 1;
@@ -379,7 +519,8 @@ public class TestAuxServices {
         ServiceA.class, Service.class);
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"),
         ServiceB.class, Service.class);
-    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     aux.init(conf);
 
     int latch = 1;
@@ -416,7 +557,8 @@ public class TestAuxServices {
         ServiceA.class, Service.class);
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"),
         ServiceB.class, Service.class);
-    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     aux.init(conf);
     aux.start();
 
@@ -429,7 +571,8 @@ public class TestAuxServices {
 
   @Test
   public void testValidAuxServiceName() {
-    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     Configuration conf = new Configuration();
     conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] {"Asrv1", "Bsrv_2"});
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Asrv1"),
@@ -443,7 +586,8 @@ public class TestAuxServices {
     }
 
     //Test bad auxService Name
-    final AuxServices aux1 = new AuxServices(MOCK_AUX_PATH_HANDLER);
+    final AuxServices aux1 = new AuxServices(MOCK_AUX_PATH_HANDLER,
+        MOCK_CONTEXT, MOCK_DEL_SERVICE);
     conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, new String[] {"1Asrv1"});
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "1Asrv1"),
         ServiceA.class, Service.class);
@@ -469,7 +613,8 @@ public class TestAuxServices {
     conf.setClass(String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "Bsrv"),
         RecoverableServiceB.class, Service.class);
     try {
-      final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER);
+      final AuxServices aux = new AuxServices(MOCK_AUX_PATH_HANDLER,
+          MOCK_CONTEXT, MOCK_DEL_SERVICE);
       aux.init(conf);
       Assert.assertEquals(2, aux.getServices().size());
       File auxStorageDir = new File(TEST_DIR,

+ 11 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java

@@ -146,6 +146,7 @@ public class TestDockerContainerRuntime {
   private final String whitelistedUser = "yoda";
   private String[] testCapabilities;
   private final String signalPid = "1234";
+  private int dockerStopGracePeriod;
 
   @Before
   public void setup() {
@@ -166,6 +167,10 @@ public class TestDockerContainerRuntime {
     env.put("FROM_CLIENT", "1");
     image = "busybox:latest";
 
+    dockerStopGracePeriod = conf.getInt(
+      YarnConfiguration.NM_DOCKER_STOP_GRACE_PERIOD,
+      YarnConfiguration.DEFAULT_NM_DOCKER_STOP_GRACE_PERIOD);
+
     env.put(DockerLinuxContainerRuntime.ENV_DOCKER_CONTAINER_IMAGE, image);
     when(container.getContainerId()).thenReturn(cId);
     when(cId.toString()).thenReturn(containerId);
@@ -1308,10 +1313,11 @@ public class TestDockerContainerRuntime {
     List<String> dockerCommands = getDockerCommandsForSignal(
         ContainerExecutor.Signal.TERM,
         DockerCommandExecutor.DockerContainerStatus.RUNNING);
-    Assert.assertEquals(3, dockerCommands.size());
+    Assert.assertEquals(4, dockerCommands.size());
     Assert.assertEquals("[docker-command-execution]", dockerCommands.get(0));
     Assert.assertEquals("  docker-command=stop", dockerCommands.get(1));
     Assert.assertEquals("  name=container_id", dockerCommands.get(2));
+    Assert.assertEquals("  time=10", dockerCommands.get(3));
   }
 
   @Test
@@ -1321,10 +1327,11 @@ public class TestDockerContainerRuntime {
     List<String> dockerCommands = getDockerCommandsForSignal(
         ContainerExecutor.Signal.KILL,
         DockerCommandExecutor.DockerContainerStatus.RUNNING);
-    Assert.assertEquals(3, dockerCommands.size());
+    Assert.assertEquals(4, dockerCommands.size());
     Assert.assertEquals("[docker-command-execution]", dockerCommands.get(0));
     Assert.assertEquals("  docker-command=stop", dockerCommands.get(1));
     Assert.assertEquals("  name=container_id", dockerCommands.get(2));
+    Assert.assertEquals("  time=10", dockerCommands.get(3));
   }
 
   @Test
@@ -1884,7 +1891,8 @@ public class TestDockerContainerRuntime {
             || ContainerExecutor.Signal.TERM.equals(signal)) {
           if (DockerCommandExecutor.isStoppable(containerStatus)) {
             DockerStopCommand dockerStopCommand =
-                new DockerStopCommand(containerName);
+                new DockerStopCommand(containerName)
+                .setGracePeriod(dockerStopGracePeriod);
             DockerCommandExecutor.executeDockerCommand(dockerStopCommand,
                 containerName, environment, conf, mockExecutor, false);
           }

Some files were not shown because too many files changed in this diff