瀏覽代碼

Merge branch 'trunk' into HDFS-7240

Anu Engineer 7 年之前
父節點
當前提交
7c957451f3
共有 50 個文件被更改,包括 723 次插入213 次删除
  1. 0 12
      hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
  2. 6 4
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  3. 2 2
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
  4. 2 2
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java
  5. 6 6
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AddErasureCodingPolicyResponse.java
  6. 1 1
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
  7. 6 5
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
  8. 12 10
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java
  9. 47 20
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java
  10. 11 12
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java
  11. 1 1
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto
  12. 1 1
      hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
  13. 4 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  14. 7 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
  15. 18 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
  16. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java
  17. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
  18. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  19. 8 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  20. 5 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
  21. 4 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java
  22. 11 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  23. 165 0
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsDataNodeAdminGuide.md
  24. 167 0
      hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsUpgradeDomain.md
  25. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
  26. 4 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java
  27. 13 12
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java
  28. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
  29. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java
  30. 27 17
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java
  31. 9 7
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/dfs.hosts.json
  32. 7 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/legacy.dfs.hosts.json
  33. 6 6
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/BaileyBorweinPlouffe.java
  34. 3 4
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java
  35. 4 3
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DancingLinks.java
  36. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/pi/DistSum.java
  37. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java
  38. 3 4
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraOutputFormat.java
  39. 3 4
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java
  40. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java
  41. 3 3
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/terasort/TestTeraSort.java
  42. 3 1
      hadoop-project/src/site/site.xml
  43. 1 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/factories/impl/pb/RpcClientFactoryPBImpl.java
  44. 2 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/factories/impl/pb/RpcServerFactoryPBImpl.java
  45. 49 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/factories/impl/pb/TestRpcClientFactoryPBImpl.java
  46. 48 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/factories/impl/pb/TestRpcServerFactoryPBImpl.java
  47. 6 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java
  48. 5 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java
  49. 8 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java
  50. 10 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java

+ 0 - 12
hadoop-common-project/hadoop-common/src/main/conf/log4j.properties

@@ -78,19 +78,7 @@ log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
 #
 # TaskLog Appender
 #
-
-#Default values
-hadoop.tasklog.taskid=null
-hadoop.tasklog.iscleanup=false
-hadoop.tasklog.noKeepSplits=4
-hadoop.tasklog.totalLogFileSize=100
-hadoop.tasklog.purgeLogSplits=true
-hadoop.tasklog.logsRetainHours=12
-
 log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
-log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
-log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
-log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
 
 log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
 log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n

+ 6 - 4
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -102,7 +102,7 @@ import org.apache.hadoop.hdfs.client.impl.DfsClientConf;
 import org.apache.hadoop.hdfs.client.impl.LeaseRenewer;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.AclException;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
@@ -2807,13 +2807,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
     }
   }
 
-  public AddECPolicyResponse[] addErasureCodingPolicies(
+  public AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
       ErasureCodingPolicy[] policies) throws IOException {
     checkOpen();
     try (TraceScope ignored = tracer.newScope("addErasureCodingPolicies")) {
       return namenode.addErasureCodingPolicies(policies);
     } catch (RemoteException re) {
-      throw re.unwrapRemoteException(AccessControlException.class);
+      throw re.unwrapRemoteException(AccessControlException.class,
+          SafeModeException.class);
     }
   }
 
@@ -2823,7 +2824,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
     try (TraceScope ignored = tracer.newScope("removeErasureCodingPolicy")) {
       namenode.removeErasureCodingPolicy(ecPolicyName);
     } catch (RemoteException re) {
-      throw re.unwrapRemoteException(AccessControlException.class);
+      throw re.unwrapRemoteException(AccessControlException.class,
+          SafeModeException.class);
     }
   }
 

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java

@@ -64,7 +64,7 @@ import org.apache.hadoop.hdfs.DFSOpsCountStatistics.OpType;
 import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.client.impl.CorruptFileBlockIterator;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
@@ -2650,7 +2650,7 @@ public class DistributedFileSystem extends FileSystem {
    * @return Return the response list of adding operations.
    * @throws IOException
    */
-  public AddECPolicyResponse[] addErasureCodingPolicies(
+  public AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
       ErasureCodingPolicy[] policies)  throws IOException {
     return dfs.addErasureCodingPolicies(policies);
   }

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java

@@ -35,7 +35,7 @@ import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSInotifyEventInputStream;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
@@ -562,7 +562,7 @@ public class HdfsAdmin {
    * @return Return the response list of adding operations.
    * @throws IOException
    */
-  public AddECPolicyResponse[] addErasureCodingPolicies(
+  public AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
       ErasureCodingPolicy[] policies)  throws IOException {
     return dfs.addErasureCodingPolicies(policies);
   }

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AddECPolicyResponse.java → hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AddErasureCodingPolicyResponse.java

@@ -22,25 +22,25 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 /**
  * A response of add an ErasureCoding policy.
  */
-public class AddECPolicyResponse {
+public class AddErasureCodingPolicyResponse {
   private boolean succeed;
   private ErasureCodingPolicy policy;
   private String errorMsg;
 
-  public AddECPolicyResponse(ErasureCodingPolicy policy) {
+  public AddErasureCodingPolicyResponse(ErasureCodingPolicy policy) {
     this.policy = policy;
     this.succeed = true;
   }
 
-  public AddECPolicyResponse(ErasureCodingPolicy policy,
-      String errorMsg) {
+  public AddErasureCodingPolicyResponse(ErasureCodingPolicy policy,
+                                        String errorMsg) {
     this.policy = policy;
     this.errorMsg = errorMsg;
     this.succeed = false;
   }
 
-  public AddECPolicyResponse(ErasureCodingPolicy policy,
-      HadoopIllegalArgumentException e) {
+  public AddErasureCodingPolicyResponse(ErasureCodingPolicy policy,
+                                        HadoopIllegalArgumentException e) {
     this(policy, e.getMessage());
   }
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java

@@ -1584,7 +1584,7 @@ public interface ClientProtocol {
    * @throws IOException
    */
   @AtMostOnce
-  AddECPolicyResponse[] addErasureCodingPolicies(
+  AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
       ErasureCodingPolicy[] policies) throws IOException;
 
   /**

+ 6 - 5
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java

@@ -50,7 +50,7 @@ import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.AddBlockFlag;
 import org.apache.hadoop.hdfs.inotify.EventBatchList;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
@@ -1718,7 +1718,7 @@ public class ClientNamenodeProtocolTranslatorPB implements
   }
 
   @Override
-  public AddECPolicyResponse[] addErasureCodingPolicies(
+  public AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
       ErasureCodingPolicy[] policies) throws IOException {
     List<ErasureCodingPolicyProto> protos = Arrays.stream(policies)
         .map(PBHelperClient::convertErasureCodingPolicy)
@@ -1729,9 +1729,10 @@ public class ClientNamenodeProtocolTranslatorPB implements
     try {
       AddErasureCodingPoliciesResponseProto rep = rpcProxy
           .addErasureCodingPolicies(null, req);
-      AddECPolicyResponse[] responses = rep.getResponsesList().stream()
-          .map(PBHelperClient::convertAddECPolicyResponse)
-          .toArray(AddECPolicyResponse[]::new);
+      AddErasureCodingPolicyResponse[] responses =
+          rep.getResponsesList().stream()
+              .map(PBHelperClient::convertAddErasureCodingPolicyResponse)
+              .toArray(AddErasureCodingPolicyResponse[]::new);
       return responses;
     }  catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);

+ 12 - 10
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java

@@ -58,7 +58,7 @@ import org.apache.hadoop.hdfs.DFSUtilClient;
 import org.apache.hadoop.hdfs.inotify.Event;
 import org.apache.hadoop.hdfs.inotify.EventBatch;
 import org.apache.hadoop.hdfs.inotify.EventBatchList;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
 import org.apache.hadoop.hdfs.protocol.BlockType;
@@ -137,7 +137,7 @@ import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.ReencryptionS
 import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.ZoneReencryptionStatusProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.AccessModeProto;
-import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.AddECPolicyResponseProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.AddErasureCodingPolicyResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockStoragePolicyProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockTypeProto;
@@ -2981,10 +2981,11 @@ public class PBHelperClient {
     return builder.build();
   }
 
-  public static AddECPolicyResponseProto convertAddECPolicyResponse(
-      AddECPolicyResponse response) {
-    AddECPolicyResponseProto.Builder builder =
-        AddECPolicyResponseProto.newBuilder()
+  public static AddErasureCodingPolicyResponseProto
+      convertAddErasureCodingPolicyResponse(
+          AddErasureCodingPolicyResponse response) {
+    AddErasureCodingPolicyResponseProto.Builder builder =
+        AddErasureCodingPolicyResponseProto.newBuilder()
         .setPolicy(convertErasureCodingPolicy(response.getPolicy()))
         .setSucceed(response.isSucceed());
     if (!response.isSucceed()) {
@@ -2993,13 +2994,14 @@ public class PBHelperClient {
     return builder.build();
   }
 
-  public static AddECPolicyResponse convertAddECPolicyResponse(
-      AddECPolicyResponseProto proto) {
+  public static AddErasureCodingPolicyResponse
+      convertAddErasureCodingPolicyResponse(
+          AddErasureCodingPolicyResponseProto proto) {
     ErasureCodingPolicy policy = convertErasureCodingPolicy(proto.getPolicy());
     if (proto.getSucceed()) {
-      return new AddECPolicyResponse(policy);
+      return new AddErasureCodingPolicyResponse(policy);
     } else {
-      return new AddECPolicyResponse(policy, proto.getErrorMsg());
+      return new AddErasureCodingPolicyResponse(policy, proto.getErrorMsg());
     }
   }
 

+ 47 - 20
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java

@@ -19,58 +19,85 @@
 package org.apache.hadoop.hdfs.util;
 
 import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.databind.JsonMappingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.ObjectReader;
+
 import java.io.FileInputStream;
 import java.io.InputStreamReader;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.Set;
-import java.util.HashSet;
+import java.util.List;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
- * Reader support for JSON based datanode configuration, an alternative
+ * Reader support for JSON-based datanode configuration, an alternative format
  * to the exclude/include files configuration.
- * The JSON file format is the array of elements where each element
+ * The JSON file format defines the array of elements where each element
  * in the array describes the properties of a datanode. The properties of
- * a datanode is defined in {@link DatanodeAdminProperties}. For example,
+ * a datanode is defined by {@link DatanodeAdminProperties}. For example,
  *
- * {"hostName": "host1"}
- * {"hostName": "host2", "port": 50, "upgradeDomain": "ud0"}
- * {"hostName": "host3", "port": 0, "adminState": "DECOMMISSIONED"}
+ * [
+ *   {"hostName": "host1"},
+ *   {"hostName": "host2", "port": 50, "upgradeDomain": "ud0"},
+ *   {"hostName": "host3", "port": 0, "adminState": "DECOMMISSIONED"}
+ * ]
  */
 @InterfaceAudience.LimitedPrivate({"HDFS"})
 @InterfaceStability.Unstable
 public final class CombinedHostsFileReader {
-  private static final ObjectReader READER =
-      new ObjectMapper().readerFor(DatanodeAdminProperties.class);
-  private static final JsonFactory JSON_FACTORY = new JsonFactory();
+
+  public static final Logger LOG =
+      LoggerFactory.getLogger(CombinedHostsFileReader.class);
 
   private CombinedHostsFileReader() {
   }
 
   /**
    * Deserialize a set of DatanodeAdminProperties from a json file.
-   * @param hostsFile the input json file to read from.
+   * @param hostsFile the input json file to read from
    * @return the set of DatanodeAdminProperties
    * @throws IOException
    */
-  public static Set<DatanodeAdminProperties>
+  public static DatanodeAdminProperties[]
       readFile(final String hostsFile) throws IOException {
-    HashSet<DatanodeAdminProperties> allDNs = new HashSet<>();
+    DatanodeAdminProperties[] allDNs = new DatanodeAdminProperties[0];
+    ObjectMapper objectMapper = new ObjectMapper();
+    boolean tryOldFormat = false;
     try (Reader input =
-         new InputStreamReader(new FileInputStream(hostsFile), "UTF-8")) {
-      Iterator<DatanodeAdminProperties> iterator =
-          READER.readValues(JSON_FACTORY.createParser(input));
-      while (iterator.hasNext()) {
-        DatanodeAdminProperties properties = iterator.next();
-        allDNs.add(properties);
+        new InputStreamReader(new FileInputStream(hostsFile), "UTF-8")) {
+      allDNs = objectMapper.readValue(input, DatanodeAdminProperties[].class);
+    } catch (JsonMappingException jme) {
+      // The old format doesn't have json top-level token to enclose the array.
+      // For backward compatibility, try parsing the old format.
+      tryOldFormat = true;
+      LOG.warn("{} has invalid JSON format." +
+          "Try the old format without top-level token defined.", hostsFile);
+    }
+
+    if (tryOldFormat) {
+      ObjectReader objectReader =
+          objectMapper.readerFor(DatanodeAdminProperties.class);
+      JsonFactory jsonFactory = new JsonFactory();
+      List<DatanodeAdminProperties> all = new ArrayList<>();
+      try (Reader input =
+          new InputStreamReader(new FileInputStream(hostsFile), "UTF-8")) {
+        Iterator<DatanodeAdminProperties> iterator =
+            objectReader.readValues(jsonFactory.createParser(input));
+        while (iterator.hasNext()) {
+          DatanodeAdminProperties properties = iterator.next();
+          all.add(properties);
+        }
       }
+      allDNs = all.toArray(new DatanodeAdminProperties[all.size()]);
     }
     return allDNs;
   }

+ 11 - 12
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java

@@ -32,20 +32,21 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties;
 
 /**
- * Writer support for JSON based datanode configuration, an alternative
+ * Writer support for JSON-based datanode configuration, an alternative format
  * to the exclude/include files configuration.
- * The JSON file format is the array of elements where each element
+ * The JSON file format defines the array of elements where each element
  * in the array describes the properties of a datanode. The properties of
- * a datanode is defined in {@link DatanodeAdminProperties}. For example,
+ * a datanode is defined by {@link DatanodeAdminProperties}. For example,
  *
- * {"hostName": "host1"}
- * {"hostName": "host2", "port": 50, "upgradeDomain": "ud0"}
- * {"hostName": "host3", "port": 0, "adminState": "DECOMMISSIONED"}
+ * [
+ *   {"hostName": "host1"},
+ *   {"hostName": "host2", "port": 50, "upgradeDomain": "ud0"},
+ *   {"hostName": "host3", "port": 0, "adminState": "DECOMMISSIONED"}
+ * ]
  */
 @InterfaceAudience.LimitedPrivate({"HDFS"})
 @InterfaceStability.Unstable
 public final class CombinedHostsFileWriter {
-  private static final ObjectMapper MAPPER = new ObjectMapper();
   private CombinedHostsFileWriter() {
   }
 
@@ -57,13 +58,11 @@ public final class CombinedHostsFileWriter {
    */
   public static void writeFile(final String hostsFile,
       final Set<DatanodeAdminProperties> allDNs) throws IOException {
-    StringBuilder configs = new StringBuilder();
+    final ObjectMapper objectMapper = new ObjectMapper();
+
     try (Writer output =
        new OutputStreamWriter(new FileOutputStream(hostsFile), "UTF-8")) {
-      for (DatanodeAdminProperties datanodeAdminProperties: allDNs) {
-        configs.append(MAPPER.writeValueAsString(datanodeAdminProperties));
-      }
-      output.write(configs.toString());
+      objectMapper.writeValue(output, allDNs);
     }
   }
 }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/erasurecoding.proto

@@ -58,7 +58,7 @@ message AddErasureCodingPoliciesRequestProto {
 }
 
 message AddErasureCodingPoliciesResponseProto {
-  repeated AddECPolicyResponseProto responses = 1;
+  repeated AddErasureCodingPolicyResponseProto responses = 1;
 }
 
 message RemoveErasureCodingPolicyRequestProto {

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto

@@ -392,7 +392,7 @@ message ErasureCodingPolicyProto {
   optional ErasureCodingPolicyState state = 5 [default = ENABLED];
 }
 
-message AddECPolicyResponseProto {
+message AddErasureCodingPolicyResponseProto {
   required ErasureCodingPolicyProto policy = 1;
   required bool succeed = 2;
   optional string errorMsg = 3;

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -725,6 +725,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
       "dfs.edit.log.transfer.bandwidthPerSec";
   public static final long DFS_EDIT_LOG_TRANSFER_RATE_DEFAULT = 0; //no throttling
 
+  public static final String DFS_QJM_OPERATIONS_TIMEOUT =
+      "dfs.qjm.operations.timeout";
+  public static final long DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT = 60000;
+
   // Datanode File IO Stats
   public static final String DFS_DATANODE_ENABLE_FILEIO_FAULT_INJECTION_KEY =
       "dfs.datanode.enable.fileio.fault.injection";

+ 7 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java

@@ -36,7 +36,7 @@ import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.QuotaUsage;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
@@ -1721,15 +1721,16 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
       ErasureCodingPolicy[] policies = request.getEcPoliciesList().stream()
           .map(PBHelperClient::convertErasureCodingPolicy)
           .toArray(ErasureCodingPolicy[]::new);
-      AddECPolicyResponse[] result = server
+      AddErasureCodingPolicyResponse[] result = server
           .addErasureCodingPolicies(policies);
 
-      List<HdfsProtos.AddECPolicyResponseProto> responseProtos = Arrays
-          .stream(result).map(PBHelperClient::convertAddECPolicyResponse)
-          .collect(Collectors.toList());
+      List<HdfsProtos.AddErasureCodingPolicyResponseProto> responseProtos =
+          Arrays.stream(result)
+              .map(PBHelperClient::convertAddErasureCodingPolicyResponse)
+              .collect(Collectors.toList());
       AddErasureCodingPoliciesResponseProto response =
           AddErasureCodingPoliciesResponseProto.newBuilder()
-            .addAllResponses(responseProtos).build();
+              .addAllResponses(responseProtos).build();
       return response;
     } catch (IOException e) {
       throw new ServiceException(e);

+ 18 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java

@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.PriorityQueue;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
@@ -76,18 +77,10 @@ public class QuorumJournalManager implements JournalManager {
   private final int newEpochTimeoutMs;
   private final int writeTxnsTimeoutMs;
 
-  // Since these don't occur during normal operation, we can
-  // use rather lengthy timeouts, and don't need to make them
-  // configurable.
-  private static final int FORMAT_TIMEOUT_MS            = 60000;
-  private static final int HASDATA_TIMEOUT_MS           = 60000;
-  private static final int CAN_ROLL_BACK_TIMEOUT_MS     = 60000;
-  private static final int FINALIZE_TIMEOUT_MS          = 60000;
-  private static final int PRE_UPGRADE_TIMEOUT_MS       = 60000;
-  private static final int ROLL_BACK_TIMEOUT_MS         = 60000;
-  private static final int DISCARD_SEGMENTS_TIMEOUT_MS  = 60000;
-  private static final int UPGRADE_TIMEOUT_MS           = 60000;
-  private static final int GET_JOURNAL_CTIME_TIMEOUT_MS = 60000;
+  // This timeout is used for calls that don't occur during normal operation
+  // e.g. format, upgrade operations and a few others. So we can use rather
+  // lengthy timeouts by default.
+  private final int timeoutMs;
   
   private final Configuration conf;
   private final URI uri;
@@ -141,6 +134,10 @@ public class QuorumJournalManager implements JournalManager {
     this.writeTxnsTimeoutMs = conf.getInt(
         DFSConfigKeys.DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_KEY,
         DFSConfigKeys.DFS_QJOURNAL_WRITE_TXNS_TIMEOUT_DEFAULT);
+    this.timeoutMs = (int) conf.getTimeDuration(DFSConfigKeys
+            .DFS_QJM_OPERATIONS_TIMEOUT,
+        DFSConfigKeys.DFS_QJM_OPERATIONS_TIMEOUT_DEFAULT, TimeUnit
+            .MILLISECONDS);
   }
   
   protected List<AsyncLogger> createLoggers(
@@ -201,7 +198,7 @@ public class QuorumJournalManager implements JournalManager {
   public void format(NamespaceInfo nsInfo) throws IOException {
     QuorumCall<AsyncLogger,Void> call = loggers.format(nsInfo);
     try {
-      call.waitFor(loggers.size(), loggers.size(), 0, FORMAT_TIMEOUT_MS,
+      call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
           "format");
     } catch (InterruptedException e) {
       throw new IOException("Interrupted waiting for format() response");
@@ -220,7 +217,7 @@ public class QuorumJournalManager implements JournalManager {
         loggers.isFormatted();
 
     try {
-      call.waitFor(loggers.size(), 0, 0, HASDATA_TIMEOUT_MS, "hasSomeData");
+      call.waitFor(loggers.size(), 0, 0, timeoutMs, "hasSomeData");
     } catch (InterruptedException e) {
       throw new IOException("Interrupted while determining if JNs have data");
     } catch (TimeoutException e) {
@@ -505,7 +502,7 @@ public class QuorumJournalManager implements JournalManager {
   public void doPreUpgrade() throws IOException {
     QuorumCall<AsyncLogger, Void> call = loggers.doPreUpgrade();
     try {
-      call.waitFor(loggers.size(), loggers.size(), 0, PRE_UPGRADE_TIMEOUT_MS,
+      call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
           "doPreUpgrade");
       
       if (call.countExceptions() > 0) {
@@ -522,7 +519,7 @@ public class QuorumJournalManager implements JournalManager {
   public void doUpgrade(Storage storage) throws IOException {
     QuorumCall<AsyncLogger, Void> call = loggers.doUpgrade(storage);
     try {
-      call.waitFor(loggers.size(), loggers.size(), 0, UPGRADE_TIMEOUT_MS,
+      call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
           "doUpgrade");
       
       if (call.countExceptions() > 0) {
@@ -539,7 +536,7 @@ public class QuorumJournalManager implements JournalManager {
   public void doFinalize() throws IOException {
     QuorumCall<AsyncLogger, Void> call = loggers.doFinalize();
     try {
-      call.waitFor(loggers.size(), loggers.size(), 0, FINALIZE_TIMEOUT_MS,
+      call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
           "doFinalize");
       
       if (call.countExceptions() > 0) {
@@ -558,7 +555,7 @@ public class QuorumJournalManager implements JournalManager {
     QuorumCall<AsyncLogger, Boolean> call = loggers.canRollBack(storage,
         prevStorage, targetLayoutVersion);
     try {
-      call.waitFor(loggers.size(), loggers.size(), 0, CAN_ROLL_BACK_TIMEOUT_MS,
+      call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
           "lockSharedStorage");
       
       if (call.countExceptions() > 0) {
@@ -591,7 +588,7 @@ public class QuorumJournalManager implements JournalManager {
   public void doRollback() throws IOException {
     QuorumCall<AsyncLogger, Void> call = loggers.doRollback();
     try {
-      call.waitFor(loggers.size(), loggers.size(), 0, ROLL_BACK_TIMEOUT_MS,
+      call.waitFor(loggers.size(), loggers.size(), 0, timeoutMs,
           "doRollback");
       
       if (call.countExceptions() > 0) {
@@ -609,7 +606,7 @@ public class QuorumJournalManager implements JournalManager {
     QuorumCall<AsyncLogger, Void> call = loggers.discardSegments(startTxId);
     try {
       call.waitFor(loggers.size(), loggers.size(), 0,
-          DISCARD_SEGMENTS_TIMEOUT_MS, "discardSegments");
+          timeoutMs, "discardSegments");
       if (call.countExceptions() > 0) {
         call.rethrowException(
             "Could not perform discardSegments of one or more JournalNodes");
@@ -628,7 +625,7 @@ public class QuorumJournalManager implements JournalManager {
     QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime();
     try {
       call.waitFor(loggers.size(), loggers.size(), 0,
-          GET_JOURNAL_CTIME_TIMEOUT_MS, "getJournalCTime");
+          timeoutMs, "getJournalCTime");
       
       if (call.countExceptions() > 0) {
         call.rethrowException("Could not journal CTime for one "

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java

@@ -39,7 +39,6 @@ import java.net.InetSocketAddress;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Set;
 
 import com.google.common.base.Predicate;
 
@@ -212,7 +211,7 @@ public class CombinedHostFileManager extends HostConfigManager {
   }
   private void refresh(final String hostsFile) throws IOException {
     HostProperties hostProps = new HostProperties();
-    Set<DatanodeAdminProperties> all =
+    DatanodeAdminProperties[] all =
         CombinedHostsFileReader.readFile(hostsFile);
     for(DatanodeAdminProperties properties : all) {
       InetSocketAddress addr = parseEntry(hostsFile,

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi.BlockIterator;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
+import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.Time;
@@ -81,6 +82,8 @@ public class VolumeScanner extends Thread {
    */
   private final DataNode datanode;
 
+  private final DataNodeMetrics metrics;
+
   /**
    * A reference to the volume that we're scanning.
    */
@@ -299,6 +302,7 @@ public class VolumeScanner extends Thread {
   VolumeScanner(Conf conf, DataNode datanode, FsVolumeReference ref) {
     this.conf = conf;
     this.datanode = datanode;
+    this.metrics = datanode.getMetrics();
     this.ref = ref;
     this.volume = ref.getVolume();
     ScanResultHandler handler;
@@ -443,12 +447,14 @@ public class VolumeScanner extends Thread {
       throttler.setBandwidth(bytesPerSec);
       long bytesRead = blockSender.sendBlock(nullStream, null, throttler);
       resultHandler.handle(block, null);
+      metrics.incrBlocksVerified();
       return bytesRead;
     } catch (IOException e) {
       resultHandler.handle(block, e);
     } finally {
       IOUtils.cleanup(null, blockSender);
     }
+    metrics.incrBlockVerificationFailures();
     return -1;
   }
 

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -752,7 +752,7 @@ public class FSDirectory implements Closeable {
     try {
       int threads = (initThreads < 1) ? 1 : initThreads;
       LOG.info("Initializing quota with " + threads + " thread(s)");
-      long start = Time.now();
+      long start = Time.monotonicNow();
       QuotaCounts counts = new QuotaCounts.Builder().build();
       ForkJoinPool p = new ForkJoinPool(threads);
       RecursiveAction task = new InitQuotaTask(getBlockStoragePolicySuite(),
@@ -760,7 +760,7 @@ public class FSDirectory implements Closeable {
       p.execute(task);
       task.join();
       p.shutdown();
-      LOG.info("Quota initialization completed in " + (Time.now() - start) +
+      LOG.info("Quota initialization completed in " + (Time.monotonicNow() - start) +
           " milliseconds\n" + counts);
     } finally {
       writeUnlock();

+ 8 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -180,7 +180,7 @@ import org.apache.hadoop.hdfs.DFSUtilClient;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.UnknownCryptoProtocolVersionException;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockType;
@@ -7193,12 +7193,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
    *                      rebuilding
    * @return The according result of add operation.
    */
-  AddECPolicyResponse[] addErasureCodingPolicies(ErasureCodingPolicy[] policies,
-      final boolean logRetryCache) throws IOException {
+  AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
+      ErasureCodingPolicy[] policies, final boolean logRetryCache)
+      throws IOException {
     final String operationName = "addErasureCodingPolicies";
     String addECPolicyName = "";
     checkOperation(OperationCategory.WRITE);
-    List<AddECPolicyResponse> responses = new ArrayList<>();
+    List<AddErasureCodingPolicyResponse> responses = new ArrayList<>();
     boolean success = false;
     writeLock();
     try {
@@ -7210,13 +7211,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
               FSDirErasureCodingOp.addErasureCodingPolicy(this, policy,
                   logRetryCache);
           addECPolicyName = newPolicy.getName();
-          responses.add(new AddECPolicyResponse(newPolicy));
+          responses.add(new AddErasureCodingPolicyResponse(newPolicy));
         } catch (HadoopIllegalArgumentException e) {
-          responses.add(new AddECPolicyResponse(policy, e));
+          responses.add(new AddErasureCodingPolicyResponse(policy, e));
         }
       }
       success = true;
-      return responses.toArray(new AddECPolicyResponse[0]);
+      return responses.toArray(new AddErasureCodingPolicyResponse[0]);
     } finally {
       writeUnlock(operationName);
       if (success) {

+ 5 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java

@@ -85,7 +85,7 @@ import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.inotify.EventBatch;
 import org.apache.hadoop.hdfs.inotify.EventBatchList;
 import org.apache.hadoop.hdfs.protocol.AclException;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
@@ -2333,17 +2333,18 @@ public class NameNodeRpcServer implements NamenodeProtocols {
   }
 
   @Override
-  public AddECPolicyResponse[] addErasureCodingPolicies(
+  public AddErasureCodingPolicyResponse[] addErasureCodingPolicies(
       ErasureCodingPolicy[] policies) throws IOException {
     checkNNStartup();
     namesystem.checkSuperuserPrivilege();
     final CacheEntryWithPayload cacheEntry =
         RetryCache.waitForCompletion(retryCache, null);
     if (cacheEntry != null && cacheEntry.isSuccess()) {
-      return (AddECPolicyResponse[]) cacheEntry.getPayload();
+      return (AddErasureCodingPolicyResponse[]) cacheEntry.getPayload();
     }
     boolean success = false;
-    AddECPolicyResponse[] responses = new AddECPolicyResponse[0];
+    AddErasureCodingPolicyResponse[] responses =
+        new AddErasureCodingPolicyResponse[0];
     try {
       responses =
           namesystem.addErasureCodingPolicies(policies, cacheEntry != null);

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/ECAdmin.java

@@ -21,7 +21,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
 import org.apache.hadoop.hdfs.util.ECPolicyLoader;
 import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
@@ -175,9 +175,10 @@ public class ECAdmin extends Configured implements Tool {
         List<ErasureCodingPolicy> policies =
             new ECPolicyLoader().loadPolicy(filePath);
         if (policies.size() > 0) {
-          AddECPolicyResponse[] responses = dfs.addErasureCodingPolicies(
+          AddErasureCodingPolicyResponse[] responses =
+              dfs.addErasureCodingPolicies(
             policies.toArray(new ErasureCodingPolicy[policies.size()]));
-          for (AddECPolicyResponse response : responses) {
+          for (AddErasureCodingPolicyResponse response : responses) {
             System.out.println(response);
           }
         } else {

+ 11 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -4629,4 +4629,15 @@
     </description>
   </property>
 
+  <property>
+    <name>dfs.qjm.operations.timeout</name>
+    <value>60s</value>
+    <description>
+      Common key to set timeout for related operations in
+      QuorumJournalManager. This setting supports multiple time unit suffixes
+      as described in dfs.heartbeat.interval.
+      If no suffix is specified then milliseconds is assumed.
+    </description>
+  </property>
+
 </configuration>

+ 165 - 0
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsDataNodeAdminGuide.md

@@ -0,0 +1,165 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+HDFS DataNode Admin Guide
+=================
+
+<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
+
+Overview
+--------
+
+The Hadoop Distributed File System (HDFS) namenode maintains states of all datanodes.
+There are two types of states. The fist type describes the liveness of a datanode indicating if
+the node is live, dead or stale. The second type describes the admin state indicating if the node
+is in service, decommissioned or under maintenance.
+
+When an administrator decommission a datanode, the datanode will first be transitioned into
+`DECOMMISSION_INPROGRESS` state. After all blocks belonging to that datanode have been fully replicated elsewhere
+based on each block's replication factor. the datanode will be transitioned to `DECOMMISSIONED` state. After that,
+the administrator can shutdown the node to perform long-term repair and maintenance that could take days or weeks.
+After the machine has been repaired, the machine can be recommissioned back to the cluster.
+
+Sometimes administrators only need to take datanodes down for minutes/hours to perform short-term repair/maintenance.
+In such scenario, the HDFS block replication overhead incurred by decommission might not be necessary and a light-weight process is desirable.
+And that is what maintenance state is used for. When an administrator put a datanode in maintenance state, the datanode will first be transitioned
+to `ENTERING_MAINTENANCE` state. As long as all blocks belonging to that datanode is minimally replicated elsewhere, the datanode
+will immediately be transitioned to `IN_MAINTENANCE` state. After the maintenance has completed, the administrator can take the datanode
+out of the maintenance state. In addition, maintenance state supports timeout that allows administrators to config the maximum duration in
+which a datanode is allowed to stay in maintenance state. After the timeout, the datanode will be transitioned out of maintenance state
+automatically by HDFS without human intervention.
+
+In summary, datanode admin operations include the followings:
+
+* Decommission
+* Recommission
+* Putting nodes in maintenance state
+* Taking nodes out of maintenance state
+
+And datanode admin states include the followings:
+
+* `NORMAL` The node is in service.
+* `DECOMMISSIONED` The node has been decommissioned.
+* `DECOMMISSION_INPROGRESS` The node is being transitioned to DECOMMISSIONED state.
+* `IN_MAINTENANCE` The node in in maintenance state.
+* `ENTERING_MAINTENANCE` The node is being transitioned to maintenance state.
+
+
+Host-level settings
+-----------
+
+To perform any of datanode admin operations, there are two steps.
+
+* Update host-level configuration files to indicate the desired admin states of targeted datanodes. There are two supported formats for configuration files.
+    * Hostname-only configuration. Each line includes the hostname/ip address for a datanode. That is the default format.
+    * JSON-based configuration. The configuration is in JSON format. Each element maps to one datanode and each datanode can have multiple properties. This format is required to put datanodes to maintenance states.
+
+* Run the following command to have namenode reload the host-level configuration files.
+`hdfs dfsadmin [-refreshNodes]`
+
+### Hostname-only configuration
+This is the default configuration used by the namenode. It only supports node decommission and recommission; it doesn't support admin operations related to maintenance state. Use `dfs.hosts` and `dfs.hosts.exclude` as explained in [hdfs-default.xml](./hdfs-default.xml).
+
+In the following example, `host1` and `host2` need to be in service.
+`host3` and `host4` need to be in decommissioned state.
+
+dfs.hosts file
+```text
+host1
+host2
+host3
+host4
+```
+dfs.hosts.exclude file
+```text
+host3
+host4
+```
+
+### JSON-based configuration
+
+JSON-based format is the new configuration format that supports generic properties on datanodes. Set the following
+configurations to enable JSON-based format as explained in [hdfs-default.xml](./hdfs-default.xml).
+
+
+| Setting | Value |
+|:---- |:---- |
+|`dfs.namenode.hosts.provider.classname`| `org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager`|
+|`dfs.hosts`| the path of the json hosts file |
+
+Here is the list of currently supported properties by HDFS.
+
+
+| Property | Description |
+|:---- |:---- |
+|`hostName`| Required. The host name of the datanode. |
+|`upgradeDomain`| Optional. The upgrade domain id of the datanode. |
+|`adminState`| Optional. The expected admin state. The default value is `NORMAL`; `DECOMMISSIONED` for decommission; `IN_MAINTENANCE` for maintenance state. |
+|`port`| Optional. the port number of the datanode |
+|`maintenanceExpireTimeInMS`| Optional. The epoch time in milliseconds until which the datanode will remain in maintenance state. The default value is forever. |
+
+In the following example, `host1` and `host2` need to in service. `host3` need to be in decommissioned state. `host4` need to be in in maintenance state.
+
+dfs.hosts file
+```json
+[
+  {
+    "hostName": "host1"
+  },
+  {
+    "hostName": "host2",
+    "upgradeDomain": "ud0"
+  },
+  {
+    "hostName": "host3",
+    "adminState": "DECOMMISSIONED"
+  },
+  {
+    "hostName": "host4",
+    "upgradeDomain": "ud2",
+    "adminState": "IN_MAINTENANCE"
+  }
+]
+```
+
+
+Cluster-level settings
+-----------
+
+There are several cluster-level settings related to datanode administration.
+For common use cases, you should rely on the default values. Please refer to
+[hdfs-default.xml](./hdfs-default.xml) for descriptions and default values.
+
+```text
+dfs.namenode.maintenance.replication.min
+dfs.namenode.decommission.interval
+dfs.namenode.decommission.blocks.per.interval
+dfs.namenode.decommission.max.concurrent.tracked.nodes
+```
+
+Metrics
+-----------
+
+Admin states are part of the namenode's webUI and JMX. As explained in [HDFSCommands.html](./HDFSCommands.html), you can also verify admin states using the following commands.
+
+Use `dfsadmin` to check admin states at the cluster level.
+
+`hdfs dfsadmin -report`
+
+Use `fsck` to check admin states of datanodes storing data at a specific path. For backward compatibility, a special flag is required to return maintenance states.
+
+```text
+hdfs fsck <path> // only show decommission state
+hdfs fsck <path> -maintenance // include maintenance state
+```

+ 167 - 0
hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HdfsUpgradeDomain.md

@@ -0,0 +1,167 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+HDFS Upgrade Domain
+====================
+
+<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
+
+
+Introduction
+------------
+
+The current default HDFS block placement policy guarantees that a block’s 3 replicas will be placed
+on at least 2 racks. Specifically one replica is placed on one rack and the other two replicas
+are placed on another rack during write pipeline. This is a good compromise between rack diversity and write-pipeline efficiency. Note that
+subsequent load balancing or machine membership change might cause 3 replicas of a block to be distributed
+across 3 different racks. Thus any 3 datanodes in different racks could store 3 replicas of a block.
+
+
+However, the default placement policy impacts how we should perform datanode rolling upgrade.
+[HDFS Rolling Upgrade document](./HdfsRollingUpgrade.html) explains how the datanodes can be upgraded in a rolling
+fashion without downtime. Because any 3 datanodes in different racks could store all the replicas of a block, it is
+important to perform sequential restart of datanodes one at a time in order to minimize the impact on data availability
+and read/write operations. Upgrading one rack at a time is another option; but that will increase the chance of
+data unavailability if there is machine failure at another rack during the upgrade.
+
+The side effect of this sequential datanode rolling upgrade strategy is longer
+upgrade duration for larger clusters.
+
+
+Architecture
+-------
+
+To address the limitation of block placement policy on rolling upgrade, the concept of upgrade domain
+has been added to HDFS via a new block placement policy. The idea is to group datanodes in a new
+dimension called upgrade domain, in addition to the existing rack-based grouping.
+For example, we can assign all datanodes in the first position of any rack to upgrade domain ud_01,
+nodes in the second position to upgrade domain ud_02 and so on.
+
+The namenode provides BlockPlacementPolicy interface to support any custom block placement besides
+the default block placement policy. A new upgrade domain block placement policy based on this interface
+is available in HDFS. It will make sure replicas of any given block are distributed across machines from different upgrade domains.
+By default, 3 replicas of any given block are placed on 3 different upgrade domains. This means all datanodes belonging to
+a specific upgrade domain collectively won't store more than one replica of any block.
+
+With upgrade domain block placement policy in place, we can upgrade all datanodes belonging to one upgrade domain at the
+same time without impacting data availability. Only after finishing upgrading one upgrade domain we move to the next
+upgrade domain until all upgrade domains have been upgraded. Such procedure will ensure no two replicas of any given
+block will be upgraded at the same time. This means we can upgrade many machines at the same time for a large cluster.
+And as the cluster continues to scale, new machines will be added to the existing upgrade domains without impact the
+parallelism of the upgrade.
+
+For an existing cluster with the default block placement policy, after switching to the new upgrade domain block
+placement policy, any newly created blocks will conform the new policy. The old blocks allocated based on the old policy
+need to migrated the new policy. There is a migrator tool you can use. See HDFS-8789 for details.
+
+
+Settings
+-------
+
+To enable upgrade domain on your clusters, please follow these steps:
+
+* Assign datanodes to individual upgrade domain groups.
+* Enable upgrade domain block placement policy.
+* Migrate blocks allocated based on old block placement policy to the new upgrade domain policy.
+
+### Upgrade domain id assignment
+
+How a datanode maps to an upgrade domain id is defined by administrators and specific to the cluster layout.
+A common way to use the rack position of the machine as its upgrade domain id.
+
+To configure mapping from host name to its upgrade domain id, we need to use json-based host configuration file.
+by setting the following property as explained in [hdfs-default.xml](./hdfs-default.xml).
+
+| Setting | Value |
+|:---- |:---- |
+|`dfs.namenode.hosts.provider.classname` | `org.apache.hadoop.hdfs.server.blockmanagement.CombinedHostFileManager`|
+|`dfs.hosts`| the path of the json hosts file |
+
+The json hosts file defines the property for all hosts. In the following example,
+there are 4 datanodes in 2 racks; the machines at rack position 01 belong to upgrade domain 01;
+the machines at rack position 02 belong to upgrade domain 02.
+
+```json
+[
+  {
+    "hostName": "dcA­rackA­01",
+    "upgradeDomain": "01"
+  },
+  {
+    "hostName": "dcA­rackA­02",
+    "upgradeDomain": "02"
+  },
+  {
+    "hostName": "dcA­rackB­01",
+    "upgradeDomain": "01"
+  },
+  {
+    "hostName": "dcA­rackB­02",
+    "upgradeDomain": "02"
+  }
+]
+```
+
+
+### Enable upgrade domain block placement policy
+
+After each datanode has been assigned an upgrade domain id, the next step is to enable
+upgrade domain block placement policy with the following configuration as explained in [hdfs-default.xml](./hdfs-default.xml).
+
+| Setting | Value |
+|:---- |:---- |
+|`dfs.block.replicator.classname`| `org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyWithUpgradeDomain` |
+
+After restarting of namenode, the new policy will be used for any new block allocation.
+
+
+### Migration
+
+If you change the block placement policy of an existing cluster, you will need to make sure the
+blocks allocated prior to the block placement policy change conform the new block placement policy.
+
+HDFS-8789 provides the initial draft patch of a client-side migration tool. After the tool is committed,
+we will be able to describe how to use the tool.
+
+
+Rolling restart based on upgrade domains
+-------
+
+During cluster administration, we might need to restart datanodes to pick up new configuration, new hadoop release
+or JVM version and so on. With upgrade domains enabled and all blocks on the cluster conform to the new policy, we can now
+restart datanodes in batches, one upgrade domain at a time. Whether it is manual process or via automation, the steps are
+
+* Group datanodes by upgrade domains based on dfsadmin or JMX's datanode information.
+* For each upgrade domain
+    * (Optional) put all the nodes in that upgrade domain to maintenance state (refer to [HdfsDataNodeAdminGuide.html](./HdfsDataNodeAdminGuide.html)).
+    * Restart all those nodes.
+    * Check if all datanodes are healthy after restart. Unhealthy nodes should be decommissioned.
+    * (Optional) Take all those nodes out of maintenance state.
+
+
+Metrics
+-----------
+
+Upgrade domains are part of namenode's JMX. As explained in [HDFSCommands.html](./HDFSCommands.html), you can also verify upgrade domains using the following commands.
+
+Use `dfsadmin` to check upgrade domains at the cluster level.
+
+`hdfs dfsadmin -report`
+
+Use `fsck` to check upgrade domains of datanodes storing data at a specific path.
+
+`hdfs fsck <path> -files -blocks -upgradedomains`

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java

@@ -108,7 +108,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo;
 import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
@@ -1472,7 +1472,7 @@ public class DFSTestUtil {
     ErasureCodingPolicy newPolicy1 =
         new ErasureCodingPolicy(ErasureCodeConstants.RS_3_2_SCHEMA, 8 * 1024);
     ErasureCodingPolicy[] policyArray = new ErasureCodingPolicy[] {newPolicy1};
-    AddECPolicyResponse[] responses =
+    AddErasureCodingPolicyResponse[] responses =
         filesystem.addErasureCodingPolicies(policyArray);
     newPolicy1 = responses[0].getPolicy();
 

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestErasureCodingPolicies.java

@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -686,7 +686,7 @@ public class TestErasureCodingPolicies {
     ErasureCodingPolicy newPolicy =
         new ErasureCodingPolicy(toAddSchema, 128 * 1024);
     ErasureCodingPolicy[] policyArray = new ErasureCodingPolicy[]{newPolicy};
-    AddECPolicyResponse[] responses =
+    AddErasureCodingPolicyResponse[] responses =
         fs.addErasureCodingPolicies(policyArray);
     assertEquals(1, responses.length);
     assertFalse(responses[0].isSucceed());
@@ -839,7 +839,8 @@ public class TestErasureCodingPolicies {
         new ErasureCodingPolicy(ErasureCodeConstants.RS_3_2_SCHEMA, 8 * 1024);
     ErasureCodingPolicy[] policyArray =
         new ErasureCodingPolicy[] {newPolicy1};
-    AddECPolicyResponse[] responses = fs.addErasureCodingPolicies(policyArray);
+    AddErasureCodingPolicyResponse[] responses =
+        fs.addErasureCodingPolicies(policyArray);
     assertEquals(1, responses.length);
     assertTrue(responses[0].isSucceed());
     newPolicy1 = responses[0].getPolicy();

+ 13 - 12
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java

@@ -19,7 +19,7 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 
 import com.google.protobuf.UninitializedMessageException;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
 import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
 
@@ -913,14 +913,15 @@ public class TestPBHelper {
     // Check conversion of the built-in policies.
     for (ErasureCodingPolicy policy :
         SystemErasureCodingPolicies.getPolicies()) {
-      AddECPolicyResponse response = new AddECPolicyResponse(policy);
-      HdfsProtos.AddECPolicyResponseProto proto = PBHelperClient
-          .convertAddECPolicyResponse(response);
+      AddErasureCodingPolicyResponse response =
+          new AddErasureCodingPolicyResponse(policy);
+      HdfsProtos.AddErasureCodingPolicyResponseProto proto = PBHelperClient
+          .convertAddErasureCodingPolicyResponse(response);
       // Optional fields should not be set.
       assertFalse("Unnecessary field is set.", proto.hasErrorMsg());
       // Convert proto back to an object and check for equality.
-      AddECPolicyResponse convertedResponse = PBHelperClient
-          .convertAddECPolicyResponse(proto);
+      AddErasureCodingPolicyResponse convertedResponse = PBHelperClient
+          .convertAddErasureCodingPolicyResponse(proto);
       assertEquals("Converted policy not equal", response.getPolicy(),
           convertedResponse.getPolicy());
       assertEquals("Converted policy not equal", response.isSucceed(),
@@ -929,13 +930,13 @@ public class TestPBHelper {
 
     ErasureCodingPolicy policy = SystemErasureCodingPolicies
         .getPolicies().get(0);
-    AddECPolicyResponse response =
-        new AddECPolicyResponse(policy, "failed");
-    HdfsProtos.AddECPolicyResponseProto proto = PBHelperClient
-        .convertAddECPolicyResponse(response);
+    AddErasureCodingPolicyResponse response =
+        new AddErasureCodingPolicyResponse(policy, "failed");
+    HdfsProtos.AddErasureCodingPolicyResponseProto proto = PBHelperClient
+        .convertAddErasureCodingPolicyResponse(response);
     // Convert proto back to an object and check for equality.
-    AddECPolicyResponse convertedResponse = PBHelperClient
-        .convertAddECPolicyResponse(proto);
+    AddErasureCodingPolicyResponse convertedResponse = PBHelperClient
+        .convertAddErasureCodingPolicyResponse(proto);
     assertEquals("Converted policy not equal", response.getPolicy(),
         convertedResponse.getPolicy());
     assertEquals("Converted policy not equal", response.getErrorMsg(),

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java

@@ -47,7 +47,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.StripedFileTestUtil;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState;
@@ -735,7 +735,7 @@ public class TestFSEditLogLoader {
       ErasureCodingPolicy newPolicy =
           new ErasureCodingPolicy(schema, cellSize, (byte) 0);
       ErasureCodingPolicy[] policyArray = new ErasureCodingPolicy[]{newPolicy};
-      AddECPolicyResponse[] responses =
+      AddErasureCodingPolicyResponse[] responses =
           fs.addErasureCodingPolicies(policyArray);
       assertEquals(1, responses.length);
       assertTrue(responses[0].isSucceed());

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java

@@ -35,7 +35,7 @@ import java.io.IOException;
 import java.util.EnumSet;
 
 import org.apache.hadoop.hdfs.StripedFileTestUtil;
-import org.apache.hadoop.hdfs.protocol.AddECPolicyResponse;
+import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyState;
 import org.apache.hadoop.hdfs.protocol.SystemErasureCodingPolicies;
@@ -847,7 +847,8 @@ public class TestFSImage {
       ErasureCodingPolicy newPolicy =
           new ErasureCodingPolicy(newSchema, 2 * 1024, (byte) 254);
       ErasureCodingPolicy[] policies = new ErasureCodingPolicy[]{newPolicy};
-      AddECPolicyResponse[] ret = fs.addErasureCodingPolicies(policies);
+      AddErasureCodingPolicyResponse[] ret =
+          fs.addErasureCodingPolicies(policies);
       assertEquals(1, ret.length);
       assertEquals(true, ret[0].isSucceed());
       newPolicy = ret[0].getPolicy();

+ 27 - 17
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java

@@ -20,8 +20,6 @@ package org.apache.hadoop.hdfs.util;
 import java.io.File;
 import java.io.FileWriter;
 
-import java.util.Set;
-
 import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Before;
@@ -30,19 +28,21 @@ import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 
-/*
- * Test for JSON based HostsFileReader
+/**
+ * Test for JSON based HostsFileReader.
  */
 public class TestCombinedHostsFileReader {
 
   // Using /test/build/data/tmp directory to store temporary files
-  static final String HOSTS_TEST_DIR = GenericTestUtils.getTestDir()
+  static final String HOSTSTESTDIR = GenericTestUtils.getTestDir()
       .getAbsolutePath();
-  File NEW_FILE = new File(HOSTS_TEST_DIR, "dfs.hosts.new.json");
+  private final File newFile = new File(HOSTSTESTDIR, "dfs.hosts.new.json");
 
-  static final String TEST_CACHE_DATA_DIR =
+  static final String TESTCACHEDATADIR =
       System.getProperty("test.cache.data", "build/test/cache");
-  File EXISTING_FILE = new File(TEST_CACHE_DATA_DIR, "dfs.hosts.json");
+  private final File jsonFile = new File(TESTCACHEDATADIR, "dfs.hosts.json");
+  private final File legacyFile =
+      new File(TESTCACHEDATADIR, "legacy.dfs.hosts.json");
 
   @Before
   public void setUp() throws Exception {
@@ -51,18 +51,28 @@ public class TestCombinedHostsFileReader {
   @After
   public void tearDown() throws Exception {
     // Delete test file after running tests
-    NEW_FILE.delete();
+    newFile.delete();
+
+  }
 
+  /*
+   * Load the legacy test json file
+   */
+  @Test
+  public void testLoadLegacyJsonFile() throws Exception {
+    DatanodeAdminProperties[] all =
+        CombinedHostsFileReader.readFile(legacyFile.getAbsolutePath());
+    assertEquals(7, all.length);
   }
 
   /*
-   * Load the existing test json file
+   * Load the test json file
    */
   @Test
   public void testLoadExistingJsonFile() throws Exception {
-    Set<DatanodeAdminProperties> all =
-        CombinedHostsFileReader.readFile(EXISTING_FILE.getAbsolutePath());
-    assertEquals(7, all.size());
+    DatanodeAdminProperties[] all =
+        CombinedHostsFileReader.readFile(jsonFile.getAbsolutePath());
+    assertEquals(7, all.length);
   }
 
   /*
@@ -70,11 +80,11 @@ public class TestCombinedHostsFileReader {
    */
   @Test
   public void testEmptyCombinedHostsFileReader() throws Exception {
-    FileWriter hosts = new FileWriter(NEW_FILE);
+    FileWriter hosts = new FileWriter(newFile);
     hosts.write("");
     hosts.close();
-    Set<DatanodeAdminProperties> all =
-        CombinedHostsFileReader.readFile(NEW_FILE.getAbsolutePath());
-    assertEquals(0, all.size());
+    DatanodeAdminProperties[] all =
+        CombinedHostsFileReader.readFile(newFile.getAbsolutePath());
+    assertEquals(0, all.length);
   }
 }

+ 9 - 7
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/dfs.hosts.json

@@ -1,7 +1,9 @@
-{"hostName": "host1"}
-{"hostName": "host2", "upgradeDomain": "ud0"}
-{"hostName": "host3", "adminState": "DECOMMISSIONED"}
-{"hostName": "host4", "upgradeDomain": "ud2", "adminState": "DECOMMISSIONED"}
-{"hostName": "host5", "port": 8090}
-{"hostName": "host6", "adminState": "IN_MAINTENANCE"}
-{"hostName": "host7", "adminState": "IN_MAINTENANCE", "maintenanceExpireTimeInMS": "112233"}
+[
+  {"hostName": "host1"},
+  {"hostName": "host2", "upgradeDomain": "ud0"},
+  {"hostName": "host3", "adminState": "DECOMMISSIONED"},
+  {"hostName": "host4", "upgradeDomain": "ud2", "adminState": "DECOMMISSIONED"},
+  {"hostName": "host5", "port": 8090},
+  {"hostName": "host6", "adminState": "IN_MAINTENANCE"},
+  {"hostName": "host7", "adminState": "IN_MAINTENANCE", "maintenanceExpireTimeInMS": "112233"}
+]

+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/legacy.dfs.hosts.json

@@ -0,0 +1,7 @@
+{"hostName": "host1"}
+{"hostName": "host2", "upgradeDomain": "ud0"}
+{"hostName": "host3", "adminState": "DECOMMISSIONED"}
+{"hostName": "host4", "upgradeDomain": "ud2", "adminState": "DECOMMISSIONED"}
+{"hostName": "host5", "port": 8090}
+{"hostName": "host6", "adminState": "IN_MAINTENANCE"}
+{"hostName": "host7", "adminState": "IN_MAINTENANCE", "maintenanceExpireTimeInMS": "112233"}

+ 6 - 6
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/BaileyBorweinPlouffe.java

@@ -29,6 +29,8 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
@@ -47,10 +49,9 @@ import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Charsets;
 
@@ -83,8 +84,7 @@ public class BaileyBorweinPlouffe extends Configured implements Tool {
   private static final String DIGIT_SIZE_PROPERTY = NAME + ".digit.size";
   private static final String DIGIT_PARTS_PROPERTY = NAME + ".digit.parts";
 
-  private static final Logger LOG =
-      LoggerFactory.getLogger(BaileyBorweinPlouffe.class);
+  private static final Log LOG = LogFactory.getLog(BaileyBorweinPlouffe.class);
 
   /** Mapper class computing digits of Pi. */
   public static class BbpMapper extends
@@ -380,7 +380,7 @@ public class BaileyBorweinPlouffe extends Configured implements Tool {
 
     // start a map/reduce job
     out.println("\nStarting Job ...");
-    final long startTime = System.currentTimeMillis();
+    final long startTime = Time.monotonicNow();
     try {
       if (!job.waitForCompletion(true)) {
         out.println("Job failed.");
@@ -389,7 +389,7 @@ public class BaileyBorweinPlouffe extends Configured implements Tool {
     } catch (Exception e) {
       throw new RuntimeException(e);
     } finally {
-      final double duration = (System.currentTimeMillis() - startTime)/1000.0;
+      final double duration = (Time.monotonicNow() - startTime)/1000.0;
       out.println("Duration is " + duration + " seconds.");
     }
     out.println("Output file: " + hexfile);

+ 3 - 4
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/DBCountPageView.java

@@ -29,6 +29,8 @@ import java.sql.SQLException;
 import java.sql.Statement;
 import java.util.Random;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.io.LongWritable;
@@ -47,8 +49,6 @@ import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.hsqldb.server.Server;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * This is a demonstrative program, which uses DBInputFormat for reading
@@ -77,8 +77,7 @@ import org.slf4j.LoggerFactory;
  */
 public class DBCountPageView extends Configured implements Tool {
 
-  private static final Logger LOG =
-      LoggerFactory.getLogger(DBCountPageView.class);
+  private static final Log LOG = LogFactory.getLog(DBCountPageView.class);
   
   private Connection connection;
   private boolean initialized = false;

+ 4 - 3
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DancingLinks.java

@@ -19,8 +19,8 @@ package org.apache.hadoop.examples.dancing;
 
 import java.util.*;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 /**
  * A generic solver for tile laying problems using Knuth's dancing link
@@ -35,7 +35,8 @@ import org.slf4j.LoggerFactory;
  * The type parameter ColumnName is the class of application's column names.
  */
 public class DancingLinks<ColumnName> {
-  private static final Logger LOG = LoggerFactory.getLogger(DancingLinks.class);
+  private static final Log LOG = 
+    LogFactory.getLog(DancingLinks.class.getName());
   
   /**
    * A cell in the table with up/down and left/right links that form doubly

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/pi/DistSum.java

@@ -28,6 +28,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Callable;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.examples.pi.math.Summation;
@@ -53,8 +55,6 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * The main class for computing sums using map/reduce jobs.
@@ -66,7 +66,7 @@ import org.slf4j.LoggerFactory;
  * a mix-type job may be executed on either side.
  */
 public final class DistSum extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(DistSum.class);
+  private static final Log LOG = LogFactory.getLog(DistSum.class);
 
   private static final String NAME = DistSum.class.getSimpleName();
   private static final String N_PARTS = "mapreduce.pi." + NAME + ".nParts";

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java

@@ -25,6 +25,8 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.Checksum;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
@@ -47,8 +49,6 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.util.PureJavaCrc32;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Generate the official GraySort input data set.
@@ -66,7 +66,7 @@ import org.slf4j.LoggerFactory;
  * <b>bin/hadoop jar hadoop-*-examples.jar teragen 10000000000 in-dir</b>
  */
 public class TeraGen extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(TeraGen.class);
+  private static final Log LOG = LogFactory.getLog(TeraGen.class);
 
   public enum Counters {CHECKSUM}
 

+ 3 - 4
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraOutputFormat.java

@@ -21,6 +21,8 @@ package org.apache.hadoop.examples.terasort;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
@@ -36,15 +38,12 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.security.TokenCache;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * An output format that writes the key and value appended together.
  */
 public class TeraOutputFormat extends FileOutputFormat<Text,Text> {
-  private static final Logger LOG =
-      LoggerFactory.getLogger(TeraOutputFormat.class);
+  private static final Log LOG = LogFactory.getLog(TeraOutputFormat.class);
   private OutputCommitter committer = null;
 
   /**

+ 3 - 4
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java

@@ -21,18 +21,17 @@ package org.apache.hadoop.examples.terasort;
 import java.io.*;
 import java.util.*;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Charsets;
 
 class TeraScheduler {
-  private static final Logger LOG =
-      LoggerFactory.getLogger(TeraScheduler.class);
+  private static final Log LOG = LogFactory.getLog(TeraScheduler.class);
   private Split[] splits;
   private List<Host> hosts = new ArrayList<Host>();
   private int slotsPerHost;

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java

@@ -23,6 +23,8 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.net.URI;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
@@ -36,8 +38,6 @@ import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Generates the sampled split points, launches the job, and waits for it to
@@ -47,7 +47,7 @@ import org.slf4j.LoggerFactory;
  * <b>bin/hadoop jar hadoop-*-examples.jar terasort in-dir out-dir</b>
  */
 public class TeraSort extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(TeraSort.class);
+  private static final Log LOG = LogFactory.getLog(TeraSort.class);
 
   /**
    * A partitioner that splits text keys into roughly equal partitions

+ 3 - 3
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/terasort/TestTeraSort.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.examples.terasort;
 import java.io.File;
 import java.io.IOException;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileAlreadyExistsException;
@@ -27,14 +29,12 @@ import org.apache.hadoop.mapred.HadoopTestCase;
 import org.apache.hadoop.util.ToolRunner;
 import org.junit.After;
 import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
 
 public class TestTeraSort extends HadoopTestCase {
-  private static final Logger LOG = LoggerFactory.getLogger(TestTeraSort.class);
+  private static Log LOG = LogFactory.getLog(TestTeraSort.class);
   
   public TestTeraSort()
       throws IOException {

+ 3 - 1
hadoop-project/src/site/site.xml

@@ -101,7 +101,9 @@
       <item name="Synthetic Load Generator" href="hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html"/>
       <item name="Erasure Coding" href="hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html"/>
       <item name="Disk Balancer" href="hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html"/>
-   </menu>
+      <item name="Upgrade Domain" href="hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html"/>
+      <item name="DataNode Admin" href="hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html"/>
+    </menu>
 
     <menu name="Ozone" inherit="top">
       <item name="Getting Started" href="hadoop-project-dist/hadoop-hdfs/OzoneGettingStarted.html"/>

+ 1 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/factories/impl/pb/RpcClientFactoryPBImpl.java

@@ -45,7 +45,6 @@ public class RpcClientFactoryPBImpl implements RpcClientFactory {
   private static final String PB_IMPL_CLASS_SUFFIX = "PBClientImpl";
   
   private static final RpcClientFactoryPBImpl self = new RpcClientFactoryPBImpl();
-  private Configuration localConf = new Configuration();
   private ConcurrentMap<Class<?>, Constructor<?>> cache = new ConcurrentHashMap<Class<?>, Constructor<?>>();
   
   public static RpcClientFactoryPBImpl get() {
@@ -62,7 +61,7 @@ public class RpcClientFactoryPBImpl implements RpcClientFactory {
     if (constructor == null) {
       Class<?> pbClazz = null;
       try {
-        pbClazz = localConf.getClassByName(getPBImplClassName(protocol));
+        pbClazz = conf.getClassByName(getPBImplClassName(protocol));
       } catch (ClassNotFoundException e) {
         throw new YarnRuntimeException("Failed to load class: ["
             + getPBImplClassName(protocol) + "]", e);

+ 2 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/factories/impl/pb/RpcServerFactoryPBImpl.java

@@ -51,7 +51,6 @@ public class RpcServerFactoryPBImpl implements RpcServerFactory {
   
   private static final RpcServerFactoryPBImpl self = new RpcServerFactoryPBImpl();
 
-  private Configuration localConf = new Configuration();
   private ConcurrentMap<Class<?>, Constructor<?>> serviceCache = new ConcurrentHashMap<Class<?>, Constructor<?>>();
   private ConcurrentMap<Class<?>, Method> protoCache = new ConcurrentHashMap<Class<?>, Method>();
   
@@ -80,7 +79,7 @@ public class RpcServerFactoryPBImpl implements RpcServerFactory {
     if (constructor == null) {
       Class<?> pbServiceImplClazz = null;
       try {
-        pbServiceImplClazz = localConf
+        pbServiceImplClazz = conf
             .getClassByName(getPbServiceImplClassName(protocol));
       } catch (ClassNotFoundException e) {
         throw new YarnRuntimeException("Failed to load class: ["
@@ -113,7 +112,7 @@ public class RpcServerFactoryPBImpl implements RpcServerFactory {
     if (method == null) {
       Class<?> protoClazz = null;
       try {
-        protoClazz = localConf.getClassByName(getProtoClassName(protocol));
+        protoClazz = conf.getClassByName(getProtoClassName(protocol));
       } catch (ClassNotFoundException e) {
         throw new YarnRuntimeException("Failed to load class: ["
             + getProtoClassName(protocol) + "]", e);

+ 49 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/factories/impl/pb/TestRpcClientFactoryPBImpl.java

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.factories.impl.pb;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import java.net.InetSocketAddress;
+
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+/**
+ * Test class for RpcClientFactoryPBImpl.
+ */
+public class TestRpcClientFactoryPBImpl {
+  @Test
+  public void testToUseCustomClassloader() throws Exception {
+    Configuration configuration = mock(Configuration.class);
+    RpcClientFactoryPBImpl rpcClientFactoryPB = RpcClientFactoryPBImpl.get();
+    try {
+      rpcClientFactoryPB.getClient(
+          Class.forName("org.apache.hadoop.yarn.api.ApplicationClientProtocol"),
+          -1, new InetSocketAddress(0), configuration);
+    } catch (Exception e) {
+      // Do nothing
+    }
+    verify(configuration, atLeastOnce()).getClassByName(anyString());
+  }
+
+}

+ 48 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/factories/impl/pb/TestRpcServerFactoryPBImpl.java

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.factories.impl.pb;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+import java.net.InetSocketAddress;
+
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+/**
+ * Test class for RpcServerFactoryPBImpl.
+ */
+public class TestRpcServerFactoryPBImpl {
+  @Test
+  public void testToUseCustomClassloader() throws Exception {
+    Configuration configuration = mock(Configuration.class);
+    RpcServerFactoryPBImpl rpcServerFactoryPB = RpcServerFactoryPBImpl.get();
+    try {
+      rpcServerFactoryPB.getServer(
+          Class.forName("org.apache.hadoop.yarn.api.ApplicationClientProtocol"),
+          -1, new InetSocketAddress(0), configuration, null, 1);
+    } catch (Exception e) {
+      // Do nothing
+    }
+    verify(configuration, atLeastOnce()).getClassByName(anyString());
+  }
+}

+ 6 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java

@@ -173,4 +173,10 @@ public interface CGroupsHandler {
    */
   String getCGroupParam(CGroupController controller, String cGroupId,
       String param) throws ResourceHandlerException;
+
+  /**
+   * Returns CGroup Mount Path.
+   * @return parameter value as read from the parameter file
+   */
+  String getCGroupMountPath();
 }

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandlerImpl.java

@@ -603,4 +603,9 @@ class CGroupsHandlerImpl implements CGroupsHandler {
           "Unable to read from " + cGroupParamPath);
     }
   }
+
+  @Override
+  public String getCGroupMountPath() {
+    return cGroupMountPath;
+  }
 }

+ 8 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/DockerLinuxContainerRuntime.java

@@ -167,13 +167,12 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
   public static final String ENV_DOCKER_CONTAINER_LOCAL_RESOURCE_MOUNTS =
       "YARN_CONTAINER_RUNTIME_DOCKER_LOCAL_RESOURCE_MOUNTS";
 
-  static final String CGROUPS_ROOT_DIRECTORY = "/sys/fs/cgroup";
-
   private Configuration conf;
   private DockerClient dockerClient;
   private PrivilegedOperationExecutor privilegedOperationExecutor;
   private Set<String> allowedNetworks = new HashSet<>();
   private String defaultNetwork;
+  private String cgroupsRootDirectory;
   private CGroupsHandler cGroupsHandler;
   private AccessControlList privilegedContainersAcl;
 
@@ -228,6 +227,7 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
       LOG.info("cGroupsHandler is null - cgroups not in use.");
     } else {
       this.cGroupsHandler = cGroupsHandler;
+      this.cgroupsRootDirectory = cGroupsHandler.getCGroupMountPath();
     }
   }
 
@@ -486,9 +486,12 @@ public class DockerLinuxContainerRuntime implements LinuxContainerRuntime {
         .setContainerWorkDir(containerWorkDir.toString())
         .setNetworkType(network);
     setHostname(runCommand, containerIdStr, hostname);
-    runCommand.setCapabilities(capabilities)
-        .addMountLocation(CGROUPS_ROOT_DIRECTORY,
-            CGROUPS_ROOT_DIRECTORY + ":ro", false);
+    runCommand.setCapabilities(capabilities);
+
+    if(cgroupsRootDirectory != null) {
+      runCommand.addMountLocation(cgroupsRootDirectory,
+          cgroupsRootDirectory + ":ro", false);
+    }
 
     List<String> allDirs = new ArrayList<>(containerLocalDirs);
     allDirs.addAll(filecacheDirs);

+ 10 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileg
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerModule;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.docker.DockerRunCommand;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerExecutionException;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.runtime.ContainerRuntimeConstants;
@@ -261,12 +262,18 @@ public class TestDockerContainerRuntime {
   }
 
   private String getExpectedCGroupsMountString() {
+    CGroupsHandler cgroupsHandler = ResourceHandlerModule.getCGroupsHandler();
+    if(cgroupsHandler == null) {
+      return "";
+    }
+
+    String cgroupMountPath = cgroupsHandler.getCGroupMountPath();
     boolean cGroupsMountExists = new File(
-        DockerLinuxContainerRuntime.CGROUPS_ROOT_DIRECTORY).exists();
+        cgroupMountPath).exists();
 
     if(cGroupsMountExists) {
-      return "-v " + DockerLinuxContainerRuntime.CGROUPS_ROOT_DIRECTORY
-          + ":" + DockerLinuxContainerRuntime.CGROUPS_ROOT_DIRECTORY + ":ro ";
+      return "-v " + cgroupMountPath
+          + ":" + cgroupMountPath + ":ro ";
     } else {
       return "";
     }