Browse Source

Merging r1560794 through r1561801 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5698@1561802 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao 11 years ago
parent
commit
8e7372bbc4
100 changed files with 6616 additions and 988 deletions
  1. 7 0
      hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml
  2. 12 3
      hadoop-common-project/hadoop-common/CHANGES.txt
  3. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  4. 10 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
  5. 11 5
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java
  6. 0 434
      hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm
  7. 637 0
      hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm
  8. 3 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
  9. 5 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestSymlinkLocalFS.java
  10. 17 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  11. 5 0
      hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
  12. 33 0
      hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
  13. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperAsHASharedDir.java
  14. 3 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
  15. 44 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
  16. 58 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
  17. 14 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java
  18. 68 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java
  19. 68 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
  20. 138 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java
  21. 14 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java
  22. 94 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java
  23. 92 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java
  24. 18 11
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java
  25. 9 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java
  26. 65 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
  27. 28 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
  28. 32 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java
  29. 77 132
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
  30. 119 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java
  31. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java
  32. 19 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java
  33. 33 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
  34. 4 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
  35. 58 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
  36. 19 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java
  37. 91 118
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  38. 16 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  39. 47 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
  40. 51 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
  41. 54 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
  42. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
  43. 174 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java
  44. 28 23
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  45. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java
  46. 20 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
  47. 78 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto
  48. 46 0
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm
  49. 3 16
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
  50. 47 19
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
  51. 20 25
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
  52. 34 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
  53. 59 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
  54. 9 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java
  55. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java
  56. 25 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
  57. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java
  58. 674 49
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
  59. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java
  60. 64 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotFileLength.java
  61. 29 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
  62. 0 1
      hadoop-mapreduce-project/pom.xml
  63. 0 4
      hadoop-project/pom.xml
  64. 1 0
      hadoop-project/src/site/site.xml
  65. 0 1
      hadoop-tools/hadoop-distcp/pom.xml
  66. 0 1
      hadoop-tools/hadoop-openstack/pom.xml
  67. 97 0
      hadoop-yarn-project/CHANGES.txt
  68. 18 1
      hadoop-yarn-project/hadoop-yarn/bin/yarn
  69. 31 2
      hadoop-yarn-project/hadoop-yarn/bin/yarn.cmd
  70. 9 0
      hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh
  71. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
  72. 334 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationHistoryProtocol.java
  73. 75 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptReportRequest.java
  74. 74 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptReportResponse.java
  75. 67 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptsRequest.java
  76. 76 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptsResponse.java
  77. 64 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainerReportRequest.java
  78. 63 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainerReportResponse.java
  79. 67 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainersRequest.java
  80. 81 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainersResponse.java
  81. 165 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptReport.java
  82. 202 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerReport.java
  83. 66 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationAttemptState.java
  84. 68 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  85. 48 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ApplicationAttemptNotFoundException.java
  86. 7 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ApplicationNotFoundException.java
  87. 48 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ContainerNotFoundException.java
  88. 39 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/application_history_client.proto
  89. 113 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/application_history_server.proto
  90. 38 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
  91. 36 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
  92. 180 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AHSClient.java
  93. 76 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java
  94. 155 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AHSClientImpl.java
  95. 81 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java
  96. 244 69
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java
  97. 415 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAHSClient.java
  98. 0 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java
  99. 172 4
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java
  100. 1 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml

+ 7 - 0
hadoop-assemblies/src/main/resources/assemblies/hadoop-yarn-dist.xml

@@ -122,6 +122,13 @@
         <include>*-sources.jar</include>
       </includes>
     </fileSet>
+    <fileSet>
+      <directory>hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/target</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
+      <includes>
+        <include>*-sources.jar</include>
+      </includes>
+    </fileSet>
     <fileSet>
       <directory>hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/target</directory>
       <outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>

+ 12 - 3
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -424,6 +424,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10143 replace WritableFactories's hashmap with ConcurrentHashMap
     (Liang Xie via stack)
 
+    HADOOP-9652. Allow RawLocalFs#getFileLinkStatus to fill in the link owner
+    and mode if requested. (Andrew Wang via Colin Patrick McCabe)
+
   OPTIMIZATIONS
 
     HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn)
@@ -450,9 +453,6 @@ Release 2.4.0 - UNRELEASED
     HADOOP-9817. FileSystem#globStatus and FileContext#globStatus need to work
     with symlinks. (Colin Patrick McCabe via Andrew Wang)
 
-    HADOOP-9652.  RawLocalFs#getFileLinkStatus does not fill in the link owner
-    and mode.  (Andrew Wang via Colin Patrick McCabe)
-
     HADOOP-9875.  TestDoAsEffectiveUser can fail on JDK 7.  (Aaron T. Myers via
     Colin Patrick McCabe)
 
@@ -536,6 +536,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10252. HttpServer can't start if hostname is not specified. (Jimmy
     Xiang via atm)
 
+    HADOOP-10203. Connection leak in
+    Jets3tNativeFileSystemStore#retrieveMetadata. (Andrei Savu via atm)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -556,6 +559,9 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10132. RPC#stopProxy() should log the class of proxy when IllegalArgumentException 
     is encountered (Ted yu via umamahesh)
 
+    HADOOP-10248. Property name should be included in the exception where property value 
+    is null (Akira AJISAKA via umamahesh)
+
   OPTIMIZATIONS
 
     HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
@@ -631,6 +637,9 @@ Release 2.3.0 - UNRELEASED
 
     HADOOP-10112. har file listing doesn't work with wild card. (brandonli)
 
+    HADOOP-10167. Mark hadoop-common source as UTF-8 in Maven pom files / refactoring
+    (Mikhail Antonov via cos)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -963,7 +963,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
         "Property name must not be null");
     Preconditions.checkArgument(
         value != null,
-        "Property value must not be null");
+        "The value of property " + name + " must not be null");
     DeprecationContext deprecations = deprecationContext.get();
     if (deprecations.getDeprecatedKeyMap().isEmpty()) {
       getProps();

+ 10 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java

@@ -16,8 +16,11 @@
  * limitations under the License.
  */
 
+
 package org.apache.hadoop.fs;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import java.io.BufferedOutputStream;
 import java.io.DataOutput;
 import java.io.File;
@@ -51,7 +54,13 @@ import org.apache.hadoop.util.StringUtils;
 public class RawLocalFileSystem extends FileSystem {
   static final URI NAME = URI.create("file:///");
   private Path workingDir;
-  private static final boolean useDeprecatedFileStatus = !Stat.isAvailable();
+  // Temporary workaround for HADOOP-9652.
+  private static boolean useDeprecatedFileStatus = true;
+
+  @VisibleForTesting
+  public static void useStatIfAvailable() {
+    useDeprecatedFileStatus = !Stat.isAvailable();
+  }
   
   public RawLocalFileSystem() {
     workingDir = getInitialWorkingDirectory();

+ 11 - 5
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java

@@ -110,23 +110,29 @@ class Jets3tNativeFileSystemStore implements NativeFileSystemStore {
       handleS3ServiceException(e);
     }
   }
-  
+
   @Override
   public FileMetadata retrieveMetadata(String key) throws IOException {
+    StorageObject object = null;
     try {
       if(LOG.isDebugEnabled()) {
         LOG.debug("Getting metadata for key: " + key + " from bucket:" + bucket.getName());
       }
-      S3Object object = s3Service.getObject(bucket.getName(), key);
+      object = s3Service.getObjectDetails(bucket.getName(), key);
       return new FileMetadata(key, object.getContentLength(),
           object.getLastModifiedDate().getTime());
-    } catch (S3ServiceException e) {
+
+    } catch (ServiceException e) {
       // Following is brittle. Is there a better way?
-      if (e.getS3ErrorCode().matches("NoSuchKey")) {
+      if ("NoSuchKey".equals(e.getErrorCode())) {
         return null; //return null if key not found
       }
-      handleS3ServiceException(e);
+      handleServiceException(e);
       return null; //never returned - keep compiler happy
+    } finally {
+      if (object != null) {
+        object.closeDataInputStream();
+      }
     }
   }
 

+ 0 - 434
hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm

@@ -571,440 +571,6 @@ $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh stop proxyserver --config $HADOOP_CONF_D
 $ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR
 ----    	
 
-* {Running Hadoop in Secure Mode}
-
-  This section deals with important parameters to be specified in
-  to run Hadoop in <<secure mode>> with strong, Kerberos-based
-  authentication.
-
-  * <<<User Accounts for Hadoop Daemons>>>
-
-  Ensure that HDFS and YARN daemons run as different Unix users, for e.g.
-  <<<hdfs>>> and <<<yarn>>>. Also, ensure that the MapReduce JobHistory
-  server runs as user <<<mapred>>>.
-
-  It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
-
-*---------------+----------------------------------------------------------------------+
-|| User:Group   || Daemons                                                             |
-*---------------+----------------------------------------------------------------------+
-| hdfs:hadoop   | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode |
-*---------------+----------------------------------------------------------------------+
-| yarn:hadoop   | ResourceManager, NodeManager                                         |
-*---------------+----------------------------------------------------------------------+
-| mapred:hadoop | MapReduce JobHistory Server                                          |
-*---------------+----------------------------------------------------------------------+
-
-  * <<<Permissions for both HDFS and local fileSystem paths>>>
-
-  The following table lists various paths on HDFS and local filesystems (on
-  all nodes) and recommended permissions:
-
-*-------------------+-------------------+------------------+------------------+
-|| Filesystem       || Path             || User:Group      || Permissions     |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<dfs.namenode.name.dir>>> | hdfs:hadoop | drwx------ |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<dfs.datanode.data.dir>>> | hdfs:hadoop | drwx------ |
-*-------------------+-------------------+------------------+------------------+
-| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x |
-*-------------------+-------------------+------------------+------------------+
-| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
-*-------------------+-------------------+------------------+------------------+
-| local | container-executor | root:hadoop | --Sr-s--- |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
-*-------------------+-------------------+------------------+------------------+
-| hdfs | / | hdfs:hadoop | drwxr-xr-x |
-*-------------------+-------------------+------------------+------------------+
-| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt |
-*-------------------+-------------------+------------------+------------------+
-| hdfs | /user | hdfs:hadoop | drwxr-xr-x |
-*-------------------+-------------------+------------------+------------------+
-| hdfs | <<<yarn.nodemanager.remote-app-log-dir>>> | yarn:hadoop | drwxrwxrwxt |
-*-------------------+-------------------+------------------+------------------+
-| hdfs | <<<mapreduce.jobhistory.intermediate-done-dir>>> | mapred:hadoop | |
-| | | | drwxrwxrwxt |
-*-------------------+-------------------+------------------+------------------+
-| hdfs | <<<mapreduce.jobhistory.done-dir>>> | mapred:hadoop | |
-| | | | drwxr-x--- |
-*-------------------+-------------------+------------------+------------------+
-
-  * Kerberos Keytab files
-
-    * HDFS
-
-    The NameNode keytab file, on the NameNode host, should look like the
-    following:
-
-----
-$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab
-Keytab name: FILE:/etc/security/keytab/nn.service.keytab
-KVNO Timestamp         Principal
-   4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-----
-
-    The Secondary NameNode keytab file, on that host, should look like the
-    following:
-
-----
-$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab
-Keytab name: FILE:/etc/security/keytab/sn.service.keytab
-KVNO Timestamp         Principal
-   4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-----
-
-    The DataNode keytab file, on each host, should look like the following:
-
-----
-$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab
-Keytab name: FILE:/etc/security/keytab/dn.service.keytab
-KVNO Timestamp         Principal
-   4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-----
-
-    * YARN
-
-    The ResourceManager keytab file, on the ResourceManager host, should look
-    like the following:
-
-----
-$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab
-Keytab name: FILE:/etc/security/keytab/rm.service.keytab
-KVNO Timestamp         Principal
-   4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-----
-
-    The NodeManager keytab file, on each host, should look like the following:
-
-----
-$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab
-Keytab name: FILE:/etc/security/keytab/nm.service.keytab
-KVNO Timestamp         Principal
-   4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-----
-
-    * MapReduce JobHistory Server
-
-    The MapReduce JobHistory Server keytab file, on that host, should look
-    like the following:
-
-----
-$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab
-Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
-KVNO Timestamp         Principal
-   4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
-   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
-----
-
-** Configuration in Secure Mode
-
-  * <<<conf/core-site.xml>>>
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<hadoop.security.authentication>>> | <kerberos> | <simple> is non-secure. |
-*-------------------------+-------------------------+------------------------+
-| <<<hadoop.security.authorization>>> | <true> | |
-| | | Enable RPC service-level authorization. |
-*-------------------------+-------------------------+------------------------+
-
-  * <<<conf/hdfs-site.xml>>>
-
-    * Configurations for NameNode:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.block.access.token.enable>>> | <true> |  |
-| | | Enable HDFS block access tokens for secure operations. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.https.enable>>> | <true> | |
-| | | This value is deprecated. Use dfs.http.policy |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
-| | | HTTPS_ONLY turns off http access |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.https.port>>> | <50470> | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.keytab.file>>> | </etc/security/keytab/nn.service.keytab> | |
-| | | Kerberos keytab file for the NameNode. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.kerberos.principal>>> | nn/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the NameNode. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.kerberos.https.principal>>> | host/_HOST@REALM.TLD | |
-| | | HTTPS Kerberos principal name for the NameNode. |
-*-------------------------+-------------------------+------------------------+
-
-    * Configurations for Secondary NameNode:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.secondary.http-address>>> | <c_nn_host_fqdn:50090> | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.secondary.https-port>>> | <50470> | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.secondary.keytab.file>>> | | |
-| | </etc/security/keytab/sn.service.keytab> | |
-| | | Kerberos keytab file for the NameNode. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.secondary.kerberos.principal>>> | sn/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the Secondary NameNode. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.namenode.secondary.kerberos.https.principal>>> | | |
-| | host/_HOST@REALM.TLD | |
-| | | HTTPS Kerberos principal name for the Secondary NameNode. |
-*-------------------------+-------------------------+------------------------+
-
-    * Configurations for DataNode:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.datanode.data.dir.perm>>> | 700 | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.datanode.address>>> | <0.0.0.0:2003> | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.datanode.https.address>>> | <0.0.0.0:2005> | |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.datanode.keytab.file>>> | </etc/security/keytab/dn.service.keytab> | |
-| | | Kerberos keytab file for the DataNode. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.datanode.kerberos.principal>>> | dn/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the DataNode. |
-*-------------------------+-------------------------+------------------------+
-| <<<dfs.datanode.kerberos.https.principal>>> | | |
-| | host/_HOST@REALM.TLD | |
-| | | HTTPS Kerberos principal name for the DataNode. |
-*-------------------------+-------------------------+------------------------+
-
-  * <<<conf/yarn-site.xml>>>
-
-    * WebAppProxy
-
-    The <<<WebAppProxy>>> provides a proxy between the web applications
-    exported by an application and an end user.  If security is enabled
-    it will warn users before accessing a potentially unsafe web application.
-    Authentication and authorization using the proxy is handled just like
-    any other privileged web application.
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.web-proxy.address>>> | | |
-| | <<<WebAppProxy>>> host:port for proxy to AM web apps. | |
-| | | <host:port> if this is the same as <<<yarn.resourcemanager.webapp.address>>>|
-| | | or it is not defined then the <<<ResourceManager>>> will run the proxy|
-| | | otherwise a standalone proxy server will need to be launched.|
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.web-proxy.keytab>>> | | |
-| | </etc/security/keytab/web-app.service.keytab> | |
-| | | Kerberos keytab file for the WebAppProxy. |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.web-proxy.principal>>> | wap/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the WebAppProxy. |
-*-------------------------+-------------------------+------------------------+
-
-    * LinuxContainerExecutor
-
-    A <<<ContainerExecutor>>> used by YARN framework which define how any
-    <container> launched and controlled.
-
-    The following are the available in Hadoop YARN:
-
-*--------------------------------------+--------------------------------------+
-|| ContainerExecutor                   || Description                         |
-*--------------------------------------+--------------------------------------+
-| <<<DefaultContainerExecutor>>>             | |
-| | The default executor which YARN uses to manage container execution. |
-| | The container process has the same Unix user as the NodeManager.  |
-*--------------------------------------+--------------------------------------+
-| <<<LinuxContainerExecutor>>>               | |
-| | Supported only on GNU/Linux, this executor runs the containers as either the |
-| | YARN user who submitted the application (when full security is enabled) or |
-| | as a dedicated user (defaults to nobody) when full security is not enabled. |
-| | When full security is enabled, this executor requires all user accounts to be |
-| | created on the cluster nodes where the containers are launched. It uses |
-| | a <setuid> executable that is included in the Hadoop distribution. |
-| | The NodeManager uses this executable to launch and kill containers. |
-| | The setuid executable switches to the user who has submitted the |
-| | application and launches or kills the containers. For maximum security, |
-| | this executor sets up restricted permissions and user/group ownership of |
-| | local files and directories used by the containers such as the shared |
-| | objects, jars, intermediate files, log files etc. Particularly note that, |
-| | because of this, except the application owner and NodeManager, no other |
-| | user can access any of the local files/directories including those |
-| | localized as part of the distributed cache. |
-*--------------------------------------+--------------------------------------+
-
-    To build the LinuxContainerExecutor executable run:
-
-----
- $ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/
-----
-
-    The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the
-    path on the cluster nodes where a configuration file for the setuid
-    executable should be located. The executable should be installed in
-    $HADOOP_YARN_HOME/bin.
-
-    The executable must have specific permissions: 6050 or --Sr-s---
-    permissions user-owned by <root> (super-user) and group-owned by a
-    special group (e.g. <<<hadoop>>>) of which the NodeManager Unix user is
-    the group member and no ordinary application user is. If any application
-    user belongs to this special group, security will be compromised. This
-    special group name should be specified for the configuration property
-    <<<yarn.nodemanager.linux-container-executor.group>>> in both
-    <<<conf/yarn-site.xml>>> and <<<conf/container-executor.cfg>>>.
-
-    For example, let's say that the NodeManager is run as user <yarn> who is
-    part of the groups users and <hadoop>, any of them being the primary group.
-    Let also be that <users> has both <yarn> and another user
-    (application submitter) <alice> as its members, and <alice> does not
-    belong to <hadoop>. Going by the above description, the setuid/setgid
-    executable should be set 6050 or --Sr-s--- with user-owner as <yarn> and
-    group-owner as <hadoop> which has <yarn> as its member (and not <users>
-    which has <alice> also as its member besides <yarn>).
-
-    The LinuxTaskController requires that paths including and leading up to
-    the directories specified in <<<yarn.nodemanager.local-dirs>>> and
-    <<<yarn.nodemanager.log-dirs>>> to be set 755 permissions as described
-    above in the table on permissions on directories.
-
-      * <<<conf/container-executor.cfg>>>
-
-    The executable requires a configuration file called
-    <<<container-executor.cfg>>> to be present in the configuration
-    directory passed to the mvn target mentioned above.
-
-    The configuration file must be owned by the user running NodeManager
-    (user <<<yarn>>> in the above example), group-owned by anyone and
-    should have the permissions 0400 or r--------.
-
-    The executable requires following configuration items to be present
-    in the <<<conf/container-executor.cfg>>> file. The items should be
-    mentioned as simple key=value pairs, one per-line:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
-| | | Unix group of the NodeManager. The group owner of the |
-| | |<container-executor> binary should be this group. Should be same as the |
-| | | value with which the NodeManager is configured. This configuration is |
-| | | required for validating the secure access of the <container-executor> |
-| | | binary. |
-*-------------------------+-------------------------+------------------------+
-| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
-*-------------------------+-------------------------+------------------------+
-| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
-*-------------------------+-------------------------+------------------------+
-| <<<min.user.id>>> | 1000 | Prevent other super-users. |
-*-------------------------+-------------------------+------------------------+
-
-      To re-cap, here are the local file-sysytem permissions required for the
-      various paths related to the <<<LinuxContainerExecutor>>>:
-
-*-------------------+-------------------+------------------+------------------+
-|| Filesystem       || Path             || User:Group      || Permissions     |
-*-------------------+-------------------+------------------+------------------+
-| local | container-executor | root:hadoop | --Sr-s--- |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
-*-------------------+-------------------+------------------+------------------+
-| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
-*-------------------+-------------------+------------------+------------------+
-
-      * Configurations for ResourceManager:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.resourcemanager.keytab>>> | | |
-| | </etc/security/keytab/rm.service.keytab> | |
-| | | Kerberos keytab file for the ResourceManager. |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.resourcemanager.principal>>> | rm/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the ResourceManager. |
-*-------------------------+-------------------------+------------------------+
-
-      * Configurations for NodeManager:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.keytab>>> | </etc/security/keytab/nm.service.keytab> | |
-| | | Kerberos keytab file for the NodeManager. |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.principal>>> | nm/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the NodeManager. |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.container-executor.class>>> | | |
-| | <<<org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor>>> |
-| | | Use LinuxContainerExecutor. |
-*-------------------------+-------------------------+------------------------+
-| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
-| | | Unix group of the NodeManager. |
-*-------------------------+-------------------------+------------------------+
-
-    * <<<conf/mapred-site.xml>>>
-
-      * Configurations for MapReduce JobHistory Server:
-
-*-------------------------+-------------------------+------------------------+
-|| Parameter              || Value                  || Notes                 |
-*-------------------------+-------------------------+------------------------+
-| <<<mapreduce.jobhistory.address>>> | | |
-| | MapReduce JobHistory Server <host:port> | Default port is 10020. |
-*-------------------------+-------------------------+------------------------+
-| <<<mapreduce.jobhistory.keytab>>> | |
-| | </etc/security/keytab/jhs.service.keytab> | |
-| | | Kerberos keytab file for the MapReduce JobHistory Server. |
-*-------------------------+-------------------------+------------------------+
-| <<<mapreduce.jobhistory.principal>>> | jhs/_HOST@REALM.TLD | |
-| | | Kerberos principal name for the MapReduce JobHistory Server. |
-*-------------------------+-------------------------+------------------------+
-
 
 * {Operating the Hadoop Cluster}
 

+ 637 - 0
hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm

@@ -0,0 +1,637 @@
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~   http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+  ---
+  Hadoop in Secure Mode
+  ---
+  ---
+  ${maven.build.timestamp}
+
+%{toc|section=0|fromDepth=0|toDepth=3}
+
+Hadoop in Secure Mode
+
+* Introduction
+
+  This document describes how to configure authentication for Hadoop in
+  secure mode.
+
+  By default Hadoop runs in non-secure mode in which no actual
+  authentication is required.
+  By configuring Hadoop runs in secure mode,
+  each user and service needs to be authenticated by Kerberos
+  in order to use Hadoop services.
+
+  Security features of Hadoop consist of
+  {{{Authentication}authentication}},
+  {{{./ServiceLevelAuth.html}service level authorization}},
+  {{{./HttpAuthentication.html}authentication for Web consoles}}
+  and {{{Data confidentiality}data confidenciality}}.
+
+
+* Authentication
+
+** End User Accounts
+
+  When service level authentication is turned on,
+  end users using Hadoop in secure mode needs to be authenticated by Kerberos.
+  The simplest way to do authentication is using <<<kinit>>> command of Kerberos.
+
+** User Accounts for Hadoop Daemons
+
+  Ensure that HDFS and YARN daemons run as different Unix users,
+  e.g. <<<hdfs>>> and <<<yarn>>>.
+  Also, ensure that the MapReduce JobHistory server runs as
+  different user such as <<<mapred>>>.
+
+  It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
+  See also "{{Mapping from user to group}}" for group management.
+
+*---------------+----------------------------------------------------------------------+
+|| User:Group   || Daemons                                                             |
+*---------------+----------------------------------------------------------------------+
+| hdfs:hadoop   | NameNode, Secondary NameNode, JournalNode, DataNode                  |
+*---------------+----------------------------------------------------------------------+
+| yarn:hadoop   | ResourceManager, NodeManager                                         |
+*---------------+----------------------------------------------------------------------+
+| mapred:hadoop | MapReduce JobHistory Server                                          |
+*---------------+----------------------------------------------------------------------+
+
+** Kerberos principals for Hadoop Daemons and Users
+
+  For running hadoop service daemons in  Hadoop in secure mode,
+  Kerberos principals are required.
+  Each service reads auhenticate information saved in keytab file with appropriate permission.
+
+  HTTP web-consoles should be served by principal different from RPC's one.
+
+  Subsections below shows the examples of credentials for Hadoop services.
+
+*** HDFS
+
+    The NameNode keytab file, on the NameNode host, should look like the
+    following:
+
+----
+$ klist -e -k -t /etc/security/keytab/nn.service.keytab
+Keytab name: FILE:/etc/security/keytab/nn.service.keytab
+KVNO Timestamp         Principal
+   4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+----
+
+    The Secondary NameNode keytab file, on that host, should look like the
+    following:
+
+----
+$ klist -e -k -t /etc/security/keytab/sn.service.keytab
+Keytab name: FILE:/etc/security/keytab/sn.service.keytab
+KVNO Timestamp         Principal
+   4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+----
+
+    The DataNode keytab file, on each host, should look like the following:
+
+----
+$ klist -e -k -t /etc/security/keytab/dn.service.keytab
+Keytab name: FILE:/etc/security/keytab/dn.service.keytab
+KVNO Timestamp         Principal
+   4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+----
+
+*** YARN
+
+    The ResourceManager keytab file, on the ResourceManager host, should look
+    like the following:
+
+----
+$ klist -e -k -t /etc/security/keytab/rm.service.keytab
+Keytab name: FILE:/etc/security/keytab/rm.service.keytab
+KVNO Timestamp         Principal
+   4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+----
+
+    The NodeManager keytab file, on each host, should look like the following:
+
+----
+$ klist -e -k -t /etc/security/keytab/nm.service.keytab
+Keytab name: FILE:/etc/security/keytab/nm.service.keytab
+KVNO Timestamp         Principal
+   4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+----
+
+*** MapReduce JobHistory Server
+
+    The MapReduce JobHistory Server keytab file, on that host, should look
+    like the following:
+
+----
+$ klist -e -k -t /etc/security/keytab/jhs.service.keytab
+Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
+KVNO Timestamp         Principal
+   4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
+   4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
+----
+
+** Mapping from Kerberos principal to OS user account
+
+  Hadoop maps Kerberos principal to OS user account using
+  the rule specified by <<<hadoop.security.auth_to_local>>>
+  which works in the same way as the <<<auth_to_local>>> in
+  {{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos configuration file (krb5.conf)}}.
+
+  By default, it picks the first component of principal name as a user name
+  if the realms matches to the <<<defalut_realm>>> (usually defined in /etc/krb5.conf).
+  For example, <<<host/full.qualified.domain.name@REALM.TLD>>> is mapped to <<<host>>>
+  by default rule.
+
+** Mapping from user to group
+
+  Though files on HDFS are associated to owner and group,
+  Hadoop does not have the definition of group by itself.
+  Mapping from user to group is done by OS or LDAP.
+
+  You can change a way of mapping by
+  specifying the name of mapping provider as a value of
+  <<<hadoop.security.group.mapping>>>
+  See {{{../hadoop-hdfs/HdfsPermissionsGuide.html}HDFS Permissions Guide}} for details.
+
+  Practically you need to manage SSO environment using Kerberos with LDAP
+  for Hadoop in secure mode.
+
+** Proxy user
+
+  Some products such as Apache Oozie which access the services of Hadoop
+  on behalf of end users need to be able to impersonate end users.
+  You can configure proxy user using properties
+  <<<hadoop.proxyuser.${superuser}.hosts>>> and <<<hadoop.proxyuser.${superuser}.groups>>>.
+
+  For example, by specifying as below in core-site.xml,
+  user named <<<oozie>>> accessing from any host
+  can impersonate any user belonging to any group.
+
+----
+  <property>
+    <name>hadoop.proxyuser.oozie.hosts</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>hadoop.proxyuser.oozie.groups</name>
+    <value>*</value>
+  </property>
+----
+
+** Secure DataNode
+
+  Because the data transfer protocol of DataNode
+  does not use the RPC framework of Hadoop,
+  DataNode must authenticate itself by
+  using privileged ports which are specified by
+  <<<dfs.datanode.address>>> and <<<dfs.datanode.http.address>>>.
+  This authentication is based on the assumption
+  that the attacker won't be able to get root privileges.
+
+  When you execute <<<hdfs datanode>>> command as root,
+  server process binds privileged port at first,
+  then drops privilege and runs as the user account specified by
+  <<<HADOOP_SECURE_DN_USER>>>.
+  This startup process uses jsvc installed to <<<JSVC_HOME>>>.
+  You must specify <<<HADOOP_SECURE_DN_USER>>> and <<<JSVC_HOME>>>
+  as environment variables on start up (in hadoop-env.sh).
+
+
+* Data confidentiality
+
+** Data Encryption on RPC
+
+  The data transfered between hadoop services and clients.
+  Setting <<<hadoop.rpc.protection>>> to <<<"privacy">>> in the core-site.xml
+  activate data encryption.
+
+** Data Encryption on Block data transfer.
+
+  You need to set <<<dfs.encrypt.data.transfer>>> to <<<"true">>> in the hdfs-site.xml
+  in order to activate data encryption for data transfer protocol of DataNode.
+
+** Data Encryption on HTTP
+
+  Data transfer between Web-console and clients are protected by using SSL(HTTPS).
+
+
+* Configuration
+
+** Permissions for both HDFS and local fileSystem paths
+
+  The following table lists various paths on HDFS and local filesystems (on
+  all nodes) and recommended permissions:
+
+*-------------------+-------------------+------------------+------------------+
+|| Filesystem       || Path             || User:Group      || Permissions     |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<dfs.namenode.name.dir>>> | hdfs:hadoop | drwx------ |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<dfs.datanode.data.dir>>> | hdfs:hadoop | drwx------ |
+*-------------------+-------------------+------------------+------------------+
+| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x |
+*-------------------+-------------------+------------------+------------------+
+| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
+*-------------------+-------------------+------------------+------------------+
+| local | container-executor | root:hadoop | --Sr-s--- |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
+*-------------------+-------------------+------------------+------------------+
+| hdfs | / | hdfs:hadoop | drwxr-xr-x |
+*-------------------+-------------------+------------------+------------------+
+| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt |
+*-------------------+-------------------+------------------+------------------+
+| hdfs | /user | hdfs:hadoop | drwxr-xr-x |
+*-------------------+-------------------+------------------+------------------+
+| hdfs | <<<yarn.nodemanager.remote-app-log-dir>>> | yarn:hadoop | drwxrwxrwxt |
+*-------------------+-------------------+------------------+------------------+
+| hdfs | <<<mapreduce.jobhistory.intermediate-done-dir>>> | mapred:hadoop | |
+| | | | drwxrwxrwxt |
+*-------------------+-------------------+------------------+------------------+
+| hdfs | <<<mapreduce.jobhistory.done-dir>>> | mapred:hadoop | |
+| | | | drwxr-x--- |
+*-------------------+-------------------+------------------+------------------+
+
+** Common Configurations
+
+  In order to turn on RPC authentication in hadoop,
+  set the value of <<<hadoop.security.authentication>>> property to
+  <<<"kerberos">>>, and set security related settings listed below appropriately.
+
+  The following properties should be in the <<<core-site.xml>>> of all the
+  nodes in the cluster.
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<hadoop.security.authentication>>> | <kerberos> | |
+| | | <<<simple>>> : No authentication. (default) \
+| | | <<<kerberos>>> : Enable authentication by Kerberos. |
+*-------------------------+-------------------------+------------------------+
+| <<<hadoop.security.authorization>>> | <true> | |
+| | | Enable {{{./ServiceLevelAuth.html}RPC service-level authorization}}. |
+*-------------------------+-------------------------+------------------------+
+| <<<hadoop.rpc.protection>>> | <authentication> |
+| | | <authentication> : authentication only (default) \
+| | | <integrity> : integrity check in addition to authentication \
+| | | <privacy> : data encryption in addition to integrity |
+*-------------------------+-------------------------+------------------------+
+| <<<hadoop.security.auth_to_local>>> | | |
+| |  <<<RULE:>>><exp1>\
+| |  <<<RULE:>>><exp2>\
+| |  <...>\
+| |  DEFAULT |
+| | | The value is string containing new line characters.
+| | | See
+| | | {{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos documentation}}
+| | | for format for <exp>.
+*-------------------------+-------------------------+------------------------+
+| <<<hadoop.proxyuser.>>><superuser><<<.hosts>>> | | |
+| | | comma separated hosts from which <superuser> access are allowd to impersonation. |
+| | | <<<*>>> means wildcard. |
+*-------------------------+-------------------------+------------------------+
+| <<<hadoop.proxyuser.>>><superuser><<<.groups>>> | | |
+| | | comma separated groups to which users impersonated by <superuser> belongs. |
+| | | <<<*>>> means wildcard. |
+*-------------------------+-------------------------+------------------------+
+Configuration for <<<conf/core-site.xml>>>
+
+**  NameNode
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.block.access.token.enable>>> | <true> |  |
+| | | Enable HDFS block access tokens for secure operations. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.https.enable>>> | <true> | |
+| | | This value is deprecated. Use dfs.http.policy |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
+| | | HTTPS_ONLY turns off http access |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.https.port>>> | <50470> | |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.keytab.file>>> | </etc/security/keytab/nn.service.keytab> | |
+| | | Kerberos keytab file for the NameNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.kerberos.principal>>> | nn/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the NameNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.kerberos.https.principal>>> | host/_HOST@REALM.TLD | |
+| | | HTTPS Kerberos principal name for the NameNode. |
+*-------------------------+-------------------------+------------------------+
+Configuration for <<<conf/hdfs-site.xml>>>
+
+**  Secondary NameNode
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.secondary.http-address>>> | <c_nn_host_fqdn:50090> | |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.secondary.https-port>>> | <50470> | |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.secondary.keytab.file>>> | | |
+| | </etc/security/keytab/sn.service.keytab> | |
+| | | Kerberos keytab file for the NameNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.secondary.kerberos.principal>>> | sn/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the Secondary NameNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.namenode.secondary.kerberos.https.principal>>> | | |
+| | host/_HOST@REALM.TLD | |
+| | | HTTPS Kerberos principal name for the Secondary NameNode. |
+*-------------------------+-------------------------+------------------------+
+Configuration for <<<conf/hdfs-site.xml>>>
+
+** DataNode
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.data.dir.perm>>> | 700 | |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.address>>> | <0.0.0.0:1004> | |
+| | | Secure DataNode must use privileged port |
+| | | in order to assure that the server was started securely. |
+| | | This means that the server must be started via jsvc. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.http.address>>> | <0.0.0.0:1006> | |
+| | | Secure DataNode must use privileged port |
+| | | in order to assure that the server was started securely. |
+| | | This means that the server must be started via jsvc. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.https.address>>> | <0.0.0.0:50470> | |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.keytab.file>>> | </etc/security/keytab/dn.service.keytab> | |
+| | | Kerberos keytab file for the DataNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.kerberos.principal>>> | dn/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the DataNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.datanode.kerberos.https.principal>>> | | |
+| | host/_HOST@REALM.TLD | |
+| | | HTTPS Kerberos principal name for the DataNode. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.encrypt.data.transfer>>> | <false> | |
+| | | set to <<<true>>> when using data encryption |
+*-------------------------+-------------------------+------------------------+
+Configuration for <<<conf/hdfs-site.xml>>>
+
+
+** WebHDFS
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.webhdfs.enabled>>> | http/_HOST@REALM.TLD | |
+| | | Enable security on WebHDFS. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.web.authentication.kerberos.principal>>> | http/_HOST@REALM.TLD | |
+| | | Kerberos keytab file for the WebHDFS. |
+*-------------------------+-------------------------+------------------------+
+| <<<dfs.web.authentication.kerberos.keytab>>> | </etc/security/keytab/http.service.keytab> | |
+| | | Kerberos principal name for WebHDFS. |
+*-------------------------+-------------------------+------------------------+
+Configuration for <<<conf/hdfs-site.xml>>>
+
+
+** ResourceManager
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.resourcemanager.keytab>>> | | |
+| | </etc/security/keytab/rm.service.keytab> | |
+| | | Kerberos keytab file for the ResourceManager. |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.resourcemanager.principal>>> | rm/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the ResourceManager. |
+*-------------------------+-------------------------+------------------------+
+Configuration for  <<<conf/yarn-site.xml>>>
+
+**  NodeManager
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.nodemanager.keytab>>> | </etc/security/keytab/nm.service.keytab> | |
+| | | Kerberos keytab file for the NodeManager. |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.nodemanager.principal>>> | nm/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the NodeManager. |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.nodemanager.container-executor.class>>> | | |
+| | <<<org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor>>> |
+| | | Use LinuxContainerExecutor. |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
+| | | Unix group of the NodeManager. |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.nodemanager.linux-container-executor.path>>> | </path/to/bin/container-executor> | |
+| | | The path to the executable of Linux container executor. |
+*-------------------------+-------------------------+------------------------+
+Configuration for  <<<conf/yarn-site.xml>>>
+
+** Configuration for WebAppProxy
+
+    The <<<WebAppProxy>>> provides a proxy between the web applications
+    exported by an application and an end user.  If security is enabled
+    it will warn users before accessing a potentially unsafe web application.
+    Authentication and authorization using the proxy is handled just like
+    any other privileged web application.
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.web-proxy.address>>> | | |
+| | <<<WebAppProxy>>> host:port for proxy to AM web apps. | |
+| | | <host:port> if this is the same as <<<yarn.resourcemanager.webapp.address>>>|
+| | | or it is not defined then the <<<ResourceManager>>> will run the proxy|
+| | | otherwise a standalone proxy server will need to be launched.|
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.web-proxy.keytab>>> | | |
+| | </etc/security/keytab/web-app.service.keytab> | |
+| | | Kerberos keytab file for the WebAppProxy. |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.web-proxy.principal>>> | wap/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the WebAppProxy. |
+*-------------------------+-------------------------+------------------------+
+Configuration for  <<<conf/yarn-site.xml>>>
+
+** LinuxContainerExecutor
+
+    A <<<ContainerExecutor>>> used by YARN framework which define how any
+    <container> launched and controlled.
+
+    The following are the available in Hadoop YARN:
+
+*--------------------------------------+--------------------------------------+
+|| ContainerExecutor                   || Description                         |
+*--------------------------------------+--------------------------------------+
+| <<<DefaultContainerExecutor>>>             | |
+| | The default executor which YARN uses to manage container execution. |
+| | The container process has the same Unix user as the NodeManager.  |
+*--------------------------------------+--------------------------------------+
+| <<<LinuxContainerExecutor>>>               | |
+| | Supported only on GNU/Linux, this executor runs the containers as either the |
+| | YARN user who submitted the application (when full security is enabled) or |
+| | as a dedicated user (defaults to nobody) when full security is not enabled. |
+| | When full security is enabled, this executor requires all user accounts to be |
+| | created on the cluster nodes where the containers are launched. It uses |
+| | a <setuid> executable that is included in the Hadoop distribution. |
+| | The NodeManager uses this executable to launch and kill containers. |
+| | The setuid executable switches to the user who has submitted the |
+| | application and launches or kills the containers. For maximum security, |
+| | this executor sets up restricted permissions and user/group ownership of |
+| | local files and directories used by the containers such as the shared |
+| | objects, jars, intermediate files, log files etc. Particularly note that, |
+| | because of this, except the application owner and NodeManager, no other |
+| | user can access any of the local files/directories including those |
+| | localized as part of the distributed cache. |
+*--------------------------------------+--------------------------------------+
+
+    To build the LinuxContainerExecutor executable run:
+
+----
+ $ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/
+----
+
+    The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the
+    path on the cluster nodes where a configuration file for the setuid
+    executable should be located. The executable should be installed in
+    $HADOOP_YARN_HOME/bin.
+
+    The executable must have specific permissions: 6050 or --Sr-s---
+    permissions user-owned by <root> (super-user) and group-owned by a
+    special group (e.g. <<<hadoop>>>) of which the NodeManager Unix user is
+    the group member and no ordinary application user is. If any application
+    user belongs to this special group, security will be compromised. This
+    special group name should be specified for the configuration property
+    <<<yarn.nodemanager.linux-container-executor.group>>> in both
+    <<<conf/yarn-site.xml>>> and <<<conf/container-executor.cfg>>>.
+
+    For example, let's say that the NodeManager is run as user <yarn> who is
+    part of the groups users and <hadoop>, any of them being the primary group.
+    Let also be that <users> has both <yarn> and another user
+    (application submitter) <alice> as its members, and <alice> does not
+    belong to <hadoop>. Going by the above description, the setuid/setgid
+    executable should be set 6050 or --Sr-s--- with user-owner as <yarn> and
+    group-owner as <hadoop> which has <yarn> as its member (and not <users>
+    which has <alice> also as its member besides <yarn>).
+
+    The LinuxTaskController requires that paths including and leading up to
+    the directories specified in <<<yarn.nodemanager.local-dirs>>> and
+    <<<yarn.nodemanager.log-dirs>>> to be set 755 permissions as described
+    above in the table on permissions on directories.
+
+      * <<<conf/container-executor.cfg>>>
+
+    The executable requires a configuration file called
+    <<<container-executor.cfg>>> to be present in the configuration
+    directory passed to the mvn target mentioned above.
+
+    The configuration file must be owned by the user running NodeManager
+    (user <<<yarn>>> in the above example), group-owned by anyone and
+    should have the permissions 0400 or r--------.
+
+    The executable requires following configuration items to be present
+    in the <<<conf/container-executor.cfg>>> file. The items should be
+    mentioned as simple key=value pairs, one per-line:
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
+| | | Unix group of the NodeManager. The group owner of the |
+| | |<container-executor> binary should be this group. Should be same as the |
+| | | value with which the NodeManager is configured. This configuration is |
+| | | required for validating the secure access of the <container-executor> |
+| | | binary. |
+*-------------------------+-------------------------+------------------------+
+| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
+*-------------------------+-------------------------+------------------------+
+| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
+*-------------------------+-------------------------+------------------------+
+| <<<min.user.id>>> | 1000 | Prevent other super-users. |
+*-------------------------+-------------------------+------------------------+
+Configuration for  <<<conf/yarn-site.xml>>>
+
+      To re-cap, here are the local file-sysytem permissions required for the
+      various paths related to the <<<LinuxContainerExecutor>>>:
+
+*-------------------+-------------------+------------------+------------------+
+|| Filesystem       || Path             || User:Group      || Permissions     |
+*-------------------+-------------------+------------------+------------------+
+| local | container-executor | root:hadoop | --Sr-s--- |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
+*-------------------+-------------------+------------------+------------------+
+| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
+*-------------------+-------------------+------------------+------------------+
+
+**  MapReduce JobHistory Server
+
+*-------------------------+-------------------------+------------------------+
+|| Parameter              || Value                  || Notes                 |
+*-------------------------+-------------------------+------------------------+
+| <<<mapreduce.jobhistory.address>>> | | |
+| | MapReduce JobHistory Server <host:port> | Default port is 10020. |
+*-------------------------+-------------------------+------------------------+
+| <<<mapreduce.jobhistory.keytab>>> | |
+| | </etc/security/keytab/jhs.service.keytab> | |
+| | | Kerberos keytab file for the MapReduce JobHistory Server. |
+*-------------------------+-------------------------+------------------------+
+| <<<mapreduce.jobhistory.principal>>> | jhs/_HOST@REALM.TLD | |
+| | | Kerberos principal name for the MapReduce JobHistory Server. |
+*-------------------------+-------------------------+------------------------+
+Configuration for  <<<conf/mapred-site.xml>>>

+ 3 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java

@@ -1183,6 +1183,8 @@ public class TestConfiguration extends TestCase {
       fail("Should throw an IllegalArgumentException exception ");
     } catch (Exception e) {
       assertTrue(e instanceof IllegalArgumentException);
+      assertEquals(e.getMessage(),
+          "The value of property testClassName must not be null");
     }
   }
 
@@ -1193,6 +1195,7 @@ public class TestConfiguration extends TestCase {
       fail("Should throw an IllegalArgumentException exception ");
     } catch (Exception e) {
       assertTrue(e instanceof IllegalArgumentException);
+      assertEquals(e.getMessage(), "Property name must not be null");
     }
   }
 

+ 5 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestSymlinkLocalFS.java

@@ -38,6 +38,11 @@ import org.junit.Test;
  * Test symbolic links using LocalFs.
  */
 abstract public class TestSymlinkLocalFS extends SymlinkBaseTest {
+
+  // Workaround for HADOOP-9652
+  static {
+    RawLocalFileSystem.useStatIfAvailable();
+  }
   
   @Override
   protected String getScheme() {

+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -148,6 +148,8 @@ Trunk (Unreleased)
     HDFS-5721. sharedEditsImage in Namenode#initializeSharedEdits() should be 
     closed before method returns. (Ted Yu via junping_du)
 
+    HDFS-5138. Support HDFS upgrade in HA. (atm via todd)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -502,6 +504,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts
     via kihwal)
 
+    HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and 
+    the corresponding byte value. (jing9)
+
   OPTIMIZATIONS
 
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)
@@ -594,6 +599,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5806. balancer should set SoTimeout to avoid indefinite hangs.
     (Nathan Roberts via Andrew Wang).
 
+    HDFS-5728. Block recovery will fail if the metafile does not have crc 
+    for all chunks of the block (Vinay via kihwal)
+
   BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
 
     HDFS-4985. Add storage type to the protocol and expose it in block report
@@ -974,6 +982,12 @@ Release 2.3.0 - UNRELEASED
     HDFS-5677. Need error checking for HA cluster configuration.
     (Vincent Sheffer via cos)
 
+    HADOOP-10086. User document for authentication in secure cluster.
+    (Masatake Iwasaki via Arpit Agarwal)
+
+    HDFS-5825. Use FileUtils.copyFile() to implement DFSTestUtils.copyFile().
+    (Haohui Mai via Arpit Agarwal)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -1134,6 +1148,9 @@ Release 2.3.0 - UNRELEASED
 
     HDFS-5789. Some of snapshot APIs missing checkOperation double check in fsn. (umamahesh)
 
+    HDFS-5343. When cat command is issued on snapshot files getting unexpected result.
+    (Sathish via umamahesh)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml

@@ -361,5 +361,10 @@
       <Class name="org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor" />
       <Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE" />
     </Match>
+    <Match>
+      <Class name="org.apache.hadoop.hdfs.DFSUtil"/>
+      <Method name="assertAllResultsEqual" />
+      <Bug pattern="NP_LOAD_OF_KNOWN_NULL_VALUE" />
+    </Match>
 
  </FindBugsFilter>

+ 33 - 0
hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java

@@ -18,6 +18,8 @@
 package org.apache.hadoop.contrib.bkjournal;
 
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.JournalManager;
 import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
@@ -659,6 +661,37 @@ public class BookKeeperJournalManager implements JournalManager {
     }
   }
 
+  @Override
+  public void doPreUpgrade() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doUpgrade(Storage storage) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getJournalCTime() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doFinalize() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+      int targetLayoutVersion) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doRollback() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
   @Override
   public void close() throws IOException {
     try {

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperAsHASharedDir.java

@@ -316,7 +316,7 @@ public class TestBookKeeperAsHASharedDir {
     } catch (IOException ioe) {
       LOG.info("Got expected exception", ioe);
       GenericTestUtils.assertExceptionContains(
-          "Cannot start an HA namenode with name dirs that need recovery", ioe);
+          "storage directory does not exist or is not accessible", ioe);
     }
   }
 

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java

@@ -792,6 +792,9 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
             currentNode = blockSeekTo(pos);
           }
           int realLen = (int) Math.min(len, (blockEnd - pos + 1L));
+          if (locatedBlocks.isLastBlockComplete()) {
+            realLen = (int) Math.min(realLen, locatedBlocks.getFileLength());
+          }
           int result = readBuffer(strategy, off, realLen, corruptedBlockMap);
           
           if (result >= 0) {

+ 44 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

@@ -43,6 +43,7 @@ import java.net.URISyntaxException;
 import java.security.SecureRandom;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
@@ -574,10 +575,24 @@ public class DFSUtil {
     }
     return ret;
   }
+  
+  /**
+   * Get all of the RPC addresses of the individual NNs in a given nameservice.
+   * 
+   * @param conf Configuration
+   * @param nsId the nameservice whose NNs addresses we want.
+   * @param defaultValue default address to return in case key is not found.
+   * @return A map from nnId -> RPC address of each NN in the nameservice.
+   */
+  public static Map<String, InetSocketAddress> getRpcAddressesForNameserviceId(
+      Configuration conf, String nsId, String defaultValue) {
+    return getAddressesForNameserviceId(conf, nsId, defaultValue,
+        DFS_NAMENODE_RPC_ADDRESS_KEY);
+  }
 
   private static Map<String, InetSocketAddress> getAddressesForNameserviceId(
       Configuration conf, String nsId, String defaultValue,
-      String[] keys) {
+      String... keys) {
     Collection<String> nnIds = getNameNodeIds(conf, nsId);
     Map<String, InetSocketAddress> ret = Maps.newHashMap();
     for (String nnId : emptyAsSingletonNull(nnIds)) {
@@ -1670,4 +1685,32 @@ public class DFSUtil {
     }
     return builder;
   }
+
+  /**
+   * Assert that all objects in the collection are equal. Returns silently if
+   * so, throws an AssertionError if any object is not equal. All null values
+   * are considered equal.
+   * 
+   * @param objects the collection of objects to check for equality.
+   */
+  public static void assertAllResultsEqual(Collection<?> objects) {
+    Object[] resultsArray = objects.toArray();
+    
+    if (resultsArray.length == 0)
+      return;
+    
+    for (int i = 0; i < resultsArray.length; i++) {
+      if (i == 0)
+        continue;
+      else {
+        Object currElement = resultsArray[i];
+        Object lastElement = resultsArray[i - 1];
+        if ((currElement == null && currElement != lastElement) ||
+            (currElement != null && !currElement.equals(lastElement))) {
+          throw new AssertionError("Not all elements match in results: " +
+            Arrays.toString(resultsArray));
+        }
+      }
+    }
+  }
 }

+ 58 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java

@@ -26,22 +26,29 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnresolvedLinkException;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.ipc.StandbyException;
+import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 import java.util.Map;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
@@ -305,4 +312,55 @@ public class HAUtil {
     DFSClient dfsClient = dfs.getClient();
     return RPC.getServerAddress(dfsClient.getNamenode());
   }
+  
+  /**
+   * Get an RPC proxy for each NN in an HA nameservice. Used when a given RPC
+   * call should be made on every NN in an HA nameservice, not just the active.
+   * 
+   * @param conf configuration
+   * @param nsId the nameservice to get all of the proxies for.
+   * @return a list of RPC proxies for each NN in the nameservice.
+   * @throws IOException in the event of error.
+   */
+  public static List<ClientProtocol> getProxiesForAllNameNodesInNameservice(
+      Configuration conf, String nsId) throws IOException {
+    Map<String, InetSocketAddress> nnAddresses =
+        DFSUtil.getRpcAddressesForNameserviceId(conf, nsId, null);
+    
+    List<ClientProtocol> namenodes = new ArrayList<ClientProtocol>();
+    for (InetSocketAddress nnAddress : nnAddresses.values()) {
+      NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo = null;
+      proxyInfo = NameNodeProxies.createNonHAProxy(conf,
+          nnAddress, ClientProtocol.class,
+          UserGroupInformation.getCurrentUser(), false);
+      namenodes.add(proxyInfo.getProxy());
+    }
+    return namenodes;
+  }
+  
+  /**
+   * Used to ensure that at least one of the given HA NNs is currently in the
+   * active state..
+   * 
+   * @param namenodes list of RPC proxies for each NN to check.
+   * @return true if at least one NN is active, false if all are in the standby state.
+   * @throws IOException in the event of error.
+   */
+  public static boolean isAtLeastOneActive(List<ClientProtocol> namenodes)
+      throws IOException {
+    for (ClientProtocol namenode : namenodes) {
+      try {
+        namenode.getFileInfo("/");
+        return true;
+      } catch (RemoteException re) {
+        IOException cause = re.unwrapRemoteException();
+        if (cause instanceof StandbyException) {
+          // This is expected to happen for a standby NN.
+        } else {
+          throw re;
+        }
+      }
+    }
+    return false;
+  }
 }

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLogger.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochR
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 
@@ -151,4 +152,17 @@ interface AsyncLogger {
    * StringBuilder. This is displayed on the NN web UI.
    */
   public void appendReport(StringBuilder sb);
+
+  public ListenableFuture<Void> doPreUpgrade();
+
+  public ListenableFuture<Void> doUpgrade(StorageInfo sInfo);
+
+  public ListenableFuture<Void> doFinalize();
+
+  public ListenableFuture<Boolean> canRollBack(StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion);
+
+  public ListenableFuture<Void> doRollback();
+
+  public ListenableFuture<Long> getJournalCTime();
 }

+ 68 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/AsyncLoggerSet.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJourna
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 
@@ -308,4 +309,71 @@ class AsyncLoggerSet {
     }
     return QuorumCall.create(calls);
   }
+  
+  QuorumCall<AsyncLogger, Void> doPreUpgrade() {
+    Map<AsyncLogger, ListenableFuture<Void>> calls =
+        Maps.newHashMap();
+    for (AsyncLogger logger : loggers) {
+      ListenableFuture<Void> future =
+          logger.doPreUpgrade();
+      calls.put(logger, future);
+    }
+    return QuorumCall.create(calls);
+  }
+
+  public QuorumCall<AsyncLogger, Void> doUpgrade(StorageInfo sInfo) {
+    Map<AsyncLogger, ListenableFuture<Void>> calls =
+        Maps.newHashMap();
+    for (AsyncLogger logger : loggers) {
+      ListenableFuture<Void> future =
+          logger.doUpgrade(sInfo);
+      calls.put(logger, future);
+    }
+    return QuorumCall.create(calls);
+  }
+
+  public QuorumCall<AsyncLogger, Void> doFinalize() {
+    Map<AsyncLogger, ListenableFuture<Void>> calls =
+        Maps.newHashMap();
+    for (AsyncLogger logger : loggers) {
+      ListenableFuture<Void> future =
+          logger.doFinalize();
+      calls.put(logger, future);
+    }
+    return QuorumCall.create(calls);
+  }
+
+  public QuorumCall<AsyncLogger, Boolean> canRollBack(StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion) {
+    Map<AsyncLogger, ListenableFuture<Boolean>> calls =
+        Maps.newHashMap();
+    for (AsyncLogger logger : loggers) {
+      ListenableFuture<Boolean> future =
+          logger.canRollBack(storage, prevStorage, targetLayoutVersion);
+      calls.put(logger, future);
+    }
+    return QuorumCall.create(calls);
+  }
+
+  public QuorumCall<AsyncLogger, Void> doRollback() {
+    Map<AsyncLogger, ListenableFuture<Void>> calls =
+        Maps.newHashMap();
+    for (AsyncLogger logger : loggers) {
+      ListenableFuture<Void> future =
+          logger.doRollback();
+      calls.put(logger, future);
+    }
+    return QuorumCall.create(calls);
+  }
+
+  public QuorumCall<AsyncLogger, Long> getJournalCTime() {
+    Map<AsyncLogger, ListenableFuture<Long>> calls =
+        Maps.newHashMap();
+    for (AsyncLogger logger : loggers) {
+      ListenableFuture<Long> future = logger.getJournalCTime();
+      calls.put(logger, future);
+    }
+    return QuorumCall.create(calls);
+  }
+
 }

+ 68 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java

@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
 import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolPB;
 import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.qjournal.server.GetJournalEditServlet;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -564,6 +565,72 @@ public class IPCLoggerChannel implements AsyncLogger {
       }
     });
   }
+  
+  @Override
+  public ListenableFuture<Void> doPreUpgrade() {
+    return executor.submit(new Callable<Void>() {
+      @Override
+      public Void call() throws IOException {
+        getProxy().doPreUpgrade(journalId);
+        return null;
+      }
+    });
+  }
+  
+  @Override
+  public ListenableFuture<Void> doUpgrade(final StorageInfo sInfo) {
+    return executor.submit(new Callable<Void>() {
+      @Override
+      public Void call() throws IOException {
+        getProxy().doUpgrade(journalId, sInfo);
+        return null;
+      }
+    });
+  }
+  
+  @Override
+  public ListenableFuture<Void> doFinalize() {
+    return executor.submit(new Callable<Void>() {
+      @Override
+      public Void call() throws IOException {
+        getProxy().doFinalize(journalId);
+        return null;
+      }
+    });
+  }
+  
+  @Override
+  public ListenableFuture<Boolean> canRollBack(final StorageInfo storage,
+      final StorageInfo prevStorage, final int targetLayoutVersion) {
+    return executor.submit(new Callable<Boolean>() {
+      @Override
+      public Boolean call() throws IOException {
+        return getProxy().canRollBack(journalId, storage, prevStorage,
+            targetLayoutVersion);
+      }
+    });
+  }
+
+  @Override
+  public ListenableFuture<Void> doRollback() {
+    return executor.submit(new Callable<Void>() {
+      @Override
+      public Void call() throws IOException {
+        getProxy().doRollback(journalId);
+        return null;
+      }
+    });
+  }
+  
+  @Override
+  public ListenableFuture<Long> getJournalCTime() {
+    return executor.submit(new Callable<Long>() {
+      @Override
+      public Long call() throws IOException {
+        return getProxy().getJournalCTime(journalId);
+      }
+    });
+  }
 
   @Override
   public String toString() {
@@ -636,4 +703,5 @@ public class IPCLoggerChannel implements AsyncLogger {
   private boolean hasHttpServerEndPoint() {
    return httpServerURL != null;
   }
+
 }

+ 138 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java

@@ -34,10 +34,13 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
 import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
@@ -77,8 +80,14 @@ public class QuorumJournalManager implements JournalManager {
   // Since these don't occur during normal operation, we can
   // use rather lengthy timeouts, and don't need to make them
   // configurable.
-  private static final int FORMAT_TIMEOUT_MS = 60000;
-  private static final int HASDATA_TIMEOUT_MS = 60000;
+  private static final int FORMAT_TIMEOUT_MS            = 60000;
+  private static final int HASDATA_TIMEOUT_MS           = 60000;
+  private static final int CAN_ROLL_BACK_TIMEOUT_MS     = 60000;
+  private static final int FINALIZE_TIMEOUT_MS          = 60000;
+  private static final int PRE_UPGRADE_TIMEOUT_MS       = 60000;
+  private static final int ROLL_BACK_TIMEOUT_MS         = 60000;
+  private static final int UPGRADE_TIMEOUT_MS           = 60000;
+  private static final int GET_JOURNAL_CTIME_TIMEOUT_MS = 60000;
   
   private final Configuration conf;
   private final URI uri;
@@ -492,4 +501,131 @@ public class QuorumJournalManager implements JournalManager {
     return loggers;
   }
 
+  @Override
+  public void doPreUpgrade() throws IOException {
+    QuorumCall<AsyncLogger, Void> call = loggers.doPreUpgrade();
+    try {
+      call.waitFor(loggers.size(), loggers.size(), 0, PRE_UPGRADE_TIMEOUT_MS,
+          "doPreUpgrade");
+      
+      if (call.countExceptions() > 0) {
+        call.rethrowException("Could not do pre-upgrade of one or more JournalNodes");
+      }
+    } catch (InterruptedException e) {
+      throw new IOException("Interrupted waiting for doPreUpgrade() response");
+    } catch (TimeoutException e) {
+      throw new IOException("Timed out waiting for doPreUpgrade() response");
+    }
+  }
+
+  @Override
+  public void doUpgrade(Storage storage) throws IOException {
+    QuorumCall<AsyncLogger, Void> call = loggers.doUpgrade(storage);
+    try {
+      call.waitFor(loggers.size(), loggers.size(), 0, UPGRADE_TIMEOUT_MS,
+          "doUpgrade");
+      
+      if (call.countExceptions() > 0) {
+        call.rethrowException("Could not perform upgrade of one or more JournalNodes");
+      }
+    } catch (InterruptedException e) {
+      throw new IOException("Interrupted waiting for doUpgrade() response");
+    } catch (TimeoutException e) {
+      throw new IOException("Timed out waiting for doUpgrade() response");
+    }
+  }
+  
+  @Override
+  public void doFinalize() throws IOException {
+    QuorumCall<AsyncLogger, Void> call = loggers.doFinalize();
+    try {
+      call.waitFor(loggers.size(), loggers.size(), 0, FINALIZE_TIMEOUT_MS,
+          "doFinalize");
+      
+      if (call.countExceptions() > 0) {
+        call.rethrowException("Could not finalize one or more JournalNodes");
+      }
+    } catch (InterruptedException e) {
+      throw new IOException("Interrupted waiting for doFinalize() response");
+    } catch (TimeoutException e) {
+      throw new IOException("Timed out waiting for doFinalize() response");
+    }
+  }
+  
+  @Override
+  public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+      int targetLayoutVersion) throws IOException {
+    QuorumCall<AsyncLogger, Boolean> call = loggers.canRollBack(storage,
+        prevStorage, targetLayoutVersion);
+    try {
+      call.waitFor(loggers.size(), loggers.size(), 0, CAN_ROLL_BACK_TIMEOUT_MS,
+          "lockSharedStorage");
+      
+      if (call.countExceptions() > 0) {
+        call.rethrowException("Could not check if roll back possible for"
+            + " one or more JournalNodes");
+      }
+      
+      // Either they all return the same thing or this call fails, so we can
+      // just return the first result.
+      DFSUtil.assertAllResultsEqual(call.getResults().values());
+      for (Boolean result : call.getResults().values()) {
+        return result;
+      }
+    } catch (InterruptedException e) {
+      throw new IOException("Interrupted waiting for lockSharedStorage() " +
+          "response");
+    } catch (TimeoutException e) {
+      throw new IOException("Timed out waiting for lockSharedStorage() " +
+          "response");
+    }
+    
+    throw new AssertionError("Unreachable code.");
+  }
+
+  @Override
+  public void doRollback() throws IOException {
+    QuorumCall<AsyncLogger, Void> call = loggers.doRollback();
+    try {
+      call.waitFor(loggers.size(), loggers.size(), 0, ROLL_BACK_TIMEOUT_MS,
+          "doRollback");
+      
+      if (call.countExceptions() > 0) {
+        call.rethrowException("Could not perform rollback of one or more JournalNodes");
+      }
+    } catch (InterruptedException e) {
+      throw new IOException("Interrupted waiting for doFinalize() response");
+    } catch (TimeoutException e) {
+      throw new IOException("Timed out waiting for doFinalize() response");
+    }
+  }
+  
+  @Override
+  public long getJournalCTime() throws IOException {
+    QuorumCall<AsyncLogger, Long> call = loggers.getJournalCTime();
+    try {
+      call.waitFor(loggers.size(), loggers.size(), 0,
+          GET_JOURNAL_CTIME_TIMEOUT_MS, "getJournalCTime");
+      
+      if (call.countExceptions() > 0) {
+        call.rethrowException("Could not journal CTime for one "
+            + "more JournalNodes");
+      }
+      
+      // Either they all return the same thing or this call fails, so we can
+      // just return the first result.
+      DFSUtil.assertAllResultsEqual(call.getResults().values());
+      for (Long result : call.getResults().values()) {
+        return result;
+      }
+    } catch (InterruptedException e) {
+      throw new IOException("Interrupted waiting for getJournalCTime() " +
+          "response");
+    } catch (TimeoutException e) {
+      throw new IOException("Timed out waiting for getJournalCTime() " +
+          "response");
+    }
+    
+    throw new AssertionError("Unreachable code.");
+  }
 }

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/QJournalProtocol.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochR
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
 import org.apache.hadoop.hdfs.qjournal.server.JournalNode;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.JournalManager;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.security.KerberosInfo;
@@ -143,4 +144,17 @@ public interface QJournalProtocol {
    */
   public void acceptRecovery(RequestInfo reqInfo,
       SegmentStateProto stateToAccept, URL fromUrl) throws IOException;
+
+  public void doPreUpgrade(String journalId) throws IOException;
+
+  public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException;
+
+  public void doFinalize(String journalId) throws IOException;
+
+  public Boolean canRollBack(String journalId, StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion) throws IOException;
+
+  public void doRollback(String journalId) throws IOException;
+
+  public Long getJournalCTime(String journalId) throws IOException;
 }

+ 94 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java

@@ -17,17 +17,35 @@
  */
 package org.apache.hadoop.hdfs.qjournal.protocolPB;
 
+import java.io.IOException;
+import java.net.URL;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoUpgradeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoUpgradeResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FinalizeLogSegmentRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FinalizeLogSegmentResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FormatRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FormatResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.HeartbeatRequestProto;
@@ -39,8 +57,6 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.JournalRe
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.JournalResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
-import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto;
-import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PurgeLogsRequestProto;
@@ -48,13 +64,11 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PurgeLogs
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 
 import com.google.protobuf.RpcController;
 import com.google.protobuf.ServiceException;
 
-import java.io.IOException;
-import java.net.URL;
-
 /**
  * Implementation for protobuf service that forwards requests
  * received on {@link JournalProtocolPB} to the 
@@ -244,4 +258,79 @@ public class QJournalProtocolServerSideTranslatorPB implements QJournalProtocolP
         reqInfo.hasCommittedTxId() ?
           reqInfo.getCommittedTxId() : HdfsConstants.INVALID_TXID);
   }
+
+
+  @Override
+  public DoPreUpgradeResponseProto doPreUpgrade(RpcController controller,
+      DoPreUpgradeRequestProto request) throws ServiceException {
+    try {
+      impl.doPreUpgrade(convert(request.getJid()));
+      return DoPreUpgradeResponseProto.getDefaultInstance();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public DoUpgradeResponseProto doUpgrade(RpcController controller,
+      DoUpgradeRequestProto request) throws ServiceException {
+    try {
+      impl.doUpgrade(convert(request.getJid()),
+          PBHelper.convert(request.getSInfo()));
+      return DoUpgradeResponseProto.getDefaultInstance();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public DoFinalizeResponseProto doFinalize(RpcController controller,
+      DoFinalizeRequestProto request) throws ServiceException {
+    try {
+      impl.doFinalize(convert(request.getJid()));
+      return DoFinalizeResponseProto.getDefaultInstance();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public CanRollBackResponseProto canRollBack(RpcController controller,
+      CanRollBackRequestProto request) throws ServiceException {
+    try {
+      Boolean result = impl.canRollBack(convert(request.getJid()),
+          PBHelper.convert(request.getStorage()),
+          PBHelper.convert(request.getPrevStorage()),
+          request.getTargetLayoutVersion());
+      return CanRollBackResponseProto.newBuilder()
+          .setCanRollBack(result)
+          .build();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public DoRollbackResponseProto doRollback(RpcController controller, DoRollbackRequestProto request)
+      throws ServiceException {
+    try {
+      impl.doRollback(convert(request.getJid()));
+      return DoRollbackResponseProto.getDefaultInstance();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public GetJournalCTimeResponseProto getJournalCTime(RpcController controller,
+      GetJournalCTimeRequestProto request) throws ServiceException {
+    try {
+      Long resultCTime = impl.getJournalCTime(convert(request.getJid()));
+      return GetJournalCTimeResponseProto.newBuilder()
+          .setResultCTime(resultCTime)
+          .build();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
 }

+ 92 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolTranslatorPB.java

@@ -23,13 +23,23 @@ import java.net.URL;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.CanRollBackResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoFinalizeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoPreUpgradeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoRollbackRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.DoUpgradeRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FinalizeLogSegmentRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.FormatRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestResponseProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeRequestProto;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalCTimeResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetJournalStateResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.HeartbeatRequestProto;
@@ -39,7 +49,6 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.JournalId
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.JournalRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
-import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.AcceptRecoveryRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PurgeLogsRequestProto;
@@ -47,6 +56,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.RequestIn
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.StartLogSegmentRequestProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.ipc.ProtobufHelper;
@@ -277,4 +287,85 @@ public class QJournalProtocolTranslatorPB implements ProtocolMetaInterface,
         RPC.getProtocolVersion(QJournalProtocolPB.class), methodName);
   }
 
+  @Override
+  public void doPreUpgrade(String jid) throws IOException {
+    try {
+      rpcProxy.doPreUpgrade(NULL_CONTROLLER,
+          DoPreUpgradeRequestProto.newBuilder()
+            .setJid(convertJournalId(jid))
+            .build());
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException {
+    try {
+      rpcProxy.doUpgrade(NULL_CONTROLLER,
+          DoUpgradeRequestProto.newBuilder()
+            .setJid(convertJournalId(journalId))
+            .setSInfo(PBHelper.convert(sInfo))
+            .build());
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+  
+  @Override
+  public void doFinalize(String jid) throws IOException {
+    try {
+      rpcProxy.doFinalize(NULL_CONTROLLER,
+          DoFinalizeRequestProto.newBuilder()
+            .setJid(convertJournalId(jid))
+            .build());
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public Boolean canRollBack(String journalId, StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+    try {
+      CanRollBackResponseProto response = rpcProxy.canRollBack(
+          NULL_CONTROLLER,
+          CanRollBackRequestProto.newBuilder()
+            .setJid(convertJournalId(journalId))
+            .setStorage(PBHelper.convert(storage))
+            .setPrevStorage(PBHelper.convert(prevStorage))
+            .setTargetLayoutVersion(targetLayoutVersion)
+            .build());
+      return response.getCanRollBack();
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public void doRollback(String journalId) throws IOException {
+    try {
+      rpcProxy.doRollback(NULL_CONTROLLER,
+          DoRollbackRequestProto.newBuilder()
+            .setJid(convertJournalId(journalId))
+            .build());
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public Long getJournalCTime(String journalId) throws IOException {
+    try {
+      GetJournalCTimeResponseProto response = rpcProxy.getJournalCTime(
+          NULL_CONTROLLER,
+          GetJournalCTimeRequestProto.newBuilder()
+            .setJid(convertJournalId(journalId))
+            .build());
+      return response.getResultCTime();
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
 }

+ 18 - 11
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java

@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager;
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
 import org.apache.hadoop.hdfs.server.namenode.GetImageServlet;
@@ -139,20 +140,26 @@ public class GetJournalEditServlet extends HttpServlet {
   private boolean checkStorageInfoOrSendError(JNStorage storage,
       HttpServletRequest request, HttpServletResponse response)
       throws IOException {
-    String myStorageInfoString = storage.toColonSeparatedString();
+    int myNsId = storage.getNamespaceID();
+    String myClusterId = storage.getClusterID();
+    
     String theirStorageInfoString = StringEscapeUtils.escapeHtml(
         request.getParameter(STORAGEINFO_PARAM));
 
-    if (theirStorageInfoString != null
-        && !myStorageInfoString.equals(theirStorageInfoString)) {
-      String msg = "This node has storage info '" + myStorageInfoString
-          + "' but the requesting node expected '"
-          + theirStorageInfoString + "'";
-      
-      response.sendError(HttpServletResponse.SC_FORBIDDEN, msg);
-      LOG.warn("Received an invalid request file transfer request from " +
-          request.getRemoteAddr() + ": " + msg);
-      return false;
+    if (theirStorageInfoString != null) {
+      int theirNsId = StorageInfo.getNsIdFromColonSeparatedString(
+          theirStorageInfoString);
+      String theirClusterId = StorageInfo.getClusterIdFromColonSeparatedString(
+          theirStorageInfoString);
+      if (myNsId != theirNsId || !myClusterId.equals(theirClusterId)) {
+        String msg = "This node has namespaceId '" + myNsId + " and clusterId '"
+            + myClusterId + "' but the requesting node expected '" + theirNsId
+            + "' and '" + theirClusterId + "'";
+        response.sendError(HttpServletResponse.SC_FORBIDDEN, msg);
+        LOG.warn("Received an invalid request file transfer request from " +
+            request.getRemoteAddr() + ": " + msg);
+        return false;
+      }
     }
     return true;
   }

+ 9 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JNStorage.java

@@ -130,6 +130,10 @@ class JNStorage extends Storage {
     return new File(sd.getCurrentDir(), "paxos");
   }
   
+  File getRoot() {
+    return sd.getRoot();
+  }
+  
   /**
    * Remove any log files and associated paxos files which are older than
    * the given txid.
@@ -182,12 +186,15 @@ class JNStorage extends Storage {
     unlockAll();
     sd.clearDirectory();
     writeProperties(sd);
+    createPaxosDir();
+    analyzeStorage();
+  }
+  
+  void createPaxosDir() throws IOException {
     if (!getPaxosDir().mkdirs()) {
       throw new IOException("Could not create paxos dir: " + getPaxosDir());
     }
-    analyzeStorage();
   }
-
   
   void analyzeStorage() throws IOException {
     this.state = sd.analyzeStorage(StartupOption.REGULAR, this);

+ 65 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java

@@ -37,12 +37,14 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.qjournal.protocol.JournalNotFormattedException;
 import org.apache.hadoop.hdfs.qjournal.protocol.JournalOutOfSyncException;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol;
+import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.NewEpochResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PersistedRecoveryPaxosData;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.PrepareRecoveryResponseProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentStateProto;
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager;
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
@@ -73,7 +75,7 @@ import com.google.protobuf.TextFormat;
  * Each such journal is entirely independent despite being hosted by
  * the same JVM.
  */
-class Journal implements Closeable {
+public class Journal implements Closeable {
   static final Log LOG = LogFactory.getLog(Journal.class);
 
 
@@ -122,8 +124,8 @@ class Journal implements Closeable {
    */
   private BestEffortLongFile committedTxnId;
   
-  private static final String LAST_PROMISED_FILENAME = "last-promised-epoch";
-  private static final String LAST_WRITER_EPOCH = "last-writer-epoch";
+  public static final String LAST_PROMISED_FILENAME = "last-promised-epoch";
+  public static final String LAST_WRITER_EPOCH = "last-writer-epoch";
   private static final String COMMITTED_TXID_FILENAME = "committed-txid";
   
   private final FileJournalManager fjm;
@@ -627,7 +629,7 @@ class Journal implements Closeable {
   }
 
   /**
-   * @see QJournalProtocol#getEditLogManifest(String, long)
+   * @see QJournalProtocol#getEditLogManifest(String, long, boolean)
    */
   public RemoteEditLogManifest getEditLogManifest(long sinceTxId,
       boolean inProgressOk) throws IOException {
@@ -728,7 +730,7 @@ class Journal implements Closeable {
   }
   
   /**
-   * @see QJournalProtocol#acceptRecovery(RequestInfo, SegmentStateProto, URL)
+   * @see QJournalProtocol#acceptRecovery(RequestInfo, QJournalProtocolProtos.SegmentStateProto, URL)
    */
   public synchronized void acceptRecovery(RequestInfo reqInfo,
       SegmentStateProto segment, URL fromUrl)
@@ -980,4 +982,62 @@ class Journal implements Closeable {
       }
     }
   }
+
+  public synchronized void doPreUpgrade() throws IOException {
+    storage.getJournalManager().doPreUpgrade();
+  }
+
+  public synchronized void doUpgrade(StorageInfo sInfo) throws IOException {
+    long oldCTime = storage.getCTime();
+    storage.cTime = sInfo.cTime;
+    int oldLV = storage.getLayoutVersion();
+    storage.layoutVersion = sInfo.layoutVersion;
+    LOG.info("Starting upgrade of edits directory: "
+        + ".\n   old LV = " + oldLV
+        + "; old CTime = " + oldCTime
+        + ".\n   new LV = " + storage.getLayoutVersion()
+        + "; new CTime = " + storage.getCTime());
+    storage.getJournalManager().doUpgrade(storage);
+    storage.createPaxosDir();
+    
+    // Copy over the contents of the epoch data files to the new dir.
+    File currentDir = storage.getSingularStorageDir().getCurrentDir();
+    File previousDir = storage.getSingularStorageDir().getPreviousDir();
+    
+    PersistentLongFile prevLastPromisedEpoch = new PersistentLongFile(
+        new File(previousDir, LAST_PROMISED_FILENAME), 0);
+    PersistentLongFile prevLastWriterEpoch = new PersistentLongFile(
+        new File(previousDir, LAST_WRITER_EPOCH), 0);
+    
+    lastPromisedEpoch = new PersistentLongFile(
+        new File(currentDir, LAST_PROMISED_FILENAME), 0);
+    lastWriterEpoch = new PersistentLongFile(
+        new File(currentDir, LAST_WRITER_EPOCH), 0);
+    
+    lastPromisedEpoch.set(prevLastPromisedEpoch.get());
+    lastWriterEpoch.set(prevLastWriterEpoch.get());
+  }
+
+  public synchronized void doFinalize() throws IOException {
+    LOG.info("Finalizing upgrade for journal " 
+          + storage.getRoot() + "."
+          + (storage.getLayoutVersion()==0 ? "" :
+            "\n   cur LV = " + storage.getLayoutVersion()
+            + "; cur CTime = " + storage.getCTime()));
+    storage.getJournalManager().doFinalize();
+  }
+
+  public Boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+      int targetLayoutVersion) throws IOException {
+    return this.storage.getJournalManager().canRollBack(storage, prevStorage,
+        targetLayoutVersion);
+  }
+
+  public void doRollback() throws IOException {
+    storage.getJournalManager().doRollback();
+  }
+
+  public Long getJournalCTime() throws IOException {
+    return storage.getJournalManager().getJournalCTime();
+  }
 }

+ 28 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java

@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
@@ -285,4 +286,31 @@ public class JournalNode implements Tool, Configurable, JournalNodeMXBean {
     StringUtils.startupShutdownMessage(JournalNode.class, args, LOG);
     System.exit(ToolRunner.run(new JournalNode(), args));
   }
+
+  public void doPreUpgrade(String journalId) throws IOException {
+    getOrCreateJournal(journalId).doPreUpgrade();
+  }
+
+  public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException {
+    getOrCreateJournal(journalId).doUpgrade(sInfo);
+  }
+
+  public void doFinalize(String journalId) throws IOException {
+    getOrCreateJournal(journalId).doFinalize();
+  }
+
+  public Boolean canRollBack(String journalId, StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+    return getOrCreateJournal(journalId).canRollBack(storage, prevStorage,
+        targetLayoutVersion);
+  }
+
+  public void doRollback(String journalId) throws IOException {
+    getOrCreateJournal(journalId).doRollback();
+  }
+
+  public Long getJournalCTime(String journalId) throws IOException {
+    return getOrCreateJournal(journalId).getJournalCTime();
+  }
+
 }

+ 32 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.SegmentSt
 import org.apache.hadoop.hdfs.qjournal.protocol.RequestInfo;
 import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolPB;
 import org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
@@ -205,4 +206,35 @@ class JournalNodeRpcServer implements QJournalProtocol {
         .acceptRecovery(reqInfo, log, fromUrl);
   }
 
+  @Override
+  public void doPreUpgrade(String journalId) throws IOException {
+    jn.doPreUpgrade(journalId);
+  }
+
+  @Override
+  public void doUpgrade(String journalId, StorageInfo sInfo) throws IOException {
+    jn.doUpgrade(journalId, sInfo);
+  }
+
+  @Override
+  public void doFinalize(String journalId) throws IOException {
+    jn.doFinalize(journalId);
+  }
+
+  @Override
+  public Boolean canRollBack(String journalId, StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion)
+      throws IOException {
+    return jn.canRollBack(journalId, storage, prevStorage, targetLayoutVersion);
+  }
+
+  @Override
+  public void doRollback(String journalId) throws IOException {
+    jn.doRollback(journalId);
+  }
+
+  @Override
+  public Long getJournalCTime(String journalId) throws IOException {
+    return jn.getJournalCTime(journalId);
+  }
 }

+ 77 - 132
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java

@@ -18,7 +18,6 @@
 package org.apache.hadoop.hdfs.server.common;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.RandomAccessFile;
@@ -26,26 +25,23 @@ import java.lang.management.ManagementFactory;
 import java.nio.channels.FileLock;
 import java.nio.channels.OverlappingFileLockException;
 import java.util.ArrayList;
-import java.util.List;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.LayoutVersion;
-import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.hadoop.util.VersionInfo;
 
-import com.google.common.base.Preconditions;
-
 import com.google.common.base.Charsets;
+import com.google.common.base.Preconditions;
 
 
 
@@ -82,7 +78,6 @@ public abstract class Storage extends StorageInfo {
   public static final int[] LAYOUT_VERSIONS_203 = {-19, -31};
 
   public    static final String STORAGE_FILE_LOCK     = "in_use.lock";
-  protected static final String STORAGE_FILE_VERSION  = "VERSION";
   public    static final String STORAGE_DIR_CURRENT   = "current";
   public    static final String STORAGE_DIR_PREVIOUS  = "previous";
   public    static final String STORAGE_TMP_REMOVED   = "removed.tmp";
@@ -126,22 +121,24 @@ public abstract class Storage extends StorageInfo {
   
   private class DirIterator implements Iterator<StorageDirectory> {
     StorageDirType dirType;
+    boolean includeShared;
     int prevIndex; // for remove()
     int nextIndex; // for next()
     
-    DirIterator(StorageDirType dirType) {
+    DirIterator(StorageDirType dirType, boolean includeShared) {
       this.dirType = dirType;
       this.nextIndex = 0;
       this.prevIndex = 0;
+      this.includeShared = includeShared;
     }
     
     @Override
     public boolean hasNext() {
       if (storageDirs.isEmpty() || nextIndex >= storageDirs.size())
         return false;
-      if (dirType != null) {
+      if (dirType != null || !includeShared) {
         while (nextIndex < storageDirs.size()) {
-          if (getStorageDir(nextIndex).getStorageDirType().isOfType(dirType))
+          if (shouldReturnNextDir())
             break;
           nextIndex++;
         }
@@ -156,9 +153,9 @@ public abstract class Storage extends StorageInfo {
       StorageDirectory sd = getStorageDir(nextIndex);
       prevIndex = nextIndex;
       nextIndex++;
-      if (dirType != null) {
+      if (dirType != null || !includeShared) {
         while (nextIndex < storageDirs.size()) {
-          if (getStorageDir(nextIndex).getStorageDirType().isOfType(dirType))
+          if (shouldReturnNextDir())
             break;
           nextIndex++;
         }
@@ -172,6 +169,12 @@ public abstract class Storage extends StorageInfo {
       storageDirs.remove(prevIndex); // remove last returned element
       hasNext(); // reset nextIndex to correct place
     }
+    
+    private boolean shouldReturnNextDir() {
+      StorageDirectory sd = getStorageDir(nextIndex);
+      return (dirType == null || sd.getStorageDirType().isOfType(dirType)) &&
+          (includeShared || !sd.isShared());
+    }
   }
   
   /**
@@ -203,7 +206,27 @@ public abstract class Storage extends StorageInfo {
    * them via the Iterator
    */
   public Iterator<StorageDirectory> dirIterator(StorageDirType dirType) {
-    return new DirIterator(dirType);
+    return dirIterator(dirType, true);
+  }
+  
+  /**
+   * Return all entries in storageDirs, potentially excluding shared dirs.
+   * @param includeShared whether or not to include shared dirs.
+   * @return an iterator over the configured storage dirs.
+   */
+  public Iterator<StorageDirectory> dirIterator(boolean includeShared) {
+    return dirIterator(null, includeShared);
+  }
+  
+  /**
+   * @param dirType all entries will be of this type of dir
+   * @param includeShared true to include any shared directories,
+   *        false otherwise
+   * @return an iterator over the configured storage dirs.
+   */
+  public Iterator<StorageDirectory> dirIterator(StorageDirType dirType,
+      boolean includeShared) {
+    return new DirIterator(dirType, includeShared);
   }
   
   public Iterable<StorageDirectory> dirIterable(final StorageDirType dirType) {
@@ -233,7 +256,9 @@ public abstract class Storage extends StorageInfo {
   @InterfaceAudience.Private
   public static class StorageDirectory implements FormatConfirmable {
     final File root;              // root directory
-    final boolean useLock;        // flag to enable storage lock
+    // whether or not this dir is shared between two separate NNs for HA, or
+    // between multiple block pools in the case of federation.
+    final boolean isShared;
     final StorageDirType dirType; // storage dir type
     FileLock lock;                // storage lock
 
@@ -241,11 +266,11 @@ public abstract class Storage extends StorageInfo {
     
     public StorageDirectory(File dir) {
       // default dirType is null
-      this(dir, null, true);
+      this(dir, null, false);
     }
     
     public StorageDirectory(File dir, StorageDirType dirType) {
-      this(dir, dirType, true);
+      this(dir, dirType, false);
     }
     
     public void setStorageUuid(String storageUuid) {
@@ -260,14 +285,14 @@ public abstract class Storage extends StorageInfo {
      * Constructor
      * @param dir directory corresponding to the storage
      * @param dirType storage directory type
-     * @param useLock true - enables locking on the storage directory and false
-     *          disables locking
+     * @param isShared whether or not this dir is shared between two NNs. true
+     *          disables locking on the storage directory, false enables locking
      */
-    public StorageDirectory(File dir, StorageDirType dirType, boolean useLock) {
+    public StorageDirectory(File dir, StorageDirType dirType, boolean isShared) {
       this.root = dir;
       this.lock = null;
       this.dirType = dirType;
-      this.useLock = useLock;
+      this.isShared = isShared;
     }
     
     /**
@@ -621,6 +646,10 @@ public abstract class Storage extends StorageInfo {
       
       return true;
     }
+    
+    public boolean isShared() {
+      return isShared;
+    }
 
 
     /**
@@ -635,7 +664,7 @@ public abstract class Storage extends StorageInfo {
      * @throws IOException if locking fails
      */
     public void lock() throws IOException {
-      if (!useLock) {
+      if (isShared()) {
         LOG.info("Locking is disabled");
         return;
       }
@@ -889,22 +918,6 @@ public abstract class Storage extends StorageInfo {
     public String toString();
   }
   
-  /**
-   * Get common storage fields.
-   * Should be overloaded if additional fields need to be get.
-   * 
-   * @param props
-   * @throws IOException
-   */
-  protected void setFieldsFromProperties(
-      Properties props, StorageDirectory sd) throws IOException {
-    setLayoutVersion(props, sd);
-    setNamespaceID(props, sd);
-    setStorageType(props, sd);
-    setcTime(props, sd);
-    setClusterId(props, layoutVersion, sd);
-  }
-  
   /**
    * Set common storage fields into the given properties object.
    * Should be overloaded if additional fields need to be set.
@@ -923,22 +936,29 @@ public abstract class Storage extends StorageInfo {
     }
     props.setProperty("cTime", String.valueOf(cTime));
   }
-
+  
   /**
-   * Read properties from the VERSION file in the given storage directory.
+   * Get common storage fields.
+   * Should be overloaded if additional fields need to be get.
+   * 
+   * @param props
+   * @throws IOException
    */
-  public void readProperties(StorageDirectory sd) throws IOException {
-    Properties props = readPropertiesFile(sd.getVersionFile());
-    setFieldsFromProperties(props, sd);
+  protected void setFieldsFromProperties(
+      Properties props, StorageDirectory sd) throws IOException {
+    super.setFieldsFromProperties(props, sd);
+    setStorageType(props, sd);
   }
-
-  /**
-   * Read properties from the the previous/VERSION file in the given storage directory.
-   */
-  public void readPreviousVersionProperties(StorageDirectory sd)
-      throws IOException {
-    Properties props = readPropertiesFile(sd.getPreviousVersionFile());
-    setFieldsFromProperties(props, sd);
+  
+  /** Validate and set storage type from {@link Properties}*/
+  protected void setStorageType(Properties props, StorageDirectory sd)
+      throws InconsistentFSStateException {
+    NodeType type = NodeType.valueOf(getProperty(props, sd, "storageType"));
+    if (!storageType.equals(type)) {
+      throw new InconsistentFSStateException(sd.root,
+          "node type is incompatible with others.");
+    }
+    storageType = type;
   }
 
   /**
@@ -947,10 +967,15 @@ public abstract class Storage extends StorageInfo {
   public void writeProperties(StorageDirectory sd) throws IOException {
     writeProperties(sd.getVersionFile(), sd);
   }
-
+  
   public void writeProperties(File to, StorageDirectory sd) throws IOException {
     Properties props = new Properties();
     setPropertiesFromFields(props, sd);
+    writeProperties(to, sd, props);
+  }
+
+  public static void writeProperties(File to, StorageDirectory sd,
+      Properties props) throws IOException {
     RandomAccessFile file = new RandomAccessFile(to, "rws");
     FileOutputStream out = null;
     try {
@@ -977,23 +1002,6 @@ public abstract class Storage extends StorageInfo {
       file.close();
     }
   }
-  
-  public static Properties readPropertiesFile(File from) throws IOException {
-    RandomAccessFile file = new RandomAccessFile(from, "rws");
-    FileInputStream in = null;
-    Properties props = new Properties();
-    try {
-      in = new FileInputStream(file.getFD());
-      file.seek(0);
-      props.load(in);
-    } finally {
-      if (in != null) {
-        in.close();
-      }
-      file.close();
-    }
-    return props;
-  }
 
   public static void rename(File from, File to) throws IOException {
     if (!from.renameTo(to))
@@ -1044,69 +1052,6 @@ public abstract class Storage extends StorageInfo {
       + "-" + Long.toString(storage.getCTime());
   }
   
-  String getProperty(Properties props, StorageDirectory sd,
-      String name) throws InconsistentFSStateException {
-    String property = props.getProperty(name);
-    if (property == null) {
-      throw new InconsistentFSStateException(sd.root, "file "
-          + STORAGE_FILE_VERSION + " has " + name + " missing.");
-    }
-    return property;
-  }
-  
-  /** Validate and set storage type from {@link Properties}*/
-  protected void setStorageType(Properties props, StorageDirectory sd)
-      throws InconsistentFSStateException {
-    NodeType type = NodeType.valueOf(getProperty(props, sd, "storageType"));
-    if (!storageType.equals(type)) {
-      throw new InconsistentFSStateException(sd.root,
-          "node type is incompatible with others.");
-    }
-    storageType = type;
-  }
-  
-  /** Validate and set ctime from {@link Properties}*/
-  protected void setcTime(Properties props, StorageDirectory sd)
-      throws InconsistentFSStateException {
-    cTime = Long.parseLong(getProperty(props, sd, "cTime"));
-  }
-
-  /** Validate and set clusterId from {@link Properties}*/
-  protected void setClusterId(Properties props, int layoutVersion,
-      StorageDirectory sd) throws InconsistentFSStateException {
-    // Set cluster ID in version that supports federation
-    if (LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) {
-      String cid = getProperty(props, sd, "clusterID");
-      if (!(clusterID.equals("") || cid.equals("") || clusterID.equals(cid))) {
-        throw new InconsistentFSStateException(sd.getRoot(),
-            "cluster Id is incompatible with others.");
-      }
-      clusterID = cid;
-    }
-  }
-  
-  /** Validate and set layout version from {@link Properties}*/
-  protected void setLayoutVersion(Properties props, StorageDirectory sd)
-      throws IncorrectVersionException, InconsistentFSStateException {
-    int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
-    if (lv < HdfsConstants.LAYOUT_VERSION) { // future version
-      throw new IncorrectVersionException(lv, "storage directory "
-          + sd.root.getAbsolutePath());
-    }
-    layoutVersion = lv;
-  }
-  
-  /** Validate and set namespaceID version from {@link Properties}*/
-  protected void setNamespaceID(Properties props, StorageDirectory sd)
-      throws InconsistentFSStateException {
-    int nsId = Integer.parseInt(getProperty(props, sd, "namespaceID"));
-    if (namespaceID != 0 && nsId != 0 && namespaceID != nsId) {
-      throw new InconsistentFSStateException(sd.root,
-          "namespaceID is incompatible with others.");
-    }
-    namespaceID = nsId;
-  }
-  
   public static boolean is203LayoutVersion(int layoutVersion) {
     for (int lv203 : LAYOUT_VERSIONS_203) {
       if (lv203 == layoutVersion) {

+ 119 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/StorageInfo.java

@@ -17,9 +17,17 @@
  */
 package org.apache.hadoop.hdfs.server.common;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.util.Properties;
+
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 
 import com.google.common.base.Joiner;
 
@@ -34,6 +42,8 @@ public class StorageInfo {
   public int   namespaceID;     // id of the file system
   public String clusterID;      // id of the cluster
   public long  cTime;           // creation time of the file system state
+  
+  protected static final String STORAGE_FILE_VERSION    = "VERSION";
  
   public StorageInfo () {
     this(0, 0, "", 0L);
@@ -96,4 +106,113 @@ public class StorageInfo {
     return Joiner.on(":").join(
         layoutVersion, namespaceID, cTime, clusterID);
   }
+  
+  public static int getNsIdFromColonSeparatedString(String in) {
+    return Integer.parseInt(in.split(":")[1]);
+  }
+  
+  public static String getClusterIdFromColonSeparatedString(String in) {
+    return in.split(":")[3];
+  }
+  
+  /**
+   * Read properties from the VERSION file in the given storage directory.
+   */
+  public void readProperties(StorageDirectory sd) throws IOException {
+    Properties props = readPropertiesFile(sd.getVersionFile());
+    setFieldsFromProperties(props, sd);
+  }
+  
+  /**
+   * Read properties from the the previous/VERSION file in the given storage directory.
+   */
+  public void readPreviousVersionProperties(StorageDirectory sd)
+      throws IOException {
+    Properties props = readPropertiesFile(sd.getPreviousVersionFile());
+    setFieldsFromProperties(props, sd);
+  }
+  
+  /**
+   * Get common storage fields.
+   * Should be overloaded if additional fields need to be get.
+   * 
+   * @param props
+   * @throws IOException
+   */
+  protected void setFieldsFromProperties(
+      Properties props, StorageDirectory sd) throws IOException {
+    setLayoutVersion(props, sd);
+    setNamespaceID(props, sd);
+    setcTime(props, sd);
+    setClusterId(props, layoutVersion, sd);
+  }
+  
+  /** Validate and set ctime from {@link Properties}*/
+  protected void setcTime(Properties props, StorageDirectory sd)
+      throws InconsistentFSStateException {
+    cTime = Long.parseLong(getProperty(props, sd, "cTime"));
+  }
+
+  /** Validate and set clusterId from {@link Properties}*/
+  protected void setClusterId(Properties props, int layoutVersion,
+      StorageDirectory sd) throws InconsistentFSStateException {
+    // Set cluster ID in version that supports federation
+    if (LayoutVersion.supports(Feature.FEDERATION, layoutVersion)) {
+      String cid = getProperty(props, sd, "clusterID");
+      if (!(clusterID.equals("") || cid.equals("") || clusterID.equals(cid))) {
+        throw new InconsistentFSStateException(sd.getRoot(),
+            "cluster Id is incompatible with others.");
+      }
+      clusterID = cid;
+    }
+  }
+  
+  /** Validate and set layout version from {@link Properties}*/
+  protected void setLayoutVersion(Properties props, StorageDirectory sd)
+      throws IncorrectVersionException, InconsistentFSStateException {
+    int lv = Integer.parseInt(getProperty(props, sd, "layoutVersion"));
+    if (lv < HdfsConstants.LAYOUT_VERSION) { // future version
+      throw new IncorrectVersionException(lv, "storage directory "
+          + sd.root.getAbsolutePath());
+    }
+    layoutVersion = lv;
+  }
+  
+  /** Validate and set namespaceID version from {@link Properties}*/
+  protected void setNamespaceID(Properties props, StorageDirectory sd)
+      throws InconsistentFSStateException {
+    int nsId = Integer.parseInt(getProperty(props, sd, "namespaceID"));
+    if (namespaceID != 0 && nsId != 0 && namespaceID != nsId) {
+      throw new InconsistentFSStateException(sd.root,
+          "namespaceID is incompatible with others.");
+    }
+    namespaceID = nsId;
+  }
+  
+  static String getProperty(Properties props, StorageDirectory sd,
+      String name) throws InconsistentFSStateException {
+    String property = props.getProperty(name);
+    if (property == null) {
+      throw new InconsistentFSStateException(sd.root, "file "
+          + STORAGE_FILE_VERSION + " has " + name + " missing.");
+    }
+    return property;
+  }
+  
+  public static Properties readPropertiesFile(File from) throws IOException {
+    RandomAccessFile file = new RandomAccessFile(from, "rws");
+    FileInputStream in = null;
+    Properties props = new Properties();
+    try {
+      in = new FileInputStream(file.getFD());
+      file.seek(0);
+      props.load(in);
+    } finally {
+      if (in != null) {
+        in.close();
+      }
+      file.close();
+    }
+    return props;
+  }
 }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java

@@ -103,7 +103,7 @@ public class BlockPoolSliceStorage extends Storage {
         dataDirs.size());
     for (Iterator<File> it = dataDirs.iterator(); it.hasNext();) {
       File dataDir = it.next();
-      StorageDirectory sd = new StorageDirectory(dataDir, null, false);
+      StorageDirectory sd = new StorageDirectory(dataDir, null, true);
       StorageState curState;
       try {
         curState = sd.analyzeStorage(startOpt, this);

+ 19 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java

@@ -23,6 +23,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.RandomAccessFile;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.DU;
@@ -191,7 +192,7 @@ class BlockPoolSlice {
             blockFile.length(), genStamp, volume, blockFile.getParentFile());
       } else {
         newReplica = new ReplicaWaitingToBeRecovered(blockId,
-            validateIntegrity(blockFile, genStamp), 
+            validateIntegrityAndSetLength(blockFile, genStamp), 
             genStamp, volume, blockFile.getParentFile());
       }
 
@@ -214,7 +215,7 @@ class BlockPoolSlice {
    * @param genStamp generation stamp of the block
    * @return the number of valid bytes
    */
-  private long validateIntegrity(File blockFile, long genStamp) {
+  private long validateIntegrityAndSetLength(File blockFile, long genStamp) {
     DataInputStream checksumIn = null;
     InputStream blockIn = null;
     try {
@@ -257,11 +258,25 @@ class BlockPoolSlice {
       IOUtils.readFully(blockIn, buf, 0, lastChunkSize);
 
       checksum.update(buf, 0, lastChunkSize);
+      long validFileLength;
       if (checksum.compare(buf, lastChunkSize)) { // last chunk matches crc
-        return lastChunkStartPos + lastChunkSize;
+        validFileLength = lastChunkStartPos + lastChunkSize;
       } else { // last chunck is corrupt
-        return lastChunkStartPos;
+        validFileLength = lastChunkStartPos;
       }
+
+      // truncate if extra bytes are present without CRC
+      if (blockFile.length() > validFileLength) {
+        RandomAccessFile blockRAF = new RandomAccessFile(blockFile, "rw");
+        try {
+          // truncate blockFile
+          blockRAF.setLength(validFileLength);
+        } finally {
+          blockRAF.close();
+        }
+      }
+
+      return validFileLength;
     } catch (IOException e) {
       FsDatasetImpl.LOG.warn(e);
       return 0;

+ 33 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.namenode;
 import java.io.IOException;
 import java.util.Collection;
 
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.JournalInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
@@ -97,4 +99,35 @@ class BackupJournalManager implements JournalManager {
   public String toString() {
     return "BackupJournalManager";
   }
+  
+  @Override
+  public void doPreUpgrade() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doUpgrade(Storage storage) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+  
+  @Override
+  public void doFinalize() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+      int targetLayoutVersion) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doRollback() throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getJournalCTime() throws IOException {
+    throw new UnsupportedOperationException();
+  }
 }

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java

@@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalProtoc
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolServerSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.protocol.FenceResponse;
@@ -367,7 +368,7 @@ public class BackupNode extends NameNode {
     } else {
       nsInfo.validateStorage(storage);
     }
-    bnImage.initEditLog();
+    bnImage.initEditLog(StartupOption.REGULAR);
     setRegistration();
     NamenodeRegistration nnReg = null;
     while(!isStopRequested()) {
@@ -423,7 +424,8 @@ public class BackupNode extends NameNode {
     return DFSUtil.getBackupNameServiceId(conf);
   }
 
-  protected HAState createHAState() {
+  @Override
+  protected HAState createHAState(StartupOption startOpt) {
     return new BackupState();
   }
 

+ 58 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddBlockOp;
@@ -252,10 +253,12 @@ public class FSEditLog implements LogsPurgeable {
       if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
         StorageDirectory sd = storage.getStorageDirectory(u);
         if (sd != null) {
-          journalSet.add(new FileJournalManager(conf, sd, storage), required);
+          journalSet.add(new FileJournalManager(conf, sd, storage),
+              required, sharedEditsDirs.contains(u));
         }
       } else {
-        journalSet.add(createJournal(u), required);
+        journalSet.add(createJournal(u), required,
+            sharedEditsDirs.contains(u));
       }
     }
  
@@ -1339,6 +1342,58 @@ public class FSEditLog implements LogsPurgeable {
     }
   }
   
+  public long getSharedLogCTime() throws IOException {
+    for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+      if (jas.isShared()) {
+        return jas.getManager().getJournalCTime();
+      }
+    }
+    throw new IOException("No shared log found.");
+  }
+  
+  public synchronized void doPreUpgradeOfSharedLog() throws IOException {
+    for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+      if (jas.isShared()) {
+        jas.getManager().doPreUpgrade();
+      }
+    }
+  }
+  
+  public synchronized void doUpgradeOfSharedLog() throws IOException {
+    for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+      if (jas.isShared()) {
+        jas.getManager().doUpgrade(storage);
+      }
+    }
+  }
+  
+  public synchronized void doFinalizeOfSharedLog() throws IOException {
+    for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+      if (jas.isShared()) {
+        jas.getManager().doFinalize();
+      }
+    }
+  }
+  
+  public synchronized boolean canRollBackSharedLog(Storage prevStorage,
+      int targetLayoutVersion) throws IOException {
+    for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+      if (jas.isShared()) {
+        return jas.getManager().canRollBack(storage, prevStorage,
+            targetLayoutVersion);
+      }
+    }
+    throw new IOException("No shared log found.");
+  }
+  
+  public synchronized void doRollback() throws IOException {
+    for (JournalAndStream jas : journalSet.getAllJournalStreams()) {
+      if (jas.isShared()) {
+        jas.getManager().doRollback();
+      }
+    }
+  }
+  
   @Override
   public void selectInputStreams(Collection<EditLogInputStream> streams,
       long fromTxId, boolean inProgressOk) throws IOException {
@@ -1470,4 +1525,5 @@ public class FSEditLog implements LogsPurgeable {
                                          + uri, e);
     }
   }
+
 }

+ 19 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java

@@ -68,7 +68,7 @@ public enum FSEditLogOpCodes {
   OP_REMOVE_CACHE_POOL                    ((byte) 38),
   OP_MODIFY_CACHE_DIRECTIVE     ((byte) 39),
 
-  // Note that fromByte(..) depends on OP_INVALID being at the last position.  
+  // Note that the current range of the valid OP code is 0~127
   OP_INVALID                    ((byte) -1);
 
   private final byte opCode;
@@ -91,7 +91,22 @@ public enum FSEditLogOpCodes {
     return opCode;
   }
 
-  private static final FSEditLogOpCodes[] VALUES = FSEditLogOpCodes.values();
+  private static FSEditLogOpCodes[] VALUES;
+  
+  static {
+    byte max = 0;
+    for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) {
+      if (code.getOpCode() > max) {
+        max = code.getOpCode();
+      }
+    }
+    VALUES = new FSEditLogOpCodes[max + 1];
+    for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) {
+      if (code.getOpCode() >= 0) {
+        VALUES[code.getOpCode()] = code;
+      }
+    }
+  }
 
   /**
    * Converts byte to FSEditLogOpCodes enum value
@@ -100,12 +115,9 @@ public enum FSEditLogOpCodes {
    * @return enum with byte value of opCode
    */
   public static FSEditLogOpCodes fromByte(byte opCode) {
-    if (opCode == -1) {
-      return OP_INVALID;
-    }
-    if (opCode >= 0 && opCode < OP_INVALID.ordinal()) {
+    if (opCode >= 0 && opCode < VALUES.length) {
       return VALUES[opCode];
     }
-    return null;
+    return opCode == -1 ? OP_INVALID : null;
   }
 }

+ 91 - 118
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -178,7 +178,8 @@ public class FSImage implements Closeable {
    * @return true if the image needs to be saved or false otherwise
    */
   boolean recoverTransitionRead(StartupOption startOpt, FSNamesystem target,
-      MetaRecoveryContext recovery) throws IOException {
+      MetaRecoveryContext recovery)
+      throws IOException {
     assert startOpt != StartupOption.FORMAT : 
       "NameNode formatting should be performed before reading the image";
     
@@ -252,14 +253,14 @@ public class FSImage implements Closeable {
       doImportCheckpoint(target);
       return false; // import checkpoint saved image already
     case ROLLBACK:
-      doRollback();
-      break;
+      throw new AssertionError("Rollback is now a standalone command, " +
+          "NameNode should not be starting with this option.");
     case REGULAR:
     default:
       // just load the image
     }
     
-    return loadFSImage(target, recovery);
+    return loadFSImage(target, recovery, startOpt);
   }
   
   /**
@@ -272,17 +273,15 @@ public class FSImage implements Closeable {
   private boolean recoverStorageDirs(StartupOption startOpt,
       Map<StorageDirectory, StorageState> dataDirStates) throws IOException {
     boolean isFormatted = false;
+    // This loop needs to be over all storage dirs, even shared dirs, to make
+    // sure that we properly examine their state, but we make sure we don't
+    // mutate the shared dir below in the actual loop.
     for (Iterator<StorageDirectory> it = 
                       storage.dirIterator(); it.hasNext();) {
       StorageDirectory sd = it.next();
       StorageState curState;
       try {
         curState = sd.analyzeStorage(startOpt, storage);
-        String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
-        if (curState != StorageState.NORMAL && HAUtil.isHAEnabled(conf, nameserviceId)) {
-          throw new IOException("Cannot start an HA namenode with name dirs " +
-              "that need recovery. Dir: " + sd + " state: " + curState);
-        }
         // sd is locked but not opened
         switch(curState) {
         case NON_EXISTENT:
@@ -294,7 +293,7 @@ public class FSImage implements Closeable {
         case NORMAL:
           break;
         default:  // recovery is possible
-          sd.doRecover(curState);      
+          sd.doRecover(curState);
         }
         if (curState != StorageState.NOT_FORMATTED 
             && startOpt != StartupOption.ROLLBACK) {
@@ -315,10 +314,10 @@ public class FSImage implements Closeable {
     return isFormatted;
   }
 
-  private void doUpgrade(FSNamesystem target) throws IOException {
+  void doUpgrade(FSNamesystem target) throws IOException {
     // Upgrade is allowed only if there are 
-    // no previous fs states in any of the directories
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+    // no previous fs states in any of the local directories
+    for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
       StorageDirectory sd = it.next();
       if (sd.getPreviousDir().exists())
         throw new InconsistentFSStateException(sd.getRoot(),
@@ -327,9 +326,9 @@ public class FSImage implements Closeable {
     }
 
     // load the latest image
-    this.loadFSImage(target, null);
-
     // Do upgrade for each directory
+    this.loadFSImage(target, null, StartupOption.UPGRADE);
+    
     long oldCTime = storage.getCTime();
     storage.cTime = now();  // generate new cTime for the state
     int oldLV = storage.getLayoutVersion();
@@ -337,28 +336,17 @@ public class FSImage implements Closeable {
     
     List<StorageDirectory> errorSDs =
       Collections.synchronizedList(new ArrayList<StorageDirectory>());
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+    assert !editLog.isSegmentOpen() : "Edits log must not be open.";
+    LOG.info("Starting upgrade of local storage directories."
+        + "\n   old LV = " + oldLV
+        + "; old CTime = " + oldCTime
+        + ".\n   new LV = " + storage.getLayoutVersion()
+        + "; new CTime = " + storage.getCTime());
+    // Do upgrade for each directory
+    for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
       StorageDirectory sd = it.next();
-      LOG.info("Starting upgrade of image directory " + sd.getRoot()
-               + ".\n   old LV = " + oldLV
-               + "; old CTime = " + oldCTime
-               + ".\n   new LV = " + storage.getLayoutVersion()
-               + "; new CTime = " + storage.getCTime());
       try {
-        File curDir = sd.getCurrentDir();
-        File prevDir = sd.getPreviousDir();
-        File tmpDir = sd.getPreviousTmp();
-        assert curDir.exists() : "Current directory must exist.";
-        assert !prevDir.exists() : "previous directory must not exist.";
-        assert !tmpDir.exists() : "previous.tmp directory must not exist.";
-        assert !editLog.isSegmentOpen() : "Edits log must not be open.";
-
-        // rename current to tmp
-        NNStorage.rename(curDir, tmpDir);
-        
-        if (!curDir.mkdir()) {
-          throw new IOException("Cannot create directory " + curDir);
-        }
+        NNUpgradeUtil.doPreUpgrade(sd);
       } catch (Exception e) {
         LOG.error("Failed to move aside pre-upgrade storage " +
             "in image directory " + sd.getRoot(), e);
@@ -366,41 +354,38 @@ public class FSImage implements Closeable {
         continue;
       }
     }
+    if (target.isHaEnabled()) {
+      editLog.doPreUpgradeOfSharedLog();
+    }
     storage.reportErrorsOnDirectories(errorSDs);
     errorSDs.clear();
 
     saveFSImageInAllDirs(target, editLog.getLastWrittenTxId());
 
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+    for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
       StorageDirectory sd = it.next();
       try {
-        // Write the version file, since saveFsImage above only makes the
-        // fsimage_<txid>, and the directory is otherwise empty.
-        storage.writeProperties(sd);
-        
-        File prevDir = sd.getPreviousDir();
-        File tmpDir = sd.getPreviousTmp();
-        // rename tmp to previous
-        NNStorage.rename(tmpDir, prevDir);
+        NNUpgradeUtil.doUpgrade(sd, storage);
       } catch (IOException ioe) {
-        LOG.error("Unable to rename temp to previous for " + sd.getRoot(), ioe);
         errorSDs.add(sd);
         continue;
       }
-      LOG.info("Upgrade of " + sd.getRoot() + " is complete.");
+    }
+    if (target.isHaEnabled()) {
+      editLog.doUpgradeOfSharedLog();
     }
     storage.reportErrorsOnDirectories(errorSDs);
-
+    
     isUpgradeFinalized = false;
     if (!storage.getRemovedStorageDirs().isEmpty()) {
-      //during upgrade, it's a fatal error to fail any storage directory
+      // during upgrade, it's a fatal error to fail any storage directory
       throw new IOException("Upgrade failed in "
           + storage.getRemovedStorageDirs().size()
           + " storage directory(ies), previously logged.");
     }
   }
 
-  private void doRollback() throws IOException {
+  void doRollback(FSNamesystem fsns) throws IOException {
     // Rollback is allowed only if there is 
     // a previous fs states in at least one of the storage directories.
     // Directories that don't have previous state do not rollback
@@ -408,85 +393,46 @@ public class FSImage implements Closeable {
     FSImage prevState = new FSImage(conf);
     try {
       prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
-      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+      for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
         StorageDirectory sd = it.next();
-        File prevDir = sd.getPreviousDir();
-        if (!prevDir.exists()) {  // use current directory then
-          LOG.info("Storage directory " + sd.getRoot()
-            + " does not contain previous fs state.");
-          // read and verify consistency with other directories
-          storage.readProperties(sd);
+        if (!NNUpgradeUtil.canRollBack(sd, storage, prevState.getStorage(),
+            HdfsConstants.LAYOUT_VERSION)) {
           continue;
         }
-
-        // read and verify consistency of the prev dir
-        prevState.getStorage().readPreviousVersionProperties(sd);
-
-        if (prevState.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
-          throw new IOException(
-            "Cannot rollback to storage version " +
-                prevState.getLayoutVersion() +
-                " using this version of the NameNode, which uses storage version " +
-                HdfsConstants.LAYOUT_VERSION + ". " +
-              "Please use the previous version of HDFS to perform the rollback.");
-        }
         canRollback = true;
       }
+      
+      if (fsns.isHaEnabled()) {
+        // If HA is enabled, check if the shared log can be rolled back as well.
+        editLog.initJournalsForWrite();
+        canRollback |= editLog.canRollBackSharedLog(prevState.getStorage(),
+            HdfsConstants.LAYOUT_VERSION);
+      }
+      
       if (!canRollback)
         throw new IOException("Cannot rollback. None of the storage "
             + "directories contain previous fs state.");
-
+  
       // Now that we know all directories are going to be consistent
       // Do rollback for each directory containing previous state
-      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+      for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
         StorageDirectory sd = it.next();
-        File prevDir = sd.getPreviousDir();
-        if (!prevDir.exists())
-          continue;
-
         LOG.info("Rolling back storage directory " + sd.getRoot()
-          + ".\n   new LV = " + prevState.getStorage().getLayoutVersion()
-          + "; new CTime = " + prevState.getStorage().getCTime());
-        File tmpDir = sd.getRemovedTmp();
-        assert !tmpDir.exists() : "removed.tmp directory must not exist.";
-        // rename current to tmp
-        File curDir = sd.getCurrentDir();
-        assert curDir.exists() : "Current directory must exist.";
-        NNStorage.rename(curDir, tmpDir);
-        // rename previous to current
-        NNStorage.rename(prevDir, curDir);
-
-        // delete tmp dir
-        NNStorage.deleteDir(tmpDir);
-        LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
+                 + ".\n   new LV = " + prevState.getStorage().getLayoutVersion()
+                 + "; new CTime = " + prevState.getStorage().getCTime());
+        NNUpgradeUtil.doRollBack(sd);
       }
+      if (fsns.isHaEnabled()) {
+        // If HA is enabled, try to roll back the shared log as well.
+        editLog.doRollback();
+      }
+      
       isUpgradeFinalized = true;
     } finally {
       prevState.close();
     }
   }
 
-  private void doFinalize(StorageDirectory sd) throws IOException {
-    File prevDir = sd.getPreviousDir();
-    if (!prevDir.exists()) { // already discarded
-      LOG.info("Directory " + prevDir + " does not exist.");
-      LOG.info("Finalize upgrade for " + sd.getRoot()+ " is not required.");
-      return;
-    }
-    LOG.info("Finalizing upgrade for storage directory " 
-             + sd.getRoot() + "."
-             + (storage.getLayoutVersion()==0 ? "" :
-                   "\n   cur LV = " + storage.getLayoutVersion()
-                   + "; cur CTime = " + storage.getCTime()));
-    assert sd.getCurrentDir().exists() : "Current directory must exist.";
-    final File tmpDir = sd.getFinalizedTmp();
-    // rename previous to tmp and remove
-    NNStorage.rename(prevDir, tmpDir);
-    NNStorage.deleteDir(tmpDir);
-    isUpgradeFinalized = true;
-    LOG.info("Finalize upgrade for " + sd.getRoot()+ " is complete.");
-  }
-
   /**
    * Load image from a checkpoint directory and save it into the current one.
    * @param target the NameSystem to import into
@@ -521,7 +467,7 @@ public class FSImage implements Closeable {
     // return back the real image
     realImage.getStorage().setStorageInfo(ckptImage.getStorage());
     realImage.getEditLog().setNextTxId(ckptImage.getEditLog().getLastWrittenTxId()+1);
-    realImage.initEditLog();
+    realImage.initEditLog(StartupOption.IMPORT);
 
     target.dir.fsImage = realImage;
     realImage.getStorage().setBlockPoolID(ckptImage.getBlockPoolID());
@@ -530,12 +476,23 @@ public class FSImage implements Closeable {
     saveNamespace(target);
     getStorage().writeAll();
   }
-
-  void finalizeUpgrade() throws IOException {
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+  
+  void finalizeUpgrade(boolean finalizeEditLog) throws IOException {
+    LOG.info("Finalizing upgrade for local dirs. " +
+        (storage.getLayoutVersion() == 0 ? "" : 
+          "\n   cur LV = " + storage.getLayoutVersion()
+          + "; cur CTime = " + storage.getCTime()));
+    for (Iterator<StorageDirectory> it = storage.dirIterator(false); it.hasNext();) {
       StorageDirectory sd = it.next();
-      doFinalize(sd);
+      NNUpgradeUtil.doFinalize(sd);
+    }
+    if (finalizeEditLog) {
+      // We only do this in the case that HA is enabled and we're active. In any
+      // other case the NN will have done the upgrade of the edits directories
+      // already by virtue of the fact that they're local.
+      editLog.doFinalizeOfSharedLog();
     }
+    isUpgradeFinalized = true;
   }
 
   boolean isUpgradeFinalized() {
@@ -582,8 +539,8 @@ public class FSImage implements Closeable {
    * @return whether the image should be saved
    * @throws IOException
    */
-  boolean loadFSImage(FSNamesystem target, MetaRecoveryContext recovery)
-      throws IOException {
+  boolean loadFSImage(FSNamesystem target, MetaRecoveryContext recovery,
+      StartupOption startOpt) throws IOException {
     FSImageStorageInspector inspector = storage.readAndInspectDirs();
     FSImageFile imageFile = null;
     
@@ -600,7 +557,7 @@ public class FSImage implements Closeable {
 
     Iterable<EditLogInputStream> editStreams = null;
 
-    initEditLog();
+    initEditLog(startOpt);
 
     if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, 
                                getLayoutVersion())) {
@@ -682,14 +639,30 @@ public class FSImage implements Closeable {
     }
   }
 
-  public void initEditLog() {
+  public void initEditLog(StartupOption startOpt) throws IOException {
     Preconditions.checkState(getNamespaceID() != 0,
         "Must know namespace ID before initting edit log");
     String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
     if (!HAUtil.isHAEnabled(conf, nameserviceId)) {
+      // If this NN is not HA
       editLog.initJournalsForWrite();
       editLog.recoverUnclosedStreams();
+    } else if (HAUtil.isHAEnabled(conf, nameserviceId) &&
+        startOpt == StartupOption.UPGRADE) {
+      // This NN is HA, but we're doing an upgrade so init the edit log for
+      // write.
+      editLog.initJournalsForWrite();
+      long sharedLogCTime = editLog.getSharedLogCTime();
+      if (this.storage.getCTime() < sharedLogCTime) {
+        throw new IOException("It looks like the shared log is already " +
+            "being upgraded but this NN has not been upgraded yet. You " +
+            "should restart this NameNode with the '" +
+            StartupOption.BOOTSTRAPSTANDBY.getName() + "' option to bring " +
+            "this NN in sync with the other.");
+      }
+      editLog.recoverUnclosedStreams();
     } else {
+      // This NN is HA and we're not doing an upgrade.
       editLog.initSharedJournalsForRead();
     }
   }

+ 16 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -552,6 +552,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return leaseManager;
   }
   
+  boolean isHaEnabled() {
+    return haEnabled;
+  }
+  
   /**
    * Check the supplied configuration for correctness.
    * @param conf Supplies the configuration to validate.
@@ -881,7 +885,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       }
       // This will start a new log segment and write to the seen_txid file, so
       // we shouldn't do it when coming up in standby state
-      if (!haEnabled) {
+      if (!haEnabled || (haEnabled && startOpt == StartupOption.UPGRADE)) {
         fsImage.openEditLogForWrite();
       }
       success = true;
@@ -1008,6 +1012,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
         dir.fsImage.editLog.openForWrite();
       }
+      
       if (haEnabled) {
         // Renew all of the leases before becoming active.
         // This is because, while we were in standby mode,
@@ -1034,14 +1039,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
   }
   
+  private boolean inActiveState() {
+    return haContext != null &&
+        haContext.getState().getServiceState() == HAServiceState.ACTIVE;
+  }
+  
   /**
    * @return Whether the namenode is transitioning to active state and is in the
    *         middle of the {@link #startActiveServices()}
    */
   public boolean inTransitionToActive() {
-    return haEnabled && haContext != null
-        && haContext.getState().getServiceState() == HAServiceState.ACTIVE
-        && startingActiveService;
+    return haEnabled && inActiveState() && startingActiveService;
   }
 
   private boolean shouldUseDelegationTokens() {
@@ -4515,11 +4523,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     
   void finalizeUpgrade() throws IOException {
     checkSuperuserPrivilege();
-    checkOperation(OperationCategory.WRITE);
+    checkOperation(OperationCategory.UNCHECKED);
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
-      getFSImage().finalizeUpgrade();
+      checkOperation(OperationCategory.UNCHECKED);
+      getFSImage().finalizeUpgrade(this.isHaEnabled() && inActiveState());
     } finally {
       writeUnlock();
     }
@@ -7443,5 +7451,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       logger.addAppender(asyncAppender);        
     }
   }
+
 }
 

+ 47 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java

@@ -33,14 +33,15 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
-
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
@@ -489,4 +490,49 @@ public class FileJournalManager implements JournalManager {
                            isInProgress(), hasCorruptHeader);
     }
   }
+  
+  @Override
+  public void doPreUpgrade() throws IOException {
+    LOG.info("Starting upgrade of edits directory " + sd.getRoot());
+    try {
+     NNUpgradeUtil.doPreUpgrade(sd);
+    } catch (IOException ioe) {
+     LOG.error("Failed to move aside pre-upgrade storage " +
+         "in image directory " + sd.getRoot(), ioe);
+     throw ioe;
+    }
+  }
+  
+  /**
+   * This method assumes that the fields of the {@link Storage} object have
+   * already been updated to the appropriate new values for the upgrade.
+   */
+  @Override
+  public void doUpgrade(Storage storage) throws IOException {
+    NNUpgradeUtil.doUpgrade(sd, storage);
+  }
+  
+  @Override
+  public void doFinalize() throws IOException {
+    NNUpgradeUtil.doFinalize(sd);
+  }
+
+  @Override
+  public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+      int targetLayoutVersion) throws IOException {
+    return NNUpgradeUtil.canRollBack(sd, storage,
+        prevStorage, targetLayoutVersion);
+  }
+
+  @Override
+  public void doRollback() throws IOException {
+    NNUpgradeUtil.doRollBack(sd);
+  }
+
+  @Override
+  public long getJournalCTime() throws IOException {
+    StorageInfo sInfo = new StorageInfo();
+    sInfo.readProperties(sd);
+    return sInfo.getCTime();
+  }
 }

+ 51 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java

@@ -22,7 +22,9 @@ import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 
 /**
@@ -64,6 +66,54 @@ public interface JournalManager extends Closeable, FormatConfirmable,
    * Recover segments which have not been finalized.
    */
   void recoverUnfinalizedSegments() throws IOException;
+  
+  /**
+   * Perform any steps that must succeed across all JournalManagers involved in
+   * an upgrade before proceeding onto the actual upgrade stage. If a call to
+   * any JM's doPreUpgrade method fails, then doUpgrade will not be called for
+   * any JM.
+   */
+  void doPreUpgrade() throws IOException;
+  
+  /**
+   * Perform the actual upgrade of the JM. After this is completed, the NN can
+   * begin to use the new upgraded metadata. This metadata may later be either
+   * finalized or rolled back to the previous state.
+   * 
+   * @param storage info about the new upgraded versions.
+   */
+  void doUpgrade(Storage storage) throws IOException;
+  
+  /**
+   * Finalize the upgrade. JMs should purge any state that they had been keeping
+   * around during the upgrade process. After this is completed, rollback is no
+   * longer allowed.
+   */
+  void doFinalize() throws IOException;
+  
+  /**
+   * Return true if this JM can roll back to the previous storage state, false
+   * otherwise. The NN will refuse to run the rollback operation unless at least
+   * one JM or fsimage storage directory can roll back.
+   * 
+   * @param storage the storage info for the current state
+   * @param prevStorage the storage info for the previous (unupgraded) state
+   * @param targetLayoutVersion the layout version we intend to roll back to
+   * @return true if this JM can roll back, false otherwise.
+   */
+  boolean canRollBack(StorageInfo storage, StorageInfo prevStorage,
+      int targetLayoutVersion) throws IOException;
+  
+  /**
+   * Perform the rollback to the previous FS state. JMs which do not need to
+   * roll back their state should just return without error.
+   */
+  void doRollback() throws IOException;
+  
+  /**
+   * @return the CTime of the journal manager.
+   */
+  long getJournalCTime() throws IOException;
 
   /**
    * Close the journal manager, freeing any resources it may hold.
@@ -84,4 +134,5 @@ public interface JournalManager extends Closeable, FormatConfirmable,
       super(reason);
     }
   }
+
 }

+ 54 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java

@@ -33,6 +33,8 @@ import java.util.concurrent.CopyOnWriteArrayList;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
@@ -77,11 +79,14 @@ public class JournalSet implements JournalManager {
     private final JournalManager journal;
     private boolean disabled = false;
     private EditLogOutputStream stream;
-    private boolean required = false;
+    private final boolean required;
+    private final boolean shared;
     
-    public JournalAndStream(JournalManager manager, boolean required) {
+    public JournalAndStream(JournalManager manager, boolean required,
+        boolean shared) {
       this.journal = manager;
       this.required = required;
+      this.shared = shared;
     }
 
     public void startLogSegment(long txId) throws IOException {
@@ -163,6 +168,10 @@ public class JournalSet implements JournalManager {
     public boolean isRequired() {
       return required;
     }
+    
+    public boolean isShared() {
+      return shared;
+    }
   }
  
   // COW implementation is necessary since some users (eg the web ui) call
@@ -178,7 +187,7 @@ public class JournalSet implements JournalManager {
   
   @Override
   public void format(NamespaceInfo nsInfo) throws IOException {
-    // The iteration is done by FSEditLog itself
+    // The operation is done by FSEditLog itself
     throw new UnsupportedOperationException();
   }
 
@@ -537,9 +546,13 @@ public class JournalSet implements JournalManager {
     }
     return jList;
   }
-
+  
   void add(JournalManager j, boolean required) {
-    JournalAndStream jas = new JournalAndStream(j, required);
+    add(j, required, false);
+  }
+  
+  void add(JournalManager j, boolean required, boolean shared) {
+    JournalAndStream jas = new JournalAndStream(j, required, shared);
     journals.add(jas);
   }
   
@@ -655,4 +668,40 @@ public class JournalSet implements JournalManager {
     }
     return buf.toString();
   }
+
+  @Override
+  public void doPreUpgrade() throws IOException {
+    // This operation is handled by FSEditLog directly.
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doUpgrade(Storage storage) throws IOException {
+    // This operation is handled by FSEditLog directly.
+    throw new UnsupportedOperationException();
+  }
+  
+  @Override
+  public void doFinalize() throws IOException {
+    // This operation is handled by FSEditLog directly.
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+    // This operation is handled by FSEditLog directly.
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void doRollback() throws IOException {
+    // This operation is handled by FSEditLog directly.
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getJournalCTime() throws IOException {
+    // This operation is handled by FSEditLog directly.
+    throw new UnsupportedOperationException();
+  }
 }

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java

@@ -299,7 +299,7 @@ public class NNStorage extends Storage implements Closeable,
       if(dirName.getScheme().compareTo("file") == 0) {
         this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
             dirType,
-            !sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
+            sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
       }
     }
 
@@ -310,7 +310,7 @@ public class NNStorage extends Storage implements Closeable,
       // URI is of type file://
       if(dirName.getScheme().compareTo("file") == 0)
         this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
-                    NameNodeDirType.EDITS, !sharedEditsDirs.contains(dirName)));
+                    NameNodeDirType.EDITS, sharedEditsDirs.contains(dirName)));
     }
   }
 
@@ -976,7 +976,7 @@ public class NNStorage extends Storage implements Closeable,
     StringBuilder layoutVersions = new StringBuilder();
 
     // First determine what range of layout versions we're going to inspect
-    for (Iterator<StorageDirectory> it = dirIterator();
+    for (Iterator<StorageDirectory> it = dirIterator(false);
          it.hasNext();) {
       StorageDirectory sd = it.next();
       if (!sd.getVersionFile().exists()) {

+ 174 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNUpgradeUtil.java

@@ -0,0 +1,174 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+
+abstract class NNUpgradeUtil {
+  
+  private static final Log LOG = LogFactory.getLog(NNUpgradeUtil.class);
+  
+  /**
+   * Return true if this storage dir can roll back to the previous storage
+   * state, false otherwise. The NN will refuse to run the rollback operation
+   * unless at least one JM or fsimage storage directory can roll back.
+   * 
+   * @param storage the storage info for the current state
+   * @param prevStorage the storage info for the previous (unupgraded) state
+   * @param targetLayoutVersion the layout version we intend to roll back to
+   * @return true if this JM can roll back, false otherwise.
+   * @throws IOException in the event of error
+   */
+  static boolean canRollBack(StorageDirectory sd, StorageInfo storage,
+      StorageInfo prevStorage, int targetLayoutVersion) throws IOException {
+    File prevDir = sd.getPreviousDir();
+    if (!prevDir.exists()) {  // use current directory then
+      LOG.info("Storage directory " + sd.getRoot()
+               + " does not contain previous fs state.");
+      // read and verify consistency with other directories
+      storage.readProperties(sd);
+      return false;
+    }
+
+    // read and verify consistency of the prev dir
+    prevStorage.readPreviousVersionProperties(sd);
+
+    if (prevStorage.getLayoutVersion() != targetLayoutVersion) {
+      throw new IOException(
+        "Cannot rollback to storage version " +
+        prevStorage.getLayoutVersion() +
+        " using this version of the NameNode, which uses storage version " +
+        targetLayoutVersion + ". " +
+        "Please use the previous version of HDFS to perform the rollback.");
+    }
+    
+    return true;
+  }
+
+  /**
+   * Finalize the upgrade. The previous dir, if any, will be renamed and
+   * removed. After this is completed, rollback is no longer allowed.
+   * 
+   * @param sd the storage directory to finalize
+   * @throws IOException in the event of error
+   */
+  static void doFinalize(StorageDirectory sd) throws IOException {
+    File prevDir = sd.getPreviousDir();
+    if (!prevDir.exists()) { // already discarded
+      LOG.info("Directory " + prevDir + " does not exist.");
+      LOG.info("Finalize upgrade for " + sd.getRoot()+ " is not required.");
+      return;
+    }
+    LOG.info("Finalizing upgrade of storage directory " + sd.getRoot());
+    assert sd.getCurrentDir().exists() : "Current directory must exist.";
+    final File tmpDir = sd.getFinalizedTmp();
+    // rename previous to tmp and remove
+    NNStorage.rename(prevDir, tmpDir);
+    NNStorage.deleteDir(tmpDir);
+    LOG.info("Finalize upgrade for " + sd.getRoot()+ " is complete.");
+  }
+  
+  /**
+   * Perform any steps that must succeed across all storage dirs/JournalManagers
+   * involved in an upgrade before proceeding onto the actual upgrade stage. If
+   * a call to any JM's or local storage dir's doPreUpgrade method fails, then
+   * doUpgrade will not be called for any JM. The existing current dir is
+   * renamed to previous.tmp, and then a new, empty current dir is created.
+   * 
+   * @param sd the storage directory to perform the pre-upgrade procedure.
+   * @throws IOException in the event of error
+   */
+  static void doPreUpgrade(StorageDirectory sd) throws IOException {
+    LOG.info("Starting upgrade of storage directory " + sd.getRoot());
+    File curDir = sd.getCurrentDir();
+    File prevDir = sd.getPreviousDir();
+    File tmpDir = sd.getPreviousTmp();
+    assert curDir.exists() : "Current directory must exist.";
+    assert !prevDir.exists() : "previous directory must not exist.";
+    assert !tmpDir.exists() : "previous.tmp directory must not exist.";
+
+    // rename current to tmp
+    NNStorage.rename(curDir, tmpDir);
+    
+    if (!curDir.mkdir()) {
+      throw new IOException("Cannot create directory " + curDir);
+    }
+  }
+  
+  /**
+   * Perform the upgrade of the storage dir to the given storage info. The new
+   * storage info is written into the current directory, and the previous.tmp
+   * directory is renamed to previous.
+   * 
+   * @param sd the storage directory to upgrade
+   * @param storage info about the new upgraded versions.
+   * @throws IOException in the event of error
+   */
+  static void doUpgrade(StorageDirectory sd, Storage storage) throws
+      IOException {
+    LOG.info("Performing upgrade of storage directory " + sd.getRoot());
+    try {
+      // Write the version file, since saveFsImage only makes the
+      // fsimage_<txid>, and the directory is otherwise empty.
+      storage.writeProperties(sd);
+      
+      File prevDir = sd.getPreviousDir();
+      File tmpDir = sd.getPreviousTmp();
+      // rename tmp to previous
+      NNStorage.rename(tmpDir, prevDir);
+    } catch (IOException ioe) {
+      LOG.error("Unable to rename temp to previous for " + sd.getRoot(), ioe);
+      throw ioe;
+    }
+  }
+
+  /**
+   * Perform rollback of the storage dir to the previous state. The existing
+   * current dir is removed, and the previous dir is renamed to current.
+   * 
+   * @param sd the storage directory to roll back.
+   * @throws IOException in the event of error
+   */
+  static void doRollBack(StorageDirectory sd)
+      throws IOException {
+    File prevDir = sd.getPreviousDir();
+    if (!prevDir.exists())
+      return;
+
+    File tmpDir = sd.getRemovedTmp();
+    assert !tmpDir.exists() : "removed.tmp directory must not exist.";
+    // rename current to tmp
+    File curDir = sd.getCurrentDir();
+    assert curDir.exists() : "Current directory must exist.";
+    NNStorage.rename(curDir, tmpDir);
+    // rename previous to current
+    NNStorage.rename(prevDir, curDir);
+
+    // delete tmp dir
+    NNStorage.deleteDir(tmpDir);
+    LOG.info("Rollback of " + sd.getRoot() + " is complete.");
+  }
+  
+}

+ 28 - 23
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -648,7 +648,7 @@ public class NameNode implements NameNodeStatusMXBean {
     String nsId = getNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
     this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
-    state = createHAState();
+    state = createHAState(getStartupOption(conf));
     this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
     this.haContext = createHAContext();
     try {
@@ -670,8 +670,12 @@ public class NameNode implements NameNodeStatusMXBean {
     }
   }
 
-  protected HAState createHAState() {
-    return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
+  protected HAState createHAState(StartupOption startOpt) {
+    if (!haEnabled || startOpt == StartupOption.UPGRADE) {
+      return ACTIVE_STATE;
+    } else {
+      return STANDBY_STATE;
+    }
   }
 
   protected HAContext createHAContext() {
@@ -1023,26 +1027,28 @@ public class NameNode implements NameNodeStatusMXBean {
       }
     }
   }
-
-  private static boolean finalize(Configuration conf,
-                               boolean isConfirmationNeeded
-                               ) throws IOException {
+  
+  @VisibleForTesting
+  public static boolean doRollback(Configuration conf,
+      boolean isConfirmationNeeded) throws IOException {
     String nsId = DFSUtil.getNamenodeNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
     initializeGenericKeys(conf, nsId, namenodeId);
 
     FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
     System.err.print(
-        "\"finalize\" will remove the previous state of the files system.\n"
-        + "Recent upgrade will become permanent.\n"
-        + "Rollback option will not be available anymore.\n");
+        "\"rollBack\" will remove the current state of the file system,\n"
+        + "returning you to the state prior to initiating your recent.\n"
+        + "upgrade. This action is permanent and cannot be undone. If you\n"
+        + "are performing a rollback in an HA environment, you should be\n"
+        + "certain that no NameNode process is running on any host.");
     if (isConfirmationNeeded) {
-      if (!confirmPrompt("Finalize filesystem state?")) {
-        System.err.println("Finalize aborted.");
+      if (!confirmPrompt("Roll back file system state?")) {
+        System.err.println("Rollback aborted.");
         return true;
       }
     }
-    nsys.dir.fsImage.finalizeUpgrade();
+    nsys.dir.fsImage.doRollback(nsys);
     return false;
   }
 
@@ -1206,14 +1212,6 @@ public class NameNode implements NameNodeStatusMXBean {
       return null;
     }
     setStartupOption(conf, startOpt);
-    
-    if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
-        (startOpt == StartupOption.UPGRADE ||
-         startOpt == StartupOption.ROLLBACK ||
-         startOpt == StartupOption.FINALIZE)) {
-      throw new HadoopIllegalArgumentException("Invalid startup option. " +
-          "Cannot perform DFS upgrade with HA enabled.");
-    }
 
     switch (startOpt) {
       case FORMAT: {
@@ -1229,10 +1227,17 @@ public class NameNode implements NameNodeStatusMXBean {
         return null;
       }
       case FINALIZE: {
-        boolean aborted = finalize(conf, true);
-        terminate(aborted ? 1 : 0);
+        System.err.println("Use of the argument '" + StartupOption.FINALIZE +
+            "' is no longer supported. To finalize an upgrade, start the NN " +
+            " and then run `hdfs dfsadmin -finalizeUpgrade'");
+        terminate(1);
         return null; // avoid javac warning
       }
+      case ROLLBACK: {
+        boolean aborted = doRollback(conf, true);
+        terminate(aborted ? 1 : 0);
+        return null; // avoid warning
+      }
       case BOOTSTRAPSTANDBY: {
         String toolArgs[] = Arrays.copyOfRange(argv, 1, argv.length);
         int rc = BootstrapStandby.run(toolArgs, conf);

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BootstrapStandby.java

@@ -39,6 +39,7 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -192,7 +193,7 @@ public class BootstrapStandby implements Tool, Configurable {
     FSImage image = new FSImage(conf);
     try {
       image.getStorage().setStorageInfo(storage);
-      image.initEditLog();
+      image.initEditLog(StartupOption.REGULAR);
       assert image.getEditLog().isOpenForRead() :
         "Expected edit log to be open for read";
 

+ 20 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.tools;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.net.URI;
 import java.net.URL;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
@@ -46,6 +47,7 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
@@ -769,7 +771,24 @@ public class DFSAdmin extends FsShell {
    */
   public int finalizeUpgrade() throws IOException {
     DistributedFileSystem dfs = getDFS();
-    dfs.finalizeUpgrade();
+    
+    Configuration dfsConf = dfs.getConf();
+    URI dfsUri = dfs.getUri();
+    boolean isHaEnabled = HAUtil.isLogicalUri(dfsConf, dfsUri);
+    if (isHaEnabled) {
+      // In the case of HA, run finalizeUpgrade for all NNs in this nameservice
+      String nsId = dfsUri.getHost();
+      List<ClientProtocol> namenodes =
+          HAUtil.getProxiesForAllNameNodesInNameservice(dfsConf, nsId);
+      if (!HAUtil.isAtLeastOneActive(namenodes)) {
+        throw new IOException("Cannot finalize with no NameNode active");
+      }
+      for (ClientProtocol haNn : namenodes) {
+        haNn.finalizeUpgrade();
+      }
+    } else {
+      dfs.finalizeUpgrade();
+    }
     
     return 0;
   }

+ 78 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto

@@ -133,6 +133,72 @@ message IsFormattedResponseProto {
   required bool isFormatted = 1;
 }
 
+/**
+ * getJournalCTime()
+ */
+message GetJournalCTimeRequestProto {
+  required JournalIdProto jid = 1;
+}
+
+message GetJournalCTimeResponseProto {
+  required int64 resultCTime = 1;
+}
+
+/**
+ * doPreUpgrade()
+ */
+message DoPreUpgradeRequestProto {
+  required JournalIdProto jid = 1;
+}
+
+message DoPreUpgradeResponseProto {
+}
+
+/**
+ * doUpgrade()
+ */
+message DoUpgradeRequestProto {
+  required JournalIdProto jid = 1;
+  required StorageInfoProto sInfo = 2;
+}
+
+message DoUpgradeResponseProto {
+}
+
+/**
+ * doFinalize()
+ */
+message DoFinalizeRequestProto {
+  required JournalIdProto jid = 1;
+}
+
+message DoFinalizeResponseProto {
+}
+
+/**
+ * canRollBack()
+ */
+message CanRollBackRequestProto {
+  required JournalIdProto jid = 1;
+  required StorageInfoProto storage = 2;
+  required StorageInfoProto prevStorage = 3;
+  required int32 targetLayoutVersion = 4;
+}
+
+message CanRollBackResponseProto {
+  required bool canRollBack = 1;
+}
+
+/**
+ * doRollback()
+ */
+message DoRollbackRequestProto {
+  required JournalIdProto jid = 1;
+}
+
+message DoRollbackResponseProto {
+}
+
 /**
  * getJournalState()
  */
@@ -236,6 +302,18 @@ message AcceptRecoveryResponseProto {
 service QJournalProtocolService {
   rpc isFormatted(IsFormattedRequestProto) returns (IsFormattedResponseProto);
 
+  rpc getJournalCTime(GetJournalCTimeRequestProto) returns (GetJournalCTimeResponseProto);
+  
+  rpc doPreUpgrade(DoPreUpgradeRequestProto) returns (DoPreUpgradeResponseProto);
+  
+  rpc doUpgrade(DoUpgradeRequestProto) returns (DoUpgradeResponseProto);
+
+  rpc doFinalize(DoFinalizeRequestProto) returns (DoFinalizeResponseProto);
+
+  rpc canRollBack(CanRollBackRequestProto) returns (CanRollBackResponseProto);
+
+  rpc doRollback(DoRollbackRequestProto) returns (DoRollbackResponseProto);
+
   rpc getJournalState(GetJournalStateRequestProto) returns (GetJournalStateResponseProto);
 
   rpc newEpoch(NewEpochRequestProto) returns (NewEpochResponseProto);

+ 46 - 0
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm

@@ -765,3 +765,49 @@ digest:hdfs-zkfcs:vlUvLnd8MlacsE80rDuu6ONESbM=:rwcda
   Even if automatic failover is configured, you may initiate a manual failover
   using the same <<<hdfs haadmin>>> command. It will perform a coordinated
   failover.
+
+* HDFS Upgrade/Finalization/Rollback with HA Enabled
+
+  When moving between versions of HDFS, sometimes the newer software can simply
+  be installed and the cluster restarted. Sometimes, however, upgrading the
+  version of HDFS you're running may require changing on-disk data. In this case,
+  one must use the HDFS Upgrade/Finalize/Rollback facility after installing the
+  new software. This process is made more complex in an HA environment, since the
+  on-disk metadata that the NN relies upon is by definition distributed, both on
+  the two HA NNs in the pair, and on the JournalNodes in the case that QJM is
+  being used for the shared edits storage. This documentation section describes
+  the procedure to use the HDFS Upgrade/Finalize/Rollback facility in an HA setup.
+
+  <<To perform an HA upgrade>>, the operator must do the following:
+
+    [[1]] Shut down all of the NNs as normal, and install the newer software.
+
+    [[2]] Start one of the NNs with the <<<'-upgrade'>>> flag.
+  
+    [[3]] On start, this NN will not enter the standby state as usual in an HA
+    setup. Rather, this NN will immediately enter the active state, perform an
+    upgrade of its local storage dirs, and also perform an upgrade of the shared
+    edit log.
+  
+    [[4]] At this point the other NN in the HA pair will be out of sync with
+    the upgraded NN. In order to bring it back in sync and once again have a highly
+    available setup, you should re-bootstrap this NameNode by running the NN with
+    the <<<'-bootstrapStandby'>>> flag. It is an error to start this second NN with
+    the <<<'-upgrade'>>> flag.
+  
+  Note that if at any time you want to restart the NameNodes before finalizing
+  or rolling back the upgrade, you should start the NNs as normal, i.e. without
+  any special startup flag.
+
+  <<To finalize an HA upgrade>>, the operator will use the <<<`hdfsadmin
+  dfsadmin -finalizeUpgrade'>>> command while the NNs are running and one of them
+  is active. The active NN at the time this happens will perform the finalization
+  of the shared log, and the NN whose local storage directories contain the
+  previous FS state will delete its local state.
+
+  <<To perform a rollback>> of an upgrade, both NNs should first be shut down.
+  The operator should run the roll back command on the NN where they initiated
+  the upgrade procedure, which will perform the rollback on the local dirs there,
+  as well as on the shared log, either NFS or on the JNs. Afterward, this NN
+  should be started and the operator should run <<<`-bootstrapStandby'>>> on the
+  other NN to bring the two NNs in sync with this rolled-back file system state.

+ 3 - 16
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
+
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -27,7 +29,6 @@ import org.apache.hadoop.fs.*;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem.Statistics;
 import org.apache.hadoop.fs.Options.Rename;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
@@ -889,21 +890,7 @@ public class DFSTestUtil {
   
   /** Copy one file's contents into the other **/
   public static void copyFile(File src, File dest) throws IOException {
-    InputStream in = null;
-    OutputStream out = null;
-    
-    try {
-      in = new FileInputStream(src);
-      out = new FileOutputStream(dest);
-
-      byte [] b = new byte[1024];
-      while( in.read(b)  > 0 ) {
-        out.write(b);
-      }
-    } finally {
-      if(in != null) in.close();
-      if(out != null) out.close();
-    }
+    FileUtils.copyFile(src, dest);
   }
 
   public static class Builder {

+ 47 - 19
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -100,7 +100,6 @@ import org.apache.hadoop.net.StaticMapping;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.ProxyUsers;
-import org.apache.hadoop.security.ssl.SSLFactory;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.ToolRunner;
@@ -147,6 +146,7 @@ public class MiniDFSCluster {
     private boolean enableManagedDfsDirsRedundancy = true;
     private boolean manageDataDfsDirs = true;
     private StartupOption option = null;
+    private StartupOption dnOption = null;
     private String[] racks = null; 
     private String [] hosts = null;
     private long [] simulatedCapacities = null;
@@ -241,6 +241,14 @@ public class MiniDFSCluster {
       this.option = val;
       return this;
     }
+    
+    /**
+     * Default: null
+     */
+    public Builder dnStartupOption(StartupOption val) {
+      this.dnOption = val;
+      return this;
+    }
 
     /**
      * Default: null
@@ -357,6 +365,7 @@ public class MiniDFSCluster {
                        builder.enableManagedDfsDirsRedundancy,
                        builder.manageDataDfsDirs,
                        builder.option,
+                       builder.dnOption,
                        builder.racks,
                        builder.hosts,
                        builder.simulatedCapacities,
@@ -406,18 +415,24 @@ public class MiniDFSCluster {
   /**
    * Stores the information related to a namenode in the cluster
    */
-  static class NameNodeInfo {
+  public static class NameNodeInfo {
     final NameNode nameNode;
     final Configuration conf;
     final String nameserviceId;
     final String nnId;
+    StartupOption startOpt;
     NameNodeInfo(NameNode nn, String nameserviceId, String nnId,
-        Configuration conf) {
+        StartupOption startOpt, Configuration conf) {
       this.nameNode = nn;
       this.nameserviceId = nameserviceId;
       this.nnId = nnId;
+      this.startOpt = startOpt;
       this.conf = conf;
     }
+    
+    public void setStartOpt(StartupOption startOpt) {
+      this.startOpt = startOpt;
+    }
   }
   
   /**
@@ -603,8 +618,8 @@ public class MiniDFSCluster {
                         long[] simulatedCapacities) throws IOException {
     this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
     initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
-        manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
-        operation, racks, hosts,
+        manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs, 
+        operation, null, racks, hosts,
         simulatedCapacities, null, true, false,
         MiniDFSNNTopology.simpleSingleNN(nameNodePort, 0), true, false, false);
   }
@@ -613,7 +628,8 @@ public class MiniDFSCluster {
       Configuration conf,
       int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
       boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
-      boolean manageDataDfsDirs, StartupOption operation, String[] racks,
+      boolean manageDataDfsDirs, StartupOption startOpt,
+      StartupOption dnStartOpt, String[] racks,
       String[] hosts, long[] simulatedCapacities, String clusterId,
       boolean waitSafeMode, boolean setupHostsFile,
       MiniDFSNNTopology nnTopology, boolean checkExitOnShutdown,
@@ -662,7 +678,7 @@ public class MiniDFSCluster {
       createNameNodesAndSetConf(
           nnTopology, manageNameDfsDirs, manageNameDfsSharedDirs,
           enableManagedDfsDirsRedundancy,
-          format, operation, clusterId, conf);
+          format, startOpt, clusterId, conf);
     } catch (IOException ioe) {
       LOG.error("IOE creating namenodes. Permissions dump:\n" +
           createPermissionsDiagnosisString(data_dir));
@@ -675,13 +691,15 @@ public class MiniDFSCluster {
       }
     }
     
-    if (operation == StartupOption.RECOVER) {
+    if (startOpt == StartupOption.RECOVER) {
       return;
     }
 
     // Start the DataNodes
-    startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, operation, racks,
-        hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig);
+    startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs,
+        dnStartOpt != null ? dnStartOpt : startOpt,
+        racks, hosts, simulatedCapacities, setupHostsFile,
+        checkDataNodeAddrConfig, checkDataNodeHostConfig);
     waitClusterUp();
     //make sure ProxyUsers uses the latest conf
     ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
@@ -759,6 +777,8 @@ public class MiniDFSCluster {
         if (manageNameDfsSharedDirs) {
           URI sharedEditsUri = getSharedEditsDir(nnCounter, nnCounter+nnIds.size()-1); 
           conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
+          // Clean out the shared edits dir completely, including all subdirectories.
+          FileUtil.fullyDelete(new File(sharedEditsUri));
         }
       }
 
@@ -858,7 +878,8 @@ public class MiniDFSCluster {
     URI srcDir = Lists.newArrayList(srcDirs).get(0);
     FileSystem dstFS = FileSystem.getLocal(dstConf).getRaw();
     for (URI dstDir : dstDirs) {
-      Preconditions.checkArgument(!dstDir.equals(srcDir));
+      Preconditions.checkArgument(!dstDir.equals(srcDir),
+          "src and dst are the same: " + dstDir);
       File dstDirF = new File(dstDir);
       if (dstDirF.exists()) {
         if (!FileUtil.fullyDelete(dstDirF)) {
@@ -892,6 +913,14 @@ public class MiniDFSCluster {
     conf.set(key, "127.0.0.1:" + nnConf.getIpcPort());
   }
   
+  private static String[] createArgs(StartupOption operation) {
+    String[] args = (operation == null ||
+        operation == StartupOption.FORMAT ||
+        operation == StartupOption.REGULAR) ?
+            new String[] {} : new String[] {operation.getName()};
+    return args;
+  }
+  
   private void createNameNode(int nnIndex, Configuration conf,
       int numDataNodes, boolean format, StartupOption operation,
       String clusterId, String nameserviceId,
@@ -906,10 +935,7 @@ public class MiniDFSCluster {
     }
     
     // Start the NameNode
-    String[] args = (operation == null ||
-                     operation == StartupOption.FORMAT ||
-                     operation == StartupOption.REGULAR) ?
-      new String[] {} : new String[] {operation.getName()};
+    String[] args = createArgs(operation);
     NameNode nn =  NameNode.createNameNode(args, conf);
     if (operation == StartupOption.RECOVER) {
       return;
@@ -931,7 +957,7 @@ public class MiniDFSCluster {
     DFSUtil.setGenericConf(conf, nameserviceId, nnId,
         DFS_NAMENODE_HTTP_ADDRESS_KEY);
     nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId,
-        new Configuration(conf));
+        operation, new Configuration(conf));
   }
 
   /**
@@ -1499,7 +1525,7 @@ public class MiniDFSCluster {
       nn.stop();
       nn.join();
       Configuration conf = nameNodes[nnIndex].conf;
-      nameNodes[nnIndex] = new NameNodeInfo(null, null, null, conf);
+      nameNodes[nnIndex] = new NameNodeInfo(null, null, null, null, conf);
     }
   }
   
@@ -1545,10 +1571,12 @@ public class MiniDFSCluster {
       throws IOException {
     String nameserviceId = nameNodes[nnIndex].nameserviceId;
     String nnId = nameNodes[nnIndex].nnId;
+    StartupOption startOpt = nameNodes[nnIndex].startOpt;
     Configuration conf = nameNodes[nnIndex].conf;
     shutdownNameNode(nnIndex);
-    NameNode nn = NameNode.createNameNode(new String[] {}, conf);
-    nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId, conf);
+    NameNode nn = NameNode.createNameNode(createArgs(startOpt), conf);
+    nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId, startOpt,
+        conf);
     if (waitActive) {
       waitClusterUp();
       LOG.info("Restarted the namenode");

+ 20 - 25
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.util.StringUtils;
 import org.junit.After;
 import org.junit.Test;
@@ -97,10 +98,10 @@ public class TestDFSRollback {
    * Attempts to start a NameNode with the given operation.  Starting
    * the NameNode should throw an exception.
    */
-  void startNameNodeShouldFail(StartupOption operation, String searchString) {
+  void startNameNodeShouldFail(String searchString) {
     try {
+      NameNode.doRollback(conf, false);
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
-                                                .startupOption(operation)
                                                 .format(false)
                                                 .manageDataDfsDirs(false)
                                                 .manageNameDfsDirs(false)
@@ -149,24 +150,19 @@ public class TestDFSRollback {
       log("Normal NameNode rollback", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
-      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
-                                                .format(false)
-                                                .manageDataDfsDirs(false)
-                                                .manageNameDfsDirs(false)
-                                                .startupOption(StartupOption.ROLLBACK)
-                                                .build();
+      NameNode.doRollback(conf, false);
       checkResult(NAME_NODE, nameNodeDirs);
-      cluster.shutdown();
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
       log("Normal DataNode rollback", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+      NameNode.doRollback(conf, false);
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
                                                 .format(false)
                                                 .manageDataDfsDirs(false)
                                                 .manageNameDfsDirs(false)
-                                                .startupOption(StartupOption.ROLLBACK)
+                                                .dnStartupOption(StartupOption.ROLLBACK)
                                                 .build();
       UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
       UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "previous");
@@ -179,11 +175,12 @@ public class TestDFSRollback {
       log("Normal BlockPool rollback", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+      NameNode.doRollback(conf, false);
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
                                                 .format(false)
                                                 .manageDataDfsDirs(false)
                                                 .manageNameDfsDirs(false)
-                                                .startupOption(StartupOption.ROLLBACK)
+                                                .dnStartupOption(StartupOption.ROLLBACK)
                                                 .build();
       UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
       UpgradeUtilities.createBlockPoolStorageDirs(dataNodeDirs, "current",
@@ -217,10 +214,10 @@ public class TestDFSRollback {
       cluster.shutdown();
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       UpgradeUtilities.createEmptyDirs(dataNodeDirs);
-
+      
       log("NameNode rollback without existing previous dir", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
-      startNameNodeShouldFail(StartupOption.ROLLBACK,
+      startNameNodeShouldFail(
           "None of the storage directories contain previous fs state");
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
@@ -237,15 +234,16 @@ public class TestDFSRollback {
       cluster.shutdown();
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       UpgradeUtilities.createEmptyDirs(dataNodeDirs);
-
+      
       log("DataNode rollback with future stored layout version in previous", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+      NameNode.doRollback(conf, false);
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
                                                 .format(false)
                                                 .manageDataDfsDirs(false)
                                                 .manageNameDfsDirs(false)
-                                                .startupOption(StartupOption.ROLLBACK)
+                                                .dnStartupOption(StartupOption.ROLLBACK)
                                                 .build();
       UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
       baseDirs = UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "previous");
@@ -266,11 +264,12 @@ public class TestDFSRollback {
       log("DataNode rollback with newer fsscTime in previous", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
+      NameNode.doRollback(conf, false);
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
                                                 .format(false)
                                                 .manageDataDfsDirs(false)
                                                 .manageNameDfsDirs(false)
-                                                .startupOption(StartupOption.ROLLBACK)
+                                                .dnStartupOption(StartupOption.ROLLBACK)
                                                 .build();
       
       UpgradeUtilities.createDataNodeStorageDirs(dataNodeDirs, "current");
@@ -287,21 +286,19 @@ public class TestDFSRollback {
       cluster.shutdown();
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       UpgradeUtilities.createEmptyDirs(dataNodeDirs);
-
+      
       log("NameNode rollback with no edits file", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
       deleteMatchingFiles(baseDirs, "edits.*");
-      startNameNodeShouldFail(StartupOption.ROLLBACK,
-          "Gap in transactions");
+      startNameNodeShouldFail("Gap in transactions");
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
       log("NameNode rollback with no image file", numDirs);
       UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "current");
       baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
       deleteMatchingFiles(baseDirs, "fsimage_.*");
-      startNameNodeShouldFail(StartupOption.ROLLBACK,
-          "No valid image files found");
+      startNameNodeShouldFail("No valid image files found");
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
       log("NameNode rollback with corrupt version file", numDirs);
@@ -313,8 +310,7 @@ public class TestDFSRollback {
             "layoutVersion".getBytes(Charsets.UTF_8),
             "xxxxxxxxxxxxx".getBytes(Charsets.UTF_8));
       }
-      startNameNodeShouldFail(StartupOption.ROLLBACK,
-          "file VERSION has layoutVersion missing");
+      startNameNodeShouldFail("file VERSION has layoutVersion missing");
 
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
@@ -328,8 +324,7 @@ public class TestDFSRollback {
       
       UpgradeUtilities.createNameNodeVersionFile(conf, baseDirs,
           storageInfo, UpgradeUtilities.getCurrentBlockPoolID(cluster));
-      startNameNodeShouldFail(StartupOption.ROLLBACK,
-          "Cannot rollback to storage version 1 using this version");
+      startNameNodeShouldFail("Cannot rollback to storage version 1 using this version");
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
     } // end numDir loop
   }

+ 34 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java

@@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.Shell;
 import org.junit.Assume;
 import org.junit.Before;
@@ -764,4 +765,37 @@ public class TestDFSUtil {
     assertEquals(4*24*60*60*1000l, DFSUtil.parseRelativeTime("4d"));
     assertEquals(999*24*60*60*1000l, DFSUtil.parseRelativeTime("999d"));
   }
+  
+  @Test
+  public void testAssertAllResultsEqual() {
+    checkAllResults(new Long[]{}, true);
+    checkAllResults(new Long[]{1l}, true);
+    checkAllResults(new Long[]{1l, 1l}, true);
+    checkAllResults(new Long[]{1l, 1l, 1l}, true);
+    checkAllResults(new Long[]{new Long(1), new Long(1)}, true);
+    checkAllResults(new Long[]{null, null, null}, true);
+    
+    checkAllResults(new Long[]{1l, 2l}, false);
+    checkAllResults(new Long[]{2l, 1l}, false);
+    checkAllResults(new Long[]{1l, 2l, 1l}, false);
+    checkAllResults(new Long[]{2l, 1l, 1l}, false);
+    checkAllResults(new Long[]{1l, 1l, 2l}, false);
+    checkAllResults(new Long[]{1l, null}, false);
+    checkAllResults(new Long[]{null, 1l}, false);
+    checkAllResults(new Long[]{1l, null, 1l}, false);
+  }
+  
+  private static void checkAllResults(Long[] toCheck, boolean shouldSucceed) {
+    if (shouldSucceed) {
+      DFSUtil.assertAllResultsEqual(Arrays.asList(toCheck));
+    } else {
+      try {
+        DFSUtil.assertAllResultsEqual(Arrays.asList(toCheck));
+        fail("Should not have succeeded with input: " +
+            Arrays.toString(toCheck));
+      } catch (AssertionError ae) {
+        GenericTestUtils.assertExceptionContains("Not all elements match", ae);
+      }
+    }
+  }
 }

+ 59 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java

@@ -19,20 +19,28 @@ package org.apache.hadoop.hdfs;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.io.File;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.TestInterDatanodeProtocol;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.Test;
 
 public class TestLeaseRecovery {
@@ -148,4 +156,55 @@ public class TestLeaseRecovery {
       if (cluster != null) {cluster.shutdown();}
     }
   }
+
+  /**
+   * Block Recovery when the meta file not having crcs for all chunks in block
+   * file
+   */
+  @Test
+  public void testBlockRecoveryWithLessMetafile() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+        UserGroupInformation.getCurrentUser().getShortUserName());
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
+        .build();
+    Path file = new Path("/testRecoveryFile");
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    FSDataOutputStream out = dfs.create(file);
+    int count = 0;
+    while (count < 2 * 1024 * 1024) {
+      out.writeBytes("Data");
+      count += 4;
+    }
+    out.hsync();
+    // abort the original stream
+    ((DFSOutputStream) out.getWrappedStream()).abort();
+
+    LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
+        file.toString(), 0, count);
+    ExtendedBlock block = locations.get(0).getBlock();
+    DataNode dn = cluster.getDataNodes().get(0);
+    BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null);
+    File metafile = new File(localPathInfo.getMetaPath());
+    assertTrue(metafile.exists());
+
+    // reduce the block meta file size
+    RandomAccessFile raf = new RandomAccessFile(metafile, "rw");
+    raf.setLength(metafile.length() - 20);
+    raf.close();
+
+    // restart DN to make replica to RWR
+    DataNodeProperties dnProp = cluster.stopDataNode(0);
+    cluster.restartDataNode(dnProp, true);
+
+    // try to recover the lease
+    DistributedFileSystem newdfs = (DistributedFileSystem) FileSystem
+        .newInstance(cluster.getConfiguration(0));
+    count = 0;
+    while (++count < 10 && !newdfs.recoverLease(file)) {
+      Thread.sleep(1000);
+    }
+    assertTrue("File should be closed", newdfs.recoverLease(file));
+
+  }
 }

+ 9 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniJournalCluster.java

@@ -167,8 +167,16 @@ public class MiniJournalCluster {
     return new File(baseDir, "journalnode-" + idx).getAbsoluteFile();
   }
   
+  public File getJournalDir(int idx, String jid) {
+    return new File(getStorageDir(idx), jid);
+  }
+  
   public File getCurrentDir(int idx, String jid) {
-    return new File(new File(getStorageDir(idx), jid), "current");
+    return new File(getJournalDir(idx, jid), "current");
+  }
+  
+  public File getPreviousDir(int idx, String jid) {
+    return new File(getJournalDir(idx, jid), "previous");
   }
 
   public JournalNode getJournalNode(int i) {

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/MiniQJMHACluster.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
 
@@ -47,6 +48,7 @@ public class MiniQJMHACluster {
 
   public static class Builder {
     private final Configuration conf;
+    private StartupOption startOpt = null;
     private final MiniDFSCluster.Builder dfsBuilder;
     
     public Builder(Configuration conf) {
@@ -61,6 +63,10 @@ public class MiniQJMHACluster {
     public MiniQJMHACluster build() throws IOException {
       return new MiniQJMHACluster(this);
     }
+
+    public void startupOption(StartupOption startOpt) {
+      this.startOpt = startOpt;
+    }
   }
   
   public static MiniDFSNNTopology createDefaultTopology() {
@@ -95,6 +101,9 @@ public class MiniQJMHACluster {
     Configuration confNN0 = cluster.getConfiguration(0);
     NameNode.initializeSharedEdits(confNN0, true);
     
+    cluster.getNameNodeInfos()[0].setStartOpt(builder.startOpt);
+    cluster.getNameNodeInfos()[1].setStartOpt(builder.startOpt);
+    
     // restart the cluster
     cluster.restartNameNodes();
   }

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java

@@ -27,6 +27,8 @@ import java.util.Collection;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.junit.Test;
 
@@ -191,6 +193,29 @@ public class TestGenericJournalConf {
       shouldPromptCalled = true;
       return false;
     }
+
+    @Override
+    public void doPreUpgrade() throws IOException {}
+
+    @Override
+    public void doUpgrade(Storage storage) throws IOException {}
+
+    @Override
+    public void doFinalize() throws IOException {}
+
+    @Override
+    public boolean canRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion)
+        throws IOException {
+      return false;
+    }
+
+    @Override
+    public void doRollback() throws IOException {}
+
+    @Override
+    public long getJournalCTime() throws IOException {
+      return -1;
+    }
   }
 
   public static class BadConstructorJournalManager extends DummyJournalManager {

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestBootstrapStandby.java

@@ -91,7 +91,7 @@ public class TestBootstrapStandby {
       fail("Did not throw");
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
-          "Cannot start an HA namenode with name dirs that need recovery",
+          "storage directory does not exist or is not accessible",
           ioe);
     }
     

+ 674 - 49
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java

@@ -1,89 +1,506 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
+import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster.Builder;
+import org.apache.hadoop.hdfs.qjournal.server.Journal;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.tools.DFSAdmin;
+import org.apache.hadoop.hdfs.util.PersistentLongFile;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Before;
 import org.junit.Test;
 
-import com.google.common.collect.Lists;
+import com.google.common.base.Joiner;
 
 /**
  * Tests for upgrading with HA enabled.
  */
 public class TestDFSUpgradeWithHA {
-  
+
   private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class);
+  
+  private Configuration conf;
+  
+  @Before
+  public void createConfiguration() {
+    conf = new HdfsConfiguration();
+    // Turn off persistent IPC, so that the DFSClient can survive NN restart
+    conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+        0);
+  }
+
+  private static void assertCTimesEqual(MiniDFSCluster cluster) {
+    long nn1CTime = cluster.getNamesystem(0).getFSImage().getStorage().getCTime();
+    long nn2CTime = cluster.getNamesystem(1).getFSImage().getStorage().getCTime();
+    assertEquals(nn1CTime, nn2CTime);
+  }
+
+  private static void checkClusterPreviousDirExistence(MiniDFSCluster cluster,
+      boolean shouldExist) {
+    for (int i = 0; i < 2; i++) {
+      checkNnPreviousDirExistence(cluster, i, shouldExist);
+    }
+  }
+
+  private static void checkNnPreviousDirExistence(MiniDFSCluster cluster,
+      int index, boolean shouldExist) {
+    Collection<URI> nameDirs = cluster.getNameDirs(index);
+    for (URI nnDir : nameDirs) {
+      checkPreviousDirExistence(new File(nnDir), shouldExist);
+    }
+  }
 
+  private static void checkJnPreviousDirExistence(MiniQJMHACluster jnCluster,
+      boolean shouldExist) throws IOException {
+    for (int i = 0; i < 3; i++) {
+      checkPreviousDirExistence(
+          jnCluster.getJournalCluster().getJournalDir(i, "ns1"), shouldExist);
+    }
+    if (shouldExist) {
+      assertEpochFilesCopied(jnCluster);
+    }
+  }
+
+  private static void assertEpochFilesCopied(MiniQJMHACluster jnCluster)
+      throws IOException {
+    for (int i = 0; i < 3; i++) {
+      File journalDir = jnCluster.getJournalCluster().getJournalDir(i, "ns1");
+      File currDir = new File(journalDir, "current");
+      File prevDir = new File(journalDir, "previous");
+      for (String fileName : new String[]{ Journal.LAST_PROMISED_FILENAME,
+          Journal.LAST_WRITER_EPOCH }) {
+        File prevFile = new File(prevDir, fileName);
+        // Possible the prev file doesn't exist, e.g. if there has never been a
+        // writer before the upgrade.
+        if (prevFile.exists()) {
+          PersistentLongFile prevLongFile = new PersistentLongFile(prevFile, -10);
+          PersistentLongFile currLongFile = new PersistentLongFile(new File(currDir,
+              fileName), -11);
+          assertTrue("Value in " + fileName + " has decreased on upgrade in "
+              + journalDir, prevLongFile.get() <= currLongFile.get());
+        }
+      }
+    }
+  }
+
+  private static void checkPreviousDirExistence(File rootDir,
+      boolean shouldExist) {
+    File previousDir = new File(rootDir, "previous");
+    if (shouldExist) {
+      assertTrue(previousDir + " does not exist", previousDir.exists());
+    } else {
+      assertFalse(previousDir + " does exist", previousDir.exists());
+    }
+  }
+  
+  private void runFinalizeCommand(MiniDFSCluster cluster)
+      throws IOException {
+    HATestUtil.setFailoverConfigurations(cluster, conf);
+    new DFSAdmin(conf).finalizeUpgrade();
+  }
+  
   /**
-   * Make sure that an HA NN refuses to start if given an upgrade-related
-   * startup option.
+   * Ensure that an admin cannot finalize an HA upgrade without at least one NN
+   * being active.
    */
   @Test
-  public void testStartingWithUpgradeOptionsFails() throws IOException {
-    for (StartupOption startOpt : Lists.newArrayList(new StartupOption[] {
-        StartupOption.UPGRADE, StartupOption.FINALIZE,
-        StartupOption.ROLLBACK })) {
-      MiniDFSCluster cluster = null;
+  public void testCannotFinalizeIfNoActive() throws IOException,
+      URISyntaxException {
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf)
+          .nnTopology(MiniDFSNNTopology.simpleHATopology())
+          .numDataNodes(0)
+          .build();
+
+      File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+      
+      // No upgrade is in progress at the moment.
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      checkPreviousDirExistence(sharedDir, false);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkPreviousDirExistence(sharedDir, true);
+      
+      // NN0 should come up in the active state when given the -upgrade option,
+      // so no need to transition it to active.
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      // Restart NN0 without the -upgrade flag, to make sure that works.
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+      cluster.restartNameNode(0, false);
+      
+      // Make sure we can still do FS ops after upgrading.
+      cluster.transitionToActive(0);
+      assertTrue(fs.mkdirs(new Path("/foo3")));
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      // Now restart NN1 and make sure that we can do ops against that as well.
+      cluster.restartNameNode(1);
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      assertTrue(fs.mkdirs(new Path("/foo4")));
+      
+      assertCTimesEqual(cluster);
+      
+      // Now there's no active NN.
+      cluster.transitionToStandby(1);
+
       try {
-        cluster = new MiniDFSCluster.Builder(new Configuration())
-            .nnTopology(MiniDFSNNTopology.simpleHATopology())
-            .startupOption(startOpt)
-            .numDataNodes(0)
-            .build();
-        fail("Should not have been able to start an HA NN in upgrade mode");
-      } catch (IllegalArgumentException iae) {
+        runFinalizeCommand(cluster);
+        fail("Should not have been able to finalize upgrade with no NN active");
+      } catch (IOException ioe) {
         GenericTestUtils.assertExceptionContains(
-            "Cannot perform DFS upgrade with HA enabled.", iae);
-        LOG.info("Got expected exception", iae);
-      } finally {
-        if (cluster != null) {
-          cluster.shutdown();
-        }
+            "Cannot finalize with no NameNode active", ioe);
+      }
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
       }
     }
   }
-  
+
   /**
-   * Make sure that an HA NN won't start if a previous upgrade was in progress.
+   * Make sure that an HA NN with NFS-based HA can successfully start and
+   * upgrade.
    */
   @Test
-  public void testStartingWithUpgradeInProgressFails() throws Exception {
+  public void testNfsUpgrade() throws IOException, URISyntaxException {
     MiniDFSCluster cluster = null;
+    FileSystem fs = null;
     try {
-      cluster = new MiniDFSCluster.Builder(new Configuration())
+      cluster = new MiniDFSCluster.Builder(conf)
           .nnTopology(MiniDFSNNTopology.simpleHATopology())
           .numDataNodes(0)
           .build();
       
+      File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+      
+      // No upgrade is in progress at the moment.
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      checkPreviousDirExistence(sharedDir, false);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkPreviousDirExistence(sharedDir, true);
+      
+      // NN0 should come up in the active state when given the -upgrade option,
+      // so no need to transition it to active.
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      // Restart NN0 without the -upgrade flag, to make sure that works.
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+      cluster.restartNameNode(0, false);
+      
+      // Make sure we can still do FS ops after upgrading.
+      cluster.transitionToActive(0);
+      assertTrue(fs.mkdirs(new Path("/foo3")));
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      // Now restart NN1 and make sure that we can do ops against that as well.
+      cluster.restartNameNode(1);
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      assertTrue(fs.mkdirs(new Path("/foo4")));
+      
+      assertCTimesEqual(cluster);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
+  /**
+   * Make sure that an HA NN can successfully upgrade when configured using
+   * JournalNodes.
+   */
+  @Test
+  public void testUpgradeWithJournalNodes() throws IOException,
+      URISyntaxException {
+    MiniQJMHACluster qjCluster = null;
+    FileSystem fs = null;
+    try {
+      Builder builder = new MiniQJMHACluster.Builder(conf);
+      builder.getDfsBuilder()
+          .numDataNodes(0);
+      qjCluster = builder.build();
+
+      MiniDFSCluster cluster = qjCluster.getDfsCluster();
+      
+      // No upgrade is in progress at the moment.
+      checkJnPreviousDirExistence(qjCluster, false);
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkJnPreviousDirExistence(qjCluster, true);
+      
+      // NN0 should come up in the active state when given the -upgrade option,
+      // so no need to transition it to active.
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      // Restart NN0 without the -upgrade flag, to make sure that works.
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+      cluster.restartNameNode(0, false);
+      
+      // Make sure we can still do FS ops after upgrading.
+      cluster.transitionToActive(0);
+      assertTrue(fs.mkdirs(new Path("/foo3")));
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      // Now restart NN1 and make sure that we can do ops against that as well.
+      cluster.restartNameNode(1);
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      assertTrue(fs.mkdirs(new Path("/foo4")));
+      
+      assertCTimesEqual(cluster);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (qjCluster != null) {
+        qjCluster.shutdown();
+      }
+    }
+  }
+
+  @Test
+  public void testFinalizeWithJournalNodes() throws IOException,
+      URISyntaxException {
+    MiniQJMHACluster qjCluster = null;
+    FileSystem fs = null;
+    try {
+      Builder builder = new MiniQJMHACluster.Builder(conf);
+      builder.getDfsBuilder()
+          .numDataNodes(0);
+      qjCluster = builder.build();
+
+      MiniDFSCluster cluster = qjCluster.getDfsCluster();
+      
+      // No upgrade is in progress at the moment.
+      checkJnPreviousDirExistence(qjCluster, false);
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkJnPreviousDirExistence(qjCluster, true);
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      cluster.restartNameNode(1);
+      
+      runFinalizeCommand(cluster);
+      
+      checkClusterPreviousDirExistence(cluster, false);
+      checkJnPreviousDirExistence(qjCluster, false);
+      assertCTimesEqual(cluster);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (qjCluster != null) {
+        qjCluster.shutdown();
+      }
+    }
+  }
+  
+  /**
+   * Make sure that even if the NN which initiated the upgrade is in the standby
+   * state that we're allowed to finalize.
+   */
+  @Test
+  public void testFinalizeFromSecondNameNodeWithJournalNodes()
+      throws IOException, URISyntaxException {
+    MiniQJMHACluster qjCluster = null;
+    FileSystem fs = null;
+    try {
+      Builder builder = new MiniQJMHACluster.Builder(conf);
+      builder.getDfsBuilder()
+          .numDataNodes(0);
+      qjCluster = builder.build();
+
+      MiniDFSCluster cluster = qjCluster.getDfsCluster();
+      
+      // No upgrade is in progress at the moment.
+      checkJnPreviousDirExistence(qjCluster, false);
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkJnPreviousDirExistence(qjCluster, true);
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      cluster.restartNameNode(1);
+      
+      // Make the second NN (not the one that initiated the upgrade) active when
+      // the finalize command is run.
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      runFinalizeCommand(cluster);
+      
+      checkClusterPreviousDirExistence(cluster, false);
+      checkJnPreviousDirExistence(qjCluster, false);
+      assertCTimesEqual(cluster);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (qjCluster != null) {
+        qjCluster.shutdown();
+      }
+    }
+  }
+
+  /**
+   * Make sure that an HA NN will start if a previous upgrade was in progress.
+   */
+  @Test
+  public void testStartingWithUpgradeInProgressSucceeds() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf)
+          .nnTopology(MiniDFSNNTopology.simpleHATopology())
+          .numDataNodes(0)
+          .build();
+
       // Simulate an upgrade having started.
       for (int i = 0; i < 2; i++) {
         for (URI uri : cluster.getNameDirs(i)) {
@@ -92,18 +509,226 @@ public class TestDFSUpgradeWithHA {
           assertTrue(prevTmp.mkdirs());
         }
       }
-      
+
       cluster.restartNameNodes();
-      fail("Should not have been able to start an HA NN with an in-progress upgrade");
-    } catch (IOException ioe) {
-      GenericTestUtils.assertExceptionContains(
-          "Cannot start an HA namenode with name dirs that need recovery.",
-          ioe);
-      LOG.info("Got expected exception", ioe);
     } finally {
       if (cluster != null) {
         cluster.shutdown();
       }
     }
   }
+
+  /**
+   * Test rollback with NFS shared dir.
+   */
+  @Test
+  public void testRollbackWithNfs() throws Exception {
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf)
+          .nnTopology(MiniDFSNNTopology.simpleHATopology())
+          .numDataNodes(0)
+          .build();
+
+      File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+      
+      // No upgrade is in progress at the moment.
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      checkPreviousDirExistence(sharedDir, false);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkPreviousDirExistence(sharedDir, true);
+      
+      // NN0 should come up in the active state when given the -upgrade option,
+      // so no need to transition it to active.
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      cluster.restartNameNode(1);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkPreviousDirExistence(sharedDir, true);
+      assertCTimesEqual(cluster);
+      
+      // Now shut down the cluster and do the rollback.
+      Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
+      cluster.shutdown();
+
+      conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
+      NameNode.doRollback(conf, false);
+
+      // The rollback operation should have rolled back the first NN's local
+      // dirs, and the shared dir, but not the other NN's dirs. Those have to be
+      // done by bootstrapping the standby.
+      checkNnPreviousDirExistence(cluster, 0, false);
+      checkPreviousDirExistence(sharedDir, false);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+  
+  @Test
+  public void testRollbackWithJournalNodes() throws IOException,
+      URISyntaxException {
+    MiniQJMHACluster qjCluster = null;
+    FileSystem fs = null;
+    try {
+      Builder builder = new MiniQJMHACluster.Builder(conf);
+      builder.getDfsBuilder()
+          .numDataNodes(0);
+      qjCluster = builder.build();
+
+      MiniDFSCluster cluster = qjCluster.getDfsCluster();
+      
+      // No upgrade is in progress at the moment.
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      checkJnPreviousDirExistence(qjCluster, false);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkJnPreviousDirExistence(qjCluster, true);
+      
+      // NN0 should come up in the active state when given the -upgrade option,
+      // so no need to transition it to active.
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      // Now bootstrap the standby with the upgraded info.
+      int rc = BootstrapStandby.run(
+          new String[]{"-force"},
+          cluster.getConfiguration(1));
+      assertEquals(0, rc);
+      
+      cluster.restartNameNode(1);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkJnPreviousDirExistence(qjCluster, true);
+      assertCTimesEqual(cluster);
+      
+      // Shut down the NNs, but deliberately leave the JNs up and running.
+      Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
+      cluster.shutdown();
+
+      conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
+      NameNode.doRollback(conf, false);
+
+      // The rollback operation should have rolled back the first NN's local
+      // dirs, and the shared dir, but not the other NN's dirs. Those have to be
+      // done by bootstrapping the standby.
+      checkNnPreviousDirExistence(cluster, 0, false);
+      checkJnPreviousDirExistence(qjCluster, false);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (qjCluster != null) {
+        qjCluster.shutdown();
+      }
+    }
+  }
+  
+  /**
+   * Make sure that starting a second NN with the -upgrade flag fails if the
+   * other NN has already done that.
+   */
+  @Test
+  public void testCannotUpgradeSecondNameNode() throws IOException,
+      URISyntaxException {
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+  
+      File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
+      
+      // No upgrade is in progress at the moment.
+      checkClusterPreviousDirExistence(cluster, false);
+      assertCTimesEqual(cluster);
+      checkPreviousDirExistence(sharedDir, false);
+      
+      // Transition NN0 to active and do some FS ops.
+      cluster.transitionToActive(0);
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      assertTrue(fs.mkdirs(new Path("/foo1")));
+      
+      // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
+      // flag.
+      cluster.shutdownNameNode(1);
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
+      cluster.restartNameNode(0, false);
+      
+      checkNnPreviousDirExistence(cluster, 0, true);
+      checkNnPreviousDirExistence(cluster, 1, false);
+      checkPreviousDirExistence(sharedDir, true);
+      
+      // NN0 should come up in the active state when given the -upgrade option,
+      // so no need to transition it to active.
+      assertTrue(fs.mkdirs(new Path("/foo2")));
+      
+      // Restart NN0 without the -upgrade flag, to make sure that works.
+      cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
+      cluster.restartNameNode(0, false);
+      
+      // Make sure we can still do FS ops after upgrading.
+      cluster.transitionToActive(0);
+      assertTrue(fs.mkdirs(new Path("/foo3")));
+      
+      // Make sure that starting the second NN with the -upgrade flag fails.
+      cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE);
+      try {
+        cluster.restartNameNode(1, false);
+        fail("Should not have been able to start second NN with -upgrade");
+      } catch (IOException ioe) {
+        GenericTestUtils.assertExceptionContains(
+            "It looks like the shared log is already being upgraded", ioe);
+      }
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
 }

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestInitializeSharedEdits.java

@@ -96,7 +96,7 @@ public class TestInitializeSharedEdits {
     } catch (IOException ioe) {
       LOG.info("Got expected exception", ioe);
       GenericTestUtils.assertExceptionContains(
-          "Cannot start an HA namenode with name dirs that need recovery", ioe);
+          "storage directory does not exist or is not accessible", ioe);
     }
     try {
       cluster.restartNameNode(1, false);
@@ -104,7 +104,7 @@ public class TestInitializeSharedEdits {
     } catch (IOException ioe) {
       LOG.info("Got expected exception", ioe);
       GenericTestUtils.assertExceptionContains(
-          "Cannot start an HA namenode with name dirs that need recovery", ioe);
+          "storage directory does not exist or is not accessible", ioe);
     }
   }
   

+ 64 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotFileLength.java

@@ -17,22 +17,26 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
-import java.util.Random;
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
 
 import org.apache.hadoop.fs.FileStatus;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import static org.hamcrest.CoreMatchers.is;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
+
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.util.ToolRunner;
 
 public class TestSnapshotFileLength {
 
@@ -112,4 +116,61 @@ public class TestSnapshotFileLength {
     assertThat(bytesRead, is(BLOCKSIZE));
     fis.close();
   }
+
+  /**
+   * Adding as part of jira HDFS-5343
+   * Test for checking the cat command on snapshot path it
+   *  cannot read a file beyond snapshot file length
+   * @throws Exception
+   */
+  @Test (timeout = 600000)
+  public void testSnapshotFileLengthWithCatCommand() throws Exception {
+
+    FSDataInputStream fis = null;
+    FileStatus fileStatus = null;
+
+    int bytesRead;
+    byte[] buffer = new byte[BLOCKSIZE * 8];
+
+    hdfs.mkdirs(sub);
+    Path file1 = new Path(sub, file1Name);
+    DFSTestUtil.createFile(hdfs, file1, BLOCKSIZE, REPLICATION, SEED);
+
+    hdfs.allowSnapshot(sub);
+    hdfs.createSnapshot(sub, snapshot1);
+
+    DFSTestUtil.appendFile(hdfs, file1, BLOCKSIZE);
+
+    // Make sure we can read the entire file via its non-snapshot path.
+    fileStatus = hdfs.getFileStatus(file1);
+    assertEquals(fileStatus.getLen(), BLOCKSIZE * 2);
+    fis = hdfs.open(file1);
+    bytesRead = fis.read(buffer, 0, buffer.length);
+    assertEquals(bytesRead, BLOCKSIZE * 2);
+    fis.close();
+
+    Path file1snap1 =
+        SnapshotTestHelper.getSnapshotPath(sub, snapshot1, file1Name);
+    fis = hdfs.open(file1snap1);
+    fileStatus = hdfs.getFileStatus(file1snap1);
+    assertEquals(fileStatus.getLen(), BLOCKSIZE);
+    // Make sure we can only read up to the snapshot length.
+    bytesRead = fis.read(buffer, 0, buffer.length);
+    assertEquals(bytesRead, BLOCKSIZE);
+    fis.close();
+
+    PrintStream psBackup = System.out;
+    ByteArrayOutputStream bao = new ByteArrayOutputStream();
+    System.setOut(new PrintStream(bao));
+    System.setErr(new PrintStream(bao));
+    // Make sure we can cat the file upto to snapshot length
+    FsShell shell = new FsShell();
+    try{
+      ToolRunner.run(conf, shell, new String[] { "-cat",
+      "/TestSnapshotFileLength/sub1/.snapshot/snapshot1/file1" });
+      assertEquals(bao.size(), BLOCKSIZE);
+    }finally{
+      System.setOut(psBackup);
+    }
+  }
 }

+ 29 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java

@@ -44,9 +44,13 @@ import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.api.records.NodeState;
@@ -371,4 +375,29 @@ public class ResourceMgrDelegate extends YarnClient {
       IOException {
     return client.getQueueAclsInfo();
   }
+
+  @Override
+  public ApplicationAttemptReport getApplicationAttemptReport(
+      ApplicationAttemptId appAttemptId) throws YarnException, IOException {
+    return client.getApplicationAttemptReport(appAttemptId);
+  }
+
+  @Override
+  public List<ApplicationAttemptReport> getApplicationAttempts(
+      ApplicationId appId) throws YarnException, IOException {
+    return client.getApplicationAttempts(appId);
+  }
+
+  @Override
+  public ContainerReport getContainerReport(ContainerId containerId)
+      throws YarnException, IOException {
+    return client.getContainerReport(containerId);
+  }
+
+  @Override
+  public List<ContainerReport> getContainers(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException {
+    return client.getContainers(applicationAttemptId);
+  }
 }

+ 0 - 1
hadoop-mapreduce-project/pom.xml

@@ -29,7 +29,6 @@
   <url>http://hadoop.apache.org/mapreduce/</url>
 
   <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <test.logs>true</test.logs>
     <test.timeout>600000</test.timeout>
     <fork.mode>once</fork.mode>

+ 0 - 4
hadoop-project/pom.xml

@@ -39,10 +39,6 @@
     <!-- number of threads/forks to use when running tests in parallel, see parallel-tests profile -->
     <testsThreadCount>4</testsThreadCount>
 
-    <!-- platform encoding override -->
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-
     <!-- These 2 versions are defined here becuase they are used -->
     <!-- JDIFF generation from embedded ant in the antrun plugin -->
     <jdiff.version>1.0.9</jdiff.version>

+ 1 - 0
hadoop-project/src/site/site.xml

@@ -59,6 +59,7 @@
       <item name="CLI Mini Cluster" href="hadoop-project-dist/hadoop-common/CLIMiniCluster.html"/>
       <item name="Native Libraries" href="hadoop-project-dist/hadoop-common/NativeLibraries.html"/>
       <item name="Superusers" href="hadoop-project-dist/hadoop-common/Superusers.html"/>
+      <item name="Secure Mode" href="hadoop-project-dist/hadoop-common/SecureMode.html"/>
       <item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/>
       <item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/>
     </menu>

+ 0 - 1
hadoop-tools/hadoop-distcp/pom.xml

@@ -33,7 +33,6 @@
   <properties>
     <file.encoding>UTF-8</file.encoding>
     <downloadSources>true</downloadSources>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
   </properties>
 
   <dependencies>

+ 0 - 1
hadoop-tools/hadoop-openstack/pom.xml

@@ -35,7 +35,6 @@
   <properties>
     <file.encoding>UTF-8</file.encoding>
     <downloadSources>true</downloadSources>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
   </properties>
 
   <profiles>

+ 97 - 0
hadoop-yarn-project/CHANGES.txt

@@ -9,8 +9,67 @@ Trunk - Unreleased
     YARN-1496. Protocol additions to allow moving apps between queues (Sandy
     Ryza)
 
+    YARN-930. Bootstrapping ApplicationHistoryService module. (vinodkv)
+  
+    YARN-947. Implementing the data objects to be used by the History reader
+    and writer interfaces. (Zhijie Shen via vinodkv)
+  
+    YARN-934. Defined a Writer Interface for HistoryStorage. (Zhijie Shen via
+    vinodkv)
+  
+    YARN-925. Defined a Reader Interface for HistoryStorage. (Mayank Bansal via
+    vinodkv)
+  
+    YARN-978. Created ApplicationAttemptReport. (Mayank Bansal via vinodkv)
+  
+    YARN-956. Added a testable in-memory HistoryStorage. (Mayank Bansal via
+    vinodkv)
+  
+    YARN-975. Added a file-system implementation for HistoryStorage. (Zhijie Shen
+    via vinodkv)
+  
+    YARN-1123. Added a new ContainerReport and its Protobuf implementation. (Mayank
+    Bansal via vinodkv)
+  
+    YARN-979. Added more APIs for getting information about ApplicationAttempts
+    and Containers from ApplicationHistoryProtocol. (Mayank Bansal and Zhijie Shen
+    via vinodkv)
+  
+    YARN-953. Changed ResourceManager to start writing history data. (Zhijie Shen
+    via vinodkv)
+  
+    YARN-1266. Implemented PB service and client wrappers for
+    ApplicationHistoryProtocol. (Mayank Bansal via vinodkv)
+  
+    YARN-955. Implemented ApplicationHistoryProtocol handler. (Mayank Bansal via
+    vinodkv)
+  
+    YARN-1242. Changed yarn scripts to be able to start ApplicationHistoryServer
+    as an individual process. (Mayank Bansal via vinodkv)
+  
+    YARN-954. Implemented web UI for the ApplicationHistoryServer and wired it into
+    the HistoryStorage. (Zhijie Shen via vinodkv)
+  
+    YARN-967. Added the client and CLI interfaces for obtaining ApplicationHistory
+    data. (Mayank Bansal via vinodkv)
+  
+    YARN-1023. Added Webservices REST APIs support for Application History. (Zhijie
+    Shen via vinodkv)
+  
+    YARN-1413. Implemented serving of aggregated-logs in the ApplicationHistory
+    server. (Mayank Bansal via vinodkv)
+
   IMPROVEMENTS
 
+    YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
+    devaraj)
+  
+    YARN-974. Added more information to RMContainer to be collected and recorded in
+    Application-History. (Zhijie Shen via vinodkv)
+  
+    YARN-987. Added ApplicationHistoryManager responsible for exposing reports to
+    all clients. (Mayank Bansal via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -18,6 +77,41 @@ Trunk - Unreleased
     YARN-524 TestYarnVersionInfo failing if generated properties doesn't
     include an SVN URL. (stevel)
 
+    YARN-935. Correcting pom.xml to build applicationhistoryserver module
+    successfully. (Zhijie Shen via vinodkv)
+  
+    YARN-962. Fixed bug in application-history proto file and renamed it be just
+    a client proto file. (Zhijie Shen via vinodkv)
+  
+    YARN-984. Renamed the incorrectly named applicationhistoryservice.records.pb.impl
+    package to be applicationhistoryservice.records.impl.pb. (Devaraj K via vinodkv)
+  
+    YARN-1534. Fixed failure of test TestAHSWebApp. (Shinichi Yamashita via vinodkv)
+  
+    YARN-1555. Fixed test failures in applicationhistoryservice.* (Vinod Kumar 
+    Vavilapalli via mayank)
+  
+    YARN-1594. Updated pom.xml of applicationhistoryservice sub-project according to
+    YARN-888. (Vinod Kumar Vavilapalli via zjshen)
+  
+    YARN-1596. Fixed Javadoc warnings on branch YARN-321. (Vinod Kumar Vavilapalli
+    via zjshen)
+  
+    YARN-1597. Fixed Findbugs warnings on branch YARN-321. (Vinod Kumar Vavilapalli
+    via zjshen)
+  
+    YARN-1595. Made enabling history service configurable and fixed test failures on
+    branch YARN-321. (Vinod Kumar Vavilapalli via zjshen)
+  
+    YARN-1605. Fixed formatting issues in the new module on branch YARN-321. (Vinod
+    Kumar Vavilapalli via zjshen)
+
+    YARN-1625. Fixed RAT warnings after YARN-321 merge. (Shinichi Yamashita via
+    vinodkv)
+
+    YARN-1613. Fixed the typo with the configuration name
+    YARN_HISTORY_SERVICE_ENABLED. (Akira Ajisaka via vinodkv)
+
 Release 2.4.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -366,6 +460,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1607. TestRM relies on the scheduler assigning multiple containers in
     a single node update (Sandy Ryza)
 
+    YARN-1575. Public localizer crashes with "Localized unkown resource"
+    (jlowe)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 18 - 1
hadoop-yarn-project/hadoop-yarn/bin/yarn

@@ -58,10 +58,13 @@ function print_usage(){
   echo "where COMMAND is one of:"
   echo "  resourcemanager      run the ResourceManager" 
   echo "  nodemanager          run a nodemanager on each slave" 
+  echo "  historyserver        run the application history server" 
   echo "  rmadmin              admin tools" 
   echo "  version              print the version"
   echo "  jar <jar>            run a jar file"
   echo "  application          prints application(s) report/kill application"
+  echo "  applicationattempt   prints applicationattempt(s) report"
+  echo "  container            prints container(s) report"
   echo "  node                 prints node report(s)"
   echo "  logs                 dump container logs"
   echo "  classpath            prints the class path needed to get the"
@@ -145,6 +148,10 @@ if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-common/target/classes" ]; the
 fi
 if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes" ]; then
   CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-resourcemanager/target/classes
+  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes
+fi
+if [ -d "$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/yarn-server/yarn-server-applicationhistoryservice/target/classes
 fi
 if [ -d "$HADOOP_YARN_HOME/build/test/classes" ]; then
   CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/target/test/classes
@@ -177,9 +184,12 @@ if [ "$COMMAND" = "classpath" ] ; then
 elif [ "$COMMAND" = "rmadmin" ] ; then
   CLASS='org.apache.hadoop.yarn.client.cli.RMAdminCLI'
   YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
-elif [ "$COMMAND" = "application" ] ; then
+elif [ "$COMMAND" = "application" ] || 
+     [ "$COMMAND" = "applicationattempt" ] || 
+     [ "$COMMAND" = "container" ]; then
   CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
   YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
+  set -- $COMMAND $@
 elif [ "$COMMAND" = "node" ] ; then
   CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
   YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS"
@@ -190,6 +200,13 @@ elif [ "$COMMAND" = "resourcemanager" ] ; then
   if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
     JAVA_HEAP_MAX="-Xmx""$YARN_RESOURCEMANAGER_HEAPSIZE""m"
   fi
+elif [ "$COMMAND" = "historyserver" ] ; then
+  CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/ahs-config/log4j.properties
+  CLASS='org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer'
+  YARN_OPTS="$YARN_OPTS $YARN_HISTORYSERVER_OPTS"
+  if [ "$YARN_RESOURCEMANAGER_HEAPSIZE" != "" ]; then
+    JAVA_HEAP_MAX="-Xmx""$YARN_HISTORYSERVER_HEAPSIZE""m"
+  fi  
 elif [ "$COMMAND" = "nodemanager" ] ; then
   CLASSPATH=${CLASSPATH}:$YARN_CONF_DIR/nm-config/log4j.properties
   CLASS='org.apache.hadoop.yarn.server.nodemanager.NodeManager'

+ 31 - 2
hadoop-yarn-project/hadoop-yarn/bin/yarn.cmd

@@ -120,8 +120,11 @@ if "%1" == "--config" (
 
   if exist %HADOOP_YARN_HOME%\yarn-server\yarn-server-resourcemanager\target\classes (
     set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-server\yarn-server-resourcemanager\target\classes
+    set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-server\yarn-server-applicationhistoryservice\target\classes
+  )
+  if exist %HADOOP_YARN_HOME%\yarn-server\yarn-server-applicationhistoryservice\target\classes (
+    set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\yarn-server\yarn-server-applicationhistoryservice\target\classes
   )
-
   if exist %HADOOP_YARN_HOME%\build\test\classes (
     set CLASSPATH=%CLASSPATH%;%HADOOP_YARN_HOME%\build\test\classes
   )
@@ -138,7 +141,8 @@ if "%1" == "--config" (
     goto :eof
   )
 
-  set yarncommands=resourcemanager nodemanager proxyserver rmadmin version jar application node logs daemonlog
+  set yarncommands=resourcemanager nodemanager proxyserver rmadmin version jar ^
+     application applicationattempt container node logs daemonlog historyserver
   for %%i in ( %yarncommands% ) do (
     if %yarn-command% == %%i set yarncommand=true
   )
@@ -170,8 +174,21 @@ goto :eof
 :application
   set CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
   set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS%
+  set yarn-command-arguments=%yarn-command% %yarn-command-arguments%
   goto :eof
 
+:applicationattempt
+  set CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
+  set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS%
+  set yarn-command-arguments=%yarn-command% %yarn-command-arguments%
+  goto :eof
+
+:container
+  set CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI
+  set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS%
+  set yarn-command-arguments=%yarn-command% %yarn-command-arguments%
+  goto :eof  
+
 :node
   set CLASS=org.apache.hadoop.yarn.client.cli.NodeCLI
   set YARN_OPTS=%YARN_OPTS% %YARN_CLIENT_OPTS%
@@ -186,6 +203,15 @@ goto :eof
   )
   goto :eof
 
+:historyserver
+  set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%\ahs-config\log4j.properties
+  set CLASS=org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer
+  set YARN_OPTS=%YARN_OPTS% %HADOOP_HISTORYSERVER_OPTS%
+  if defined YARN_RESOURCEMANAGER_HEAPSIZE (
+    set JAVA_HEAP_MAX=-Xmx%YARN_HISTORYSERVER_HEAPSIZE%m
+  )
+  goto :eof
+
 :nodemanager
   set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%\nm-config\log4j.properties
   set CLASS=org.apache.hadoop.yarn.server.nodemanager.NodeManager
@@ -251,10 +277,13 @@ goto :eof
   @echo        where COMMAND is one of:
   @echo   resourcemanager      run the ResourceManager
   @echo   nodemanager          run a nodemanager on each slave
+  @echo   historyserver        run the application history server  
   @echo   rmadmin              admin tools
   @echo   version              print the version
   @echo   jar ^<jar^>          run a jar file
   @echo   application          prints application(s) report/kill application
+  @echo   applicationattempt   prints applicationattempt(s) report
+  @echo   container            prints container(s) report
   @echo   node                 prints node report(s)
   @echo   logs                 dump container logs
   @echo   classpath            prints the class path needed to get the

+ 9 - 0
hadoop-yarn-project/hadoop-yarn/conf/yarn-env.sh

@@ -54,6 +54,15 @@ fi
 # or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
 #export YARN_RESOURCEMANAGER_HEAPSIZE=1000
 
+# Specify the max Heapsize for the HistoryManager using a numerical value
+# in the scale of MB. For example, to specify an jvm option of -Xmx1000m, set
+# the value to 1000.
+# This value will be overridden by an Xmx setting specified in either YARN_OPTS
+# and/or YARN_HISTORYSERVER_OPTS.
+# If not specified, the default value will be picked from either YARN_HEAPMAX
+# or JAVA_HEAP_MAX with YARN_HEAPMAX as the preferred option of the two.
+#export YARN_HISTORYSERVER_HEAPSIZE=1000
+
 # Specify the JVM options to be used when starting the ResourceManager.
 # These options will be appended to the options specified as YARN_OPTS
 # and therefore may override any similar flags set in YARN_OPTS

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml

@@ -112,6 +112,8 @@
                   <include>containermanagement_protocol.proto</include>
                   <include>server/yarn_server_resourcemanager_service_protos.proto</include>
                   <include>server/resourcemanager_administration_protocol.proto</include>
+                  <include>application_history_client.proto</include>
+                  <include>server/application_history_server.proto</include>
                 </includes>
               </source>
               <output>${project.build.directory}/generated-sources/java</output>

+ 334 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationHistoryProtocol.java

@@ -0,0 +1,334 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.CancelDelegationTokenResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainersResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * <p>
+ * The protocol between clients and the <code>ApplicationHistoryServer</code> to
+ * get the information of completed applications etc.
+ * </p>
+ */
+@Public
+@Unstable
+public interface ApplicationHistoryProtocol {
+
+  /**
+   * <p>
+   * The interface used by clients to get a report of an Application from the
+   * <code>ResourceManager</code>.
+   * </p>
+   * 
+   * <p>
+   * The client, via {@link GetApplicationReportRequest} provides the
+   * {@link ApplicationId} of the application.
+   * </p>
+   * 
+   * <p>
+   * In secure mode,the <code>ApplicationHistoryServer</code> verifies access to
+   * the application, queue etc. before accepting the request.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with a
+   * {@link GetApplicationReportResponse} which includes the
+   * {@link ApplicationReport} for the application.
+   * </p>
+   * 
+   * <p>
+   * If the user does not have <code>VIEW_APP</code> access then the following
+   * fields in the report will be set to stubbed values:
+   * <ul>
+   * <li>host - set to "N/A"</li>
+   * <li>RPC port - set to -1</li>
+   * <li>client token - set to "N/A"</li>
+   * <li>diagnostics - set to "N/A"</li>
+   * <li>tracking URL - set to "N/A"</li>
+   * <li>original tracking URL - set to "N/A"</li>
+   * <li>resource usage report - all values are -1</li>
+   * </ul>
+   * </p>
+   * 
+   * @param request
+   *          request for an application report
+   * @return application report
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetApplicationReportResponse getApplicationReport(
+      GetApplicationReportRequest request) throws YarnException, IOException;
+
+  /**
+   * <p>
+   * The interface used by clients to get a report of all Applications in the
+   * cluster from the <code>ApplicationHistoryServer</code>.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with a
+   * {@link GetApplicationsResponse} which includes a list of
+   * {@link ApplicationReport} for all the applications.
+   * </p>
+   * 
+   * <p>
+   * If the user does not have <code>VIEW_APP</code> access for an application
+   * then the corresponding report will be filtered as described in
+   * {@link #getApplicationReport(GetApplicationReportRequest)}.
+   * </p>
+   * 
+   * @param request
+   *          request for reports on all the applications
+   * @return report on applications matching the given application types defined
+   *         in the request
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetApplicationsResponse
+      getApplications(GetApplicationsRequest request) throws YarnException,
+          IOException;
+
+  /**
+   * <p>
+   * The interface used by clients to get a report of an Application Attempt
+   * from the <code>ApplicationHistoryServer</code>.
+   * </p>
+   * 
+   * <p>
+   * The client, via {@link GetApplicationAttemptReportRequest} provides the
+   * {@link ApplicationAttemptId} of the application attempt.
+   * </p>
+   * 
+   * <p>
+   * In secure mode,the <code>ApplicationHistoryServer</code> verifies access to
+   * the method before accepting the request.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with a
+   * {@link GetApplicationAttemptReportResponse} which includes the
+   * {@link ApplicationAttemptReport} for the application attempt.
+   * </p>
+   * 
+   * <p>
+   * If the user does not have <code>VIEW_APP</code> access then the following
+   * fields in the report will be set to stubbed values:
+   * <ul>
+   * <li>host</li>
+   * <li>RPC port</li>
+   * <li>client token</li>
+   * <li>diagnostics - set to "N/A"</li>
+   * <li>tracking URL</li>
+   * </ul>
+   * </p>
+   * 
+   * @param request
+   *          request for an application attempt report
+   * @return application attempt report
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetApplicationAttemptReportResponse getApplicationAttemptReport(
+      GetApplicationAttemptReportRequest request) throws YarnException,
+      IOException;
+
+  /**
+   * <p>
+   * The interface used by clients to get a report of all Application attempts
+   * in the cluster from the <code>ApplicationHistoryServer</code>.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with a
+   * {@link GetApplicationAttemptsRequest} which includes the
+   * {@link ApplicationAttemptReport} for all the applications attempts of a
+   * specified application attempt.
+   * </p>
+   * 
+   * <p>
+   * If the user does not have <code>VIEW_APP</code> access for an application
+   * then the corresponding report will be filtered as described in
+   * {@link #getApplicationAttemptReport(GetApplicationAttemptReportRequest)}.
+   * </p>
+   * 
+   * @param request
+   *          request for reports on all application attempts of an application
+   * @return reports on all application attempts of an application
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetApplicationAttemptsResponse getApplicationAttempts(
+      GetApplicationAttemptsRequest request) throws YarnException, IOException;
+
+  /**
+   * <p>
+   * The interface used by clients to get a report of an Container from the
+   * <code>ApplicationHistoryServer</code>.
+   * </p>
+   * 
+   * <p>
+   * The client, via {@link GetContainerReportRequest} provides the
+   * {@link ContainerId} of the container.
+   * </p>
+   * 
+   * <p>
+   * In secure mode,the <code>ApplicationHistoryServer</code> verifies access to
+   * the method before accepting the request.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with a
+   * {@link GetContainerReportResponse} which includes the
+   * {@link ContainerReport} for the container.
+   * </p>
+   * 
+   * @param request
+   *          request for a container report
+   * @return container report
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetContainerReportResponse getContainerReport(
+      GetContainerReportRequest request) throws YarnException, IOException;
+
+  /**
+   * <p>
+   * The interface used by clients to get a report of Containers for an
+   * application attempt from the <code>ApplciationHistoryServer</code>.
+   * </p>
+   * 
+   * <p>
+   * The client, via {@link GetContainersRequest} provides the
+   * {@link ApplicationAttemptId} of the application attempt.
+   * </p>
+   * 
+   * <p>
+   * In secure mode,the <code>ApplicationHistoryServer</code> verifies access to
+   * the method before accepting the request.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with a
+   * {@link GetContainersResponse} which includes a list of
+   * {@link ContainerReport} for all the containers of a specific application
+   * attempt.
+   * </p>
+   * 
+   * @param request
+   *          request for a list of container reports of an application attempt.
+   * @return reports on all containers of an application attempt
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetContainersResponse getContainers(GetContainersRequest request)
+      throws YarnException, IOException;
+
+  /**
+   * <p>
+   * The interface used by clients to get delegation token, enabling the
+   * containers to be able to talk to the service using those tokens.
+   * </p>
+   * 
+   * <p>
+   * The <code>ApplicationHistoryServer</code> responds with the delegation
+   * token {@link Token} that can be used by the client to speak to this
+   * service.
+   * </p>
+   * 
+   * @param request
+   *          request to get a delegation token for the client.
+   * @return delegation token that can be used to talk to this service
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Public
+  @Unstable
+  public GetDelegationTokenResponse getDelegationToken(
+      GetDelegationTokenRequest request) throws YarnException, IOException;
+
+  /**
+   * Renew an existing delegation token.
+   * 
+   * @param request
+   *          the delegation token to be renewed.
+   * @return the new expiry time for the delegation token.
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Private
+  @Unstable
+  public RenewDelegationTokenResponse renewDelegationToken(
+      RenewDelegationTokenRequest request) throws YarnException, IOException;
+
+  /**
+   * Cancel an existing delegation token.
+   * 
+   * @param request
+   *          the delegation token to be cancelled.
+   * @return an empty response.
+   * @throws YarnException
+   * @throws IOException
+   */
+  @Private
+  @Unstable
+  public CancelDelegationTokenResponse cancelDelegationToken(
+      CancelDelegationTokenRequest request) throws YarnException, IOException;
+}

+ 75 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptReportRequest.java

@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The request sent by a client to the <code>ResourceManager</code> to get an
+ * {@link ApplicationAttemptReport} for an application attempt.
+ * </p>
+ * 
+ * <p>
+ * The request should include the {@link ApplicationAttemptId} of the
+ * application attempt.
+ * </p>
+ * 
+ * @see ApplicationAttemptReport
+ * @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)
+ */
+@Public
+@Unstable
+public abstract class GetApplicationAttemptReportRequest {
+
+  @Public
+  @Unstable
+  public static GetApplicationAttemptReportRequest newInstance(
+      ApplicationAttemptId applicationAttemptId) {
+    GetApplicationAttemptReportRequest request =
+        Records.newRecord(GetApplicationAttemptReportRequest.class);
+    request.setApplicationAttemptId(applicationAttemptId);
+    return request;
+  }
+
+  /**
+   * Get the <code>ApplicationAttemptId</code> of an application attempt.
+   * 
+   * @return <code>ApplicationAttemptId</code> of an application attempt
+   */
+  @Public
+  @Unstable
+  public abstract ApplicationAttemptId getApplicationAttemptId();
+
+  /**
+   * Set the <code>ApplicationAttemptId</code> of an application attempt
+   * 
+   * @param applicationAttemptId
+   *          <code>ApplicationAttemptId</code> of an application attempt
+   */
+  @Public
+  @Unstable
+  public abstract void setApplicationAttemptId(
+      ApplicationAttemptId applicationAttemptId);
+}

+ 74 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptReportResponse.java

@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The response sent by the <code>ResourceManager</code> to a client requesting
+ * an application attempt report.
+ * </p>
+ * 
+ * <p>
+ * The response includes an {@link ApplicationAttemptReport} which has the
+ * details about the particular application attempt
+ * </p>
+ * 
+ * @see ApplicationAttemptReport
+ * @see ApplicationHistoryProtocol#getApplicationAttemptReport(GetApplicationAttemptReportRequest)
+ */
+@Public
+@Unstable
+public abstract class GetApplicationAttemptReportResponse {
+
+  @Public
+  @Unstable
+  public static GetApplicationAttemptReportResponse newInstance(
+      ApplicationAttemptReport ApplicationAttemptReport) {
+    GetApplicationAttemptReportResponse response =
+        Records.newRecord(GetApplicationAttemptReportResponse.class);
+    response.setApplicationAttemptReport(ApplicationAttemptReport);
+    return response;
+  }
+
+  /**
+   * Get the <code>ApplicationAttemptReport</code> for the application attempt.
+   * 
+   * @return <code>ApplicationAttemptReport</code> for the application attempt
+   */
+  @Public
+  @Unstable
+  public abstract ApplicationAttemptReport getApplicationAttemptReport();
+
+  /**
+   * Get the <code>ApplicationAttemptReport</code> for the application attempt.
+   * 
+   * @param applicationAttemptReport
+   *          <code>ApplicationAttemptReport</code> for the application attempt
+   */
+  @Public
+  @Unstable
+  public abstract void setApplicationAttemptReport(
+      ApplicationAttemptReport applicationAttemptReport);
+}

+ 67 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptsRequest.java

@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The request from clients to get a list of application attempt reports of an
+ * application from the <code>ResourceManager</code>.
+ * </p>
+ * 
+ * @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)
+ */
+@Public
+@Unstable
+public abstract class GetApplicationAttemptsRequest {
+
+  @Public
+  @Unstable
+  public static GetApplicationAttemptsRequest newInstance(
+      ApplicationId applicationId) {
+    GetApplicationAttemptsRequest request =
+        Records.newRecord(GetApplicationAttemptsRequest.class);
+    request.setApplicationId(applicationId);
+    return request;
+  }
+
+  /**
+   * Get the <code>ApplicationId</code> of an application
+   * 
+   * @return <code>ApplicationId</code> of an application
+   */
+  @Public
+  @Unstable
+  public abstract ApplicationId getApplicationId();
+
+  /**
+   * Set the <code>ApplicationId</code> of an application
+   * 
+   * @param applicationId
+   *          <code>ApplicationId</code> of an application
+   */
+  @Public
+  @Unstable
+  public abstract void setApplicationId(ApplicationId applicationId);
+}

+ 76 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetApplicationAttemptsResponse.java

@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The response sent by the <code>ResourceManager</code> to a client requesting
+ * a list of {@link ApplicationAttemptReport} for application attempts.
+ * </p>
+ * 
+ * <p>
+ * The <code>ApplicationAttemptReport</code> for each application includes the
+ * details of an application attempt.
+ * </p>
+ * 
+ * @see ApplicationAttemptReport
+ * @see ApplicationHistoryProtocol#getApplicationAttempts(GetApplicationAttemptsRequest)
+ */
+@Public
+@Unstable
+public abstract class GetApplicationAttemptsResponse {
+
+  @Public
+  @Unstable
+  public static GetApplicationAttemptsResponse newInstance(
+      List<ApplicationAttemptReport> applicationAttempts) {
+    GetApplicationAttemptsResponse response =
+        Records.newRecord(GetApplicationAttemptsResponse.class);
+    response.setApplicationAttemptList(applicationAttempts);
+    return response;
+  }
+
+  /**
+   * Get a list of <code>ApplicationReport</code> of an application.
+   * 
+   * @return a list of <code>ApplicationReport</code> of an application
+   */
+  @Public
+  @Unstable
+  public abstract List<ApplicationAttemptReport> getApplicationAttemptList();
+
+  /**
+   * Get a list of <code>ApplicationReport</code> of an application.
+   * 
+   * @param applicationAttempts
+   *          a list of <code>ApplicationReport</code> of an application
+   */
+  @Public
+  @Unstable
+  public abstract void setApplicationAttemptList(
+      List<ApplicationAttemptReport> applicationAttempts);
+}

+ 64 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainerReportRequest.java

@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The request sent by a client to the <code>ResourceManager</code> to get an
+ * {@link ContainerReport} for a container.
+ * </p>
+ */
+@Public
+@Unstable
+public abstract class GetContainerReportRequest {
+
+  @Public
+  @Unstable
+  public static GetContainerReportRequest newInstance(ContainerId containerId) {
+    GetContainerReportRequest request =
+        Records.newRecord(GetContainerReportRequest.class);
+    request.setContainerId(containerId);
+    return request;
+  }
+
+  /**
+   * Get the <code>ContainerId</code> of the Container.
+   * 
+   * @return <code>ContainerId</code> of the Container
+   */
+  @Public
+  @Unstable
+  public abstract ContainerId getContainerId();
+
+  /**
+   * Set the <code>ContainerId</code> of the container
+   * 
+   * @param containerId
+   *          <code>ContainerId</code> of the container
+   */
+  @Public
+  @Unstable
+  public abstract void setContainerId(ContainerId containerId);
+}

+ 63 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainerReportResponse.java

@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The response sent by the <code>ResourceManager</code> to a client requesting
+ * a container report.
+ * </p>
+ * 
+ * <p>
+ * The response includes a {@link ContainerReport} which has details of a
+ * container.
+ * </p>
+ * 
+ */
+@Public
+@Unstable
+public abstract class GetContainerReportResponse {
+  @Public
+  @Unstable
+  public static GetContainerReportResponse newInstance(
+      ContainerReport containerReport) {
+    GetContainerReportResponse response =
+        Records.newRecord(GetContainerReportResponse.class);
+    response.setContainerReport(containerReport);
+    return response;
+  }
+
+  /**
+   * Get the <code>ContainerReport</code> for the container.
+   * 
+   * @return <code>ContainerReport</code> for the container
+   */
+  @Public
+  @Unstable
+  public abstract ContainerReport getContainerReport();
+
+  @Public
+  @Unstable
+  public abstract void setContainerReport(ContainerReport containerReport);
+}

+ 67 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainersRequest.java

@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The request from clients to get a list of container reports, which belong to
+ * an application attempt from the <code>ResourceManager</code>.
+ * </p>
+ * 
+ * @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)
+ */
+@Public
+@Unstable
+public abstract class GetContainersRequest {
+  @Public
+  @Unstable
+  public static GetContainersRequest newInstance(
+      ApplicationAttemptId applicationAttemptId) {
+    GetContainersRequest request =
+        Records.newRecord(GetContainersRequest.class);
+    request.setApplicationAttemptId(applicationAttemptId);
+    return request;
+  }
+
+  /**
+   * Get the <code>ApplicationAttemptId</code> of an application attempt.
+   * 
+   * @return <code>ApplicationAttemptId</code> of an application attempt
+   */
+  @Public
+  @Unstable
+  public abstract ApplicationAttemptId getApplicationAttemptId();
+
+  /**
+   * Set the <code>ApplicationAttemptId</code> of an application attempt
+   * 
+   * @param applicationAttemptId
+   *          <code>ApplicationAttemptId</code> of an application attempt
+   */
+  @Public
+  @Unstable
+  public abstract void setApplicationAttemptId(
+      ApplicationAttemptId applicationAttemptId);
+}

+ 81 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetContainersResponse.java

@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * The response sent by the <code>ResourceManager</code> to a client requesting
+ * a list of {@link ContainerReport} for containers.
+ * </p>
+ * 
+ * <p>
+ * The <code>ContainerReport</code> for each container includes the container
+ * details.
+ * </p>
+ * 
+ * @see ContainerReport
+ * @see ApplicationHistoryProtocol#getContainers(GetContainersRequest)
+ */
+@Public
+@Unstable
+public abstract class GetContainersResponse {
+
+  @Public
+  @Unstable
+  public static GetContainersResponse newInstance(
+      List<ContainerReport> containers) {
+    GetContainersResponse response =
+        Records.newRecord(GetContainersResponse.class);
+    response.setContainerList(containers);
+    return response;
+  }
+
+  /**
+   * Get a list of <code>ContainerReport</code> for all the containers of an
+   * application attempt.
+   * 
+   * @return a list of <code>ContainerReport</code> for all the containers of an
+   *         application attempt
+   * 
+   */
+  @Public
+  @Unstable
+  public abstract List<ContainerReport> getContainerList();
+
+  /**
+   * Set a list of <code>ContainerReport</code> for all the containers of an
+   * application attempt.
+   * 
+   * @param containers
+   *          a list of <code>ContainerReport</code> for all the containers of
+   *          an application attempt
+   * 
+   */
+  @Public
+  @Unstable
+  public abstract void setContainerList(List<ContainerReport> containers);
+}

+ 165 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptReport.java

@@ -0,0 +1,165 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * <code>ApplicationAttemptReport</code> is a report of an application attempt.
+ * </p>
+ * 
+ * <p>
+ * It includes details such as:
+ * <ul>
+ * <li>{@link ApplicationAttemptId} of the application.</li>
+ * <li>Host on which the <code>ApplicationMaster</code> of this attempt is
+ * running.</li>
+ * <li>RPC port of the <code>ApplicationMaster</code> of this attempt.</li>
+ * <li>Tracking URL.</li>
+ * <li>Diagnostic information in case of errors.</li>
+ * <li>{@link YarnApplicationAttemptState} of the application attempt.</li>
+ * <li>{@link ContainerId} of the master Container.</li>
+ * </ul>
+ * </p>
+ * 
+ */
+@Public
+@Unstable
+public abstract class ApplicationAttemptReport {
+
+  @Private
+  @Unstable
+  public static ApplicationAttemptReport newInstance(
+      ApplicationAttemptId applicationAttemptId, String host, int rpcPort,
+      String url, String diagnostics, YarnApplicationAttemptState state,
+      ContainerId amContainerId) {
+    ApplicationAttemptReport report =
+        Records.newRecord(ApplicationAttemptReport.class);
+    report.setApplicationAttemptId(applicationAttemptId);
+    report.setHost(host);
+    report.setRpcPort(rpcPort);
+    report.setTrackingUrl(url);
+    report.setDiagnostics(diagnostics);
+    report.setYarnApplicationAttemptState(state);
+    report.setAMContainerId(amContainerId);
+    return report;
+  }
+
+  /**
+   * Get the <em>YarnApplicationAttemptState</em> of the application attempt.
+   * 
+   * @return <em>YarnApplicationAttemptState</em> of the application attempt
+   */
+  @Public
+  @Unstable
+  public abstract YarnApplicationAttemptState getYarnApplicationAttemptState();
+
+  @Private
+  @Unstable
+  public abstract void setYarnApplicationAttemptState(
+      YarnApplicationAttemptState yarnApplicationAttemptState);
+
+  /**
+   * Get the <em>RPC port</em> of this attempt <code>ApplicationMaster</code>.
+   * 
+   * @return <em>RPC port</em> of this attempt <code>ApplicationMaster</code>
+   */
+  @Public
+  @Unstable
+  public abstract int getRpcPort();
+
+  @Private
+  @Unstable
+  public abstract void setRpcPort(int rpcPort);
+
+  /**
+   * Get the <em>host</em> on which this attempt of
+   * <code>ApplicationMaster</code> is running.
+   * 
+   * @return <em>host</em> on which this attempt of
+   *         <code>ApplicationMaster</code> is running
+   */
+  @Public
+  @Unstable
+  public abstract String getHost();
+
+  @Private
+  @Unstable
+  public abstract void setHost(String host);
+
+  /**
+   * Get the <em>diagnositic information</em> of the application attempt in case
+   * of errors.
+   * 
+   * @return <em>diagnositic information</em> of the application attempt in case
+   *         of errors
+   */
+  @Public
+  @Unstable
+  public abstract String getDiagnostics();
+
+  @Private
+  @Unstable
+  public abstract void setDiagnostics(String diagnostics);
+
+  /**
+   * Get the <em>tracking url</em> for the application attempt.
+   * 
+   * @return <em>tracking url</em> for the application attempt
+   */
+  @Public
+  @Unstable
+  public abstract String getTrackingUrl();
+
+  @Private
+  @Unstable
+  public abstract void setTrackingUrl(String url);
+
+  /**
+   * Get the <code>ApplicationAttemptId</code> of this attempt of the
+   * application
+   * 
+   * @return <code>ApplicationAttemptId</code> of the attempt
+   */
+  @Public
+  @Unstable
+  public abstract ApplicationAttemptId getApplicationAttemptId();
+
+  @Private
+  @Unstable
+  public abstract void setApplicationAttemptId(
+      ApplicationAttemptId applicationAttemptId);
+
+  /**
+   * Get the <code>ContainerId</code> of AMContainer for this attempt
+   * 
+   * @return <code>ContainerId</code> of the attempt
+   */
+  @Public
+  @Unstable
+  public abstract ContainerId getAMContainerId();
+
+  @Private
+  @Unstable
+  public abstract void setAMContainerId(ContainerId amContainerId);
+}

+ 202 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerReport.java

@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ * <p>
+ * <code>ContainerReport</code> is a report of an container.
+ * </p>
+ * 
+ * <p>
+ * It includes details such as:
+ * <ul>
+ * <li>{@link ContainerId} of the container.</li>
+ * <li>Allocated Resources to the container.</li>
+ * <li>Assigned Node id.</li>
+ * <li>Assigned Priority.</li>
+ * <li>Start Time.</li>
+ * <li>Finish Time.</li>
+ * <li>Container Exit Status.</li>
+ * <li>{@link ContainerState} of the container.</li>
+ * <li>Diagnostic information in case of errors.</li>
+ * <li>Log URL.</li>
+ * </ul>
+ * </p>
+ * 
+ */
+
+@Public
+@Unstable
+public abstract class ContainerReport {
+  @Private
+  @Unstable
+  public static ContainerReport newInstance(ContainerId containerId,
+      Resource allocatedResource, NodeId assignedNode, Priority priority,
+      long startTime, long finishTime, String diagnosticInfo, String logUrl,
+      int containerExitStatus, ContainerState containerState) {
+    ContainerReport report = Records.newRecord(ContainerReport.class);
+    report.setContainerId(containerId);
+    report.setAllocatedResource(allocatedResource);
+    report.setAssignedNode(assignedNode);
+    report.setPriority(priority);
+    report.setStartTime(startTime);
+    report.setFinishTime(finishTime);
+    report.setDiagnosticsInfo(diagnosticInfo);
+    report.setLogUrl(logUrl);
+    report.setContainerExitStatus(containerExitStatus);
+    report.setContainerState(containerState);
+    return report;
+  }
+
+  /**
+   * Get the <code>ContainerId</code> of the container.
+   * 
+   * @return <code>ContainerId</code> of the container.
+   */
+  @Public
+  @Unstable
+  public abstract ContainerId getContainerId();
+
+  @Public
+  @Unstable
+  public abstract void setContainerId(ContainerId containerId);
+
+  /**
+   * Get the allocated <code>Resource</code> of the container.
+   * 
+   * @return allocated <code>Resource</code> of the container.
+   */
+  @Public
+  @Unstable
+  public abstract Resource getAllocatedResource();
+
+  @Public
+  @Unstable
+  public abstract void setAllocatedResource(Resource resource);
+
+  /**
+   * Get the allocated <code>NodeId</code> where container is running.
+   * 
+   * @return allocated <code>NodeId</code> where container is running.
+   */
+  @Public
+  @Unstable
+  public abstract NodeId getAssignedNode();
+
+  @Public
+  @Unstable
+  public abstract void setAssignedNode(NodeId nodeId);
+
+  /**
+   * Get the allocated <code>Priority</code> of the container.
+   * 
+   * @return allocated <code>Priority</code> of the container.
+   */
+  @Public
+  @Unstable
+  public abstract Priority getPriority();
+
+  @Public
+  @Unstable
+  public abstract void setPriority(Priority priority);
+
+  /**
+   * Get the Start time of the container.
+   * 
+   * @return Start time of the container
+   */
+  @Public
+  @Unstable
+  public abstract long getStartTime();
+
+  @Public
+  @Unstable
+  public abstract void setStartTime(long startTime);
+
+  /**
+   * Get the Finish time of the container.
+   * 
+   * @return Finish time of the container
+   */
+  @Public
+  @Unstable
+  public abstract long getFinishTime();
+
+  @Public
+  @Unstable
+  public abstract void setFinishTime(long finishTime);
+
+  /**
+   * Get the DiagnosticsInfo of the container.
+   * 
+   * @return DiagnosticsInfo of the container
+   */
+  @Public
+  @Unstable
+  public abstract String getDiagnosticsInfo();
+
+  @Public
+  @Unstable
+  public abstract void setDiagnosticsInfo(String diagnosticsInfo);
+
+  /**
+   * Get the LogURL of the container.
+   * 
+   * @return LogURL of the container
+   */
+  @Public
+  @Unstable
+  public abstract String getLogUrl();
+
+  @Public
+  @Unstable
+  public abstract void setLogUrl(String logUrl);
+
+  /**
+   * Get the final <code>ContainerState</code> of the container.
+   * 
+   * @return final <code>ContainerState</code> of the container.
+   */
+  @Public
+  @Unstable
+  public abstract ContainerState getContainerState();
+
+  @Public
+  @Unstable
+  public abstract void setContainerState(ContainerState containerState);
+
+  /**
+   * Get the final <code>exit status</code> of the container.
+   * 
+   * @return final <code>exit status</code> of the container.
+   */
+  @Public
+  @Unstable
+  public abstract int getContainerExitStatus();
+
+  @Public
+  @Unstable
+  public abstract void setContainerExitStatus(int containerExitStatus);
+
+}

+ 66 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationAttemptState.java

@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Stable;
+
+/**
+ * Enumeration of various states of a <code>RMAppAttempt</code>.
+ */
+@Public
+@Stable
+public enum YarnApplicationAttemptState {
+  /** AppAttempt was just created. */
+  NEW,
+
+  /** AppAttempt has been submitted. */
+  SUBMITTED,
+
+  /** AppAttempt was scheduled */
+  SCHEDULED,
+
+  /** Acquired AM Container from Scheduler and Saving AppAttempt Data */
+  ALLOCATED_SAVING,
+
+  /** AppAttempt Data was saved */
+  ALLOCATED,
+
+  /** AppAttempt was launched */
+  LAUNCHED,
+
+  /** AppAttempt failed. */
+  FAILED,
+
+  /** AppAttempt is currently running. */
+  RUNNING,
+
+  /** AppAttempt is waiting for state bing saved */
+  FINAL_SAVING,
+
+  /** AppAttempt is finishing. */
+  FINISHING,
+
+  /** AppAttempt finished successfully. */
+  FINISHED,
+
+  /** AppAttempt was terminated by a user or admin. */
+  KILLED
+
+}

+ 68 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -263,6 +263,17 @@ public class YarnConfiguration extends Configuration {
       RM_PREFIX + "nodemanagers.heartbeat-interval-ms";
   public static final long DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS = 1000;
 
+  /** Number of worker threads that write the history data. */
+  public static final String RM_HISTORY_WRITER_MULTI_THREADED_DISPATCHER_POOL_SIZE =
+      RM_PREFIX + "history-writer.multi-threaded-dispatcher.pool-size";
+  public static final int DEFAULT_RM_HISTORY_WRITER_MULTI_THREADED_DISPATCHER_POOL_SIZE =
+      10;
+
+  /** The implementation class of ApplicationHistoryStore, which is to be used
+   *  by RMApplicationHistoryWriter. */
+  public static final String RM_HISTORY_WRITER_CLASS = RM_PREFIX
+      + "history-writer.class";
+
   //Delegation token related keys
   public static final String  DELEGATION_KEY_UPDATE_INTERVAL_KEY = 
     RM_PREFIX + "delegation.key.update-interval";
@@ -931,6 +942,63 @@ public class YarnConfiguration extends Configuration {
   public static final String YARN_APP_CONTAINER_LOG_BACKUPS =
       YARN_PREFIX + "app.container.log.backups";
 
+  ////////////////////////////////
+  // AHS Configs
+  ////////////////////////////////
+
+  public static final String AHS_PREFIX = YARN_PREFIX + "ahs.";
+
+  /** The setting that controls whether history-service is enabled or not.. */
+  public static final String YARN_HISTORY_SERVICE_ENABLED = AHS_PREFIX
+      + "enabled";
+  public static final boolean DEFAULT_YARN_HISTORY_SERVICE_ENABLED = false;
+
+  /** URI for FileSystemApplicationHistoryStore */
+  public static final String FS_HISTORY_STORE_URI = AHS_PREFIX + "fs-history-store.uri";
+
+  /** T-file compression types used to compress history data.*/
+  public static final String FS_HISTORY_STORE_COMPRESSION_TYPE = AHS_PREFIX + "fs-history-store.compression-type";
+  public static final String DEFAULT_FS_HISTORY_STORE_COMPRESSION_TYPE = "none";
+
+  /** AHS store class */
+  public static final String AHS_STORE = AHS_PREFIX + "store.class";
+
+  /** host:port address for Application History Server API. */
+  public static final String AHS_ADDRESS = AHS_PREFIX + "address";
+  public static final int DEFAULT_AHS_PORT = 10200;
+  public static final String DEFAULT_AHS_ADDRESS = "0.0.0.0:"
+      + DEFAULT_AHS_PORT;
+
+  /** The number of threads to handle client API requests. */
+  public static final String AHS_CLIENT_THREAD_COUNT = AHS_PREFIX
+      + "client.thread-count";
+  public static final int DEFAULT_AHS_CLIENT_THREAD_COUNT = 10;
+  
+
+  /** The address of the AHS web application.*/
+  public static final String AHS_WEBAPP_ADDRESS = AHS_PREFIX
+      + "webapp.address";
+
+  public static final int DEFAULT_AHS_WEBAPP_PORT = 8188;
+  public static final String DEFAULT_AHS_WEBAPP_ADDRESS = "0.0.0.0:"
+      + DEFAULT_AHS_WEBAPP_PORT;
+
+  /** The https address of the AHS web application.*/
+  public static final String AHS_WEBAPP_HTTPS_ADDRESS = AHS_PREFIX
+      + "webapp.https.address";
+
+  public static final int DEFAULT_AHS_WEBAPP_HTTPS_PORT = 8190;
+  public static final String DEFAULT_AHS_WEBAPP_HTTPS_ADDRESS = "0.0.0.0:"
+      + DEFAULT_AHS_WEBAPP_HTTPS_PORT;
+
+  /**The kerberos principal to be used for spnego filter for AHS.*/
+  public static final String AHS_WEBAPP_SPNEGO_USER_NAME_KEY =
+      AHS_PREFIX + "webapp.spnego-principal";
+
+  /**The kerberos keytab to be used for spnego filter for AHS.*/
+  public static final String AHS_WEBAPP_SPNEGO_KEYTAB_FILE_KEY =
+      AHS_PREFIX + "webapp.spnego-keytab-file";
+
   ////////////////////////////////
   // Other Configs
   ////////////////////////////////

+ 48 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ApplicationAttemptNotFoundException.java

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.exceptions;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest;
+
+/**
+ * This exception is thrown on
+ * {@link ApplicationHistoryProtocol#getApplicationAttemptReport (GetApplicationAttemptReportRequest)}
+ * API when the Application Attempt doesn't exist in Application History Server
+ */
+@Public
+@Unstable
+public class ApplicationAttemptNotFoundException extends YarnException {
+
+  private static final long serialVersionUID = 8694508L;
+
+  public ApplicationAttemptNotFoundException(Throwable cause) {
+    super(cause);
+  }
+
+  public ApplicationAttemptNotFoundException(String message) {
+    super(message);
+  }
+
+  public ApplicationAttemptNotFoundException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}

+ 7 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ApplicationNotFoundException.java

@@ -18,14 +18,19 @@
 
 package org.apache.hadoop.yarn.exceptions;
 
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
 
 /**
  * This exception is thrown on
- * {@link ApplicationClientProtocol#getApplicationReport(GetApplicationReportRequest)} API
- * when the Application doesn't exist in RM
+ * {@link ApplicationClientProtocol#getApplicationReport
+ * (GetApplicationReportRequest)} API
+ * when the Application doesn't exist in RM and AHS
  */
+@Public
+@Unstable
 public class ApplicationNotFoundException extends YarnException{
 
   private static final long serialVersionUID = 8694408L;

+ 48 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/exceptions/ContainerNotFoundException.java

@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.exceptions;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest;
+
+/**
+ * This exception is thrown on
+ * {@link ApplicationHistoryProtocol#getContainerReport (GetContainerReportRequest)}
+ * API when the container doesn't exist in AHS
+ */
+@Public
+@Unstable
+public class ContainerNotFoundException extends YarnException {
+
+  private static final long serialVersionUID = 8694608L;
+
+  public ContainerNotFoundException(Throwable cause) {
+    super(cause);
+  }
+
+  public ContainerNotFoundException(String message) {
+    super(message);
+  }
+
+  public ContainerNotFoundException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}

+ 39 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/application_history_client.proto

@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.hadoop.yarn.proto";
+option java_outer_classname = "ApplicationHistoryProtocol";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+package hadoop.yarn;
+
+import "Security.proto";
+import "yarn_service_protos.proto";
+
+service ApplicationHistoryProtocolService {
+  rpc getApplicationReport (GetApplicationReportRequestProto) returns (GetApplicationReportResponseProto);
+  rpc getApplications (GetApplicationsRequestProto) returns (GetApplicationsResponseProto);
+  rpc getApplicationAttemptReport (GetApplicationAttemptReportRequestProto) returns (GetApplicationAttemptReportResponseProto);
+  rpc getApplicationAttempts (GetApplicationAttemptsRequestProto) returns (GetApplicationAttemptsResponseProto);
+  rpc getContainerReport (GetContainerReportRequestProto) returns (GetContainerReportResponseProto);
+  rpc getContainers (GetContainersRequestProto) returns (GetContainersResponseProto);
+  rpc getDelegationToken(hadoop.common.GetDelegationTokenRequestProto) returns (hadoop.common.GetDelegationTokenResponseProto);
+  rpc renewDelegationToken(hadoop.common.RenewDelegationTokenRequestProto) returns (hadoop.common.RenewDelegationTokenResponseProto);
+  rpc cancelDelegationToken(hadoop.common.CancelDelegationTokenRequestProto) returns (hadoop.common.CancelDelegationTokenResponseProto);
+}
+

+ 113 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/application_history_server.proto

@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.hadoop.yarn.proto";
+option java_outer_classname = "ApplicationHistoryServerProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+package hadoop.yarn;
+
+import "yarn_protos.proto";
+
+message ApplicationHistoryDataProto {
+  optional ApplicationIdProto application_id = 1;
+  optional string application_name = 2;
+  optional string application_type = 3;
+  optional string user = 4;
+  optional string queue = 5;
+  optional int64 submit_time = 6;
+  optional int64 start_time = 7;
+  optional int64 finish_time = 8;
+  optional string diagnostics_info = 9;
+  optional FinalApplicationStatusProto final_application_status = 10;
+  optional YarnApplicationStateProto yarn_application_state = 11;
+}
+
+message ApplicationStartDataProto {
+  optional ApplicationIdProto application_id = 1;
+  optional string application_name = 2;
+  optional string application_type = 3;
+  optional string user = 4;
+  optional string queue = 5;
+  optional int64 submit_time = 6;
+  optional int64 start_time = 7;
+}
+
+message ApplicationFinishDataProto {
+  optional ApplicationIdProto application_id = 1;
+  optional int64 finish_time = 2;
+  optional string diagnostics_info = 3;
+  optional FinalApplicationStatusProto final_application_status = 4;
+  optional YarnApplicationStateProto yarn_application_state = 5;
+}
+
+message ApplicationAttemptHistoryDataProto {
+  optional ApplicationAttemptIdProto application_attempt_id = 1;
+  optional string host = 2;
+  optional int32 rpc_port = 3;
+  optional string tracking_url = 4;
+  optional string diagnostics_info = 5;
+  optional FinalApplicationStatusProto final_application_status = 6;
+  optional ContainerIdProto master_container_id = 7;
+  optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 8;
+}
+
+message ApplicationAttemptStartDataProto {
+  optional ApplicationAttemptIdProto application_attempt_id = 1;
+  optional string host = 2;
+  optional int32 rpc_port = 3;
+  optional ContainerIdProto master_container_id = 4;
+}
+
+message ApplicationAttemptFinishDataProto {
+  optional ApplicationAttemptIdProto application_attempt_id = 1;
+  optional string tracking_url = 2;
+  optional string diagnostics_info = 3;
+  optional FinalApplicationStatusProto final_application_status = 4;
+  optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 5;
+}
+
+message ContainerHistoryDataProto {
+  optional ContainerIdProto container_id = 1;
+  optional ResourceProto allocated_resource = 2;
+  optional NodeIdProto assigned_node_id = 3;
+  optional PriorityProto priority = 4;
+  optional int64 start_time = 5;
+  optional int64 finish_time = 6;
+  optional string diagnostics_info = 7;
+  optional string log_url = 8;
+  optional int32 container_exit_status = 9;
+  optional ContainerStateProto container_state = 10;
+}
+
+message ContainerStartDataProto {
+  optional ContainerIdProto container_id = 1;
+  optional ResourceProto allocated_resource = 2;
+  optional NodeIdProto assigned_node_id = 3;
+  optional PriorityProto priority = 4;
+  optional int64 start_time = 5;
+}
+
+message ContainerFinishDataProto {
+  optional ContainerIdProto container_id = 1;
+  optional int64 finish_time = 2;
+  optional string diagnostics_info = 3;
+  optional string log_url = 4;
+  optional int32 container_exit_status = 5;
+  optional ContainerStateProto container_state = 6;
+}

+ 38 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto

@@ -87,6 +87,19 @@ message ContainerProto {
   optional hadoop.common.TokenProto container_token = 6;
 }
 
+message ContainerReportProto {
+  optional ContainerIdProto container_id = 1;
+  optional ResourceProto resource = 2;
+  optional NodeIdProto node_id = 3;
+  optional PriorityProto priority = 4;
+  optional int64 start_time = 5;
+  optional int64 finish_time = 6;
+  optional string diagnostics_info = 7 [default = "N/A"];
+  optional string log_url = 8;
+  optional int32 container_exit_status = 9;
+  optional ContainerStateProto container_state = 10;
+}
+
 enum YarnApplicationStateProto {
   NEW = 1;
   NEW_SAVING = 2;
@@ -98,6 +111,21 @@ enum YarnApplicationStateProto {
   KILLED = 8;
 }
 
+enum YarnApplicationAttemptStateProto {
+  APP_ATTEMPT_NEW = 1;
+  APP_ATTEMPT_SUBMITTED = 2;
+  APP_ATTEMPT_SCHEDULED = 3;
+  APP_ATTEMPT_ALLOCATED_SAVING = 4;
+  APP_ATTEMPT_ALLOCATED = 5;
+  APP_ATTEMPT_LAUNCHED = 6;
+  APP_ATTEMPT_FAILED = 7;
+  APP_ATTEMPT_RUNNING = 8;
+  APP_ATTEMPT_FINAL_SAVING = 9;
+  APP_ATTEMPT_FINISHING = 10;
+  APP_ATTEMPT_FINISHED = 11;
+  APP_ATTEMPT_KILLED = 12;
+}
+
 enum FinalApplicationStatusProto {
   APP_UNDEFINED = 0;
   APP_SUCCEEDED = 1;
@@ -164,6 +192,16 @@ message ApplicationReportProto {
   optional hadoop.common.TokenProto am_rm_token = 19;
 }
 
+message ApplicationAttemptReportProto {
+  optional ApplicationAttemptIdProto application_attempt_id = 1;
+  optional string host = 2;
+  optional int32 rpc_port = 3;
+  optional string tracking_url = 4;
+  optional string diagnostics = 5 [default = "N/A"];
+  optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
+  optional ContainerIdProto am_container_id = 7;
+}
+
 enum NodeStateProto {
   NS_NEW = 1; 
   NS_RUNNING = 2; 

+ 36 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto

@@ -240,3 +240,39 @@ message GetContainerStatusesResponseProto {
   repeated ContainerStatusProto status = 1;
   repeated ContainerExceptionMapProto failed_requests = 2;
 }
+
+//////////////////////////////////////////////////////
+/////// Application_History_Protocol /////////////////
+//////////////////////////////////////////////////////
+
+message GetApplicationAttemptReportRequestProto {
+  optional ApplicationAttemptIdProto application_attempt_id = 1;
+}
+
+message GetApplicationAttemptReportResponseProto {
+  optional ApplicationAttemptReportProto application_attempt_report = 1;
+}
+
+message GetApplicationAttemptsRequestProto {
+  optional ApplicationIdProto application_id = 1;
+}
+
+message GetApplicationAttemptsResponseProto {
+  repeated ApplicationAttemptReportProto application_attempts = 1;
+}
+
+message GetContainerReportRequestProto {
+  optional ContainerIdProto container_id = 1;
+}
+
+message GetContainerReportResponseProto {
+  optional ContainerReportProto container_report = 1;
+}
+
+message GetContainersRequestProto {
+  optional ApplicationAttemptIdProto application_attempt_id = 1;
+}
+
+message GetContainersResponseProto {
+  repeated ContainerReportProto containers = 1;
+}

+ 180 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AHSClient.java

@@ -0,0 +1,180 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client.api;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.client.api.impl.AHSClientImpl;
+import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
+import org.apache.hadoop.yarn.exceptions.ContainerNotFoundException;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+@InterfaceAudience.Public
+@InterfaceStability.Stable
+public abstract class AHSClient extends AbstractService {
+
+  /**
+   * Create a new instance of AHSClient.
+   */
+  @Public
+  public static AHSClient createAHSClient() {
+    AHSClient client = new AHSClientImpl();
+    return client;
+  }
+
+  @Private
+  public AHSClient(String name) {
+    super(name);
+  }
+
+  /**
+   * <p>
+   * Get a report of the given Application.
+   * </p>
+   * 
+   * <p>
+   * In secure mode, <code>YARN</code> verifies access to the application, queue
+   * etc. before accepting the request.
+   * </p>
+   * 
+   * <p>
+   * If the user does not have <code>VIEW_APP</code> access then the following
+   * fields in the report will be set to stubbed values:
+   * <ul>
+   * <li>host - set to "N/A"</li>
+   * <li>RPC port - set to -1</li>
+   * <li>client token - set to "N/A"</li>
+   * <li>diagnostics - set to "N/A"</li>
+   * <li>tracking URL - set to "N/A"</li>
+   * <li>original tracking URL - set to "N/A"</li>
+   * <li>resource usage report - all values are -1</li>
+   * </ul>
+   * </p>
+   * 
+   * @param appId
+   *          {@link ApplicationId} of the application that needs a report
+   * @return application report
+   * @throws YarnException
+   * @throws IOException
+   */
+  public abstract ApplicationReport getApplicationReport(ApplicationId appId)
+      throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report (ApplicationReport) of all Applications in the cluster.
+   * </p>
+   * 
+   * <p>
+   * If the user does not have <code>VIEW_APP</code> access for an application
+   * then the corresponding report will be filtered as described in
+   * {@link #getApplicationReport(ApplicationId)}.
+   * </p>
+   * 
+   * @return a list of reports for all applications
+   * @throws YarnException
+   * @throws IOException
+   */
+  public abstract List<ApplicationReport> getApplications()
+      throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report of the given ApplicationAttempt.
+   * </p>
+   * 
+   * <p>
+   * In secure mode, <code>YARN</code> verifies access to the application, queue
+   * etc. before accepting the request.
+   * </p>
+   * 
+   * @param applicationAttemptId
+   *          {@link ApplicationAttemptId} of the application attempt that needs
+   *          a report
+   * @return application attempt report
+   * @throws YarnException
+   * @throws {@link ApplicationAttemptNotFoundException} if application attempt
+   *         not found
+   * @throws IOException
+   */
+  public abstract ApplicationAttemptReport getApplicationAttemptReport(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException;
+
+  /**
+   * <p>
+   * Get a report of all (ApplicationAttempts) of Application in the cluster.
+   * </p>
+   * 
+   * @param applicationId
+   * @return a list of reports for all application attempts for specified
+   *         application
+   * @throws YarnException
+   * @throws IOException
+   */
+  public abstract List<ApplicationAttemptReport> getApplicationAttempts(
+      ApplicationId applicationId) throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report of the given Container.
+   * </p>
+   * 
+   * <p>
+   * In secure mode, <code>YARN</code> verifies access to the application, queue
+   * etc. before accepting the request.
+   * </p>
+   * 
+   * @param containerId
+   *          {@link ContainerId} of the container that needs a report
+   * @return container report
+   * @throws YarnException
+   * @throws {@link ContainerNotFoundException} if container not found
+   * @throws IOException
+   */
+  public abstract ContainerReport getContainerReport(ContainerId containerId)
+      throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report of all (Containers) of ApplicationAttempt in the cluster.
+   * </p>
+   * 
+   * @param applicationAttemptId
+   * @return a list of reports of all containers for specified application
+   *         attempt
+   * @throws YarnException
+   * @throws IOException
+   */
+  public abstract List<ContainerReport> getContainers(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException;
+}

+ 76 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java

@@ -29,9 +29,13 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.QueueInfo;
@@ -40,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.Token;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
 import org.apache.hadoop.yarn.client.api.impl.YarnClientImpl;
+import org.apache.hadoop.yarn.exceptions.ContainerNotFoundException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 
@@ -360,4 +365,75 @@ public abstract class YarnClient extends AbstractService {
    */
   public abstract List<QueueUserACLInfo> getQueueAclsInfo() throws YarnException,
       IOException;
+  
+  /**
+   * <p>
+   * Get a report of the given ApplicationAttempt.
+   * </p>
+   * 
+   * <p>
+   * In secure mode, <code>YARN</code> verifies access to the application, queue
+   * etc. before accepting the request.
+   * </p>
+   * 
+   * @param applicationAttemptId
+   *          {@link ApplicationAttemptId} of the application attempt that needs
+   *          a report
+   * @return application attempt report
+   * @throws YarnException
+   * @throws {@link ApplicationAttemptNotFoundException} if application attempt
+   *         not found
+   * @throws IOException
+   */
+  public abstract ApplicationAttemptReport getApplicationAttemptReport(
+      ApplicationAttemptId applicationAttemptId) throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report of all (ApplicationAttempts) of Application in the cluster.
+   * </p>
+   * 
+   * @param applicationId
+   * @return a list of reports for all application attempts for specified
+   *         application.
+   * @throws YarnException
+   * @throws IOException
+   */
+  public abstract List<ApplicationAttemptReport> getApplicationAttempts(
+      ApplicationId applicationId) throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report of the given Container.
+   * </p>
+   * 
+   * <p>
+   * In secure mode, <code>YARN</code> verifies access to the application, queue
+   * etc. before accepting the request.
+   * </p>
+   * 
+   * @param containerId
+   *          {@link ContainerId} of the container that needs a report
+   * @return container report
+   * @throws YarnException
+   * @throws {@link ContainerNotFoundException} if container not found.
+   * @throws IOException
+   */
+  public abstract ContainerReport getContainerReport(ContainerId containerId)
+      throws YarnException, IOException;
+
+  /**
+   * <p>
+   * Get a report of all (Containers) of ApplicationAttempt in the cluster.
+   * </p>
+   * 
+   * @param applicationAttemptId
+   * @return a list of reports of all containers for specified application
+   *         attempts
+   * @throws YarnException
+   * @throws IOException
+   */
+  public abstract List<ContainerReport> getContainers(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException;
 }

+ 155 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AHSClientImpl.java

@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client.api.impl;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainersResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.client.AHSProxy;
+import org.apache.hadoop.yarn.client.api.AHSClient;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
+
+@Private
+@Unstable
+public class AHSClientImpl extends AHSClient {
+
+  protected ApplicationHistoryProtocol ahsClient;
+  protected InetSocketAddress ahsAddress;
+
+  public AHSClientImpl() {
+    super(AHSClientImpl.class.getName());
+  }
+
+  private static InetSocketAddress getAHSAddress(Configuration conf) {
+    return conf.getSocketAddr(YarnConfiguration.AHS_ADDRESS,
+        YarnConfiguration.DEFAULT_AHS_ADDRESS,
+        YarnConfiguration.DEFAULT_AHS_PORT);
+  }
+
+  @Override
+  protected void serviceInit(Configuration conf) throws Exception {
+    this.ahsAddress = getAHSAddress(conf);
+    super.serviceInit(conf);
+  }
+
+  @Override
+  protected void serviceStart() throws Exception {
+    try {
+      ahsClient = AHSProxy.createAHSProxy(getConfig(),
+          ApplicationHistoryProtocol.class, this.ahsAddress);
+    } catch (IOException e) {
+      throw new YarnRuntimeException(e);
+    }
+    super.serviceStart();
+  }
+
+  @Override
+  protected void serviceStop() throws Exception {
+    if (this.ahsClient != null) {
+      RPC.stopProxy(this.ahsClient);
+    }
+    super.serviceStop();
+  }
+
+  @Override
+  public ApplicationReport getApplicationReport(ApplicationId appId)
+      throws YarnException, IOException {
+    GetApplicationReportRequest request = GetApplicationReportRequest
+        .newInstance(appId);
+    GetApplicationReportResponse response = ahsClient
+        .getApplicationReport(request);
+    return response.getApplicationReport();
+  }
+
+  @Override
+  public List<ApplicationReport> getApplications() throws YarnException,
+      IOException {
+    GetApplicationsRequest request = GetApplicationsRequest.newInstance(null,
+        null);
+    GetApplicationsResponse response = ahsClient.getApplications(request);
+    return response.getApplicationList();
+  }
+
+  @Override
+  public ApplicationAttemptReport getApplicationAttemptReport(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException {
+    GetApplicationAttemptReportRequest request = GetApplicationAttemptReportRequest
+        .newInstance(applicationAttemptId);
+    GetApplicationAttemptReportResponse response = ahsClient
+        .getApplicationAttemptReport(request);
+    return response.getApplicationAttemptReport();
+  }
+
+  @Override
+  public List<ApplicationAttemptReport> getApplicationAttempts(
+      ApplicationId appId) throws YarnException, IOException {
+    GetApplicationAttemptsRequest request = GetApplicationAttemptsRequest
+        .newInstance(appId);
+    GetApplicationAttemptsResponse response = ahsClient
+        .getApplicationAttempts(request);
+    return response.getApplicationAttemptList();
+  }
+
+  @Override
+  public ContainerReport getContainerReport(ContainerId containerId)
+      throws YarnException, IOException {
+    GetContainerReportRequest request = GetContainerReportRequest
+        .newInstance(containerId);
+    GetContainerReportResponse response = ahsClient.getContainerReport(request);
+    return response.getContainerReport();
+  }
+
+  @Override
+  public List<ContainerReport> getContainers(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException {
+    GetContainersRequest request = GetContainersRequest
+        .newInstance(applicationAttemptId);
+    GetContainersResponse response = ahsClient.getContainers(request);
+    return response.getContainerList();
+  }
+
+}

+ 81 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java

@@ -49,9 +49,13 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.QueueInfo;
@@ -60,9 +64,11 @@ import org.apache.hadoop.yarn.api.records.Token;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
 import org.apache.hadoop.yarn.client.ClientRMProxy;
+import org.apache.hadoop.yarn.client.api.AHSClient;
 import org.apache.hadoop.yarn.client.api.YarnClient;
 import org.apache.hadoop.yarn.client.api.YarnClientApplication;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
@@ -80,6 +86,8 @@ public class YarnClientImpl extends YarnClient {
   protected ApplicationClientProtocol rmClient;
   protected long submitPollIntervalMillis;
   private long asyncApiPollIntervalMillis;
+  protected AHSClient historyClient;
+  private boolean historyServiceEnabled;
 
   private static final String ROOT = "root";
 
@@ -100,6 +108,14 @@ public class YarnClientImpl extends YarnClient {
         YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS,
         YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS);
     }
+
+    if (conf.getBoolean(YarnConfiguration.YARN_HISTORY_SERVICE_ENABLED,
+      YarnConfiguration.DEFAULT_YARN_HISTORY_SERVICE_ENABLED)) {
+      historyServiceEnabled = true;
+      historyClient = AHSClientImpl.createAHSClient();
+      historyClient.init(getConfig());
+    }
+
     super.serviceInit(conf);
   }
 
@@ -107,7 +123,10 @@ public class YarnClientImpl extends YarnClient {
   protected void serviceStart() throws Exception {
     try {
       rmClient = ClientRMProxy.createRMProxy(getConfig(),
-            ApplicationClientProtocol.class);
+          ApplicationClientProtocol.class);
+      if (historyServiceEnabled) {
+        historyClient.start();
+      }
     } catch (IOException e) {
       throw new YarnRuntimeException(e);
     }
@@ -119,6 +138,9 @@ public class YarnClientImpl extends YarnClient {
     if (this.rmClient != null) {
       RPC.stopProxy(this.rmClient);
     }
+    if (historyServiceEnabled) {
+      historyClient.stop();
+    }
     super.serviceStop();
   }
 
@@ -207,11 +229,27 @@ public class YarnClientImpl extends YarnClient {
   @Override
   public ApplicationReport getApplicationReport(ApplicationId appId)
       throws YarnException, IOException {
-    GetApplicationReportRequest request =
-        Records.newRecord(GetApplicationReportRequest.class);
-    request.setApplicationId(appId);
-    GetApplicationReportResponse response =
-        rmClient.getApplicationReport(request);
+    GetApplicationReportResponse response = null;
+    try {
+      GetApplicationReportRequest request = Records
+          .newRecord(GetApplicationReportRequest.class);
+      request.setApplicationId(appId);
+      response = rmClient.getApplicationReport(request);
+    } catch (YarnException e) {
+
+      if (!historyServiceEnabled) {
+        // Just throw it as usual if historyService is not enabled.
+        throw e;
+      }
+
+      // Even if history-service is enabled, treat all exceptions still the same
+      // except the following
+      if (!(e.getClass() == ApplicationNotFoundException.class)) {
+        throw e;
+      }
+
+      return historyClient.getApplicationReport(appId);
+    }
     return response.getApplicationReport();
   }
 
@@ -373,4 +411,41 @@ public class YarnClientImpl extends YarnClient {
   public void setRMClient(ApplicationClientProtocol rmClient) {
     this.rmClient = rmClient;
   }
+
+  @Override
+  public ApplicationAttemptReport getApplicationAttemptReport(
+      ApplicationAttemptId appAttemptId) throws YarnException, IOException {
+    if (historyServiceEnabled) {
+      return historyClient.getApplicationAttemptReport(appAttemptId);
+    }
+    throw new YarnException("History service is not enabled.");
+  }
+
+  @Override
+  public List<ApplicationAttemptReport> getApplicationAttempts(
+      ApplicationId appId) throws YarnException, IOException {
+    if (historyServiceEnabled) {
+      return historyClient.getApplicationAttempts(appId);
+    }
+    throw new YarnException("History service is not enabled.");
+  }
+
+  @Override
+  public ContainerReport getContainerReport(ContainerId containerId)
+      throws YarnException, IOException {
+    if (historyServiceEnabled) {
+      return historyClient.getContainerReport(containerId);
+    }
+    throw new YarnException("History service is not enabled.");
+  }
+
+  @Override
+  public List<ContainerReport> getContainers(
+      ApplicationAttemptId applicationAttemptId) throws YarnException,
+      IOException {
+    if (historyServiceEnabled) {
+      return historyClient.getContainers(applicationAttemptId);
+    }
+    throw new YarnException("History service is not enabled.");
+  }
 }

+ 244 - 69
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java

@@ -35,8 +35,10 @@ import org.apache.commons.cli.Options;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.util.ConverterUtils;
@@ -46,13 +48,22 @@ import com.google.common.annotations.VisibleForTesting;
 @Private
 @Unstable
 public class ApplicationCLI extends YarnCLI {
-  private static final String APPLICATIONS_PATTERN =
-    "%30s\t%20s\t%20s\t%10s\t%10s\t%18s\t%18s\t%15s\t%35s" +
-    System.getProperty("line.separator");
+  private static final String APPLICATIONS_PATTERN = 
+    "%30s\t%20s\t%20s\t%10s\t%10s\t%18s\t%18s\t%15s\t%35s"
+      + System.getProperty("line.separator");
+  private static final String APPLICATION_ATTEMPTS_PATTERN =
+    "%30s\t%20s\t%35s\t%35s"
+      + System.getProperty("line.separator");
+  private static final String CONTAINER_PATTERN = 
+    "%30s\t%20s\t%20s\t%20s\t%20s\t%35s"
+      + System.getProperty("line.separator");
 
   private static final String APP_TYPE_CMD = "appTypes";
-  private static final String APP_STATE_CMD ="appStates";
+  private static final String APP_STATE_CMD = "appStates";
   private static final String ALLSTATES_OPTION = "ALL";
+  public static final String APPLICATION = "application";
+  public static final String APPLICATION_ATTEMPT = "applicationattempt";
+  public static final String CONTAINER = "container";
 
   private boolean allAppStates;
 
@@ -69,23 +80,33 @@ public class ApplicationCLI extends YarnCLI {
   public int run(String[] args) throws Exception {
 
     Options opts = new Options();
-    opts.addOption(STATUS_CMD, true, "Prints the status of the application.");
-    opts.addOption(LIST_CMD, false, "List applications from the RM. " +
-        "Supports optional use of -appTypes to filter applications " +
-        "based on application type, " +
-        "and -appStates to filter applications based on application state");
+    opts.addOption(STATUS_CMD, true,
+        "Prints the status of the application.");
+    if (args.length > 0
+        && args[0].compareToIgnoreCase(APPLICATION_ATTEMPT) == 0) {
+      opts.addOption(LIST_CMD, true,
+          "List application attempts for aplication from AHS. ");
+    } else if (args.length > 0 && args[0].compareToIgnoreCase("container") == 0) {
+      opts.addOption(LIST_CMD, true,
+          "List containers for application attempts from AHS. ");
+    } else {
+      opts.addOption(LIST_CMD, false, "List applications from the RM. "
+          + "Supports optional use of -appTypes to filter applications "
+          + "based on application type, "
+          + "and -appStates to filter applications based on application state");
+    }
     opts.addOption(KILL_CMD, true, "Kills the application.");
     opts.addOption(HELP_CMD, false, "Displays help for all commands.");
-    Option appTypeOpt = new Option(APP_TYPE_CMD, true, "Works with -list to " +
-        "filter applications based on " +
-        "input comma-separated list of application types.");
+    Option appTypeOpt = new Option(APP_TYPE_CMD, true, "Works with -list to "
+        + "filter applications based on "
+        + "input comma-separated list of application types.");
     appTypeOpt.setValueSeparator(',');
     appTypeOpt.setArgs(Option.UNLIMITED_VALUES);
     appTypeOpt.setArgName("Types");
     opts.addOption(appTypeOpt);
-    Option appStateOpt = new Option(APP_STATE_CMD, true, "Works with -list " +
-        "to filter applications based on input comma-separated list of " +
-        "application states. " + getAllValidApplicationStates());
+    Option appStateOpt = new Option(APP_STATE_CMD, true, "Works with -list "
+        + "to filter applications based on input comma-separated list of "
+        + "application states. " + getAllValidApplicationStates());
     appStateOpt.setValueSeparator(',');
     appStateOpt.setArgs(Option.UNLIMITED_VALUES);
     appStateOpt.setArgName("States");
@@ -104,50 +125,77 @@ public class ApplicationCLI extends YarnCLI {
     }
 
     if (cliParser.hasOption(STATUS_CMD)) {
-      if (args.length != 2) {
+      if ((args[0].compareToIgnoreCase(APPLICATION) == 0)
+          || (args[0].compareToIgnoreCase(APPLICATION_ATTEMPT) == 0)
+          || (args[0].compareToIgnoreCase(CONTAINER) == 0)) {
+        if (args.length != 3) {
+          printUsage(opts);
+          return exitCode;
+        }
+      } else if (args.length != 2) {
         printUsage(opts);
         return exitCode;
       }
-      printApplicationReport(cliParser.getOptionValue(STATUS_CMD));
+      if (args[0].compareToIgnoreCase(APPLICATION_ATTEMPT) == 0) {
+        printApplicationAttemptReport(cliParser.getOptionValue(STATUS_CMD));
+      } else if (args[0].compareToIgnoreCase(CONTAINER) == 0) {
+        printContainerReport(cliParser.getOptionValue(STATUS_CMD));
+      } else {
+        printApplicationReport(cliParser.getOptionValue(STATUS_CMD));
+      }
     } else if (cliParser.hasOption(LIST_CMD)) {
-      allAppStates = false;
-      Set<String> appTypes = new HashSet<String>();
-      if(cliParser.hasOption(APP_TYPE_CMD)) {
-        String[] types = cliParser.getOptionValues(APP_TYPE_CMD);
-        if (types != null) {
-          for (String type : types) {
-            if (!type.trim().isEmpty()) {
-              appTypes.add(type.toUpperCase().trim());
+      if (args[0].compareToIgnoreCase(APPLICATION_ATTEMPT) == 0) {
+        if (args.length != 3) {
+          printUsage(opts);
+          return exitCode;
+        }
+        listApplicationAttempts(cliParser.getOptionValue(LIST_CMD));
+      } else if (args[0].compareToIgnoreCase(CONTAINER) == 0) {
+        if (args.length != 3) {
+          printUsage(opts);
+          return exitCode;
+        }
+        listContainers(cliParser.getOptionValue(LIST_CMD));
+      } else {
+        allAppStates = false;
+        Set<String> appTypes = new HashSet<String>();
+        if (cliParser.hasOption(APP_TYPE_CMD)) {
+          String[] types = cliParser.getOptionValues(APP_TYPE_CMD);
+          if (types != null) {
+            for (String type : types) {
+              if (!type.trim().isEmpty()) {
+                appTypes.add(type.toUpperCase().trim());
+              }
             }
           }
         }
-      }
 
-      EnumSet<YarnApplicationState> appStates =
-          EnumSet.noneOf(YarnApplicationState.class);
-      if (cliParser.hasOption(APP_STATE_CMD)) {
-        String[] states = cliParser.getOptionValues(APP_STATE_CMD);
-        if (states != null) {
-          for (String state : states) {
-            if (!state.trim().isEmpty()) {
-              if (state.trim().equalsIgnoreCase(ALLSTATES_OPTION)) {
-                allAppStates = true;
-                break;
-              }
-              try {
-                appStates.add(YarnApplicationState.valueOf(state.toUpperCase()
-                    .trim()));
-              } catch (IllegalArgumentException ex) {
-                sysout.println("The application state " + state
-                    + " is invalid.");
-                sysout.println(getAllValidApplicationStates());
-                return exitCode;
+        EnumSet<YarnApplicationState> appStates = EnumSet
+            .noneOf(YarnApplicationState.class);
+        if (cliParser.hasOption(APP_STATE_CMD)) {
+          String[] states = cliParser.getOptionValues(APP_STATE_CMD);
+          if (states != null) {
+            for (String state : states) {
+              if (!state.trim().isEmpty()) {
+                if (state.trim().equalsIgnoreCase(ALLSTATES_OPTION)) {
+                  allAppStates = true;
+                  break;
+                }
+                try {
+                  appStates.add(YarnApplicationState.valueOf(state
+                      .toUpperCase().trim()));
+                } catch (IllegalArgumentException ex) {
+                  sysout.println("The application state " + state
+                      + " is invalid.");
+                  sysout.println(getAllValidApplicationStates());
+                  return exitCode;
+                }
               }
             }
           }
         }
+        listApplications(appTypes, appStates);
       }
-      listApplications(appTypes, appStates);
     } else if (cliParser.hasOption(KILL_CMD)) {
       if (args.length != 2) {
         printUsage(opts);
@@ -175,8 +223,85 @@ public class ApplicationCLI extends YarnCLI {
   }
 
   /**
-   * Lists the applications matching the given application Types
-   * And application States present in the Resource Manager
+   * Prints the application attempt report for an application attempt id.
+   * 
+   * @param applicationAttemptId
+   * @throws YarnException
+   */
+  private void printApplicationAttemptReport(String applicationAttemptId)
+      throws YarnException, IOException {
+    ApplicationAttemptReport appAttemptReport = client
+        .getApplicationAttemptReport(ConverterUtils
+            .toApplicationAttemptId(applicationAttemptId));
+    // Use PrintWriter.println, which uses correct platform line ending.
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintWriter appAttemptReportStr = new PrintWriter(baos);
+    if (appAttemptReport != null) {
+      appAttemptReportStr.println("Application Attempt Report : ");
+      appAttemptReportStr.print("\tApplicationAttempt-Id : ");
+      appAttemptReportStr.println(appAttemptReport.getApplicationAttemptId());
+      appAttemptReportStr.print("\tState : ");
+      appAttemptReportStr.println(appAttemptReport
+          .getYarnApplicationAttemptState());
+      appAttemptReportStr.print("\tAMContainer : ");
+      appAttemptReportStr.println(appAttemptReport.getAMContainerId()
+          .toString());
+      appAttemptReportStr.print("\tTracking-URL : ");
+      appAttemptReportStr.println(appAttemptReport.getTrackingUrl());
+      appAttemptReportStr.print("\tRPC Port : ");
+      appAttemptReportStr.println(appAttemptReport.getRpcPort());
+      appAttemptReportStr.print("\tAM Host : ");
+      appAttemptReportStr.println(appAttemptReport.getHost());
+      appAttemptReportStr.print("\tDiagnostics : ");
+      appAttemptReportStr.print(appAttemptReport.getDiagnostics());
+    } else {
+      appAttemptReportStr.print("Application Attempt with id '"
+          + applicationAttemptId + "' doesn't exist in History Server.");
+    }
+    appAttemptReportStr.close();
+    sysout.println(baos.toString("UTF-8"));
+  }
+
+  /**
+   * Prints the container report for an container id.
+   * 
+   * @param containerId
+   * @throws YarnException
+   */
+  private void printContainerReport(String containerId) throws YarnException,
+      IOException {
+    ContainerReport containerReport = client.getContainerReport((ConverterUtils
+        .toContainerId(containerId)));
+    // Use PrintWriter.println, which uses correct platform line ending.
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintWriter containerReportStr = new PrintWriter(baos);
+    if (containerReport != null) {
+      containerReportStr.println("Container Report : ");
+      containerReportStr.print("\tContainer-Id : ");
+      containerReportStr.println(containerReport.getContainerId());
+      containerReportStr.print("\tStart-Time : ");
+      containerReportStr.println(containerReport.getStartTime());
+      containerReportStr.print("\tFinish-Time : ");
+      containerReportStr.println(containerReport.getFinishTime());
+      containerReportStr.print("\tState : ");
+      containerReportStr.println(containerReport.getContainerState());
+      containerReportStr.print("\tLOG-URL : ");
+      containerReportStr.println(containerReport.getLogUrl());
+      containerReportStr.print("\tHost : ");
+      containerReportStr.println(containerReport.getAssignedNode());
+      containerReportStr.print("\tDiagnostics : ");
+      containerReportStr.print(containerReport.getDiagnosticsInfo());
+    } else {
+      containerReportStr.print("Container with id '" + containerId
+          + "' doesn't exist in Hostory Server.");
+    }
+    containerReportStr.close();
+    sysout.println(baos.toString("UTF-8"));
+  }
+
+  /**
+   * Lists the applications matching the given application Types And application
+   * States present in the Resource Manager
    * 
    * @param appTypes
    * @param appStates
@@ -188,7 +313,7 @@ public class ApplicationCLI extends YarnCLI {
       IOException {
     PrintWriter writer = new PrintWriter(sysout);
     if (allAppStates) {
-      for(YarnApplicationState appState : YarnApplicationState.values()) {
+      for (YarnApplicationState appState : YarnApplicationState.values()) {
         appStates.add(appState);
       }
     } else {
@@ -199,23 +324,24 @@ public class ApplicationCLI extends YarnCLI {
       }
     }
 
-    List<ApplicationReport> appsReport =
-        client.getApplications(appTypes, appStates);
+    List<ApplicationReport> appsReport = client.getApplications(appTypes,
+        appStates);
 
-    writer
-        .println("Total number of applications (application-types: " + appTypes
-            + " and states: " + appStates + ")" + ":" + appsReport.size());
-    writer.printf(APPLICATIONS_PATTERN, "Application-Id",
-        "Application-Name","Application-Type", "User", "Queue", 
-        "State", "Final-State","Progress", "Tracking-URL");
+    writer.println("Total number of applications (application-types: "
+        + appTypes + " and states: " + appStates + ")" + ":"
+        + appsReport.size());
+    writer.printf(APPLICATIONS_PATTERN, "Application-Id", "Application-Name",
+        "Application-Type", "User", "Queue", "State", "Final-State",
+        "Progress", "Tracking-URL");
     for (ApplicationReport appReport : appsReport) {
       DecimalFormat formatter = new DecimalFormat("###.##%");
       String progress = formatter.format(appReport.getProgress());
       writer.printf(APPLICATIONS_PATTERN, appReport.getApplicationId(),
-          appReport.getName(),appReport.getApplicationType(), appReport.getUser(),
-          appReport.getQueue(),appReport.getYarnApplicationState(),
-          appReport.getFinalApplicationStatus(),progress,
-          appReport.getOriginalTrackingUrl());
+          appReport.getName(), appReport.getApplicationType(), appReport
+              .getUser(), appReport.getQueue(), appReport
+              .getYarnApplicationState(),
+          appReport.getFinalApplicationStatus(), progress, appReport
+              .getOriginalTrackingUrl());
     }
     writer.flush();
   }
@@ -227,8 +353,8 @@ public class ApplicationCLI extends YarnCLI {
    * @throws YarnException
    * @throws IOException
    */
-  private void killApplication(String applicationId)
-      throws YarnException, IOException {
+  private void killApplication(String applicationId) throws YarnException,
+      IOException {
     ApplicationId appId = ConverterUtils.toApplicationId(applicationId);
     ApplicationReport appReport = client.getApplicationReport(appId);
     if (appReport.getYarnApplicationState() == YarnApplicationState.FINISHED
@@ -296,14 +422,63 @@ public class ApplicationCLI extends YarnCLI {
 
   private String getAllValidApplicationStates() {
     StringBuilder sb = new StringBuilder();
-    sb.append("The valid application state can be"
-        + " one of the following: ");
+    sb.append("The valid application state can be" + " one of the following: ");
     sb.append(ALLSTATES_OPTION + ",");
-    for (YarnApplicationState appState : YarnApplicationState
-        .values()) {
-      sb.append(appState+",");
+    for (YarnApplicationState appState : YarnApplicationState.values()) {
+      sb.append(appState + ",");
     }
     String output = sb.toString();
-    return output.substring(0, output.length()-1);
+    return output.substring(0, output.length() - 1);
+  }
+
+  /**
+   * Lists the application attempts matching the given applicationid
+   * 
+   * @param applicationId
+   * @throws YarnException
+   * @throws IOException
+   */
+  private void listApplicationAttempts(String appId) throws YarnException,
+      IOException {
+    PrintWriter writer = new PrintWriter(sysout);
+
+    List<ApplicationAttemptReport> appAttemptsReport = client
+        .getApplicationAttempts(ConverterUtils.toApplicationId(appId));
+    writer.println("Total number of application attempts " + ":"
+        + appAttemptsReport.size());
+    writer.printf(APPLICATION_ATTEMPTS_PATTERN, "ApplicationAttempt-Id",
+        "State", "AM-Container-Id", "Tracking-URL");
+    for (ApplicationAttemptReport appAttemptReport : appAttemptsReport) {
+      writer.printf(APPLICATION_ATTEMPTS_PATTERN, appAttemptReport
+          .getApplicationAttemptId(), appAttemptReport
+          .getYarnApplicationAttemptState(), appAttemptReport
+          .getAMContainerId().toString(), appAttemptReport.getTrackingUrl());
+    }
+    writer.flush();
+  }
+
+  /**
+   * Lists the containers matching the given application attempts
+   * 
+   * @param appAttemptId
+   * @throws YarnException
+   * @throws IOException
+   */
+  private void listContainers(String appAttemptId) throws YarnException,
+      IOException {
+    PrintWriter writer = new PrintWriter(sysout);
+
+    List<ContainerReport> appsReport = client
+        .getContainers(ConverterUtils.toApplicationAttemptId(appAttemptId));
+    writer.println("Total number of containers " + ":" + appsReport.size());
+    writer.printf(CONTAINER_PATTERN, "Container-Id", "Start Time",
+        "Finish Time", "State", "Host", "LOG-URL");
+    for (ContainerReport containerReport : appsReport) {
+      writer.printf(CONTAINER_PATTERN, containerReport.getContainerId(),
+          containerReport.getStartTime(), containerReport.getFinishTime(),
+          containerReport.getContainerState(), containerReport
+              .getAssignedNode(), containerReport.getLogUrl());
+    }
+    writer.flush();
   }
 }

+ 415 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAHSClient.java

@@ -0,0 +1,415 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client.api.impl;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainersResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
+import org.apache.hadoop.yarn.api.records.YarnApplicationState;
+import org.apache.hadoop.yarn.client.api.AHSClient;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.junit.Test;
+
+public class TestAHSClient {
+
+  @Test
+  public void testClientStop() {
+    Configuration conf = new Configuration();
+    AHSClient client = AHSClient.createAHSClient();
+    client.init(conf);
+    client.start();
+    client.stop();
+  }
+
+  @Test(timeout = 10000)
+  public void testGetApplications() throws YarnException, IOException {
+    Configuration conf = new Configuration();
+    final AHSClient client = new MockAHSClient();
+    client.init(conf);
+    client.start();
+
+    List<ApplicationReport> expectedReports =
+        ((MockAHSClient) client).getReports();
+
+    List<ApplicationReport> reports = client.getApplications();
+    Assert.assertEquals(reports, expectedReports);
+
+    reports = client.getApplications();
+    Assert.assertEquals(reports.size(), 4);
+    client.stop();
+  }
+
+  @Test(timeout = 10000)
+  public void testGetApplicationReport() throws YarnException, IOException {
+    Configuration conf = new Configuration();
+    final AHSClient client = new MockAHSClient();
+    client.init(conf);
+    client.start();
+
+    List<ApplicationReport> expectedReports =
+        ((MockAHSClient) client).getReports();
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationReport report = client.getApplicationReport(applicationId);
+    Assert.assertEquals(report, expectedReports.get(0));
+    Assert.assertEquals(report.getApplicationId().toString(), expectedReports
+      .get(0).getApplicationId().toString());
+    client.stop();
+  }
+
+  @Test(timeout = 10000)
+  public void testGetApplicationAttempts() throws YarnException, IOException {
+    Configuration conf = new Configuration();
+    final AHSClient client = new MockAHSClient();
+    client.init(conf);
+    client.start();
+
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    List<ApplicationAttemptReport> reports =
+        client.getApplicationAttempts(applicationId);
+    Assert.assertNotNull(reports);
+    Assert.assertEquals(reports.get(0).getApplicationAttemptId(),
+      ApplicationAttemptId.newInstance(applicationId, 1));
+    Assert.assertEquals(reports.get(1).getApplicationAttemptId(),
+      ApplicationAttemptId.newInstance(applicationId, 2));
+    client.stop();
+  }
+
+  @Test(timeout = 10000)
+  public void testGetApplicationAttempt() throws YarnException, IOException {
+    Configuration conf = new Configuration();
+    final AHSClient client = new MockAHSClient();
+    client.init(conf);
+    client.start();
+
+    List<ApplicationReport> expectedReports =
+        ((MockAHSClient) client).getReports();
+
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(applicationId, 1);
+    ApplicationAttemptReport report =
+        client.getApplicationAttemptReport(appAttemptId);
+    Assert.assertNotNull(report);
+    Assert.assertEquals(report.getApplicationAttemptId().toString(),
+      expectedReports.get(0).getCurrentApplicationAttemptId().toString());
+    client.stop();
+  }
+
+  @Test(timeout = 10000)
+  public void testGetContainers() throws YarnException, IOException {
+    Configuration conf = new Configuration();
+    final AHSClient client = new MockAHSClient();
+    client.init(conf);
+    client.start();
+
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(applicationId, 1);
+    List<ContainerReport> reports = client.getContainers(appAttemptId);
+    Assert.assertNotNull(reports);
+    Assert.assertEquals(reports.get(0).getContainerId(),
+      (ContainerId.newInstance(appAttemptId, 1)));
+    Assert.assertEquals(reports.get(1).getContainerId(),
+      (ContainerId.newInstance(appAttemptId, 2)));
+    client.stop();
+  }
+
+  @Test(timeout = 10000)
+  public void testGetContainerReport() throws YarnException, IOException {
+    Configuration conf = new Configuration();
+    final AHSClient client = new MockAHSClient();
+    client.init(conf);
+    client.start();
+
+    List<ApplicationReport> expectedReports =
+        ((MockAHSClient) client).getReports();
+
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(applicationId, 1);
+    ContainerId containerId = ContainerId.newInstance(appAttemptId, 1);
+    ContainerReport report = client.getContainerReport(containerId);
+    Assert.assertNotNull(report);
+    Assert.assertEquals(report.getContainerId().toString(), (ContainerId
+      .newInstance(expectedReports.get(0).getCurrentApplicationAttemptId(), 1))
+      .toString());
+    client.stop();
+  }
+
+  private static class MockAHSClient extends AHSClientImpl {
+    // private ApplicationReport mockReport;
+    private List<ApplicationReport> reports =
+        new ArrayList<ApplicationReport>();
+    private HashMap<ApplicationId, List<ApplicationAttemptReport>> attempts =
+        new HashMap<ApplicationId, List<ApplicationAttemptReport>>();
+    private HashMap<ApplicationAttemptId, List<ContainerReport>> containers =
+        new HashMap<ApplicationAttemptId, List<ContainerReport>>();
+    GetApplicationsResponse mockAppResponse =
+        mock(GetApplicationsResponse.class);
+    GetApplicationReportResponse mockResponse =
+        mock(GetApplicationReportResponse.class);
+    GetApplicationAttemptsResponse mockAppAttemptsResponse =
+        mock(GetApplicationAttemptsResponse.class);
+    GetApplicationAttemptReportResponse mockAttemptResponse =
+        mock(GetApplicationAttemptReportResponse.class);
+    GetContainersResponse mockContainersResponse =
+        mock(GetContainersResponse.class);
+    GetContainerReportResponse mockContainerResponse =
+        mock(GetContainerReportResponse.class);
+
+    public MockAHSClient() {
+      super();
+      createAppReports();
+    }
+
+    @Override
+    public void start() {
+      ahsClient = mock(ApplicationHistoryProtocol.class);
+
+      try {
+        when(
+          ahsClient
+            .getApplicationReport(any(GetApplicationReportRequest.class)))
+          .thenReturn(mockResponse);
+        when(ahsClient.getApplications(any(GetApplicationsRequest.class)))
+          .thenReturn(mockAppResponse);
+        when(
+          ahsClient
+            .getApplicationAttemptReport(any(GetApplicationAttemptReportRequest.class)))
+          .thenReturn(mockAttemptResponse);
+        when(
+          ahsClient
+            .getApplicationAttempts(any(GetApplicationAttemptsRequest.class)))
+          .thenReturn(mockAppAttemptsResponse);
+        when(ahsClient.getContainers(any(GetContainersRequest.class)))
+          .thenReturn(mockContainersResponse);
+
+        when(ahsClient.getContainerReport(any(GetContainerReportRequest.class)))
+          .thenReturn(mockContainerResponse);
+
+      } catch (YarnException e) {
+        Assert.fail("Exception is not expected.");
+      } catch (IOException e) {
+        Assert.fail("Exception is not expected.");
+      }
+    }
+
+    @Override
+    public List<ApplicationReport> getApplications() throws YarnException,
+        IOException {
+      when(mockAppResponse.getApplicationList()).thenReturn(reports);
+      return super.getApplications();
+    }
+
+    @Override
+    public ApplicationReport getApplicationReport(ApplicationId appId)
+        throws YarnException, IOException {
+      when(mockResponse.getApplicationReport()).thenReturn(getReport(appId));
+      return super.getApplicationReport(appId);
+    }
+
+    @Override
+    public List<ApplicationAttemptReport> getApplicationAttempts(
+        ApplicationId appId) throws YarnException, IOException {
+      when(mockAppAttemptsResponse.getApplicationAttemptList()).thenReturn(
+        getAttempts(appId));
+      return super.getApplicationAttempts(appId);
+    }
+
+    @Override
+    public ApplicationAttemptReport getApplicationAttemptReport(
+        ApplicationAttemptId appAttemptId) throws YarnException, IOException {
+      when(mockAttemptResponse.getApplicationAttemptReport()).thenReturn(
+        getAttempt(appAttemptId));
+      return super.getApplicationAttemptReport(appAttemptId);
+    }
+
+    @Override
+    public List<ContainerReport>
+        getContainers(ApplicationAttemptId appAttemptId) throws YarnException,
+            IOException {
+      when(mockContainersResponse.getContainerList()).thenReturn(
+        getContainersReport(appAttemptId));
+      return super.getContainers(appAttemptId);
+    }
+
+    @Override
+    public ContainerReport getContainerReport(ContainerId containerId)
+        throws YarnException, IOException {
+      when(mockContainerResponse.getContainerReport()).thenReturn(
+        getContainer(containerId));
+      return super.getContainerReport(containerId);
+    }
+
+    @Override
+    public void stop() {
+    }
+
+    public ApplicationReport getReport(ApplicationId appId) {
+      for (int i = 0; i < reports.size(); ++i) {
+        if (appId.toString().equalsIgnoreCase(
+          reports.get(i).getApplicationId().toString())) {
+          return reports.get(i);
+        }
+      }
+      return null;
+    }
+
+    public List<ApplicationAttemptReport> getAttempts(ApplicationId appId) {
+      return attempts.get(appId);
+    }
+
+    public ApplicationAttemptReport
+        getAttempt(ApplicationAttemptId appAttemptId) {
+      return attempts.get(appAttemptId.getApplicationId()).get(0);
+    }
+
+    public List<ContainerReport> getContainersReport(
+        ApplicationAttemptId appAttemptId) {
+      return containers.get(appAttemptId);
+    }
+
+    public ContainerReport getContainer(ContainerId containerId) {
+      return containers.get(containerId.getApplicationAttemptId()).get(0);
+    }
+
+    public List<ApplicationReport> getReports() {
+      return this.reports;
+    }
+
+    private void createAppReports() {
+      ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+      ApplicationReport newApplicationReport =
+          ApplicationReport.newInstance(applicationId,
+            ApplicationAttemptId.newInstance(applicationId, 1), "user",
+            "queue", "appname", "host", 124, null,
+            YarnApplicationState.RUNNING, "diagnostics", "url", 0, 0,
+            FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.53789f, "YARN",
+            null);
+      List<ApplicationReport> applicationReports =
+          new ArrayList<ApplicationReport>();
+      applicationReports.add(newApplicationReport);
+      List<ApplicationAttemptReport> appAttempts =
+          new ArrayList<ApplicationAttemptReport>();
+      ApplicationAttemptReport attempt =
+          ApplicationAttemptReport.newInstance(
+            ApplicationAttemptId.newInstance(applicationId, 1),
+            "host",
+            124,
+            "url",
+            "diagnostics",
+            YarnApplicationAttemptState.FINISHED,
+            ContainerId.newInstance(
+              newApplicationReport.getCurrentApplicationAttemptId(), 1));
+      appAttempts.add(attempt);
+      ApplicationAttemptReport attempt1 =
+          ApplicationAttemptReport.newInstance(
+            ApplicationAttemptId.newInstance(applicationId, 2),
+            "host",
+            124,
+            "url",
+            "diagnostics",
+            YarnApplicationAttemptState.FINISHED,
+            ContainerId.newInstance(
+              newApplicationReport.getCurrentApplicationAttemptId(), 2));
+      appAttempts.add(attempt1);
+      attempts.put(applicationId, appAttempts);
+
+      List<ContainerReport> containerReports = new ArrayList<ContainerReport>();
+      ContainerReport container =
+          ContainerReport.newInstance(
+            ContainerId.newInstance(attempt.getApplicationAttemptId(), 1),
+            null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234,
+            5678, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE);
+      containerReports.add(container);
+
+      ContainerReport container1 =
+          ContainerReport.newInstance(
+            ContainerId.newInstance(attempt.getApplicationAttemptId(), 2),
+            null, NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234,
+            5678, "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE);
+      containerReports.add(container1);
+      containers.put(attempt.getApplicationAttemptId(), containerReports);
+
+      ApplicationId applicationId2 = ApplicationId.newInstance(1234, 6);
+      ApplicationReport newApplicationReport2 =
+          ApplicationReport.newInstance(applicationId2,
+            ApplicationAttemptId.newInstance(applicationId2, 2), "user2",
+            "queue2", "appname2", "host2", 125, null,
+            YarnApplicationState.FINISHED, "diagnostics2", "url2", 2, 2,
+            FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.63789f,
+            "NON-YARN", null);
+      applicationReports.add(newApplicationReport2);
+
+      ApplicationId applicationId3 = ApplicationId.newInstance(1234, 7);
+      ApplicationReport newApplicationReport3 =
+          ApplicationReport.newInstance(applicationId3,
+            ApplicationAttemptId.newInstance(applicationId3, 3), "user3",
+            "queue3", "appname3", "host3", 126, null,
+            YarnApplicationState.RUNNING, "diagnostics3", "url3", 3, 3,
+            FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.73789f,
+            "MAPREDUCE", null);
+      applicationReports.add(newApplicationReport3);
+
+      ApplicationId applicationId4 = ApplicationId.newInstance(1234, 8);
+      ApplicationReport newApplicationReport4 =
+          ApplicationReport.newInstance(applicationId4,
+            ApplicationAttemptId.newInstance(applicationId4, 4), "user4",
+            "queue4", "appname4", "host4", 127, null,
+            YarnApplicationState.FAILED, "diagnostics4", "url4", 4, 4,
+            FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.83789f,
+            "NON-MAPREDUCE", null);
+      applicationReports.add(newApplicationReport4);
+      reports = applicationReports;
+    }
+  }
+}

+ 0 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java

@@ -58,12 +58,10 @@ import org.apache.hadoop.yarn.client.api.YarnClient;
 import org.apache.hadoop.yarn.client.api.YarnClientApplication;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
-import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.server.MiniYARNCluster;
 import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.util.Records;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;

+ 172 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java

@@ -43,19 +43,26 @@ import junit.framework.Assert;
 
 import org.apache.commons.lang.time.DateFormatUtils;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerReport;
+import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.client.api.YarnClient;
 import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
 import org.apache.hadoop.yarn.util.Records;
 import org.junit.Before;
 import org.junit.Test;
+import org.mortbay.log.Log;
 
 import org.apache.commons.cli.Options;
 
@@ -113,20 +120,181 @@ public class TestYarnCLI {
     verify(sysOut, times(1)).println(isA(String.class));
   }
 
+  @Test
+  public void testGetApplicationAttemptReport() throws Exception {
+    ApplicationCLI cli = createAndGetAppCLI();
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(
+        applicationId, 1);
+    ApplicationAttemptReport attemptReport = ApplicationAttemptReport
+        .newInstance(attemptId, "host", 124, "url", "diagnostics",
+            YarnApplicationAttemptState.FINISHED, ContainerId.newInstance(
+                attemptId, 1));
+    when(
+        client
+            .getApplicationAttemptReport(any(ApplicationAttemptId.class)))
+        .thenReturn(attemptReport);
+    int result = cli.run(new String[] { "applicationattempt", "-status",
+        attemptId.toString() });
+    assertEquals(0, result);
+    verify(client).getApplicationAttemptReport(attemptId);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintWriter pw = new PrintWriter(baos);
+    pw.println("Application Attempt Report : ");
+    pw.println("\tApplicationAttempt-Id : appattempt_1234_0005_000001");
+    pw.println("\tState : FINISHED");
+    pw.println("\tAMContainer : container_1234_0005_01_000001");
+    pw.println("\tTracking-URL : url");
+    pw.println("\tRPC Port : 124");
+    pw.println("\tAM Host : host");
+    pw.println("\tDiagnostics : diagnostics");
+    pw.close();
+    String appReportStr = baos.toString("UTF-8");
+    Assert.assertEquals(appReportStr, sysOutStream.toString());
+    verify(sysOut, times(1)).println(isA(String.class));
+  }
+  
+  @Test
+  public void testGetApplicationAttempts() throws Exception {
+    ApplicationCLI cli = createAndGetAppCLI();
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(
+        applicationId, 1);
+    ApplicationAttemptId attemptId1 = ApplicationAttemptId.newInstance(
+        applicationId, 2);
+    ApplicationAttemptReport attemptReport = ApplicationAttemptReport
+        .newInstance(attemptId, "host", 124, "url", "diagnostics",
+            YarnApplicationAttemptState.FINISHED, ContainerId.newInstance(
+                attemptId, 1));
+    ApplicationAttemptReport attemptReport1 = ApplicationAttemptReport
+        .newInstance(attemptId1, "host", 124, "url", "diagnostics",
+            YarnApplicationAttemptState.FINISHED, ContainerId.newInstance(
+                attemptId1, 1));
+    List<ApplicationAttemptReport> reports = new ArrayList<ApplicationAttemptReport>();
+    reports.add(attemptReport);
+    reports.add(attemptReport1);
+    when(client.getApplicationAttempts(any(ApplicationId.class)))
+        .thenReturn(reports);
+    int result = cli.run(new String[] { "applicationattempt", "-list",
+        applicationId.toString() });
+    assertEquals(0, result);
+    verify(client).getApplicationAttempts(applicationId);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintWriter pw = new PrintWriter(baos);
+    pw.println("Total number of application attempts :2");
+    pw.print("         ApplicationAttempt-Id");
+    pw.print("\t               State");
+    pw.print("\t                    AM-Container-Id");
+    pw.println("\t                       Tracking-URL");
+    pw.print("   appattempt_1234_0005_000001");
+    pw.print("\t            FINISHED");
+    pw.print("\t      container_1234_0005_01_000001");
+    pw.println("\t                                url");
+    pw.print("   appattempt_1234_0005_000002");
+    pw.print("\t            FINISHED");
+    pw.print("\t      container_1234_0005_02_000001");
+    pw.println("\t                                url");
+    pw.close();
+    String appReportStr = baos.toString("UTF-8");
+    Assert.assertEquals(appReportStr, sysOutStream.toString());
+  }
+  
+  @Test
+  public void testGetContainerReport() throws Exception {
+    ApplicationCLI cli = createAndGetAppCLI();
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(
+        applicationId, 1);
+    ContainerId containerId = ContainerId.newInstance(attemptId, 1);
+    ContainerReport container = ContainerReport.newInstance(containerId, null,
+        NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234, 5678,
+        "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE);
+    when(client.getContainerReport(any(ContainerId.class))).thenReturn(
+        container);
+    int result = cli.run(new String[] { "container", "-status",
+        containerId.toString() });
+    assertEquals(0, result);
+    verify(client).getContainerReport(containerId);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintWriter pw = new PrintWriter(baos);
+    pw.println("Container Report : ");
+    pw.println("\tContainer-Id : container_1234_0005_01_000001");
+    pw.println("\tStart-Time : 1234");
+    pw.println("\tFinish-Time : 5678");
+    pw.println("\tState : COMPLETE");
+    pw.println("\tLOG-URL : logURL");
+    pw.println("\tHost : host:1234");
+    pw.println("\tDiagnostics : diagnosticInfo");
+    pw.close();
+    String appReportStr = baos.toString("UTF-8");
+    Assert.assertEquals(appReportStr, sysOutStream.toString());
+    verify(sysOut, times(1)).println(isA(String.class));
+  }
+  
+  @Test
+  public void testGetContainers() throws Exception {
+    ApplicationCLI cli = createAndGetAppCLI();
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(
+        applicationId, 1);
+    ContainerId containerId = ContainerId.newInstance(attemptId, 1);
+    ContainerId containerId1 = ContainerId.newInstance(attemptId, 2);
+    ContainerReport container = ContainerReport.newInstance(containerId, null,
+        NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234, 5678,
+        "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE);
+    ContainerReport container1 = ContainerReport.newInstance(containerId1, null,
+        NodeId.newInstance("host", 1234), Priority.UNDEFINED, 1234, 5678,
+        "diagnosticInfo", "logURL", 0, ContainerState.COMPLETE);
+    List<ContainerReport> reports = new ArrayList<ContainerReport>();
+    reports.add(container);
+    reports.add(container1);
+    when(client.getContainers(any(ApplicationAttemptId.class))).thenReturn(
+        reports);
+    int result = cli.run(new String[] { "container", "-list",
+        attemptId.toString() });
+    assertEquals(0, result);
+    verify(client).getContainers(attemptId);
+    Log.info(sysOutStream.toString());
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintWriter pw = new PrintWriter(baos);
+    pw.println("Total number of containers :2");
+    pw.print("                  Container-Id");
+    pw.print("\t          Start Time");
+    pw.print("\t         Finish Time");
+    pw.print("\t               State");
+    pw.print("\t                Host");
+    pw.println("\t                            LOG-URL");
+    pw.print(" container_1234_0005_01_000001");
+    pw.print("\t                1234");
+    pw.print("\t                5678");
+    pw.print("\t            COMPLETE");
+    pw.print("\t           host:1234");
+    pw.println("\t                             logURL");
+    pw.print(" container_1234_0005_01_000002");
+    pw.print("\t                1234");
+    pw.print("\t                5678");
+    pw.print("\t            COMPLETE");
+    pw.print("\t           host:1234");
+    pw.println("\t                             logURL");
+    pw.close();
+    String appReportStr = baos.toString("UTF-8");
+    Assert.assertEquals(appReportStr, sysOutStream.toString());
+  }
+  
   @Test
   public void testGetApplicationReportException() throws Exception {
     ApplicationCLI cli = createAndGetAppCLI();
     ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
     when(client.getApplicationReport(any(ApplicationId.class))).thenThrow(
-        new ApplicationNotFoundException("Application with id '"
-            + applicationId + "' doesn't exist in RM."));
+        new ApplicationNotFoundException("History file for application"
+            + applicationId + " is not found"));
     try {
       cli.run(new String[] { "-status", applicationId.toString() });
       Assert.fail();
     } catch (Exception ex) {
       Assert.assertTrue(ex instanceof ApplicationNotFoundException);
-      Assert.assertEquals("Application with id '" + applicationId
-          + "' doesn't exist in RM.", ex.getMessage());
+      Assert.assertEquals("History file for application"
+          + applicationId + " is not found", ex.getMessage());
     }
   }
 

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml

@@ -199,6 +199,7 @@
             <exclude>src/main/resources/webapps/mapreduce/.keep</exclude>
             <exclude>src/main/resources/webapps/jobhistory/.keep</exclude>
             <exclude>src/main/resources/webapps/yarn/.keep</exclude>
+            <exclude>src/main/resources/webapps/applicationhistory/.keep</exclude>
             <exclude>src/main/resources/webapps/cluster/.keep</exclude>
             <exclude>src/main/resources/webapps/test/.keep</exclude>
             <exclude>src/main/resources/webapps/proxy/.keep</exclude>

Some files were not shown because too many files changed in this diff