ソースを参照

Merge from trunk to HDFS-2006

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2006@1594906 13f79535-47bb-0310-9956-ffa450edef68
Uma Maheswara Rao G 11 年 前
コミット
0506fadf6a
100 ファイル変更6411 行追加767 行削除
  1. 14 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 38 33
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
  3. 88 80
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java
  4. 7 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java
  5. 21 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java
  6. 11 19
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java
  7. 1 46
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
  8. 65 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
  9. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
  10. 14 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java
  11. 53 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyServers.java
  12. 1 21
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java
  13. 8 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java
  14. 2 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java
  15. 54 5
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java
  16. 38 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyServers.java
  17. 0 11
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java
  18. 35 1
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  19. 12 0
      hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
  20. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/config.h.cmake
  21. 3 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
  22. 5 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  23. 41 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
  24. 58 31
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
  25. 63 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java
  26. 3 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
  27. 75 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
  28. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  29. 46 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
  30. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
  31. 14 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
  32. 4 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
  33. 67 56
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  34. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
  35. 57 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
  36. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
  37. 20 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  38. 19 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  39. 381 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
  40. 213 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
  41. 61 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  42. 12 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java
  43. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
  44. 6 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
  45. 56 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java
  46. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  47. 16 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
  48. 35 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
  49. 11 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
  50. 133 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java
  51. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
  52. 80 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
  53. 13 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java
  54. 52 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
  55. 20 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java
  56. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
  57. 72 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
  58. 17 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
  59. 4 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
  60. 11 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
  61. 172 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
  62. 36 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
  63. 193 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
  64. 83 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
  65. 821 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
  66. 212 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
  67. 111 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java
  68. 178 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java
  69. 118 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java
  70. 274 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java
  71. 109 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java
  72. 88 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
  73. 10 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
  74. 290 227
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
  75. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
  76. 804 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/vecsum.c
  77. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c
  78. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/index.html
  79. 6 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
  80. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html
  81. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/index.html
  82. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html
  83. 7 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js
  84. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
  85. 79 5
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
  86. 27 7
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
  87. 9 9
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
  88. 274 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFsShellPermission.java
  89. 162 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java
  90. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java
  91. 18 13
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
  92. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/AclTestHelpers.java
  93. 52 34
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java
  94. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java
  95. 43 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
  96. 6 6
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithAcl.java
  97. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java
  98. 1 4
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
  99. 9 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
  100. 28 28
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestAclWithSnapshot.java

+ 14 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -326,6 +326,8 @@ Trunk (Unreleased)
 
 
     HADOOP-10431. Change visibility of KeyStore.Options getter methods to public. (tucu)
     HADOOP-10431. Change visibility of KeyStore.Options getter methods to public. (tucu)
 
 
+    HADOOP-10583. bin/hadoop key throws NPE with no args and assorted other fixups. (clamb via tucu)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -380,6 +382,12 @@ Release 2.5.0 - UNRELEASED
     HADOOP-10158. SPNEGO should work with multiple interfaces/SPNs.
     HADOOP-10158. SPNEGO should work with multiple interfaces/SPNs.
     (daryn via kihwal)
     (daryn via kihwal)
 
 
+    HADOOP-10566. Refactor proxyservers out of ProxyUsers.
+    (Benoy Antony via suresh)
+
+    HADOOP-10572. Example NFS mount command must pass noacl as it isn't
+    supported by the server yet. (Harsh J via brandonli)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES 
   BUG FIXES 
@@ -465,6 +473,12 @@ Release 2.5.0 - UNRELEASED
     because groups stored in Set and ArrayList are compared. 
     because groups stored in Set and ArrayList are compared. 
     (Mit Desai via kihwal)
     (Mit Desai via kihwal)
 
 
+    HADOOP-10585. Retry polices ignore interrupted exceptions (Daryn Sharp via
+    jeagles)
+
+    HADOOP-10401. ShellBasedUnixGroupsMapping#getGroups does not always return
+    primary group first (Akira AJISAKA via Colin Patrick McCabe)
+
 Release 2.4.1 - UNRELEASED
 Release 2.4.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 38 - 33
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java

@@ -27,9 +27,7 @@ import java.net.URI;
 import java.security.NoSuchAlgorithmException;
 import java.security.NoSuchAlgorithmException;
 import java.text.MessageFormat;
 import java.text.MessageFormat;
 import java.util.Date;
 import java.util.Date;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.List;
-import java.util.Map;
 
 
 import com.google.gson.stream.JsonReader;
 import com.google.gson.stream.JsonReader;
 import com.google.gson.stream.JsonWriter;
 import com.google.gson.stream.JsonWriter;
@@ -176,22 +174,26 @@ public abstract class KeyProvider {
     protected byte[] serialize() throws IOException {
     protected byte[] serialize() throws IOException {
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       JsonWriter writer = new JsonWriter(new OutputStreamWriter(buffer));
       JsonWriter writer = new JsonWriter(new OutputStreamWriter(buffer));
-      writer.beginObject();
-      if (cipher != null) {
-        writer.name(CIPHER_FIELD).value(cipher);
-      }
-      if (bitLength != 0) {
-        writer.name(BIT_LENGTH_FIELD).value(bitLength);
-      }
-      if (created != null) {
-        writer.name(CREATED_FIELD).value(created.getTime());
-      }
-      if (description != null) {
-        writer.name(DESCRIPTION_FIELD).value(description);
+      try {
+        writer.beginObject();
+        if (cipher != null) {
+          writer.name(CIPHER_FIELD).value(cipher);
+        }
+        if (bitLength != 0) {
+          writer.name(BIT_LENGTH_FIELD).value(bitLength);
+        }
+        if (created != null) {
+          writer.name(CREATED_FIELD).value(created.getTime());
+        }
+        if (description != null) {
+          writer.name(DESCRIPTION_FIELD).value(description);
+        }
+        writer.name(VERSIONS_FIELD).value(versions);
+        writer.endObject();
+        writer.flush();
+      } finally {
+        writer.close();
       }
       }
-      writer.name(VERSIONS_FIELD).value(versions);
-      writer.endObject();
-      writer.flush();
       return buffer.toByteArray();
       return buffer.toByteArray();
     }
     }
 
 
@@ -207,23 +209,27 @@ public abstract class KeyProvider {
       int versions = 0;
       int versions = 0;
       String description = null;
       String description = null;
       JsonReader reader = new JsonReader(new InputStreamReader
       JsonReader reader = new JsonReader(new InputStreamReader
-          (new ByteArrayInputStream(bytes)));
-      reader.beginObject();
-      while (reader.hasNext()) {
-        String field = reader.nextName();
-        if (CIPHER_FIELD.equals(field)) {
-          cipher = reader.nextString();
-        } else if (BIT_LENGTH_FIELD.equals(field)) {
-          bitLength = reader.nextInt();
-        } else if (CREATED_FIELD.equals(field)) {
-          created = new Date(reader.nextLong());
-        } else if (VERSIONS_FIELD.equals(field)) {
-          versions = reader.nextInt();
-        } else if (DESCRIPTION_FIELD.equals(field)) {
-          description = reader.nextString();
+        (new ByteArrayInputStream(bytes)));
+      try {
+        reader.beginObject();
+        while (reader.hasNext()) {
+          String field = reader.nextName();
+          if (CIPHER_FIELD.equals(field)) {
+            cipher = reader.nextString();
+          } else if (BIT_LENGTH_FIELD.equals(field)) {
+            bitLength = reader.nextInt();
+          } else if (CREATED_FIELD.equals(field)) {
+            created = new Date(reader.nextLong());
+          } else if (VERSIONS_FIELD.equals(field)) {
+            versions = reader.nextInt();
+          } else if (DESCRIPTION_FIELD.equals(field)) {
+            description = reader.nextString();
+          }
         }
         }
+        reader.endObject();
+      } finally {
+        reader.close();
       }
       }
-      reader.endObject();
       this.cipher = cipher;
       this.cipher = cipher;
       this.bitLength = bitLength;
       this.bitLength = bitLength;
       this.created = created;
       this.created = created;
@@ -310,7 +316,6 @@ public abstract class KeyProvider {
    */
    */
   public abstract List<String> getKeys() throws IOException;
   public abstract List<String> getKeys() throws IOException;
 
 
-
   /**
   /**
    * Get key metadata in bulk.
    * Get key metadata in bulk.
    * @param names the names of the keys to get
    * @param names the names of the keys to get

+ 88 - 80
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java

@@ -23,9 +23,6 @@ import java.io.PrintStream;
 import java.security.InvalidParameterException;
 import java.security.InvalidParameterException;
 import java.security.NoSuchAlgorithmException;
 import java.security.NoSuchAlgorithmException;
 import java.util.List;
 import java.util.List;
-import java.util.Map;
-
-import javax.crypto.KeyGenerator;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.conf.Configured;
@@ -93,41 +90,54 @@ public class KeyShell extends Configured implements Tool {
    */
    */
   private int init(String[] args) throws IOException {
   private int init(String[] args) throws IOException {
     for (int i = 0; i < args.length; i++) { // parse command line
     for (int i = 0; i < args.length; i++) { // parse command line
+      boolean moreTokens = (i < args.length - 1);
       if (args[i].equals("create")) {
       if (args[i].equals("create")) {
-        String keyName = args[++i];
+        String keyName = "--help";
+        if (moreTokens) {
+          keyName = args[++i];
+        }
+
         command = new CreateCommand(keyName);
         command = new CreateCommand(keyName);
-        if (keyName.equals("--help")) {
+        if ("--help".equals(keyName)) {
           printKeyShellUsage();
           printKeyShellUsage();
           return -1;
           return -1;
         }
         }
       } else if (args[i].equals("delete")) {
       } else if (args[i].equals("delete")) {
-        String keyName = args[++i];
+        String keyName = "--help";
+        if (moreTokens) {
+          keyName = args[++i];
+        }
+
         command = new DeleteCommand(keyName);
         command = new DeleteCommand(keyName);
-        if (keyName.equals("--help")) {
+        if ("--help".equals(keyName)) {
           printKeyShellUsage();
           printKeyShellUsage();
           return -1;
           return -1;
         }
         }
       } else if (args[i].equals("roll")) {
       } else if (args[i].equals("roll")) {
-        String keyName = args[++i];
+        String keyName = "--help";
+        if (moreTokens) {
+          keyName = args[++i];
+        }
+
         command = new RollCommand(keyName);
         command = new RollCommand(keyName);
-        if (keyName.equals("--help")) {
+        if ("--help".equals(keyName)) {
           printKeyShellUsage();
           printKeyShellUsage();
           return -1;
           return -1;
         }
         }
-      } else if (args[i].equals("list")) {
+      } else if ("list".equals(args[i])) {
         command = new ListCommand();
         command = new ListCommand();
-      } else if (args[i].equals("--size")) {
+      } else if ("--size".equals(args[i]) && moreTokens) {
         getConf().set(KeyProvider.DEFAULT_BITLENGTH_NAME, args[++i]);
         getConf().set(KeyProvider.DEFAULT_BITLENGTH_NAME, args[++i]);
-      } else if (args[i].equals("--cipher")) {
+      } else if ("--cipher".equals(args[i]) && moreTokens) {
         getConf().set(KeyProvider.DEFAULT_CIPHER_NAME, args[++i]);
         getConf().set(KeyProvider.DEFAULT_CIPHER_NAME, args[++i]);
-      } else if (args[i].equals("--provider")) {
+      } else if ("--provider".equals(args[i]) && moreTokens) {
         userSuppliedProvider = true;
         userSuppliedProvider = true;
         getConf().set(KeyProviderFactory.KEY_PROVIDER_PATH, args[++i]);
         getConf().set(KeyProviderFactory.KEY_PROVIDER_PATH, args[++i]);
-      } else if (args[i].equals("--metadata")) {
+      } else if ("--metadata".equals(args[i])) {
         getConf().setBoolean(LIST_METADATA, true);
         getConf().setBoolean(LIST_METADATA, true);
-      } else if (args[i].equals("-i") || (args[i].equals("--interactive"))) {
+      } else if ("-i".equals(args[i]) || ("--interactive".equals(args[i]))) {
         interactive = true;
         interactive = true;
-      } else if (args[i].equals("--help")) {
+      } else if ("--help".equals(args[i])) {
         printKeyShellUsage();
         printKeyShellUsage();
         return -1;
         return -1;
       } else {
       } else {
@@ -136,6 +146,12 @@ public class KeyShell extends Configured implements Tool {
         return -1;
         return -1;
       }
       }
     }
     }
+
+    if (command == null) {
+      printKeyShellUsage();
+      return -1;
+    }
+
     return 0;
     return 0;
   }
   }
 
 
@@ -143,8 +159,7 @@ public class KeyShell extends Configured implements Tool {
     out.println(USAGE_PREFIX + COMMANDS);
     out.println(USAGE_PREFIX + COMMANDS);
     if (command != null) {
     if (command != null) {
       out.println(command.getUsage());
       out.println(command.getUsage());
-    }
-    else {
+    } else {
       out.println("=========================================================" +
       out.println("=========================================================" +
       		"======");
       		"======");
       out.println(CreateCommand.USAGE + ":\n\n" + CreateCommand.DESC);
       out.println(CreateCommand.USAGE + ":\n\n" + CreateCommand.DESC);
@@ -174,8 +189,7 @@ public class KeyShell extends Configured implements Tool {
         providers = KeyProviderFactory.getProviders(getConf());
         providers = KeyProviderFactory.getProviders(getConf());
         if (userSuppliedProvider) {
         if (userSuppliedProvider) {
           provider = providers.get(0);
           provider = providers.get(0);
-        }
-        else {
+        } else {
           for (KeyProvider p : providers) {
           for (KeyProvider p : providers) {
             if (!p.isTransient()) {
             if (!p.isTransient()) {
               provider = p;
               provider = p;
@@ -190,7 +204,7 @@ public class KeyShell extends Configured implements Tool {
     }
     }
 
 
     protected void printProviderWritten() {
     protected void printProviderWritten() {
-        out.println(provider.getClass().getName() + " has been updated.");
+        out.println(provider + " has been updated.");
     }
     }
 
 
     protected void warnIfTransientProvider() {
     protected void warnIfTransientProvider() {
@@ -206,12 +220,12 @@ public class KeyShell extends Configured implements Tool {
 
 
   private class ListCommand extends Command {
   private class ListCommand extends Command {
     public static final String USAGE =
     public static final String USAGE =
-        "list [--provider] [--metadata] [--help]";
+        "list [--provider <provider>] [--metadata] [--help]";
     public static final String DESC =
     public static final String DESC =
-        "The list subcommand displays the keynames contained within \n" +
-        "a particular provider - as configured in core-site.xml or " +
-        "indicated\nthrough the --provider argument.\n" +
-        "If the --metadata option is used, the keys metadata will be printed";
+        "The list subcommand displays the keynames contained within\n" +
+        "a particular provider as configured in core-site.xml or\n" +
+        "specified with the --provider argument. --metadata displays\n" +
+        "the metadata.";
 
 
     private boolean metadata = false;
     private boolean metadata = false;
 
 
@@ -220,9 +234,9 @@ public class KeyShell extends Configured implements Tool {
       provider = getKeyProvider();
       provider = getKeyProvider();
       if (provider == null) {
       if (provider == null) {
         out.println("There are no non-transient KeyProviders configured.\n"
         out.println("There are no non-transient KeyProviders configured.\n"
-            + "Consider using the --provider option to indicate the provider\n"
-            + "to use. If you want to list a transient provider then you\n"
-            + "you MUST use the --provider argument.");
+          + "Use the --provider option to specify a provider. If you\n"
+          + "want to list a transient provider then you must use the\n"
+          + "--provider argument.");
         rc = false;
         rc = false;
       }
       }
       metadata = getConf().getBoolean(LIST_METADATA, false);
       metadata = getConf().getBoolean(LIST_METADATA, false);
@@ -231,12 +245,12 @@ public class KeyShell extends Configured implements Tool {
 
 
     public void execute() throws IOException {
     public void execute() throws IOException {
       try {
       try {
-        List<String> keys = provider.getKeys();
-        out.println("Listing keys for KeyProvider: " + provider.toString());
+        final List<String> keys = provider.getKeys();
+        out.println("Listing keys for KeyProvider: " + provider);
         if (metadata) {
         if (metadata) {
-          Metadata[] meta =
+          final Metadata[] meta =
             provider.getKeysMetadata(keys.toArray(new String[keys.size()]));
             provider.getKeysMetadata(keys.toArray(new String[keys.size()]));
-          for(int i=0; i < meta.length; ++i) {
+          for (int i = 0; i < meta.length; ++i) {
             out.println(keys.get(i) + " : " + meta[i]);
             out.println(keys.get(i) + " : " + meta[i]);
           }
           }
         } else {
         } else {
@@ -245,7 +259,7 @@ public class KeyShell extends Configured implements Tool {
           }
           }
         }
         }
       } catch (IOException e) {
       } catch (IOException e) {
-        out.println("Cannot list keys for KeyProvider: " + provider.toString()
+        out.println("Cannot list keys for KeyProvider: " + provider
             + ": " + e.getMessage());
             + ": " + e.getMessage());
         throw e;
         throw e;
       }
       }
@@ -258,11 +272,10 @@ public class KeyShell extends Configured implements Tool {
   }
   }
 
 
   private class RollCommand extends Command {
   private class RollCommand extends Command {
-    public static final String USAGE = "roll <keyname> [--provider] [--help]";
+    public static final String USAGE = "roll <keyname> [--provider <provider>] [--help]";
     public static final String DESC =
     public static final String DESC =
-        "The roll subcommand creates a new version of the key specified\n" +
-        "through the <keyname> argument within the provider indicated using\n" +
-        "the --provider argument";
+      "The roll subcommand creates a new version for the specified key\n" +
+      "within the provider indicated using the --provider argument\n";
 
 
     String keyName = null;
     String keyName = null;
 
 
@@ -274,15 +287,14 @@ public class KeyShell extends Configured implements Tool {
       boolean rc = true;
       boolean rc = true;
       provider = getKeyProvider();
       provider = getKeyProvider();
       if (provider == null) {
       if (provider == null) {
-        out.println("There are no valid KeyProviders configured.\n"
-            + "Key will not be rolled.\n"
-            + "Consider using the --provider option to indicate the provider"
-            + " to use.");
+        out.println("There are no valid KeyProviders configured. The key\n" +
+          "has not been rolled. Use the --provider option to specify\n" +
+          "a provider.");
         rc = false;
         rc = false;
       }
       }
       if (keyName == null) {
       if (keyName == null) {
-        out.println("There is no keyName specified. Please provide the" +
-            "mandatory <keyname>. See the usage description with --help.");
+        out.println("Please provide a <keyname>.\n" +
+          "See the usage description by using --help.");
         rc = false;
         rc = false;
       }
       }
       return rc;
       return rc;
@@ -290,10 +302,9 @@ public class KeyShell extends Configured implements Tool {
 
 
     public void execute() throws NoSuchAlgorithmException, IOException {
     public void execute() throws NoSuchAlgorithmException, IOException {
       try {
       try {
-        Metadata md = provider.getMetadata(keyName);
         warnIfTransientProvider();
         warnIfTransientProvider();
         out.println("Rolling key version from KeyProvider: "
         out.println("Rolling key version from KeyProvider: "
-            + provider.toString() + " for key name: " + keyName);
+            + provider + "\n  for key name: " + keyName);
         try {
         try {
           provider.rollNewVersion(keyName);
           provider.rollNewVersion(keyName);
           out.println(keyName + " has been successfully rolled.");
           out.println(keyName + " has been successfully rolled.");
@@ -301,12 +312,12 @@ public class KeyShell extends Configured implements Tool {
           printProviderWritten();
           printProviderWritten();
         } catch (NoSuchAlgorithmException e) {
         } catch (NoSuchAlgorithmException e) {
           out.println("Cannot roll key: " + keyName + " within KeyProvider: "
           out.println("Cannot roll key: " + keyName + " within KeyProvider: "
-              + provider.toString());
+              + provider);
           throw e;
           throw e;
         }
         }
       } catch (IOException e1) {
       } catch (IOException e1) {
         out.println("Cannot roll key: " + keyName + " within KeyProvider: "
         out.println("Cannot roll key: " + keyName + " within KeyProvider: "
-            + provider.toString());
+            + provider);
         throw e1;
         throw e1;
       }
       }
     }
     }
@@ -318,11 +329,11 @@ public class KeyShell extends Configured implements Tool {
   }
   }
 
 
   private class DeleteCommand extends Command {
   private class DeleteCommand extends Command {
-    public static final String USAGE = "delete <keyname> [--provider] [--help]";
+    public static final String USAGE = "delete <keyname> [--provider <provider>] [--help]";
     public static final String DESC =
     public static final String DESC =
-        "The delete subcommand deletes all of the versions of the key\n" +
-        "specified as the <keyname> argument from within the provider\n" +
-        "indicated through the --provider argument";
+        "The delete subcommand deletes all versions of the key\n" +
+        "specified by the <keyname> argument from within the\n" +
+        "provider specified --provider.";
 
 
     String keyName = null;
     String keyName = null;
     boolean cont = true;
     boolean cont = true;
@@ -335,23 +346,21 @@ public class KeyShell extends Configured implements Tool {
     public boolean validate() {
     public boolean validate() {
       provider = getKeyProvider();
       provider = getKeyProvider();
       if (provider == null) {
       if (provider == null) {
-        out.println("There are no valid KeyProviders configured.\n"
-            + "Nothing will be deleted.\n"
-            + "Consider using the --provider option to indicate the provider"
-            + " to use.");
+        out.println("There are no valid KeyProviders configured. Nothing\n"
+          + "was deleted. Use the --provider option to specify a provider.");
         return false;
         return false;
       }
       }
       if (keyName == null) {
       if (keyName == null) {
-        out.println("There is no keyName specified. Please provide the" +
-            "mandatory <keyname>. See the usage description with --help.");
+        out.println("There is no keyName specified. Please specify a " +
+            "<keyname>. See the usage description with --help.");
         return false;
         return false;
       }
       }
       if (interactive) {
       if (interactive) {
         try {
         try {
           cont = ToolRunner
           cont = ToolRunner
               .confirmPrompt("You are about to DELETE all versions of "
               .confirmPrompt("You are about to DELETE all versions of "
-                  + "the key: " + keyName + " from KeyProvider "
-                  + provider.toString() + ". Continue?:");
+                  + " key: " + keyName + " from KeyProvider "
+                  + provider + ". Continue?:");
           if (!cont) {
           if (!cont) {
             out.println("Nothing has been be deleted.");
             out.println("Nothing has been be deleted.");
           }
           }
@@ -367,7 +376,7 @@ public class KeyShell extends Configured implements Tool {
     public void execute() throws IOException {
     public void execute() throws IOException {
       warnIfTransientProvider();
       warnIfTransientProvider();
       out.println("Deleting key: " + keyName + " from KeyProvider: "
       out.println("Deleting key: " + keyName + " from KeyProvider: "
-          + provider.toString());
+          + provider);
       if (cont) {
       if (cont) {
         try {
         try {
           provider.deleteKey(keyName);
           provider.deleteKey(keyName);
@@ -375,7 +384,7 @@ public class KeyShell extends Configured implements Tool {
           provider.flush();
           provider.flush();
           printProviderWritten();
           printProviderWritten();
         } catch (IOException e) {
         } catch (IOException e) {
-          out.println(keyName + "has NOT been deleted.");
+          out.println(keyName + " has not been deleted.");
           throw e;
           throw e;
         }
         }
       }
       }
@@ -388,16 +397,16 @@ public class KeyShell extends Configured implements Tool {
   }
   }
 
 
   private class CreateCommand extends Command {
   private class CreateCommand extends Command {
-    public static final String USAGE = "create <keyname> [--cipher] " +
-    		"[--size] [--provider] [--help]";
+    public static final String USAGE =
+      "create <keyname> [--cipher <cipher>] [--size <size>]\n" +
+      "                     [--provider <provider>] [--help]";
     public static final String DESC =
     public static final String DESC =
-        "The create subcommand creates a new key for the name specified\n" +
-        "as the <keyname> argument within the provider indicated through\n" +
-        "the --provider argument. You may also indicate the specific\n" +
-        "cipher through the --cipher argument. The default for cipher is\n" +
-        "currently \"AES/CTR/NoPadding\". The default keysize is \"256\".\n" +
-        "You may also indicate the requested key length through the --size\n" +
-        "argument.";
+      "The create subcommand creates a new key for the name specified\n" +
+      "by the <keyname> argument within the provider specified by the\n" +
+      "--provider argument. You may specify a cipher with the --cipher\n" +
+      "argument. The default cipher is currently \"AES/CTR/NoPadding\".\n" +
+      "The default keysize is 256. You may specify the requested key\n" +
+      "length using the --size argument.\n";
 
 
     String keyName = null;
     String keyName = null;
 
 
@@ -409,15 +418,14 @@ public class KeyShell extends Configured implements Tool {
       boolean rc = true;
       boolean rc = true;
       provider = getKeyProvider();
       provider = getKeyProvider();
       if (provider == null) {
       if (provider == null) {
-        out.println("There are no valid KeyProviders configured.\nKey" +
-        		" will not be created.\n"
-            + "Consider using the --provider option to indicate the provider" +
-            " to use.");
+        out.println("There are no valid KeyProviders configured. No key\n" +
+          " was created. You can use the --provider option to specify\n" +
+          " a provider to use.");
         rc = false;
         rc = false;
       }
       }
       if (keyName == null) {
       if (keyName == null) {
-        out.println("There is no keyName specified. Please provide the" +
-        		"mandatory <keyname>. See the usage description with --help.");
+        out.println("Please provide a <keyname>. See the usage description" +
+          " with --help.");
         rc = false;
         rc = false;
       }
       }
       return rc;
       return rc;
@@ -432,13 +440,13 @@ public class KeyShell extends Configured implements Tool {
         provider.flush();
         provider.flush();
         printProviderWritten();
         printProviderWritten();
       } catch (InvalidParameterException e) {
       } catch (InvalidParameterException e) {
-        out.println(keyName + " has NOT been created. " + e.getMessage());
+        out.println(keyName + " has not been created. " + e.getMessage());
         throw e;
         throw e;
       } catch (IOException e) {
       } catch (IOException e) {
-        out.println(keyName + " has NOT been created. " + e.getMessage());
+        out.println(keyName + " has not been created. " + e.getMessage());
         throw e;
         throw e;
       } catch (NoSuchAlgorithmException e) {
       } catch (NoSuchAlgorithmException e) {
-        out.println(keyName + " has NOT been created. " + e.getMessage());
+        out.println(keyName + " has not been created. " + e.getMessage());
         throw e;
         throw e;
       }
       }
     }
     }

+ 7 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java

@@ -126,7 +126,6 @@ public class KMSClientProvider extends KeyProvider {
     return o;
     return o;
   }
   }
 
 
-
   public static String checkNotEmpty(String s, String name)
   public static String checkNotEmpty(String s, String name)
       throws IllegalArgumentException {
       throws IllegalArgumentException {
     checkNotNull(s, name);
     checkNotNull(s, name);
@@ -140,6 +139,13 @@ public class KMSClientProvider extends KeyProvider {
   private String kmsUrl;
   private String kmsUrl;
   private SSLFactory sslFactory;
   private SSLFactory sslFactory;
 
 
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("KMSClientProvider[");
+    sb.append(kmsUrl).append("]");
+    return sb.toString();
+  }
+
   public KMSClientProvider(URI uri, Configuration conf) throws IOException {
   public KMSClientProvider(URI uri, Configuration conf) throws IOException {
     Path path = unnestUri(uri);
     Path path = unnestUri(uri);
     URL url = path.toUri().toURL();
     URL url = path.toUri().toURL();
@@ -515,5 +521,4 @@ public class KMSClientProvider extends KeyProvider {
   public static String buildVersionName(String name, int version) {
   public static String buildVersionName(String name, int version) {
     return KeyProvider.buildVersionName(name, version);
     return KeyProvider.buildVersionName(name, version);
   }
   }
-
 }
 }

+ 21 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java

@@ -158,6 +158,17 @@ public class FsPermission implements Writable {
     return (short)s;
     return (short)s;
   }
   }
 
 
+  /**
+   * Encodes the object to a short.  Unlike {@link #toShort()}, this method may
+   * return values outside the fixed range 00000 - 01777 if extended features
+   * are encoded into this permission, such as the ACL bit.
+   *
+   * @return short extended short representation of this permission
+   */
+  public short toExtendedShort() {
+    return toShort();
+  }
+
   @Override
   @Override
   public boolean equals(Object obj) {
   public boolean equals(Object obj) {
     if (obj instanceof FsPermission) {
     if (obj instanceof FsPermission) {
@@ -273,6 +284,16 @@ public class FsPermission implements Writable {
     return stickyBit;
     return stickyBit;
   }
   }
 
 
+  /**
+   * Returns true if there is also an ACL (access control list).
+   *
+   * @return boolean true if there is also an ACL (access control list).
+   */
+  public boolean getAclBit() {
+    // File system subclasses that support the ACL bit would override this.
+    return false;
+  }
+
   /** Set the user file creation mask (umask) */
   /** Set the user file creation mask (umask) */
   public static void setUMask(Configuration conf, FsPermission umask) {
   public static void setUMask(Configuration conf, FsPermission umask) {
     conf.set(UMASK_LABEL, String.format("%1$03o", umask.toShort()));
     conf.set(UMASK_LABEL, String.format("%1$03o", umask.toShort()));

+ 11 - 19
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java

@@ -75,29 +75,21 @@ class AclCommands extends FsCommand {
 
 
     @Override
     @Override
     protected void processPath(PathData item) throws IOException {
     protected void processPath(PathData item) throws IOException {
-      AclStatus aclStatus = item.fs.getAclStatus(item.path);
       out.println("# file: " + item);
       out.println("# file: " + item);
-      out.println("# owner: " + aclStatus.getOwner());
-      out.println("# group: " + aclStatus.getGroup());
-      List<AclEntry> entries = aclStatus.getEntries();
-      if (aclStatus.isStickyBit()) {
-        String stickyFlag = "T";
-        for (AclEntry aclEntry : entries) {
-          if (aclEntry.getType() == AclEntryType.OTHER
-              && aclEntry.getScope() == AclEntryScope.ACCESS
-              && aclEntry.getPermission().implies(FsAction.EXECUTE)) {
-            stickyFlag = "t";
-            break;
-          }
-        }
-        out.println("# flags: --" + stickyFlag);
+      out.println("# owner: " + item.stat.getOwner());
+      out.println("# group: " + item.stat.getGroup());
+      FsPermission perm = item.stat.getPermission();
+      if (perm.getStickyBit()) {
+        out.println("# flags: --" +
+          (perm.getOtherAction().implies(FsAction.EXECUTE) ? "t" : "T"));
       }
       }
 
 
-      FsPermission perm = item.stat.getPermission();
-      if (entries.isEmpty()) {
-        printMinimalAcl(perm);
-      } else {
+      if (perm.getAclBit()) {
+        AclStatus aclStatus = item.fs.getAclStatus(item.path);
+        List<AclEntry> entries = aclStatus.getEntries();
         printExtendedAcl(perm, entries);
         printExtendedAcl(perm, entries);
+      } else {
+        printMinimalAcl(perm);
       }
       }
 
 
       out.println();
       out.println();

+ 1 - 46
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java

@@ -31,8 +31,6 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.ipc.RpcNoSuchMethodException;
 
 
 import com.google.common.collect.Sets;
 import com.google.common.collect.Sets;
 
 
@@ -116,7 +114,7 @@ class Ls extends FsCommand {
     FileStatus stat = item.stat;
     FileStatus stat = item.stat;
     String line = String.format(lineFormat,
     String line = String.format(lineFormat,
         (stat.isDirectory() ? "d" : "-"),
         (stat.isDirectory() ? "d" : "-"),
-        stat.getPermission() + (hasAcl(item) ? "+" : " "),
+        stat.getPermission() + (stat.getPermission().getAclBit() ? "+" : " "),
         (stat.isFile() ? stat.getReplication() : "-"),
         (stat.isFile() ? stat.getReplication() : "-"),
         stat.getOwner(),
         stat.getOwner(),
         stat.getGroup(),
         stat.getGroup(),
@@ -153,49 +151,6 @@ class Ls extends FsCommand {
     lineFormat = fmt.toString();
     lineFormat = fmt.toString();
   }
   }
 
 
-  /**
-   * Calls getAclStatus to determine if the given item has an ACL.  For
-   * compatibility, this method traps errors caused by the RPC method missing
-   * from the server side.  This would happen if the client was connected to an
-   * old NameNode that didn't have the ACL APIs.  This method also traps the
-   * case of the client-side FileSystem not implementing the ACL APIs.
-   * FileSystem instances that do not support ACLs are remembered.  This
-   * prevents the client from sending multiple failing RPC calls during a
-   * recursive ls.
-   *
-   * @param item PathData item to check
-   * @return boolean true if item has an ACL
-   * @throws IOException if there is a failure
-   */
-  private boolean hasAcl(PathData item) throws IOException {
-    FileSystem fs = item.fs;
-    if (aclNotSupportedFsSet.contains(fs.getUri())) {
-      // This FileSystem failed to run the ACL API in an earlier iteration.
-      return false;
-    }
-    try {
-      return !fs.getAclStatus(item.path).getEntries().isEmpty();
-    } catch (RemoteException e) {
-      // If this is a RpcNoSuchMethodException, then the client is connected to
-      // an older NameNode that doesn't support ACLs.  Keep going.
-      IOException e2 = e.unwrapRemoteException(RpcNoSuchMethodException.class);
-      if (!(e2 instanceof RpcNoSuchMethodException)) {
-        throw e;
-      }
-    } catch (IOException e) {
-      // The NameNode supports ACLs, but they are not enabled.  Keep going.
-      String message = e.getMessage();
-      if (message != null && !message.contains("ACLs has been disabled")) {
-        throw e;
-      }
-    } catch (UnsupportedOperationException e) {
-      // The underlying FileSystem doesn't implement ACLs.  Keep going.
-    }
-    // Remember that this FileSystem cannot support ACLs.
-    aclNotSupportedFsSet.add(fs.getUri());
-    return false;
-  }
-
   private int maxLength(int n, Object value) {
   private int maxLength(int n, Object value) {
     return Math.max(n, (value != null) ? String.valueOf(value).length() : 0);
     return Math.max(n, (value != null) ? String.valueOf(value).length() : 0);
   }
   }

+ 65 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.ha;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.PrintStream;
 import java.util.Arrays;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Map;
 import java.util.Map;
 
 
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.Options;
@@ -33,6 +34,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.ha.HAServiceProtocol.RequestSource;
 import org.apache.hadoop.ha.HAServiceProtocol.RequestSource;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.Tool;
@@ -66,7 +68,7 @@ public abstract class HAAdmin extends Configured implements Tool {
   protected final static Map<String, UsageInfo> USAGE =
   protected final static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
     ImmutableMap.<String, UsageInfo>builder()
     .put("-transitionToActive",
     .put("-transitionToActive",
-        new UsageInfo("<serviceId>", "Transitions the service into Active state"))
+        new UsageInfo(" <serviceId> [--"+FORCEACTIVE+"]", "Transitions the service into Active state"))
     .put("-transitionToStandby",
     .put("-transitionToStandby",
         new UsageInfo("<serviceId>", "Transitions the service into Standby state"))
         new UsageInfo("<serviceId>", "Transitions the service into Standby state"))
     .put("-failover",
     .put("-failover",
@@ -100,6 +102,10 @@ public abstract class HAAdmin extends Configured implements Tool {
   }
   }
 
 
   protected abstract HAServiceTarget resolveTarget(String string);
   protected abstract HAServiceTarget resolveTarget(String string);
+  
+  protected Collection<String> getTargetIds(String targetNodeToActivate) {
+    return Arrays.asList(new String[]{targetNodeToActivate});
+  }
 
 
   protected String getUsageString() {
   protected String getUsageString() {
     return "Usage: HAAdmin";
     return "Usage: HAAdmin";
@@ -133,6 +139,11 @@ public abstract class HAAdmin extends Configured implements Tool {
       printUsage(errOut, "-transitionToActive");
       printUsage(errOut, "-transitionToActive");
       return -1;
       return -1;
     }
     }
+    /*  returns true if other target node is active or some exception occurred 
+        and forceActive was not set  */
+    if(isOtherTargetNodeActive(argv[0], cmd.hasOption(FORCEACTIVE))) {
+      return -1;
+    }
     HAServiceTarget target = resolveTarget(argv[0]);
     HAServiceTarget target = resolveTarget(argv[0]);
     if (!checkManualStateManagementOK(target)) {
     if (!checkManualStateManagementOK(target)) {
       return -1;
       return -1;
@@ -142,7 +153,48 @@ public abstract class HAAdmin extends Configured implements Tool {
     HAServiceProtocolHelper.transitionToActive(proto, createReqInfo());
     HAServiceProtocolHelper.transitionToActive(proto, createReqInfo());
     return 0;
     return 0;
   }
   }
-
+  
+  /**
+   * Checks whether other target node is active or not
+   * @param targetNodeToActivate
+   * @return true if other target node is active or some other exception 
+   * occurred and forceActive was set otherwise false
+   * @throws IOException
+   */
+  private boolean isOtherTargetNodeActive(String targetNodeToActivate, boolean forceActive)
+      throws IOException  {
+    Collection<String> targetIds = getTargetIds(targetNodeToActivate);
+    if(targetIds == null) {
+      errOut.println("transitionToActive: No target node in the "
+          + "current configuration");
+      printUsage(errOut, "-transitionToActive");
+      return true;
+    }
+    targetIds.remove(targetNodeToActivate);
+    for(String targetId : targetIds) {
+      HAServiceTarget target = resolveTarget(targetId);
+      if (!checkManualStateManagementOK(target)) {
+        return true;
+      }
+      try {
+        HAServiceProtocol proto = target.getProxy(getConf(), 5000);
+        if(proto.getServiceStatus().getState() == HAServiceState.ACTIVE) {
+          errOut.println("transitionToActive: Node " +  targetId +" is already active");
+          printUsage(errOut, "-transitionToActive");
+          return true;
+        }
+      } catch (Exception e) {
+        //If forceActive switch is false then return true
+        if(!forceActive) {
+          errOut.println("Unexpected error occurred  " + e.getMessage());
+          printUsage(errOut, "-transitionToActive");
+          return true; 
+        }
+      }
+    }
+    return false;
+  }
+  
   private int transitionToStandby(final CommandLine cmd)
   private int transitionToStandby(final CommandLine cmd)
       throws IOException, ServiceFailedException {
       throws IOException, ServiceFailedException {
     String[] argv = cmd.getArgs();
     String[] argv = cmd.getArgs();
@@ -364,6 +416,9 @@ public abstract class HAAdmin extends Configured implements Tool {
     if ("-failover".equals(cmd)) {
     if ("-failover".equals(cmd)) {
       addFailoverCliOpts(opts);
       addFailoverCliOpts(opts);
     }
     }
+    if("-transitionToActive".equals(cmd)) {
+      addTransitionToActiveCliOpts(opts);
+    }
     // Mutative commands take FORCEMANUAL option
     // Mutative commands take FORCEMANUAL option
     if ("-transitionToActive".equals(cmd) ||
     if ("-transitionToActive".equals(cmd) ||
         "-transitionToStandby".equals(cmd) ||
         "-transitionToStandby".equals(cmd) ||
@@ -433,6 +488,14 @@ public abstract class HAAdmin extends Configured implements Tool {
     // that change state.
     // that change state.
   }
   }
   
   
+  /**
+   * Add CLI options which are specific to the transitionToActive command and
+   * no others.
+   */
+  private void addTransitionToActiveCliOpts(Options transitionToActiveCliOpts) {
+    transitionToActiveCliOpts.addOption(FORCEACTIVE, false, "force active");
+  }
+  
   private CommandLine parseOpts(String cmdName, Options opts, String[] argv) {
   private CommandLine parseOpts(String cmdName, Options opts, String[] argv) {
     try {
     try {
       // Strip off the first arg, since that's just the command name
       // Strip off the first arg, since that's just the command name

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java

@@ -150,7 +150,7 @@ public class RetryInvocationHandler<T> implements RpcInvocationHandler {
           }
           }
           
           
           if (action.delayMillis > 0) {
           if (action.delayMillis > 0) {
-            ThreadUtil.sleepAtLeastIgnoreInterrupts(action.delayMillis);
+            Thread.sleep(action.delayMillis);
           }
           }
           
           
           if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {
           if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {

+ 14 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java

@@ -74,7 +74,8 @@ public class ShellBasedUnixGroupsMapping
    * Get the current user's group list from Unix by running the command 'groups'
    * Get the current user's group list from Unix by running the command 'groups'
    * NOTE. For non-existing user it will return EMPTY list
    * NOTE. For non-existing user it will return EMPTY list
    * @param user user name
    * @param user user name
-   * @return the groups list that the <code>user</code> belongs to
+   * @return the groups list that the <code>user</code> belongs to. The primary
+   *         group is returned first.
    * @throws IOException if encounter any error when running the command
    * @throws IOException if encounter any error when running the command
    */
    */
   private static List<String> getUnixGroups(final String user) throws IOException {
   private static List<String> getUnixGroups(final String user) throws IOException {
@@ -84,6 +85,7 @@ public class ShellBasedUnixGroupsMapping
     } catch (ExitCodeException e) {
     } catch (ExitCodeException e) {
       // if we didn't get the group - just return empty list;
       // if we didn't get the group - just return empty list;
       LOG.warn("got exception trying to get groups for user " + user, e);
       LOG.warn("got exception trying to get groups for user " + user, e);
+      return new LinkedList<String>();
     }
     }
     
     
     StringTokenizer tokenizer =
     StringTokenizer tokenizer =
@@ -92,6 +94,17 @@ public class ShellBasedUnixGroupsMapping
     while (tokenizer.hasMoreTokens()) {
     while (tokenizer.hasMoreTokens()) {
       groups.add(tokenizer.nextToken());
       groups.add(tokenizer.nextToken());
     }
     }
+
+    // remove duplicated primary group
+    if (!Shell.WINDOWS) {
+      for (int i = 1; i < groups.size(); i++) {
+        if (groups.get(i).equals(groups.get(0))) {
+          groups.remove(i);
+          break;
+        }
+      }
+    }
+
     return groups;
     return groups;
   }
   }
 }
 }

+ 53 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyServers.java

@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security.authorize;
+
+import java.net.InetSocketAddress;
+import java.util.Collection;
+import java.util.HashSet;
+
+import org.apache.hadoop.conf.Configuration;
+
+public class ProxyServers {
+  public static final String CONF_HADOOP_PROXYSERVERS = "hadoop.proxyservers";
+  private static volatile Collection<String> proxyServers;
+
+  public static void refresh() {
+    refresh(new Configuration());
+  }
+
+  public static void refresh(Configuration conf){
+    Collection<String> tempServers = new HashSet<String>();
+    // trusted proxy servers such as http proxies
+    for (String host : conf.getTrimmedStrings(CONF_HADOOP_PROXYSERVERS)) {
+      InetSocketAddress addr = new InetSocketAddress(host, 0);
+      if (!addr.isUnresolved()) {
+        tempServers.add(addr.getAddress().getHostAddress());
+      }
+    }
+    proxyServers = tempServers;
+  }
+
+  public static boolean isProxyServer(String remoteAddr) { 
+    if (proxyServers == null) {
+      refresh(); 
+    }
+    return proxyServers.contains(remoteAddr);
+  }
+}

+ 1 - 21
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java

@@ -19,12 +19,10 @@
 package org.apache.hadoop.security.authorize;
 package org.apache.hadoop.security.authorize;
 
 
 import java.net.InetAddress;
 import java.net.InetAddress;
-import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
 import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Map.Entry;
 
 
@@ -44,7 +42,6 @@ public class ProxyUsers {
   private static final String CONF_GROUPS = ".groups";
   private static final String CONF_GROUPS = ".groups";
   private static final String CONF_HADOOP_PROXYUSER = "hadoop.proxyuser.";
   private static final String CONF_HADOOP_PROXYUSER = "hadoop.proxyuser.";
   private static final String CONF_HADOOP_PROXYUSER_RE = "hadoop\\.proxyuser\\.";
   private static final String CONF_HADOOP_PROXYUSER_RE = "hadoop\\.proxyuser\\.";
-  public static final String CONF_HADOOP_PROXYSERVERS = "hadoop.proxyservers";
   
   
   private static boolean init = false;
   private static boolean init = false;
   //list of users, groups and hosts per proxyuser
   //list of users, groups and hosts per proxyuser
@@ -54,8 +51,6 @@ public class ProxyUsers {
     new HashMap<String, Collection<String>>();
     new HashMap<String, Collection<String>>();
   private static Map<String, Collection<String>> proxyHosts = 
   private static Map<String, Collection<String>> proxyHosts = 
     new HashMap<String, Collection<String>>();
     new HashMap<String, Collection<String>>();
-  private static Collection<String> proxyServers =
-    new HashSet<String>();
 
 
   /**
   /**
    * reread the conf and get new values for "hadoop.proxyuser.*.groups/users/hosts"
    * reread the conf and get new values for "hadoop.proxyuser.*.groups/users/hosts"
@@ -75,7 +70,6 @@ public class ProxyUsers {
     proxyGroups.clear();
     proxyGroups.clear();
     proxyHosts.clear();
     proxyHosts.clear();
     proxyUsers.clear();
     proxyUsers.clear();
-    proxyServers.clear();
     
     
     // get all the new keys for users
     // get all the new keys for users
     String regex = CONF_HADOOP_PROXYUSER_RE+"[^.]*\\"+CONF_USERS;
     String regex = CONF_HADOOP_PROXYUSER_RE+"[^.]*\\"+CONF_USERS;
@@ -103,22 +97,8 @@ public class ProxyUsers {
       proxyHosts.put(entry.getKey(),
       proxyHosts.put(entry.getKey(),
           StringUtils.getTrimmedStringCollection(entry.getValue()));
           StringUtils.getTrimmedStringCollection(entry.getValue()));
     }
     }
-    
-    // trusted proxy servers such as http proxies
-    for (String host : conf.getTrimmedStrings(CONF_HADOOP_PROXYSERVERS)) {
-      InetSocketAddress addr = new InetSocketAddress(host, 0);
-      if (!addr.isUnresolved()) {
-        proxyServers.add(addr.getAddress().getHostAddress());
-      }
-    }
     init = true;
     init = true;
-  }
-
-  public static synchronized boolean isProxyServer(String remoteAddr) { 
-    if(!init) {
-      refreshSuperUserGroupsConfiguration(); 
-    }
-    return proxyServers.contains(remoteAddr);
+    ProxyServers.refresh(conf);
   }
   }
   
   
   /**
   /**

+ 8 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java

@@ -132,11 +132,17 @@ abstract public class Shell {
                     : new String[]{"bash", "-c", "groups"};
                     : new String[]{"bash", "-c", "groups"};
   }
   }
 
 
-  /** a Unix command to get a given user's groups list */
+  /**
+   * a Unix command to get a given user's groups list.
+   * If the OS is not WINDOWS, the command will get the user's primary group
+   * first and finally get the groups list which includes the primary group.
+   * i.e. the user's primary group will be included twice.
+   */
   public static String[] getGroupsForUserCommand(final String user) {
   public static String[] getGroupsForUserCommand(final String user) {
     //'groups username' command return is non-consistent across different unixes
     //'groups username' command return is non-consistent across different unixes
     return (WINDOWS)? new String[] { WINUTILS, "groups", "-F", "\"" + user + "\""}
     return (WINDOWS)? new String[] { WINUTILS, "groups", "-F", "\"" + user + "\""}
-                    : new String [] {"bash", "-c", "id -Gn " + user};
+                    : new String [] {"bash", "-c", "id -gn " + user
+                                     + "&& id -Gn " + user};
   }
   }
 
 
   /** a Unix command to get a given netgroup's user list */
   /** a Unix command to get a given netgroup's user list */

+ 2 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java

@@ -121,7 +121,7 @@ public class TestKeyShell {
     ks.setConf(new Configuration());
     ks.setConf(new Configuration());
     rc = ks.run(args1);
     rc = ks.run(args1);
     assertEquals(-1, rc);
     assertEquals(-1, rc);
-    assertTrue(outContent.toString().contains("key1 has NOT been created."));
+    assertTrue(outContent.toString().contains("key1 has not been created."));
   }
   }
 
 
   @Test
   @Test
@@ -134,7 +134,7 @@ public class TestKeyShell {
     ks.setConf(new Configuration());
     ks.setConf(new Configuration());
     rc = ks.run(args1);
     rc = ks.run(args1);
     assertEquals(-1, rc);
     assertEquals(-1, rc);
-    assertTrue(outContent.toString().contains("key1 has NOT been created."));
+    assertTrue(outContent.toString().contains("key1 has not been created."));
   }
   }
 
 
   @Test
   @Test

+ 54 - 5
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java

@@ -26,27 +26,37 @@ import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWith
 import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWithProportionalSleep;
 import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWithProportionalSleep;
 import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumTimeWithFixedSleep;
 import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumTimeWithFixedSleep;
 import static org.apache.hadoop.io.retry.RetryPolicies.exponentialBackoffRetry;
 import static org.apache.hadoop.io.retry.RetryPolicies.exponentialBackoffRetry;
+import static org.junit.Assert.*;
 
 
 import java.util.Collections;
 import java.util.Collections;
 import java.util.Map;
 import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeUnit;
-
-import junit.framework.TestCase;
+import java.util.concurrent.atomic.AtomicReference;
 
 
 import org.apache.hadoop.io.retry.UnreliableInterface.FatalException;
 import org.apache.hadoop.io.retry.UnreliableInterface.FatalException;
 import org.apache.hadoop.io.retry.UnreliableInterface.UnreliableException;
 import org.apache.hadoop.io.retry.UnreliableInterface.UnreliableException;
 import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RemoteException;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.lang.reflect.UndeclaredThrowableException;
 
 
-public class TestRetryProxy extends TestCase {
+public class TestRetryProxy {
   
   
   private UnreliableImplementation unreliableImpl;
   private UnreliableImplementation unreliableImpl;
   
   
-  @Override
-  protected void setUp() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     unreliableImpl = new UnreliableImplementation();
     unreliableImpl = new UnreliableImplementation();
   }
   }
 
 
+  @Test
   public void testTryOnceThenFail() throws UnreliableException {
   public void testTryOnceThenFail() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, TRY_ONCE_THEN_FAIL);
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, TRY_ONCE_THEN_FAIL);
@@ -62,6 +72,7 @@ public class TestRetryProxy extends TestCase {
   /**
   /**
    * Test for {@link RetryInvocationHandler#isRpcInvocation(Object)}
    * Test for {@link RetryInvocationHandler#isRpcInvocation(Object)}
    */
    */
+  @Test
   public void testRpcInvocation() throws Exception {
   public void testRpcInvocation() throws Exception {
     // For a proxy method should return true
     // For a proxy method should return true
     final UnreliableInterface unreliable = (UnreliableInterface)
     final UnreliableInterface unreliable = (UnreliableInterface)
@@ -91,6 +102,7 @@ public class TestRetryProxy extends TestCase {
     assertFalse(RetryInvocationHandler.isRpcInvocation(new Object()));
     assertFalse(RetryInvocationHandler.isRpcInvocation(new Object()));
   }
   }
   
   
+  @Test
   public void testRetryForever() throws UnreliableException {
   public void testRetryForever() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, RETRY_FOREVER);
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, RETRY_FOREVER);
@@ -99,6 +111,7 @@ public class TestRetryProxy extends TestCase {
     unreliable.failsTenTimesThenSucceeds();
     unreliable.failsTenTimesThenSucceeds();
   }
   }
   
   
+  @Test
   public void testRetryUpToMaximumCountWithFixedSleep() throws UnreliableException {
   public void testRetryUpToMaximumCountWithFixedSleep() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -113,6 +126,7 @@ public class TestRetryProxy extends TestCase {
     }
     }
   }
   }
   
   
+  @Test
   public void testRetryUpToMaximumTimeWithFixedSleep() throws UnreliableException {
   public void testRetryUpToMaximumTimeWithFixedSleep() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -127,6 +141,7 @@ public class TestRetryProxy extends TestCase {
     }
     }
   }
   }
   
   
+  @Test
   public void testRetryUpToMaximumCountWithProportionalSleep() throws UnreliableException {
   public void testRetryUpToMaximumCountWithProportionalSleep() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -141,6 +156,7 @@ public class TestRetryProxy extends TestCase {
     }
     }
   }
   }
   
   
+  @Test
   public void testExponentialRetry() throws UnreliableException {
   public void testExponentialRetry() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -155,6 +171,7 @@ public class TestRetryProxy extends TestCase {
     }
     }
   }
   }
   
   
+  @Test
   public void testRetryByException() throws UnreliableException {
   public void testRetryByException() throws UnreliableException {
     Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
     Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
       Collections.<Class<? extends Exception>, RetryPolicy>singletonMap(FatalException.class, TRY_ONCE_THEN_FAIL);
       Collections.<Class<? extends Exception>, RetryPolicy>singletonMap(FatalException.class, TRY_ONCE_THEN_FAIL);
@@ -171,6 +188,7 @@ public class TestRetryProxy extends TestCase {
     }
     }
   }
   }
   
   
+  @Test
   public void testRetryByRemoteException() {
   public void testRetryByRemoteException() {
     Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
     Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
       Collections.<Class<? extends Exception>, RetryPolicy>singletonMap(FatalException.class, TRY_ONCE_THEN_FAIL);
       Collections.<Class<? extends Exception>, RetryPolicy>singletonMap(FatalException.class, TRY_ONCE_THEN_FAIL);
@@ -186,4 +204,35 @@ public class TestRetryProxy extends TestCase {
     }
     }
   }  
   }  
   
   
+  @Test
+  public void testRetryInterruptible() throws Throwable {
+    final UnreliableInterface unreliable = (UnreliableInterface)
+        RetryProxy.create(UnreliableInterface.class, unreliableImpl,
+            retryUpToMaximumTimeWithFixedSleep(10, 10, TimeUnit.SECONDS));
+    
+    final CountDownLatch latch = new CountDownLatch(1);
+    final AtomicReference<Thread> futureThread = new AtomicReference<Thread>();
+    ExecutorService exec = Executors.newSingleThreadExecutor();
+    Future<Throwable> future = exec.submit(new Callable<Throwable>(){
+      @Override
+      public Throwable call() throws Exception {
+        futureThread.set(Thread.currentThread());
+        latch.countDown();
+        try {
+          unreliable.alwaysFailsWithFatalException();
+        } catch (UndeclaredThrowableException ute) {
+          return ute.getCause();
+        }
+        return null;
+      }
+    });
+    latch.await();
+    Thread.sleep(1000); // time to fail and sleep
+    assertTrue(futureThread.get().isAlive());
+    futureThread.get().interrupt();
+    Throwable e = future.get(1, TimeUnit.SECONDS); // should return immediately 
+    assertNotNull(e);
+    assertEquals(InterruptedException.class, e.getClass());
+    assertEquals("sleep interrupted", e.getMessage());
+  }
 }
 }

+ 38 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyServers.java

@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestProxyServers {
+
+  @Test
+  public void testProxyServer() {
+    Configuration conf = new Configuration();
+    assertFalse(ProxyServers.isProxyServer("1.1.1.1"));
+    conf.set(ProxyServers.CONF_HADOOP_PROXYSERVERS, "2.2.2.2, 3.3.3.3");
+    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
+    assertFalse(ProxyServers.isProxyServer("1.1.1.1"));
+    assertTrue(ProxyServers.isProxyServer("2.2.2.2"));
+    assertTrue(ProxyServers.isProxyServer("3.3.3.3"));
+  }
+}

+ 0 - 11
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java

@@ -327,17 +327,6 @@ public class TestProxyUsers {
     assertEquals (1,hosts.size());
     assertEquals (1,hosts.size());
   }
   }
 
 
-  @Test
-  public void testProxyServer() {
-    Configuration conf = new Configuration();
-    assertFalse(ProxyUsers.isProxyServer("1.1.1.1"));
-    conf.set(ProxyUsers.CONF_HADOOP_PROXYSERVERS, "2.2.2.2, 3.3.3.3");
-    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
-    assertFalse(ProxyUsers.isProxyServer("1.1.1.1"));
-    assertTrue(ProxyUsers.isProxyServer("2.2.2.2"));
-    assertTrue(ProxyUsers.isProxyServer("3.3.3.3"));
-  }
-
   private void assertNotAuthorized(UserGroupInformation proxyUgi, String host) {
   private void assertNotAuthorized(UserGroupInformation proxyUgi, String host) {
     try {
     try {
       ProxyUsers.authorize(proxyUgi, host);
       ProxyUsers.authorize(proxyUgi, host);

+ 35 - 1
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -270,6 +270,9 @@ Release 2.5.0 - UNRELEASED
     HDFS-5168. Add cross node dependency support to BlockPlacementPolicy.
     HDFS-5168. Add cross node dependency support to BlockPlacementPolicy.
     (Nikola Vujic via szetszwo)
     (Nikola Vujic via szetszwo)
 
 
+    HDFS-6334. Client failover proxy provider for IP failover based NN HA.
+    (kihwal)
+
   IMPROVEMENTS
   IMPROVEMENTS
 
 
     HDFS-6007. Update documentation about short-circuit local reads (iwasakims
     HDFS-6007. Update documentation about short-circuit local reads (iwasakims
@@ -348,6 +351,18 @@ Release 2.5.0 - UNRELEASED
 
 
     HDFS-6328. Clean up dead code in FSDirectory. (wheat9)
     HDFS-6328. Clean up dead code in FSDirectory. (wheat9)
 
 
+    HDFS-6230. Expose upgrade status through NameNode web UI.
+    (Mit Desai via wheat9)
+
+    HDFS-6186. Pause deletion of blocks when the namenode starts up. (jing9)
+
+    HDFS-6293. Issues with OIV processing PB-based fsimages. (kihwal)
+
+    HDFS-2949. Add check to active state transition to prevent operator-induced
+    split brain. (Rushabh S Shah via kihwal)
+
+    HDFS-6287. Add vecsum test of libhdfs read access times (cmccabe)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
     HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
@@ -440,15 +455,32 @@ Release 2.5.0 - UNRELEASED
     HDFS-6337. Setfacl testcase is failing due to dash character in username
     HDFS-6337. Setfacl testcase is failing due to dash character in username
     in TestAclCLI (umamahesh)
     in TestAclCLI (umamahesh)
 
 
-    HDFS-5381. ExtendedBlock#hashCode should use both blockId and block pool ID    
+    HDFS-5381. ExtendedBlock#hashCode should use both blockId and block pool ID
     (Benoy Antony via Colin Patrick McCabe)
     (Benoy Antony via Colin Patrick McCabe)
 
 
     HDFS-6240. WebImageViewer returns 404 if LISTSTATUS to an empty directory.
     HDFS-6240. WebImageViewer returns 404 if LISTSTATUS to an empty directory.
     (Akira Ajisaka via wheat9)
     (Akira Ajisaka via wheat9)
 
 
+    HDFS-6351. Command hdfs dfs -rm -r can't remove empty directory.
+    (Yongjun Zhang via wang)
+
+    HDFS-5522. Datanode disk error check may be incorrectly skipped.
+    (Rushabh S Shah via kihwal)
+
     HDFS-6367. EnumSetParam$Domain#parse fails for parameter containing more than one enum.
     HDFS-6367. EnumSetParam$Domain#parse fails for parameter containing more than one enum.
     (Yi Liu via umamahesh)
     (Yi Liu via umamahesh)
 
 
+    HDFS-6305. WebHdfs response decoding may throw RuntimeExceptions (Daryn
+    Sharp via jeagles)
+
+    HDFS-6355. Fix divide-by-zero, improper use of wall-clock time in
+    BlockPoolSliceScanner (cmccabe)
+
+    HDFS-6370. Web UI fails to display in intranet under IE.
+    (Haohui Mai via cnauroth)
+
+    HDFS-6381. Fix a typo in INodeReference.java. (Binglin Chang via jing9)
+
 Release 2.4.1 - UNRELEASED
 Release 2.4.1 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES
@@ -515,6 +547,8 @@ Release 2.4.1 - UNRELEASED
     HDFS-6313. WebHdfs may use the wrong NN when configured for multiple HA NNs
     HDFS-6313. WebHdfs may use the wrong NN when configured for multiple HA NNs
     (kihwal)
     (kihwal)
 
 
+    HDFS-6326. WebHdfs ACL compatibility is broken. (cnauroth)
+
 Release 2.4.0 - 2014-04-07 
 Release 2.4.0 - 2014-04-07 
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt

@@ -62,6 +62,9 @@ endfunction()
 INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckCSourceCompiles)
 CHECK_C_SOURCE_COMPILES("int main(void) { static __thread int i = 0; return 0; }" HAVE_BETTER_TLS)
 CHECK_C_SOURCE_COMPILES("int main(void) { static __thread int i = 0; return 0; }" HAVE_BETTER_TLS)
 
 
+# Check to see if we have Intel SSE intrinsics.
+CHECK_C_SOURCE_COMPILES("#include <emmintrin.h>\nint main(void) { __m128d sum0 = _mm_set_pd(0.0,0.0); return 0; }" HAVE_INTEL_SSE_INTRINSICS)
+
 # Check if we need to link dl library to get dlopen.
 # Check if we need to link dl library to get dlopen.
 # dlopen on Linux is in separate library but on FreeBSD its in libc
 # dlopen on Linux is in separate library but on FreeBSD its in libc
 INCLUDE(CheckLibraryExists)
 INCLUDE(CheckLibraryExists)
@@ -170,6 +173,15 @@ target_link_libraries(test_libhdfs_zerocopy
     pthread
     pthread
 )
 )
 
 
+add_executable(test_libhdfs_vecsum
+    main/native/libhdfs/test/vecsum.c
+)
+target_link_libraries(test_libhdfs_vecsum
+    hdfs
+    pthread
+    rt
+)
+
 IF(REQUIRE_LIBWEBHDFS)
 IF(REQUIRE_LIBWEBHDFS)
     add_subdirectory(contrib/libwebhdfs)
     add_subdirectory(contrib/libwebhdfs)
 ENDIF(REQUIRE_LIBWEBHDFS)
 ENDIF(REQUIRE_LIBWEBHDFS)

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/config.h.cmake

@@ -22,4 +22,6 @@
 
 
 #cmakedefine HAVE_BETTER_TLS
 #cmakedefine HAVE_BETTER_TLS
 
 
+#cmakedefine HAVE_INTEL_SSE_INTRINSICS
+
 #endif
 #endif

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs

@@ -49,6 +49,7 @@ function print_usage(){
   echo "  balancer             run a cluster balancing utility"
   echo "  balancer             run a cluster balancing utility"
   echo "  jmxget               get JMX exported values from NameNode or DataNode."
   echo "  jmxget               get JMX exported values from NameNode or DataNode."
   echo "  oiv                  apply the offline fsimage viewer to an fsimage"
   echo "  oiv                  apply the offline fsimage viewer to an fsimage"
+  echo "  oiv_legacy           apply the offline fsimage viewer to an legacy fsimage"
   echo "  oev                  apply the offline edits viewer to an edits file"
   echo "  oev                  apply the offline edits viewer to an edits file"
   echo "  fetchdt              fetch a delegation token from the NameNode"
   echo "  fetchdt              fetch a delegation token from the NameNode"
   echo "  getconf              get config values from configuration"
   echo "  getconf              get config values from configuration"
@@ -161,6 +162,8 @@ elif [ "$COMMAND" = "jmxget" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.JMXGet
   CLASS=org.apache.hadoop.hdfs.tools.JMXGet
 elif [ "$COMMAND" = "oiv" ] ; then
 elif [ "$COMMAND" = "oiv" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
   CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
+elif [ "COMMAND" = "oiv_legacy" ] ; then
+  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
 elif [ "$COMMAND" = "oev" ] ; then
 elif [ "$COMMAND" = "oev" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
   CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
 elif [ "$COMMAND" = "fetchdt" ] ; then
 elif [ "$COMMAND" = "fetchdt" ] ; then

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -247,6 +247,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
       "dfs.namenode.path.based.cache.refresh.interval.ms";
       "dfs.namenode.path.based.cache.refresh.interval.ms";
   public static final long    DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT = 30000L;
   public static final long    DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT = 30000L;
 
 
+  /** Pending period of block deletion since NameNode startup */
+  public static final String  DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY = "dfs.namenode.startup.delay.block.deletion.ms";
+  public static final long    DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_DEFAULT = 0L;
+
   // Whether to enable datanode's stale state detection and usage for reads
   // Whether to enable datanode's stale state detection and usage for reads
   public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
   public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
   public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;
   public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;
@@ -500,6 +504,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_SECONDARY_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY = "dfs.secondary.namenode.kerberos.internal.spnego.principal";
   public static final String  DFS_SECONDARY_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY = "dfs.secondary.namenode.kerberos.internal.spnego.principal";
   public static final String  DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
   public static final String  DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
   public static final int     DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
   public static final int     DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
+  public static final String  DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY = "dfs.namenode.legacy-oiv-image.dir";
   
   
   public static final String  DFS_NAMESERVICES = "dfs.nameservices";
   public static final String  DFS_NAMESERVICES = "dfs.nameservices";
   public static final String  DFS_NAMESERVICE_ID = "dfs.nameservice.id";
   public static final String  DFS_NAMESERVICE_ID = "dfs.nameservice.id";

+ 41 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java

@@ -38,10 +38,12 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RPC;
@@ -205,17 +207,54 @@ public class HAUtil {
  
  
   /**
   /**
    * @return true if the given nameNodeUri appears to be a logical URI.
    * @return true if the given nameNodeUri appears to be a logical URI.
-   * This is the case if there is a failover proxy provider configured
-   * for it in the given configuration.
    */
    */
   public static boolean isLogicalUri(
   public static boolean isLogicalUri(
       Configuration conf, URI nameNodeUri) {
       Configuration conf, URI nameNodeUri) {
     String host = nameNodeUri.getHost();
     String host = nameNodeUri.getHost();
+    // A logical name must be one of the service IDs.
+    return DFSUtil.getNameServiceIds(conf).contains(host);
+  }
+
+  /**
+   * Check whether the client has a failover proxy provider configured
+   * for the namenode/nameservice.
+   *
+   * @param conf Configuration
+   * @param nameNodeUri The URI of namenode
+   * @return true if failover is configured.
+   */
+  public static boolean isClientFailoverConfigured(
+      Configuration conf, URI nameNodeUri) {
+    String host = nameNodeUri.getHost();
     String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
     String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
         + host;
         + host;
     return conf.get(configKey) != null;
     return conf.get(configKey) != null;
   }
   }
 
 
+  /**
+   * Check whether logical URI is needed for the namenode and
+   * the corresponding failover proxy provider in the config.
+   *
+   * @param conf Configuration
+   * @param nameNodeUri The URI of namenode
+   * @return true if logical URI is needed. false, if not needed.
+   * @throws IOException most likely due to misconfiguration.
+   */
+  public static boolean useLogicalUri(Configuration conf, URI nameNodeUri) 
+      throws IOException {
+    // Create the proxy provider. Actual proxy is not created.
+    AbstractNNFailoverProxyProvider<ClientProtocol> provider = NameNodeProxies
+        .createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class,
+        false);
+
+    // No need to use logical URI since failover is not configured.
+    if (provider == null) {
+      return false;
+    }
+    // Check whether the failover proxy provider uses logical URI.
+    return provider.useLogicalURI();
+  }
+
   /**
   /**
    * Parse the file system URI out of the provided token.
    * Parse the file system URI out of the provided token.
    */
    */

+ 58 - 31
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java

@@ -50,6 +50,8 @@ import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
+import org.apache.hadoop.hdfs.server.namenode.ha.WrappedFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
@@ -136,26 +138,29 @@ public class NameNodeProxies {
   @SuppressWarnings("unchecked")
   @SuppressWarnings("unchecked")
   public static <T> ProxyAndInfo<T> createProxy(Configuration conf,
   public static <T> ProxyAndInfo<T> createProxy(Configuration conf,
       URI nameNodeUri, Class<T> xface) throws IOException {
       URI nameNodeUri, Class<T> xface) throws IOException {
-    Class<FailoverProxyProvider<T>> failoverProxyProviderClass =
-        getFailoverProxyProviderClass(conf, nameNodeUri, xface);
+    AbstractNNFailoverProxyProvider<T> failoverProxyProvider =
+        createFailoverProxyProvider(conf, nameNodeUri, xface, true);
   
   
-    if (failoverProxyProviderClass == null) {
+    if (failoverProxyProvider == null) {
       // Non-HA case
       // Non-HA case
       return createNonHAProxy(conf, NameNode.getAddress(nameNodeUri), xface,
       return createNonHAProxy(conf, NameNode.getAddress(nameNodeUri), xface,
           UserGroupInformation.getCurrentUser(), true);
           UserGroupInformation.getCurrentUser(), true);
     } else {
     } else {
       // HA case
       // HA case
-      FailoverProxyProvider<T> failoverProxyProvider = NameNodeProxies
-          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
-              nameNodeUri);
       Conf config = new Conf(conf);
       Conf config = new Conf(conf);
       T proxy = (T) RetryProxy.create(xface, failoverProxyProvider,
       T proxy = (T) RetryProxy.create(xface, failoverProxyProvider,
           RetryPolicies.failoverOnNetworkException(
           RetryPolicies.failoverOnNetworkException(
               RetryPolicies.TRY_ONCE_THEN_FAIL, config.maxFailoverAttempts,
               RetryPolicies.TRY_ONCE_THEN_FAIL, config.maxFailoverAttempts,
               config.maxRetryAttempts, config.failoverSleepBaseMillis,
               config.maxRetryAttempts, config.failoverSleepBaseMillis,
               config.failoverSleepMaxMillis));
               config.failoverSleepMaxMillis));
-      
-      Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+
+      Text dtService;
+      if (failoverProxyProvider.useLogicalURI()) {
+        dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      } else {
+        dtService = SecurityUtil.buildTokenService(
+            NameNode.getAddress(nameNodeUri));
+      }
       return new ProxyAndInfo<T>(proxy, dtService);
       return new ProxyAndInfo<T>(proxy, dtService);
     }
     }
   }
   }
@@ -183,12 +188,10 @@ public class NameNodeProxies {
       Configuration config, URI nameNodeUri, Class<T> xface,
       Configuration config, URI nameNodeUri, Class<T> xface,
       int numResponseToDrop) throws IOException {
       int numResponseToDrop) throws IOException {
     Preconditions.checkArgument(numResponseToDrop > 0);
     Preconditions.checkArgument(numResponseToDrop > 0);
-    Class<FailoverProxyProvider<T>> failoverProxyProviderClass = 
-        getFailoverProxyProviderClass(config, nameNodeUri, xface);
-    if (failoverProxyProviderClass != null) { // HA case
-      FailoverProxyProvider<T> failoverProxyProvider = 
-          createFailoverProxyProvider(config, failoverProxyProviderClass, 
-              xface, nameNodeUri);
+    AbstractNNFailoverProxyProvider<T> failoverProxyProvider =
+        createFailoverProxyProvider(config, nameNodeUri, xface, true);
+
+    if (failoverProxyProvider != null) { // HA case
       int delay = config.getInt(
       int delay = config.getInt(
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
@@ -211,7 +214,13 @@ public class NameNodeProxies {
       T proxy = (T) Proxy.newProxyInstance(
       T proxy = (T) Proxy.newProxyInstance(
           failoverProxyProvider.getInterface().getClassLoader(),
           failoverProxyProvider.getInterface().getClassLoader(),
           new Class[] { xface }, dummyHandler);
           new Class[] { xface }, dummyHandler);
-      Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      Text dtService;
+      if (failoverProxyProvider.useLogicalURI()) {
+        dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      } else {
+        dtService = SecurityUtil.buildTokenService(
+            NameNode.getAddress(nameNodeUri));
+      }
       return new ProxyAndInfo<T>(proxy, dtService);
       return new ProxyAndInfo<T>(proxy, dtService);
     } else {
     } else {
       LOG.warn("Currently creating proxy using " +
       LOG.warn("Currently creating proxy using " +
@@ -396,7 +405,7 @@ public class NameNodeProxies {
   /** Gets the configured Failover proxy provider's class */
   /** Gets the configured Failover proxy provider's class */
   @VisibleForTesting
   @VisibleForTesting
   public static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
   public static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
-      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
+      Configuration conf, URI nameNodeUri) throws IOException {
     if (nameNodeUri == null) {
     if (nameNodeUri == null) {
       return null;
       return null;
     }
     }
@@ -408,17 +417,6 @@ public class NameNodeProxies {
       @SuppressWarnings("unchecked")
       @SuppressWarnings("unchecked")
       Class<FailoverProxyProvider<T>> ret = (Class<FailoverProxyProvider<T>>) conf
       Class<FailoverProxyProvider<T>> ret = (Class<FailoverProxyProvider<T>>) conf
           .getClass(configKey, null, FailoverProxyProvider.class);
           .getClass(configKey, null, FailoverProxyProvider.class);
-      if (ret != null) {
-        // If we found a proxy provider, then this URI should be a logical NN.
-        // Given that, it shouldn't have a non-default port number.
-        int port = nameNodeUri.getPort();
-        if (port > 0 && port != NameNode.DEFAULT_PORT) {
-          throw new IOException("Port " + port + " specified in URI "
-              + nameNodeUri + " but host '" + host
-              + "' is a logical (HA) namenode"
-              + " and does not use port information.");
-        }
-      }
       return ret;
       return ret;
     } catch (RuntimeException e) {
     } catch (RuntimeException e) {
       if (e.getCause() instanceof ClassNotFoundException) {
       if (e.getCause() instanceof ClassNotFoundException) {
@@ -433,18 +431,33 @@ public class NameNodeProxies {
 
 
   /** Creates the Failover proxy provider instance*/
   /** Creates the Failover proxy provider instance*/
   @VisibleForTesting
   @VisibleForTesting
-  public static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
-      Configuration conf, Class<FailoverProxyProvider<T>> failoverProxyProviderClass,
-      Class<T> xface, URI nameNodeUri) throws IOException {
+  public static <T> AbstractNNFailoverProxyProvider<T> createFailoverProxyProvider(
+      Configuration conf, URI nameNodeUri, Class<T> xface, boolean checkPort)
+      throws IOException {
+    Class<FailoverProxyProvider<T>> failoverProxyProviderClass = null;
+    AbstractNNFailoverProxyProvider<T> providerNN;
     Preconditions.checkArgument(
     Preconditions.checkArgument(
         xface.isAssignableFrom(NamenodeProtocols.class),
         xface.isAssignableFrom(NamenodeProtocols.class),
         "Interface %s is not a NameNode protocol", xface);
         "Interface %s is not a NameNode protocol", xface);
     try {
     try {
+      // Obtain the class of the proxy provider
+      failoverProxyProviderClass = getFailoverProxyProviderClass(conf,
+          nameNodeUri);
+      if (failoverProxyProviderClass == null) {
+        return null;
+      }
+      // Create a proxy provider instance.
       Constructor<FailoverProxyProvider<T>> ctor = failoverProxyProviderClass
       Constructor<FailoverProxyProvider<T>> ctor = failoverProxyProviderClass
           .getConstructor(Configuration.class, URI.class, Class.class);
           .getConstructor(Configuration.class, URI.class, Class.class);
       FailoverProxyProvider<T> provider = ctor.newInstance(conf, nameNodeUri,
       FailoverProxyProvider<T> provider = ctor.newInstance(conf, nameNodeUri,
           xface);
           xface);
-      return provider;
+
+      // If the proxy provider is of an old implementation, wrap it.
+      if (!(provider instanceof AbstractNNFailoverProxyProvider)) {
+        providerNN = new WrappedFailoverProxyProvider<T>(provider);
+      } else {
+        providerNN = (AbstractNNFailoverProxyProvider<T>)provider;
+      }
     } catch (Exception e) {
     } catch (Exception e) {
       String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
       String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
       if (LOG.isDebugEnabled()) {
       if (LOG.isDebugEnabled()) {
@@ -456,6 +469,20 @@ public class NameNodeProxies {
         throw new IOException(message, e);
         throw new IOException(message, e);
       }
       }
     }
     }
+
+    // Check the port in the URI, if it is logical.
+    if (checkPort && providerNN.useLogicalURI()) {
+      int port = nameNodeUri.getPort();
+      if (port > 0 && port != NameNode.DEFAULT_PORT) {
+        // Throwing here without any cleanup is fine since we have not
+        // actually created the underlying proxies yet.
+        throw new IOException("Port " + port + " specified in URI "
+            + nameNodeUri + " but host '" + nameNodeUri.getHost()
+            + "' is a logical (HA) namenode"
+            + " and does not use port information.");
+      }
+    }
+    return providerNN;
   }
   }
 
 
 }
 }

+ 63 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java

@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocol;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.permission.FsPermission;
+
+/**
+ * HDFS permission subclass used to indicate an ACL is present.  The ACL bit is
+ * not visible directly to users of {@link FsPermission} serialization.  This is
+ * done for backwards compatibility in case any existing clients assume the
+ * value of FsPermission is in a particular range.
+ */
+@InterfaceAudience.Private
+public class FsAclPermission extends FsPermission {
+  private final static short ACL_BIT = 1 << 12;
+  private final boolean aclBit;
+
+  /**
+   * Constructs a new FsAclPermission based on the given FsPermission.
+   *
+   * @param perm FsPermission containing permission bits
+   */
+  public FsAclPermission(FsPermission perm) {
+    super(perm.toShort());
+    aclBit = true;
+  }
+
+  /**
+   * Creates a new FsAclPermission by calling the base class constructor.
+   *
+   * @param perm short containing permission bits
+   */
+  public FsAclPermission(short perm) {
+    super(perm);
+    aclBit = (perm & ACL_BIT) != 0;
+  }
+
+  @Override
+  public short toExtendedShort() {
+    return (short)(toShort() | (aclBit ? ACL_BIT : 0));
+  }
+
+  @Override
+  public boolean getAclBit() {
+    return aclBit;
+  }
+}

+ 3 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

@@ -59,6 +59,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
 import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.FsAclPermission;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
@@ -1194,13 +1195,11 @@ public class PBHelper {
   }
   }
   
   
   public static FsPermissionProto convert(FsPermission p) {
   public static FsPermissionProto convert(FsPermission p) {
-    if (p == null) return null;
-    return FsPermissionProto.newBuilder().setPerm(p.toShort()).build();
+    return FsPermissionProto.newBuilder().setPerm(p.toExtendedShort()).build();
   }
   }
   
   
   public static FsPermission convert(FsPermissionProto p) {
   public static FsPermission convert(FsPermissionProto p) {
-    if (p == null) return null;
-    return new FsPermission((short)p.getPerm());
+    return new FsAclPermission((short)p.getPerm());
   }
   }
   
   
   
   

+ 75 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java

@@ -18,9 +18,16 @@
 
 
 package org.apache.hadoop.hdfs.security.token.delegation;
 package org.apache.hadoop.hdfs.security.token.delegation;
 
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.protobuf.ByteString;
+import java.io.DataInput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -43,13 +50,9 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 
 
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map.Entry;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.protobuf.ByteString;
 
 
 /**
 /**
  * A HDFS specific delegation token secret manager.
  * A HDFS specific delegation token secret manager.
@@ -211,6 +214,18 @@ public class DelegationTokenSecretManager
     }
     }
   }
   }
 
 
+  /**
+   * Store the current state of the SecretManager for persistence
+   *
+   * @param out Output stream for writing into fsimage.
+   * @param sdPath String storage directory path
+   * @throws IOException
+   */
+  public synchronized void saveSecretManagerStateCompat(DataOutputStream out,
+      String sdPath) throws IOException {
+    serializerCompat.save(out, sdPath);
+  }
+
   public synchronized SecretManagerState saveSecretManagerState() {
   public synchronized SecretManagerState saveSecretManagerState() {
     SecretManagerSection s = SecretManagerSection.newBuilder()
     SecretManagerSection s = SecretManagerSection.newBuilder()
         .setCurrentId(currentId)
         .setCurrentId(currentId)
@@ -406,6 +421,56 @@ public class DelegationTokenSecretManager
       loadCurrentTokens(in);
       loadCurrentTokens(in);
     }
     }
 
 
+    private void save(DataOutputStream out, String sdPath) throws IOException {
+      out.writeInt(currentId);
+      saveAllKeys(out, sdPath);
+      out.writeInt(delegationTokenSequenceNumber);
+      saveCurrentTokens(out, sdPath);
+    }
+
+    /**
+     * Private helper methods to save delegation keys and tokens in fsimage
+     */
+    private synchronized void saveCurrentTokens(DataOutputStream out,
+        String sdPath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.DELEGATION_TOKENS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(currentTokens.size());
+      Iterator<DelegationTokenIdentifier> iter = currentTokens.keySet()
+          .iterator();
+      while (iter.hasNext()) {
+        DelegationTokenIdentifier id = iter.next();
+        id.write(out);
+        DelegationTokenInformation info = currentTokens.get(id);
+        out.writeLong(info.getRenewDate());
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
+    /*
+     * Save the current state of allKeys
+     */
+    private synchronized void saveAllKeys(DataOutputStream out, String sdPath)
+        throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.DELEGATION_KEYS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(allKeys.size());
+      Iterator<Integer> iter = allKeys.keySet().iterator();
+      while (iter.hasNext()) {
+        Integer key = iter.next();
+        allKeys.get(key).write(out);
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
     /**
     /**
      * Private helper methods to load Delegation tokens from fsimage
      * Private helper methods to load Delegation tokens from fsimage
      */
      */

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -261,7 +261,11 @@ public class BlockManager {
     this.namesystem = namesystem;
     this.namesystem = namesystem;
     datanodeManager = new DatanodeManager(this, namesystem, conf);
     datanodeManager = new DatanodeManager(this, namesystem, conf);
     heartbeatManager = datanodeManager.getHeartbeatManager();
     heartbeatManager = datanodeManager.getHeartbeatManager();
-    invalidateBlocks = new InvalidateBlocks(datanodeManager);
+
+    final long pendingPeriod = conf.getLong(
+        DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_DEFAULT);
+    invalidateBlocks = new InvalidateBlocks(datanodeManager, pendingPeriod);
 
 
     // Compute the map capacity by allocating 2% of total memory
     // Compute the map capacity by allocating 2% of total memory
     blocksMap = new BlocksMap(
     blocksMap = new BlocksMap(

+ 46 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java

@@ -18,16 +18,24 @@
 package org.apache.hadoop.hdfs.server.blockmanagement;
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
 
 import java.io.PrintWriter;
 import java.io.PrintWriter;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.List;
 import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.TreeMap;
 
 
+import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.util.LightWeightHashSet;
 import org.apache.hadoop.hdfs.util.LightWeightHashSet;
+import org.apache.hadoop.util.Time;
+
+import com.google.common.annotations.VisibleForTesting;
 
 
 /**
 /**
  * Keeps a Collection for every named machine containing blocks
  * Keeps a Collection for every named machine containing blocks
@@ -44,8 +52,28 @@ class InvalidateBlocks {
 
 
   private final DatanodeManager datanodeManager;
   private final DatanodeManager datanodeManager;
 
 
-  InvalidateBlocks(final DatanodeManager datanodeManager) {
+  /**
+   * The period of pending time for block invalidation since the NameNode
+   * startup
+   */
+  private final long pendingPeriodInMs;
+  /** the startup time */
+  private final long startupTime = Time.monotonicNow();
+
+  InvalidateBlocks(final DatanodeManager datanodeManager, long pendingPeriodInMs) {
     this.datanodeManager = datanodeManager;
     this.datanodeManager = datanodeManager;
+    this.pendingPeriodInMs = pendingPeriodInMs;
+    printBlockDeletionTime(BlockManager.LOG);
+  }
+
+  private void printBlockDeletionTime(final Log log) {
+    log.info(DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY
+        + " is set to " + pendingPeriodInMs + " ms.");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy MMM dd HH:mm:ss");
+    Calendar calendar = new GregorianCalendar();
+    calendar.add(Calendar.SECOND, (int) (this.pendingPeriodInMs / 1000));
+    log.info("The block deletion will start around "
+        + sdf.format(calendar.getTime()));
   }
   }
 
 
   /** @return the number of blocks to be invalidated . */
   /** @return the number of blocks to be invalidated . */
@@ -134,8 +162,25 @@ class InvalidateBlocks {
     return new ArrayList<String>(node2blocks.keySet());
     return new ArrayList<String>(node2blocks.keySet());
   }
   }
 
 
+  /**
+   * @return the remianing pending time
+   */
+  @VisibleForTesting
+  long getInvalidationDelay() {
+    return pendingPeriodInMs - (Time.monotonicNow() - startupTime);
+  }
+
   synchronized List<Block> invalidateWork(
   synchronized List<Block> invalidateWork(
       final String storageId, final DatanodeDescriptor dn) {
       final String storageId, final DatanodeDescriptor dn) {
+    final long delay = getInvalidationDelay();
+    if (delay > 0) {
+      if (BlockManager.LOG.isDebugEnabled()) {
+        BlockManager.LOG
+            .debug("Block deletion is delayed during NameNode startup. "
+                + "The deletion will start after " + delay + " ms.");
+      }
+      return null;
+    }
     final LightWeightHashSet<Block> set = node2blocks.get(storageId);
     final LightWeightHashSet<Block> set = node2blocks.get(storageId);
     if (set == null) {
     if (set == null) {
       return null;
       return null;

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authentication.util.KerberosName;
+import org.apache.hadoop.security.authorize.ProxyServers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 
 
@@ -193,7 +194,7 @@ public class JspHelper {
   public static String getRemoteAddr(HttpServletRequest request) {
   public static String getRemoteAddr(HttpServletRequest request) {
     String remoteAddr = request.getRemoteAddr();
     String remoteAddr = request.getRemoteAddr();
     String proxyHeader = request.getHeader("X-Forwarded-For");
     String proxyHeader = request.getHeader("X-Forwarded-For");
-    if (proxyHeader != null && ProxyUsers.isProxyServer(remoteAddr)) {
+    if (proxyHeader != null && ProxyServers.isProxyServer(remoteAddr)) {
       final String clientAddr = proxyHeader.split(",")[0].trim();
       final String clientAddr = proxyHeader.split(",")[0].trim();
       if (!clientAddr.isEmpty()) {
       if (!clientAddr.isEmpty()) {
         remoteAddr = clientAddr;
         remoteAddr = clientAddr;

+ 14 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java

@@ -97,7 +97,7 @@ class BlockPoolSliceScanner {
   private long totalTransientErrors = 0;
   private long totalTransientErrors = 0;
   private final AtomicInteger totalBlocksScannedInLastRun = new AtomicInteger(); // Used for test only
   private final AtomicInteger totalBlocksScannedInLastRun = new AtomicInteger(); // Used for test only
   
   
-  private long currentPeriodStart = Time.now();
+  private long currentPeriodStart = Time.monotonicNow();
   private long bytesLeft = 0; // Bytes to scan in this period
   private long bytesLeft = 0; // Bytes to scan in this period
   private long totalBytesToScan = 0;
   private long totalBytesToScan = 0;
   private boolean isNewPeriod = true;
   private boolean isNewPeriod = true;
@@ -260,7 +260,7 @@ class BlockPoolSliceScanner {
     long period = Math.min(scanPeriod, 
     long period = Math.min(scanPeriod, 
                            Math.max(blockMap.size(),1) * 600 * 1000L);
                            Math.max(blockMap.size(),1) * 600 * 1000L);
     int periodInt = Math.abs((int)period);
     int periodInt = Math.abs((int)period);
-    return Time.now() - scanPeriod + 
+    return Time.monotonicNow() - scanPeriod +
         DFSUtil.getRandom().nextInt(periodInt);
         DFSUtil.getRandom().nextInt(periodInt);
   }
   }
 
 
@@ -322,7 +322,7 @@ class BlockPoolSliceScanner {
       info = new BlockScanInfo(block);
       info = new BlockScanInfo(block);
     }
     }
     
     
-    long now = Time.now();
+    long now = Time.monotonicNow();
     info.lastScanType = type;
     info.lastScanType = type;
     info.lastScanTime = now;
     info.lastScanTime = now;
     info.lastScanOk = scanOk;
     info.lastScanOk = scanOk;
@@ -399,8 +399,9 @@ class BlockPoolSliceScanner {
   }
   }
   
   
   private synchronized void adjustThrottler() {
   private synchronized void adjustThrottler() {
-    long timeLeft = currentPeriodStart+scanPeriod - Time.now();
-    long bw = Math.max(bytesLeft*1000/timeLeft, MIN_SCAN_RATE);
+    long timeLeft = Math.max(1L,
+        currentPeriodStart + scanPeriod - Time.monotonicNow());
+    long bw = Math.max((bytesLeft * 1000) / timeLeft, MIN_SCAN_RATE);
     throttler.setBandwidth(Math.min(bw, MAX_SCAN_RATE));
     throttler.setBandwidth(Math.min(bw, MAX_SCAN_RATE));
   }
   }
   
   
@@ -523,7 +524,7 @@ class BlockPoolSliceScanner {
   private boolean assignInitialVerificationTimes() {
   private boolean assignInitialVerificationTimes() {
     //First updates the last verification times from the log file.
     //First updates the last verification times from the log file.
     if (verificationLog != null) {
     if (verificationLog != null) {
-      long now = Time.now();
+      long now = Time.monotonicNow();
       RollingLogs.LineIterator logIterator = null;
       RollingLogs.LineIterator logIterator = null;
       try {
       try {
         logIterator = verificationLog.logs.iterator(false);
         logIterator = verificationLog.logs.iterator(false);
@@ -574,7 +575,7 @@ class BlockPoolSliceScanner {
       // Initially spread the block reads over half of scan period
       // Initially spread the block reads over half of scan period
       // so that we don't keep scanning the blocks too quickly when restarted.
       // so that we don't keep scanning the blocks too quickly when restarted.
       long verifyInterval = Math.min(scanPeriod/(2L * numBlocks), 10*60*1000L);
       long verifyInterval = Math.min(scanPeriod/(2L * numBlocks), 10*60*1000L);
-      long lastScanTime = Time.now() - scanPeriod;
+      long lastScanTime = Time.monotonicNow() - scanPeriod;
 
 
       if (!blockInfoSet.isEmpty()) {
       if (!blockInfoSet.isEmpty()) {
         BlockScanInfo info;
         BlockScanInfo info;
@@ -601,16 +602,16 @@ class BlockPoolSliceScanner {
 
 
     // reset the byte counts :
     // reset the byte counts :
     bytesLeft = totalBytesToScan;
     bytesLeft = totalBytesToScan;
-    currentPeriodStart = Time.now();
+    currentPeriodStart = Time.monotonicNow();
     isNewPeriod = true;
     isNewPeriod = true;
   }
   }
   
   
   private synchronized boolean workRemainingInCurrentPeriod() {
   private synchronized boolean workRemainingInCurrentPeriod() {
-    if (bytesLeft <= 0 && Time.now() < currentPeriodStart + scanPeriod) {
+    if (bytesLeft <= 0 && Time.monotonicNow() < currentPeriodStart + scanPeriod) {
       if (LOG.isDebugEnabled()) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" +
         LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" +
                   currentPeriodStart + ", period=" + scanPeriod + ", now=" +
                   currentPeriodStart + ", period=" + scanPeriod + ", now=" +
-                  Time.now() + " " + blockPoolId);
+                  Time.monotonicNow() + " " + blockPoolId);
       }
       }
       return false;
       return false;
     } else {
     } else {
@@ -633,7 +634,7 @@ class BlockPoolSliceScanner {
       scan();
       scan();
     } finally {
     } finally {
       totalBlocksScannedInLastRun.set(processedBlocks.size());
       totalBlocksScannedInLastRun.set(processedBlocks.size());
-      lastScanTime.set(Time.now());
+      lastScanTime.set(Time.monotonicNow());
     }
     }
   }
   }
 
 
@@ -656,7 +657,7 @@ class BlockPoolSliceScanner {
       while (datanode.shouldRun
       while (datanode.shouldRun
           && !datanode.blockScanner.blockScannerThread.isInterrupted()
           && !datanode.blockScanner.blockScannerThread.isInterrupted()
           && datanode.isBPServiceAlive(blockPoolId)) {
           && datanode.isBPServiceAlive(blockPoolId)) {
-        long now = Time.now();
+        long now = Time.monotonicNow();
         synchronized (this) {
         synchronized (this) {
           if ( now >= (currentPeriodStart + scanPeriod)) {
           if ( now >= (currentPeriodStart + scanPeriod)) {
             startNewPeriod();
             startNewPeriod();
@@ -714,7 +715,7 @@ class BlockPoolSliceScanner {
     
     
     int total = blockInfoSet.size();
     int total = blockInfoSet.size();
     
     
-    long now = Time.now();
+    long now = Time.monotonicNow();
     
     
     Date date = new Date();
     Date date = new Date();
     
     

+ 4 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java

@@ -248,7 +248,7 @@ class BlockReceiver implements Closeable {
       
       
       if (cause != null) { // possible disk error
       if (cause != null) { // possible disk error
         ioe = cause;
         ioe = cause;
-        datanode.checkDiskError(ioe); // may throw an exception here
+        datanode.checkDiskError();
       }
       }
       
       
       throw ioe;
       throw ioe;
@@ -324,7 +324,7 @@ class BlockReceiver implements Closeable {
     }
     }
     // disk check
     // disk check
     if(ioe != null) {
     if(ioe != null) {
-      datanode.checkDiskError(ioe);
+      datanode.checkDiskError();
       throw ioe;
       throw ioe;
     }
     }
   }
   }
@@ -615,7 +615,7 @@ class BlockReceiver implements Closeable {
           manageWriterOsCache(offsetInBlock);
           manageWriterOsCache(offsetInBlock);
         }
         }
       } catch (IOException iex) {
       } catch (IOException iex) {
-        datanode.checkDiskError(iex);
+        datanode.checkDiskError();
         throw iex;
         throw iex;
       }
       }
     }
     }
@@ -1171,11 +1171,7 @@ class BlockReceiver implements Closeable {
         } catch (IOException e) {
         } catch (IOException e) {
           LOG.warn("IOException in BlockReceiver.run(): ", e);
           LOG.warn("IOException in BlockReceiver.run(): ", e);
           if (running) {
           if (running) {
-            try {
-              datanode.checkDiskError(e); // may throw an exception here
-            } catch (IOException ioe) {
-              LOG.warn("DataNode.checkDiskError failed in run() with: ", ioe);
-            }
+            datanode.checkDiskError();
             LOG.info(myString, e);
             LOG.info(myString, e);
             running = false;
             running = false;
             if (!Thread.interrupted()) { // failure not caused by interruption
             if (!Thread.interrupted()) { // failure not caused by interruption

+ 67 - 56
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -84,7 +84,6 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.util.*;
 import org.apache.hadoop.util.*;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
-import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
 import org.mortbay.util.ajax.JSON;
 import org.mortbay.util.ajax.JSON;
 
 
 import javax.management.ObjectName;
 import javax.management.ObjectName;
@@ -92,8 +91,6 @@ import javax.management.ObjectName;
 import java.io.*;
 import java.io.*;
 import java.lang.management.ManagementFactory;
 import java.lang.management.ManagementFactory;
 import java.net.*;
 import java.net.*;
-import java.nio.channels.ClosedByInterruptException;
-import java.nio.channels.ClosedChannelException;
 import java.nio.channels.SocketChannel;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.*;
 import java.util.*;
@@ -229,6 +226,11 @@ public class DataNode extends Configured
   ReadaheadPool readaheadPool;
   ReadaheadPool readaheadPool;
   private final boolean getHdfsBlockLocationsEnabled;
   private final boolean getHdfsBlockLocationsEnabled;
   private ObjectName dataNodeInfoBeanName;
   private ObjectName dataNodeInfoBeanName;
+  private Thread checkDiskErrorThread = null;
+  protected final int checkDiskErrorInterval = 5*1000;
+  private boolean checkDiskErrorFlag = false;
+  private Object checkDiskErrorMutex = new Object();
+  private long lastDiskErrorCheck;
 
 
   /**
   /**
    * Create the DataNode given a configuration, an array of dataDirs,
    * Create the DataNode given a configuration, an array of dataDirs,
@@ -238,6 +240,7 @@ public class DataNode extends Configured
            final List<StorageLocation> dataDirs,
            final List<StorageLocation> dataDirs,
            final SecureResources resources) throws IOException {
            final SecureResources resources) throws IOException {
     super(conf);
     super(conf);
+    this.lastDiskErrorCheck = 0;
     this.maxNumberOfBlocksToLog = conf.getLong(DFS_MAX_NUM_BLOCKS_TO_LOG_KEY,
     this.maxNumberOfBlocksToLog = conf.getLong(DFS_MAX_NUM_BLOCKS_TO_LOG_KEY,
         DFS_MAX_NUM_BLOCKS_TO_LOG_DEFAULT);
         DFS_MAX_NUM_BLOCKS_TO_LOG_DEFAULT);
 
 
@@ -1212,6 +1215,11 @@ public class DataNode extends Configured
       this.dataXceiverServer.interrupt();
       this.dataXceiverServer.interrupt();
     }
     }
 
 
+    // Interrupt the checkDiskErrorThread and terminate it.
+    if(this.checkDiskErrorThread != null) {
+      this.checkDiskErrorThread.interrupt();
+    }
+    
     // Record the time of initial notification
     // Record the time of initial notification
     long timeNotified = Time.now();
     long timeNotified = Time.now();
 
 
@@ -1321,55 +1329,17 @@ public class DataNode extends Configured
   }
   }
   
   
   
   
-  /** Check if there is no space in disk 
-   *  @param e that caused this checkDiskError call
-   **/
-  protected void checkDiskError(Exception e ) throws IOException {
-    
-    LOG.warn("checkDiskError: exception: ", e);
-    if (isNetworkRelatedException(e)) {
-      LOG.info("Not checking disk as checkDiskError was called on a network" +
-      		" related exception");	
-      return;
-    }
-    if (e.getMessage() != null &&
-        e.getMessage().startsWith("No space left on device")) {
-      throw new DiskOutOfSpaceException("No space left on device");
-    } else {
-      checkDiskError();
-    }
-  }
-  
-  /**
-   * Check if the provided exception looks like it's from a network error
-   * @param e the exception from a checkDiskError call
-   * @return true if this exception is network related, false otherwise
-   */
-  protected boolean isNetworkRelatedException(Exception e) {
-    if (e instanceof SocketException 
-        || e instanceof SocketTimeoutException
-        || e instanceof ClosedChannelException 
-        || e instanceof ClosedByInterruptException) {
-      return true;
-    }
-    
-    String msg = e.getMessage();
-    
-    return null != msg 
-        && (msg.startsWith("An established connection was aborted")
-            || msg.startsWith("Broken pipe")
-            || msg.startsWith("Connection reset")
-            || msg.contains("java.nio.channels.SocketChannel"));
-  }
-  
   /**
   /**
    *  Check if there is a disk failure and if so, handle the error
    *  Check if there is a disk failure and if so, handle the error
    */
    */
   public void checkDiskError() {
   public void checkDiskError() {
-    try {
-      data.checkDataDir();
-    } catch (DiskErrorException de) {
-      handleDiskError(de.getMessage());
+    synchronized(checkDiskErrorMutex) {
+      checkDiskErrorFlag = true;
+      if(checkDiskErrorThread == null) {
+        startCheckDiskErrorThread();
+        checkDiskErrorThread.start();
+        LOG.info("Starting CheckDiskError Thread");
+      }
     }
     }
   }
   }
   
   
@@ -1669,13 +1639,8 @@ public class DataNode extends Configured
       } catch (IOException ie) {
       } catch (IOException ie) {
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +
             targets[0] + " got ", ie);
             targets[0] + " got ", ie);
-          // check if there are any disk problem
-        try{
-          checkDiskError(ie);
-        } catch(IOException e) {
-            LOG.warn("DataNode.checkDiskError failed in run() with: ", e);
-        }
-        
+        // check if there are any disk problem
+        checkDiskError();
       } finally {
       } finally {
         xmitsInProgress.getAndDecrement();
         xmitsInProgress.getAndDecrement();
         IOUtils.closeStream(blockSender);
         IOUtils.closeStream(blockSender);
@@ -2590,4 +2555,50 @@ public class DataNode extends Configured
   public ShortCircuitRegistry getShortCircuitRegistry() {
   public ShortCircuitRegistry getShortCircuitRegistry() {
     return shortCircuitRegistry;
     return shortCircuitRegistry;
   }
   }
-}
+  
+  /**
+   * Starts a new thread which will check for disk error check request 
+   * every 5 sec
+   */
+  private void startCheckDiskErrorThread() {
+    checkDiskErrorThread = new Thread(new Runnable() {
+          @Override
+          public void run() {
+            while(shouldRun) {
+              boolean tempFlag ;
+              synchronized(checkDiskErrorMutex) {
+                tempFlag = checkDiskErrorFlag;
+                checkDiskErrorFlag = false;
+              }
+              if(tempFlag) {
+                try {
+                  data.checkDataDir();
+                } catch (DiskErrorException de) {
+                  handleDiskError(de.getMessage());
+                } catch (Exception e) {
+                  LOG.warn("Unexpected exception occurred while checking disk error  " + e);
+                  checkDiskErrorThread = null;
+                  return;
+                }
+                synchronized(checkDiskErrorMutex) {
+                  lastDiskErrorCheck = System.currentTimeMillis();
+                }
+              }
+              try {
+                Thread.sleep(checkDiskErrorInterval);
+              } catch (InterruptedException e) {
+                LOG.debug("InterruptedException in check disk error thread", e);
+                checkDiskErrorThread = null;
+                return;
+              }
+            }
+          }
+    });
+  }
+  
+  public long getLastDiskErrorCheck() {
+    synchronized(checkDiskErrorMutex) {
+      return lastDiskErrorCheck;
+    }
+  }
+}

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java

@@ -127,8 +127,8 @@ public class DatanodeWebHdfsMethods {
     token.decodeFromUrlString(delegation);
     token.decodeFromUrlString(delegation);
     URI nnUri = URI.create(HdfsConstants.HDFS_URI_SCHEME +
     URI nnUri = URI.create(HdfsConstants.HDFS_URI_SCHEME +
             "://" + nnId);
             "://" + nnId);
-    boolean isHA = HAUtil.isLogicalUri(conf, nnUri);
-    if (isHA) {
+    boolean isLogical = HAUtil.isLogicalUri(conf, nnUri);
+    if (isLogical) {
       token.setService(HAUtil.buildTokenServiceForLogicalUri(nnUri));
       token.setService(HAUtil.buildTokenServiceForLogicalUri(nnUri));
     } else {
     } else {
       token.setService(SecurityUtil.buildTokenService(nnUri));
       token.setService(SecurityUtil.buildTokenService(nnUri));

+ 57 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java

@@ -27,6 +27,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
 
 
 import java.io.DataInput;
 import java.io.DataInput;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collection;
@@ -61,10 +62,10 @@ import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
-import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
@@ -953,6 +954,18 @@ public final class CacheManager {
     }
     }
   }
   }
 
 
+  /**
+   * Saves the current state of the CacheManager to the DataOutput. Used
+   * to persist CacheManager state in the FSImage.
+   * @param out DataOutput to persist state
+   * @param sdPath path of the storage directory
+   * @throws IOException
+   */
+  public void saveStateCompat(DataOutputStream out, String sdPath)
+      throws IOException {
+    serializerCompat.save(out, sdPath);
+  }
+
   public PersistState saveState() throws IOException {
   public PersistState saveState() throws IOException {
     ArrayList<CachePoolInfoProto> pools = Lists
     ArrayList<CachePoolInfoProto> pools = Lists
         .newArrayListWithCapacity(cachePools.size());
         .newArrayListWithCapacity(cachePools.size());
@@ -1072,6 +1085,12 @@ public final class CacheManager {
   }
   }
 
 
   private final class SerializerCompat {
   private final class SerializerCompat {
+    private void save(DataOutputStream out, String sdPath) throws IOException {
+      out.writeLong(nextDirectiveId);
+      savePools(out, sdPath);
+      saveDirectives(out, sdPath);
+    }
+
     private void load(DataInput in) throws IOException {
     private void load(DataInput in) throws IOException {
       nextDirectiveId = in.readLong();
       nextDirectiveId = in.readLong();
       // pools need to be loaded first since directives point to their parent pool
       // pools need to be loaded first since directives point to their parent pool
@@ -1079,6 +1098,42 @@ public final class CacheManager {
       loadDirectives(in);
       loadDirectives(in);
     }
     }
 
 
+    /**
+     * Save cache pools to fsimage
+     */
+    private void savePools(DataOutputStream out,
+        String sdPath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.CACHE_POOLS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(cachePools.size());
+      for (CachePool pool: cachePools.values()) {
+        FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true));
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
+    /*
+     * Save cache entries to fsimage
+     */
+    private void saveDirectives(DataOutputStream out, String sdPath)
+        throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.CACHE_ENTRIES, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(directivesById.size());
+      for (CacheDirective directive : directivesById.values()) {
+        FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo());
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
     /**
     /**
      * Load cache pools from fsimage
      * Load cache pools from fsimage
      */
      */

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java

@@ -41,6 +41,9 @@ public class CheckpointConf {
 
 
   /** maxium number of retries when merge errors occur */
   /** maxium number of retries when merge errors occur */
   private final int maxRetriesOnMergeError;
   private final int maxRetriesOnMergeError;
+
+  /** The output dir for legacy OIV image */
+  private final String legacyOivImageDir;
   
   
   public CheckpointConf(Configuration conf) {
   public CheckpointConf(Configuration conf) {
     checkpointCheckPeriod = conf.getLong(
     checkpointCheckPeriod = conf.getLong(
@@ -53,6 +56,7 @@ public class CheckpointConf {
                                   DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
                                   DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
     maxRetriesOnMergeError = conf.getInt(DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY,
     maxRetriesOnMergeError = conf.getInt(DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY,
                                   DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT);
                                   DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT);
+    legacyOivImageDir = conf.get(DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
     warnForDeprecatedConfigs(conf);
     warnForDeprecatedConfigs(conf);
   }
   }
   
   
@@ -83,4 +87,8 @@ public class CheckpointConf {
   public int getMaxRetriesOnMergeError() {
   public int getMaxRetriesOnMergeError() {
     return maxRetriesOnMergeError;
     return maxRetriesOnMergeError;
   }
   }
+
+  public String getLegacyOivImageDir() {
+    return legacyOivImageDir;
+  }
 }
 }

+ 20 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -57,6 +57,7 @@ import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
 import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
 import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
 import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
+import org.apache.hadoop.hdfs.protocol.FsAclPermission;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
@@ -2604,7 +2605,7 @@ public class FSDirectory implements Closeable {
         blocksize,
         blocksize,
         node.getModificationTime(snapshot),
         node.getModificationTime(snapshot),
         node.getAccessTime(snapshot),
         node.getAccessTime(snapshot),
-        node.getFsPermission(snapshot),
+        getPermissionForFileStatus(node, snapshot),
         node.getUserName(snapshot),
         node.getUserName(snapshot),
         node.getGroupName(snapshot),
         node.getGroupName(snapshot),
         node.isSymlink() ? node.asSymlink().getSymlink() : null,
         node.isSymlink() ? node.asSymlink().getSymlink() : null,
@@ -2646,7 +2647,8 @@ public class FSDirectory implements Closeable {
     HdfsLocatedFileStatus status =
     HdfsLocatedFileStatus status =
         new HdfsLocatedFileStatus(size, node.isDirectory(), replication,
         new HdfsLocatedFileStatus(size, node.isDirectory(), replication,
           blocksize, node.getModificationTime(snapshot),
           blocksize, node.getModificationTime(snapshot),
-          node.getAccessTime(snapshot), node.getFsPermission(snapshot),
+          node.getAccessTime(snapshot),
+          getPermissionForFileStatus(node, snapshot),
           node.getUserName(snapshot), node.getGroupName(snapshot),
           node.getUserName(snapshot), node.getGroupName(snapshot),
           node.isSymlink() ? node.asSymlink().getSymlink() : null, path,
           node.isSymlink() ? node.asSymlink().getSymlink() : null, path,
           node.getId(), loc, childrenNum);
           node.getId(), loc, childrenNum);
@@ -2660,6 +2662,22 @@ public class FSDirectory implements Closeable {
     return status;
     return status;
   }
   }
 
 
+  /**
+   * Returns an inode's FsPermission for use in an outbound FileStatus.  If the
+   * inode has an ACL, then this method will convert to a FsAclPermission.
+   *
+   * @param node INode to check
+   * @param snapshot int snapshot ID
+   * @return FsPermission from inode, with ACL bit on if the inode has an ACL
+   */
+  private static FsPermission getPermissionForFileStatus(INode node,
+      int snapshot) {
+    FsPermission perm = node.getFsPermission(snapshot);
+    if (node.getAclFeature(snapshot) != null) {
+      perm = new FsAclPermission(perm);
+    }
+    return perm;
+  }
     
     
   /**
   /**
    * Add the given symbolic link to the fs. Record it in the edits log.
    * Add the given symbolic link to the fs. Record it in the edits log.

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -934,6 +934,25 @@ public class FSImage implements Closeable {
     storage.setMostRecentCheckpointInfo(txid, Time.now());
     storage.setMostRecentCheckpointInfo(txid, Time.now());
   }
   }
 
 
+  /**
+   * Save FSimage in the legacy format. This is not for NN consumption,
+   * but for tools like OIV.
+   */
+  public void saveLegacyOIVImage(FSNamesystem source, String targetDir,
+      Canceler canceler) throws IOException {
+    FSImageCompression compression =
+        FSImageCompression.createCompression(conf);
+    long txid = getLastAppliedOrWrittenTxId();
+    SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid,
+        canceler);
+    FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx);
+    String imageFileName = NNStorage.getLegacyOIVImageFileName(txid);
+    File imageFile = new File(targetDir, imageFileName);
+    saver.save(imageFile, compression);
+    archivalManager.purgeOldLegacyOIVImages(targetDir, txid);
+  }
+
+
   /**
   /**
    * FSImageSaver is being run in a separate thread when saving
    * FSImageSaver is being run in a separate thread when saving
    * FSImage. There is one thread per each copy of the image.
    * FSImage. There is one thread per each copy of the image.

+ 381 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -21,14 +21,20 @@ import static org.apache.hadoop.util.Time.now;
 
 
 import java.io.DataInput;
 import java.io.DataInput;
 import java.io.DataInputStream;
 import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.File;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.IOException;
 import java.security.DigestInputStream;
 import java.security.DigestInputStream;
+import java.security.DigestOutputStream;
 import java.security.MessageDigest;
 import java.security.MessageDigest;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.TreeMap;
 
 
@@ -50,6 +56,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
@@ -60,6 +67,7 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
@@ -69,8 +77,105 @@ import com.google.common.base.Preconditions;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.annotations.VisibleForTesting;
 
 
 /**
 /**
- * This class loads and stores the FSImage of the NameNode. The file
- * src/main/proto/fsimage.proto describes the on-disk layout of the FSImage.
+ * Contains inner classes for reading or writing the on-disk format for
+ * FSImages.
+ *
+ * In particular, the format of the FSImage looks like:
+ * <pre>
+ * FSImage {
+ *   layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long,
+ *   namesystemGenerationStampV1: long, namesystemGenerationStampV2: long,
+ *   generationStampAtBlockIdSwitch:long, lastAllocatedBlockId:
+ *   long transactionID: long, snapshotCounter: int, numberOfSnapshots: int,
+ *   numOfSnapshottableDirs: int,
+ *   {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed)
+ * }
+ *
+ * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) {
+ *   INodeInfo of root, numberOfChildren of root: int
+ *   [list of INodeInfo of root's children],
+ *   [list of INodeDirectoryInfo of root's directory children]
+ * }
+ *
+ * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){
+ *   [list of INodeInfo of INodes in topological order]
+ * }
+ *
+ * INodeInfo {
+ *   {
+ *     localName: short + byte[]
+ *   } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported
+ *   or
+ *   {
+ *     fullPath: byte[]
+ *   } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported
+ *   replicationFactor: short, modificationTime: long,
+ *   accessTime: long, preferredBlockSize: long,
+ *   numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink),
+ *   {
+ *     nsQuota: long, dsQuota: long,
+ *     {
+ *       isINodeSnapshottable: byte,
+ *       isINodeWithSnapshot: byte (if isINodeSnapshottable is false)
+ *     } (when {@link Feature#SNAPSHOT} is supported),
+ *     fsPermission: short, PermissionStatus
+ *   } for INodeDirectory
+ *   or
+ *   {
+ *     symlinkString, fsPermission: short, PermissionStatus
+ *   } for INodeSymlink
+ *   or
+ *   {
+ *     [list of BlockInfo]
+ *     [list of FileDiff]
+ *     {
+ *       isINodeFileUnderConstructionSnapshot: byte,
+ *       {clientName: short + byte[], clientMachine: short + byte[]} (when
+ *       isINodeFileUnderConstructionSnapshot is true),
+ *     } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode),
+ *     fsPermission: short, PermissionStatus
+ *   } for INodeFile
+ * }
+ *
+ * INodeDirectoryInfo {
+ *   fullPath of the directory: short + byte[],
+ *   numberOfChildren: int, [list of INodeInfo of children INode],
+ *   {
+ *     numberOfSnapshots: int,
+ *     [list of Snapshot] (when NumberOfSnapshots is positive),
+ *     numberOfDirectoryDiffs: int,
+ *     [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive),
+ *     number of children that are directories,
+ *     [list of INodeDirectoryInfo of the directory children] (includes
+ *     snapshot copies of deleted sub-directories)
+ *   } (when {@link Feature#SNAPSHOT} is supported),
+ * }
+ *
+ * Snapshot {
+ *   snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is
+ *   the name of the snapshot)
+ * }
+ *
+ * DirectoryDiff {
+ *   full path of the root of the associated Snapshot: short + byte[],
+ *   childrenSize: int,
+ *   isSnapshotRoot: byte,
+ *   snapshotINodeIsNotNull: byte (when isSnapshotRoot is false),
+ *   snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff
+ * }
+ *
+ * Diff {
+ *   createdListSize: int, [Local name of INode in created list],
+ *   deletedListSize: int, [INode in deleted list: INodeInfo]
+ * }
+ *
+ * FileDiff {
+ *   full path of the root of the associated Snapshot: short + byte[],
+ *   fileSize: long,
+ *   snapshotINodeIsNotNull: byte,
+ *   snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff
+ * }
+ * </pre>
  */
  */
 @InterfaceAudience.Private
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
 @InterfaceStability.Evolving
@@ -580,6 +685,11 @@ public class FSImageFormat {
       }
       }
     }
     }
 
 
+    /** @return The FSDirectory of the namesystem where the fsimage is loaded */
+    public FSDirectory getFSDirectoryInLoading() {
+      return namesystem.dir;
+    }
+
     public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
     public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
         boolean updateINodeMap) throws IOException {
         boolean updateINodeMap) throws IOException {
       return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
       return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
@@ -1009,7 +1119,7 @@ public class FSImageFormat {
       + " option to automatically rename these paths during upgrade.";
       + " option to automatically rename these paths during upgrade.";
 
 
   /**
   /**
-   * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
    * byte array path component.
    * byte array path component.
    */
    */
   private static byte[] renameReservedComponentOnUpgrade(byte[] component,
   private static byte[] renameReservedComponentOnUpgrade(byte[] component,
@@ -1029,7 +1139,7 @@ public class FSImageFormat {
   }
   }
 
 
   /**
   /**
-   * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
    * byte array path component.
    * byte array path component.
    */
    */
   private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
   private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
@@ -1050,4 +1160,271 @@ public class FSImageFormat {
     }
     }
     return component;
     return component;
   }
   }
+
+  /**
+   * A one-shot class responsible for writing an image file.
+   * The write() function should be called once, after which the getter
+   * functions may be used to retrieve information about the file that was written.
+   *
+   * This is replaced by the PB-based FSImage. The class is to maintain
+   * compatibility for the external fsimage tool.
+   */
+  @Deprecated
+  static class Saver {
+    private static final int LAYOUT_VERSION = -51;
+    private final SaveNamespaceContext context;
+    /** Set to true once an image has been written */
+    private boolean saved = false;
+
+    /** The MD5 checksum of the file that was written */
+    private MD5Hash savedDigest;
+    private final ReferenceMap referenceMap = new ReferenceMap();
+
+    private final Map<Long, INodeFile> snapshotUCMap =
+        new HashMap<Long, INodeFile>();
+
+    /** @throws IllegalStateException if the instance has not yet saved an image */
+    private void checkSaved() {
+      if (!saved) {
+        throw new IllegalStateException("FSImageSaver has not saved an image");
+      }
+    }
+
+    /** @throws IllegalStateException if the instance has already saved an image */
+    private void checkNotSaved() {
+      if (saved) {
+        throw new IllegalStateException("FSImageSaver has already saved an image");
+      }
+    }
+
+
+    Saver(SaveNamespaceContext context) {
+      this.context = context;
+    }
+
+    /**
+     * Return the MD5 checksum of the image file that was saved.
+     */
+    MD5Hash getSavedDigest() {
+      checkSaved();
+      return savedDigest;
+    }
+
+    void save(File newFile, FSImageCompression compression) throws IOException {
+      checkNotSaved();
+
+      final FSNamesystem sourceNamesystem = context.getSourceNamesystem();
+      final INodeDirectory rootDir = sourceNamesystem.dir.rootDir;
+      final long numINodes = rootDir.getDirectoryWithQuotaFeature()
+          .getSpaceConsumed().get(Quota.NAMESPACE);
+      String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath();
+      Step step = new Step(StepType.INODES, sdPath);
+      StartupProgress prog = NameNode.getStartupProgress();
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes);
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      long startTime = now();
+      //
+      // Write out data
+      //
+      MessageDigest digester = MD5Hash.getDigester();
+      FileOutputStream fout = new FileOutputStream(newFile);
+      DigestOutputStream fos = new DigestOutputStream(fout, digester);
+      DataOutputStream out = new DataOutputStream(fos);
+      try {
+        out.writeInt(LAYOUT_VERSION);
+        LayoutFlags.write(out);
+        // We use the non-locked version of getNamespaceInfo here since
+        // the coordinating thread of saveNamespace already has read-locked
+        // the namespace for us. If we attempt to take another readlock
+        // from the actual saver thread, there's a potential of a
+        // fairness-related deadlock. See the comments on HDFS-2223.
+        out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo()
+            .getNamespaceID());
+        out.writeLong(numINodes);
+        out.writeLong(sourceNamesystem.getGenerationStampV1());
+        out.writeLong(sourceNamesystem.getGenerationStampV2());
+        out.writeLong(sourceNamesystem.getGenerationStampAtblockIdSwitch());
+        out.writeLong(sourceNamesystem.getLastAllocatedBlockId());
+        out.writeLong(context.getTxId());
+        out.writeLong(sourceNamesystem.getLastInodeId());
+
+
+        sourceNamesystem.getSnapshotManager().write(out);
+
+        // write compression info and set up compressed stream
+        out = compression.writeHeaderAndWrapStream(fos);
+        LOG.info("Saving image file " + newFile +
+                 " using " + compression);
+
+        // save the root
+        saveINode2Image(rootDir, out, false, referenceMap, counter);
+        // save the rest of the nodes
+        saveImage(rootDir, out, true, false, counter);
+        prog.endStep(Phase.SAVING_CHECKPOINT, step);
+        // Now that the step is finished, set counter equal to total to adjust
+        // for possible under-counting due to reference inodes.
+        prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes);
+        // save files under construction
+        // TODO: for HDFS-5428, since we cannot break the compatibility of
+        // fsimage, we store part of the under-construction files that are only
+        // in snapshots in this "under-construction-file" section. As a
+        // temporary solution, we use "/.reserved/.inodes/<inodeid>" as their
+        // paths, so that when loading fsimage we do not put them into the lease
+        // map. In the future, we can remove this hack when we can bump the
+        // layout version.
+        sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap);
+
+        context.checkCancelled();
+        sourceNamesystem.saveSecretManagerStateCompat(out, sdPath);
+        context.checkCancelled();
+        sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath);
+        context.checkCancelled();
+        out.flush();
+        context.checkCancelled();
+        fout.getChannel().force(true);
+      } finally {
+        out.close();
+      }
+
+      saved = true;
+      // set md5 of the saved image
+      savedDigest = new MD5Hash(digester.digest());
+
+      LOG.info("Image file " + newFile + " of size " + newFile.length() +
+          " bytes saved in " + (now() - startTime)/1000 + " seconds.");
+    }
+
+    /**
+     * Save children INodes.
+     * @param children The list of children INodes
+     * @param out The DataOutputStream to write
+     * @param inSnapshot Whether the parent directory or its ancestor is in
+     *                   the deleted list of some snapshot (caused by rename or
+     *                   deletion)
+     * @param counter Counter to increment for namenode startup progress
+     * @return Number of children that are directory
+     */
+    private int saveChildren(ReadOnlyList<INode> children,
+        DataOutputStream out, boolean inSnapshot, Counter counter)
+        throws IOException {
+      // Write normal children INode.
+      out.writeInt(children.size());
+      int dirNum = 0;
+      int i = 0;
+      for(INode child : children) {
+        // print all children first
+        // TODO: for HDFS-5428, we cannot change the format/content of fsimage
+        // here, thus even if the parent directory is in snapshot, we still
+        // do not handle INodeUC as those stored in deleted list
+        saveINode2Image(child, out, false, referenceMap, counter);
+        if (child.isDirectory()) {
+          dirNum++;
+        } else if (inSnapshot && child.isFile()
+            && child.asFile().isUnderConstruction()) {
+          this.snapshotUCMap.put(child.getId(), child.asFile());
+        }
+        if (i++ % 50 == 0) {
+          context.checkCancelled();
+        }
+      }
+      return dirNum;
+    }
+
+    /**
+     * Save file tree image starting from the given root.
+     * This is a recursive procedure, which first saves all children and
+     * snapshot diffs of a current directory and then moves inside the
+     * sub-directories.
+     *
+     * @param current The current node
+     * @param out The DataoutputStream to write the image
+     * @param toSaveSubtree Whether or not to save the subtree to fsimage. For
+     *                      reference node, its subtree may already have been
+     *                      saved before.
+     * @param inSnapshot Whether the current directory is in snapshot
+     * @param counter Counter to increment for namenode startup progress
+     */
+    private void saveImage(INodeDirectory current, DataOutputStream out,
+        boolean toSaveSubtree, boolean inSnapshot, Counter counter)
+        throws IOException {
+      // write the inode id of the directory
+      out.writeLong(current.getId());
+
+      if (!toSaveSubtree) {
+        return;
+      }
+
+      final ReadOnlyList<INode> children = current
+          .getChildrenList(Snapshot.CURRENT_STATE_ID);
+      int dirNum = 0;
+      List<INodeDirectory> snapshotDirs = null;
+      DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature();
+      if (sf != null) {
+        snapshotDirs = new ArrayList<INodeDirectory>();
+        sf.getSnapshotDirectory(snapshotDirs);
+        dirNum += snapshotDirs.size();
+      }
+
+      // 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all
+      // Snapshots
+      if (current instanceof INodeDirectorySnapshottable) {
+        INodeDirectorySnapshottable snapshottableNode =
+            (INodeDirectorySnapshottable) current;
+        SnapshotFSImageFormat.saveSnapshots(snapshottableNode, out);
+      } else {
+        out.writeInt(-1); // # of snapshots
+      }
+
+      // 3. Write children INode
+      dirNum += saveChildren(children, out, inSnapshot, counter);
+
+      // 4. Write DirectoryDiff lists, if there is any.
+      SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap);
+
+      // Write sub-tree of sub-directories, including possible snapshots of
+      // deleted sub-directories
+      out.writeInt(dirNum); // the number of sub-directories
+      for(INode child : children) {
+        if(!child.isDirectory()) {
+          continue;
+        }
+        // make sure we only save the subtree under a reference node once
+        boolean toSave = child.isReference() ?
+            referenceMap.toProcessSubtree(child.getId()) : true;
+        saveImage(child.asDirectory(), out, toSave, inSnapshot, counter);
+      }
+      if (snapshotDirs != null) {
+        for (INodeDirectory subDir : snapshotDirs) {
+          // make sure we only save the subtree under a reference node once
+          boolean toSave = subDir.getParentReference() != null ?
+              referenceMap.toProcessSubtree(subDir.getId()) : true;
+          saveImage(subDir, out, toSave, true, counter);
+        }
+      }
+    }
+
+    /**
+     * Saves inode and increments progress counter.
+     *
+     * @param inode INode to save
+     * @param out DataOutputStream to receive inode
+     * @param writeUnderConstruction boolean true if this is under construction
+     * @param referenceMap ReferenceMap containing reference inodes
+     * @param counter Counter to increment for namenode startup progress
+     * @throws IOException thrown if there is an I/O error
+     */
+    private void saveINode2Image(INode inode, DataOutputStream out,
+        boolean writeUnderConstruction, ReferenceMap referenceMap,
+        Counter counter) throws IOException {
+      FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction,
+        referenceMap);
+      // Intentionally do not increment counter for reference inodes, because it
+      // is too difficult at this point to assess whether or not this is a
+      // reference that counts toward quota.
+      if (!(inode instanceof INodeReference)) {
+        counter.increment();
+      }
+    }
+  }
 }
 }

+ 213 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java

@@ -17,6 +17,11 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
@@ -31,20 +36,21 @@ import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
 import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.WritableUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXException;
 
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
+import com.google.common.base.Preconditions;
 
 
 /**
 /**
  * Static utility functions for serializing various pieces of data in the correct
  * Static utility functions for serializing various pieces of data in the correct
@@ -82,6 +88,26 @@ public class FSImageSerialization {
     final ShortWritable U_SHORT = new ShortWritable();
     final ShortWritable U_SHORT = new ShortWritable();
     final IntWritable U_INT = new IntWritable();
     final IntWritable U_INT = new IntWritable();
     final LongWritable U_LONG = new LongWritable();
     final LongWritable U_LONG = new LongWritable();
+    final FsPermission FILE_PERM = new FsPermission((short) 0);
+  }
+
+  private static void writePermissionStatus(INodeAttributes inode,
+      DataOutput out) throws IOException {
+    final FsPermission p = TL_DATA.get().FILE_PERM;
+    p.fromShort(inode.getFsPermissionShort());
+    PermissionStatus.write(out, inode.getUserName(), inode.getGroupName(), p);
+  }
+
+  private static void writeBlocks(final Block[] blocks,
+      final DataOutput out) throws IOException {
+    if (blocks == null) {
+      out.writeInt(0);
+    } else {
+      out.writeInt(blocks.length);
+      for (Block blk : blocks) {
+        blk.write(out);
+      }
+    }
   }
   }
 
 
   // Helper function that reads in an INodeUnderConstruction
   // Helper function that reads in an INodeUnderConstruction
@@ -127,6 +153,183 @@ public class FSImageSerialization {
     return file;
     return file;
   }
   }
 
 
+  // Helper function that writes an INodeUnderConstruction
+  // into the input stream
+  //
+  static void writeINodeUnderConstruction(DataOutputStream out, INodeFile cons,
+      String path) throws IOException {
+    writeString(path, out);
+    out.writeLong(cons.getId());
+    out.writeShort(cons.getFileReplication());
+    out.writeLong(cons.getModificationTime());
+    out.writeLong(cons.getPreferredBlockSize());
+
+    writeBlocks(cons.getBlocks(), out);
+    cons.getPermissionStatus().write(out);
+
+    FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature();
+    writeString(uc.getClientName(), out);
+    writeString(uc.getClientMachine(), out);
+
+    out.writeInt(0); //  do not store locations of last block
+  }
+
+  /**
+   * Serialize a {@link INodeFile} node
+   * @param node The node to write
+   * @param out The {@link DataOutputStream} where the fields are written
+   * @param writeBlock Whether to write block information
+   */
+  public static void writeINodeFile(INodeFile file, DataOutput out,
+      boolean writeUnderConstruction) throws IOException {
+    writeLocalName(file, out);
+    out.writeLong(file.getId());
+    out.writeShort(file.getFileReplication());
+    out.writeLong(file.getModificationTime());
+    out.writeLong(file.getAccessTime());
+    out.writeLong(file.getPreferredBlockSize());
+
+    writeBlocks(file.getBlocks(), out);
+    SnapshotFSImageFormat.saveFileDiffList(file, out);
+
+    if (writeUnderConstruction) {
+      if (file.isUnderConstruction()) {
+        out.writeBoolean(true);
+        final FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature();
+        writeString(uc.getClientName(), out);
+        writeString(uc.getClientMachine(), out);
+      } else {
+        out.writeBoolean(false);
+      }
+    }
+
+    writePermissionStatus(file, out);
+  }
+
+  /** Serialize an {@link INodeFileAttributes}. */
+  public static void writeINodeFileAttributes(INodeFileAttributes file,
+      DataOutput out) throws IOException {
+    writeLocalName(file, out);
+    writePermissionStatus(file, out);
+    out.writeLong(file.getModificationTime());
+    out.writeLong(file.getAccessTime());
+
+    out.writeShort(file.getFileReplication());
+    out.writeLong(file.getPreferredBlockSize());
+  }
+
+  private static void writeQuota(Quota.Counts quota, DataOutput out)
+      throws IOException {
+    out.writeLong(quota.get(Quota.NAMESPACE));
+    out.writeLong(quota.get(Quota.DISKSPACE));
+  }
+
+  /**
+   * Serialize a {@link INodeDirectory}
+   * @param node The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  public static void writeINodeDirectory(INodeDirectory node, DataOutput out)
+      throws IOException {
+    writeLocalName(node, out);
+    out.writeLong(node.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(node.getModificationTime());
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-1);   // # of blocks
+
+    writeQuota(node.getQuotaCounts(), out);
+
+    if (node instanceof INodeDirectorySnapshottable) {
+      out.writeBoolean(true);
+    } else {
+      out.writeBoolean(false);
+      out.writeBoolean(node.isWithSnapshot());
+    }
+
+    writePermissionStatus(node, out);
+  }
+
+  /**
+   * Serialize a {@link INodeDirectory}
+   * @param a The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  public static void writeINodeDirectoryAttributes(
+      INodeDirectoryAttributes a, DataOutput out) throws IOException {
+    writeLocalName(a, out);
+    writePermissionStatus(a, out);
+    out.writeLong(a.getModificationTime());
+    writeQuota(a.getQuotaCounts(), out);
+  }
+
+  /**
+   * Serialize a {@link INodeSymlink} node
+   * @param node The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  private static void writeINodeSymlink(INodeSymlink node, DataOutput out)
+      throws IOException {
+    writeLocalName(node, out);
+    out.writeLong(node.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(0);   // modification time
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-2);   // # of blocks
+
+    Text.writeString(out, node.getSymlinkString());
+    writePermissionStatus(node, out);
+  }
+
+  /** Serialize a {@link INodeReference} node */
+  private static void writeINodeReference(INodeReference ref, DataOutput out,
+      boolean writeUnderConstruction, ReferenceMap referenceMap
+      ) throws IOException {
+    writeLocalName(ref, out);
+    out.writeLong(ref.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(0);   // modification time
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-3);   // # of blocks
+
+    final boolean isWithName = ref instanceof INodeReference.WithName;
+    out.writeBoolean(isWithName);
+
+    if (!isWithName) {
+      Preconditions.checkState(ref instanceof INodeReference.DstReference);
+      // dst snapshot id
+      out.writeInt(((INodeReference.DstReference) ref).getDstSnapshotId());
+    } else {
+      out.writeInt(((INodeReference.WithName) ref).getLastSnapshotId());
+    }
+
+    final INodeReference.WithCount withCount
+        = (INodeReference.WithCount)ref.getReferredINode();
+    referenceMap.writeINodeReferenceWithCount(withCount, out,
+        writeUnderConstruction);
+  }
+
+  /**
+   * Save one inode's attributes to the image.
+   */
+  public static void saveINode2Image(INode node, DataOutput out,
+      boolean writeUnderConstruction, ReferenceMap referenceMap)
+      throws IOException {
+    if (node.isReference()) {
+      writeINodeReference(node.asReference(), out, writeUnderConstruction,
+          referenceMap);
+    } else if (node.isDirectory()) {
+      writeINodeDirectory(node.asDirectory(), out);
+    } else if (node.isSymlink()) {
+      writeINodeSymlink(node.asSymlink(), out);
+    } else if (node.isFile()) {
+      writeINodeFile(node.asFile(), out, writeUnderConstruction);
+    }
+  }
+
   // This should be reverted to package private once the ImageLoader
   // This should be reverted to package private once the ImageLoader
   // code is moved into this package. This method should not be called
   // code is moved into this package. This method should not be called
   // by other code.
   // by other code.
@@ -226,6 +429,12 @@ public class FSImageSerialization {
     in.readFully(createdNodeName);
     in.readFully(createdNodeName);
     return createdNodeName;
     return createdNodeName;
   }
   }
+
+  private static void writeLocalName(INodeAttributes inode, DataOutput out)
+      throws IOException {
+    final byte[] name = inode.getLocalNameBytes();
+    writeBytes(name, out);
+  }
   
   
   public static void writeBytes(byte[] data, DataOutput out)
   public static void writeBytes(byte[] data, DataOutput out)
       throws IOException {
       throws IOException {

+ 61 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -85,17 +85,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
 import static org.apache.hadoop.util.Time.now;
 import static org.apache.hadoop.util.Time.now;
 
 
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
+import java.io.*;
 import java.io.UnsupportedEncodingException;
 import java.io.UnsupportedEncodingException;
 import java.lang.management.ManagementFactory;
 import java.lang.management.ManagementFactory;
 import java.net.InetAddress;
 import java.net.InetAddress;
@@ -140,6 +130,7 @@ import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.XAttr;
 import org.apache.hadoop.fs.XAttr;
 import org.apache.hadoop.fs.XAttrSetFlag;
 import org.apache.hadoop.fs.XAttrSetFlag;
@@ -3248,10 +3239,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       // Rename does not operates on link targets
       // Rename does not operates on link targets
       // Do not resolveLink when checking permissions of src and dst
       // Do not resolveLink when checking permissions of src and dst
       // Check write access to parent of src
       // Check write access to parent of src
-      checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
+      checkPermission(pc, src, false, null, FsAction.WRITE, null, null,
+          false, false);
       // Check write access to ancestor of dst
       // Check write access to ancestor of dst
       checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
       checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
-          false);
+          false, false);
     }
     }
 
 
     if (dir.renameTo(src, dst, logRetryCache)) {
     if (dir.renameTo(src, dst, logRetryCache)) {
@@ -3312,9 +3304,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       // Rename does not operates on link targets
       // Rename does not operates on link targets
       // Do not resolveLink when checking permissions of src and dst
       // Do not resolveLink when checking permissions of src and dst
       // Check write access to parent of src
       // Check write access to parent of src
-      checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
+      checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false,
+          false);
       // Check write access to ancestor of dst
       // Check write access to ancestor of dst
-      checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
+      checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false,
+          false);
     }
     }
 
 
     dir.renameTo(src, dst, logRetryCache, options);
     dir.renameTo(src, dst, logRetryCache, options);
@@ -3394,11 +3388,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       checkNameNodeSafeMode("Cannot delete " + src);
       checkNameNodeSafeMode("Cannot delete " + src);
       src = FSDirectory.resolvePath(src, pathComponents, dir);
       src = FSDirectory.resolvePath(src, pathComponents, dir);
       if (!recursive && dir.isNonEmptyDirectory(src)) {
       if (!recursive && dir.isNonEmptyDirectory(src)) {
-        throw new IOException(src + " is non empty");
+        throw new PathIsNotEmptyDirectoryException(src + " is non empty");
       }
       }
       if (enforcePermission && isPermissionEnabled) {
       if (enforcePermission && isPermissionEnabled) {
         checkPermission(pc, src, false, null, FsAction.WRITE, null,
         checkPermission(pc, src, false, null, FsAction.WRITE, null,
-            FsAction.ALL, false);
+            FsAction.ALL, true, false);
       }
       }
       // Unlink the target directory from directory tree
       // Unlink the target directory from directory tree
       if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
       if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
@@ -3550,7 +3544,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       checkOperation(OperationCategory.READ);
       checkOperation(OperationCategory.READ);
       src = FSDirectory.resolvePath(src, pathComponents, dir);
       src = FSDirectory.resolvePath(src, pathComponents, dir);
       if (isPermissionEnabled) {
       if (isPermissionEnabled) {
-        checkPermission(pc, src, false, null, null, null, null, resolveLink);
+        checkPermission(pc, src, false, null, null, null, null, false,
+            resolveLink);
       }
       }
       stat = dir.getFileInfo(src, resolveLink);
       stat = dir.getFileInfo(src, resolveLink);
     } catch (AccessControlException e) {
     } catch (AccessControlException e) {
@@ -5549,7 +5544,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       FsAction parentAccess, FsAction access, FsAction subAccess)
       FsAction parentAccess, FsAction access, FsAction subAccess)
       throws AccessControlException, UnresolvedLinkException {
       throws AccessControlException, UnresolvedLinkException {
         checkPermission(pc, path, doCheckOwner, ancestorAccess,
         checkPermission(pc, path, doCheckOwner, ancestorAccess,
-            parentAccess, access, subAccess, true);
+            parentAccess, access, subAccess, false, true);
   }
   }
 
 
   /**
   /**
@@ -5560,14 +5555,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private void checkPermission(FSPermissionChecker pc,
   private void checkPermission(FSPermissionChecker pc,
       String path, boolean doCheckOwner, FsAction ancestorAccess,
       String path, boolean doCheckOwner, FsAction ancestorAccess,
       FsAction parentAccess, FsAction access, FsAction subAccess,
       FsAction parentAccess, FsAction access, FsAction subAccess,
-      boolean resolveLink)
+      boolean ignoreEmptyDir, boolean resolveLink)
       throws AccessControlException, UnresolvedLinkException {
       throws AccessControlException, UnresolvedLinkException {
     if (!pc.isSuperUser()) {
     if (!pc.isSuperUser()) {
       dir.waitForReady();
       dir.waitForReady();
       readLock();
       readLock();
       try {
       try {
         pc.checkPermission(path, dir, doCheckOwner, ancestorAccess,
         pc.checkPermission(path, dir, doCheckOwner, ancestorAccess,
-            parentAccess, access, subAccess, resolveLink);
+            parentAccess, access, subAccess, ignoreEmptyDir, resolveLink);
       } finally {
       } finally {
         readUnlock();
         readUnlock();
       }
       }
@@ -6091,6 +6086,42 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     leaseManager.changeLease(src, dst);
     leaseManager.changeLease(src, dst);
   }
   }
 
 
+  /**
+   * Serializes leases.
+   */
+  void saveFilesUnderConstruction(DataOutputStream out,
+      Map<Long, INodeFile> snapshotUCMap) throws IOException {
+    // This is run by an inferior thread of saveNamespace, which holds a read
+    // lock on our behalf. If we took the read lock here, we could block
+    // for fairness if a writer is waiting on the lock.
+    synchronized (leaseManager) {
+      Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction();
+      for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
+        // TODO: for HDFS-5428, because of rename operations, some
+        // under-construction files that are
+        // in the current fs directory can also be captured in the
+        // snapshotUCMap. We should remove them from the snapshotUCMap.
+        snapshotUCMap.remove(entry.getValue().getId());
+      }
+
+      out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size
+      for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), entry.getKey());
+      }
+      for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) {
+        // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
+        // as their paths
+        StringBuilder b = new StringBuilder();
+        b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
+            .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
+            .append(Path.SEPARATOR).append(entry.getValue().getId());
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), b.toString());
+      }
+    }
+  }
+
   /**
   /**
    * @return all the under-construction files in the lease map
    * @return all the under-construction files in the lease map
    */
    */
@@ -6377,6 +6408,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     getEditLog().logSync();
     getEditLog().logSync();
   }
   }
 
 
+  /**
+   * @param out save state of the secret manager
+   * @param sdPath String storage directory path
+   */
+  void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
+      throws IOException {
+    dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
+  }
+
   SecretManagerState saveSecretManagerState() {
   SecretManagerState saveSecretManagerState() {
     return dtSecretManager.saveSecretManagerState();
     return dtSecretManager.saveSecretManagerState();
   }
   }

+ 12 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.permission.AclEntryScope;
 import org.apache.hadoop.fs.permission.AclEntryType;
 import org.apache.hadoop.fs.permission.AclEntryType;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
@@ -136,6 +137,7 @@ class FSPermissionChecker {
    * @param subAccess If path is a directory,
    * @param subAccess If path is a directory,
    * it is the access required of the path and all the sub-directories.
    * it is the access required of the path and all the sub-directories.
    * If path is not a directory, there is no effect.
    * If path is not a directory, there is no effect.
+   * @param ignoreEmptyDir Ignore permission checking for empty directory?
    * @param resolveLink whether to resolve the final path component if it is
    * @param resolveLink whether to resolve the final path component if it is
    * a symlink
    * a symlink
    * @throws AccessControlException
    * @throws AccessControlException
@@ -146,7 +148,7 @@ class FSPermissionChecker {
    */
    */
   void checkPermission(String path, FSDirectory dir, boolean doCheckOwner,
   void checkPermission(String path, FSDirectory dir, boolean doCheckOwner,
       FsAction ancestorAccess, FsAction parentAccess, FsAction access,
       FsAction ancestorAccess, FsAction parentAccess, FsAction access,
-      FsAction subAccess, boolean resolveLink)
+      FsAction subAccess, boolean ignoreEmptyDir, boolean resolveLink)
       throws AccessControlException, UnresolvedLinkException {
       throws AccessControlException, UnresolvedLinkException {
     if (LOG.isDebugEnabled()) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("ACCESS CHECK: " + this
       LOG.debug("ACCESS CHECK: " + this
@@ -155,6 +157,7 @@ class FSPermissionChecker {
           + ", parentAccess=" + parentAccess
           + ", parentAccess=" + parentAccess
           + ", access=" + access
           + ", access=" + access
           + ", subAccess=" + subAccess
           + ", subAccess=" + subAccess
+          + ", ignoreEmptyDir=" + ignoreEmptyDir
           + ", resolveLink=" + resolveLink);
           + ", resolveLink=" + resolveLink);
     }
     }
     // check if (parentAccess != null) && file exists, then check sb
     // check if (parentAccess != null) && file exists, then check sb
@@ -182,7 +185,7 @@ class FSPermissionChecker {
       check(last, snapshotId, access);
       check(last, snapshotId, access);
     }
     }
     if (subAccess != null) {
     if (subAccess != null) {
-      checkSubAccess(last, snapshotId, subAccess);
+      checkSubAccess(last, snapshotId, subAccess, ignoreEmptyDir);
     }
     }
     if (doCheckOwner) {
     if (doCheckOwner) {
       checkOwner(last, snapshotId);
       checkOwner(last, snapshotId);
@@ -207,8 +210,8 @@ class FSPermissionChecker {
   }
   }
 
 
   /** Guarded by {@link FSNamesystem#readLock()} */
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void checkSubAccess(INode inode, int snapshotId, FsAction access
-      ) throws AccessControlException {
+  private void checkSubAccess(INode inode, int snapshotId, FsAction access,
+      boolean ignoreEmptyDir) throws AccessControlException {
     if (inode == null || !inode.isDirectory()) {
     if (inode == null || !inode.isDirectory()) {
       return;
       return;
     }
     }
@@ -216,9 +219,12 @@ class FSPermissionChecker {
     Stack<INodeDirectory> directories = new Stack<INodeDirectory>();
     Stack<INodeDirectory> directories = new Stack<INodeDirectory>();
     for(directories.push(inode.asDirectory()); !directories.isEmpty(); ) {
     for(directories.push(inode.asDirectory()); !directories.isEmpty(); ) {
       INodeDirectory d = directories.pop();
       INodeDirectory d = directories.pop();
-      check(d, snapshotId, access);
+      ReadOnlyList<INode> cList = d.getChildrenList(snapshotId);
+      if (!(cList.isEmpty() && ignoreEmptyDir)) {
+        check(d, snapshotId, access);
+      }
 
 
-      for(INode child : d.getChildrenList(snapshotId)) {
+      for(INode child : cList) {
         if (child.isDirectory()) {
         if (child.isDirectory()) {
           directories.push(child.asDirectory());
           directories.push(child.asDirectory());
         }
         }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java

@@ -40,7 +40,7 @@ import com.google.common.base.Preconditions;
  * snapshots and it is renamed/moved to other locations.
  * snapshots and it is renamed/moved to other locations.
  * 
  * 
  * For example,
  * For example,
- * (1) Support we have /abc/foo, say the inode of foo is inode(id=1000,name=foo)
+ * (1) Suppose we have /abc/foo, say the inode of foo is inode(id=1000,name=foo)
  * (2) create snapshot s0 for /abc
  * (2) create snapshot s0 for /abc
  * (3) mv /abc/foo /xyz/bar, i.e. inode(id=1000,name=...) is renamed from "foo"
  * (3) mv /abc/foo /xyz/bar, i.e. inode(id=1000,name=...) is renamed from "foo"
  *     to "bar" and its parent becomes /xyz.
  *     to "bar" and its parent becomes /xyz.

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java

@@ -77,7 +77,8 @@ public class NNStorage extends Storage implements Closeable,
     IMAGE_ROLLBACK("fsimage_rollback"),
     IMAGE_ROLLBACK("fsimage_rollback"),
     EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
     EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
     EDITS_INPROGRESS ("edits_inprogress"),
     EDITS_INPROGRESS ("edits_inprogress"),
-    EDITS_TMP ("edits_tmp");
+    EDITS_TMP ("edits_tmp"),
+    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
 
 
     private String fileName = null;
     private String fileName = null;
     private NameNodeFile(String name) { this.fileName = name; }
     private NameNodeFile(String name) { this.fileName = name; }
@@ -693,6 +694,10 @@ public class NNStorage extends Storage implements Closeable,
     return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
     return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
   }
   }
 
 
+  public static String getLegacyOIVImageFileName(long txid) {
+    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
+  }
+
   private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
   private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
     return String.format("%s_%019d", nnf.getName(), txid);
     return String.format("%s_%019d", nnf.getName(), txid);
   }
   }

+ 56 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java

@@ -18,11 +18,13 @@
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
 import java.io.File;
 import java.io.File;
+import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Comparator;
 import java.util.EnumSet;
 import java.util.EnumSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.List;
 import java.util.TreeSet;
 import java.util.TreeSet;
 
 
@@ -233,4 +235,58 @@ public class NNStorageRetentionManager {
       }      
       }      
     }
     }
   }
   }
+
+  /**
+   * Delete old OIV fsimages. Since the target dir is not a full blown
+   * storage directory, we simply list and keep the latest ones. For the
+   * same reason, no storage inspector is used.
+   */
+  void purgeOldLegacyOIVImages(String dir, long txid) {
+    File oivImageDir = new File(dir);
+    final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
+    String filesInStorage[];
+
+    // Get the listing
+    filesInStorage = oivImageDir.list(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        return name.matches(oivImagePrefix + "_(\\d+)");
+      }
+    });
+
+    // Check whether there is any work to do.
+    if (filesInStorage.length <= numCheckpointsToRetain) {
+      return;
+    }
+
+    // Create a sorted list of txids from the file names.
+    TreeSet<Long> sortedTxIds = new TreeSet<Long>();
+    for (String fName : filesInStorage) {
+      // Extract the transaction id from the file name.
+      long fTxId;
+      try {
+        fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
+      } catch (NumberFormatException nfe) {
+        // This should not happen since we have already filtered it.
+        // Log and continue.
+        LOG.warn("Invalid file name. Skipping " + fName);
+        continue;
+      }
+      sortedTxIds.add(Long.valueOf(fTxId));
+    }
+
+    int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
+    Iterator<Long> iter = sortedTxIds.iterator();
+    while (numFilesToDelete > 0 && iter.hasNext()) {
+      long txIdVal = iter.next().longValue();
+      String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
+      LOG.info("Deleting " + fileName);
+      File fileToDelete = new File(oivImageDir, fileName);
+      if (!fileToDelete.delete()) {
+        // deletion failed.
+        LOG.warn("Failed to delete image file: " + fileToDelete);
+      }
+      numFilesToDelete--;
+    }
+  }
 }
 }

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -1677,7 +1677,11 @@ public class NameNode implements NameNodeStatusMXBean {
   public boolean isStandbyState() {
   public boolean isStandbyState() {
     return (state.equals(STANDBY_STATE));
     return (state.equals(STANDBY_STATE));
   }
   }
-
+  
+  public boolean isActiveState() {
+    return (state.equals(ACTIVE_STATE));
+  }
+  
   /**
   /**
    * Check that a request to change this node's HA state is valid.
    * Check that a request to change this node's HA state is valid.
    * In particular, verifies that, if auto failover is enabled, non-forced
    * In particular, verifies that, if auto failover is enabled, non-forced

+ 16 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java

@@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StorageP
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpServer2;
 import org.apache.hadoop.http.HttpServer2;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.MD5Hash;
@@ -125,6 +126,7 @@ public class SecondaryNameNode implements Runnable,
 
 
   private Thread checkpointThread;
   private Thread checkpointThread;
   private ObjectName nameNodeStatusBeanName;
   private ObjectName nameNodeStatusBeanName;
+  private String legacyOivImageDir;
 
 
   @Override
   @Override
   public String toString() {
   public String toString() {
@@ -289,6 +291,9 @@ public class SecondaryNameNode implements Runnable,
           NetUtils.getHostPortString(httpsAddress));
           NetUtils.getHostPortString(httpsAddress));
     }
     }
 
 
+    legacyOivImageDir = conf.get(
+        DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
+
     LOG.info("Checkpoint Period   :" + checkpointConf.getPeriod() + " secs "
     LOG.info("Checkpoint Period   :" + checkpointConf.getPeriod() + " secs "
         + "(" + checkpointConf.getPeriod() / 60 + " min)");
         + "(" + checkpointConf.getPeriod() / 60 + " min)");
     LOG.info("Log Size Trigger    :" + checkpointConf.getTxnCount() + " txns");
     LOG.info("Log Size Trigger    :" + checkpointConf.getTxnCount() + " txns");
@@ -497,6 +502,7 @@ public class SecondaryNameNode implements Runnable,
    * @return if the image is fetched from primary or not
    * @return if the image is fetched from primary or not
    */
    */
   @VisibleForTesting
   @VisibleForTesting
+  @SuppressWarnings("deprecated")
   public boolean doCheckpoint() throws IOException {
   public boolean doCheckpoint() throws IOException {
     checkpointImage.ensureCurrentDirExists();
     checkpointImage.ensureCurrentDirExists();
     NNStorage dstStorage = checkpointImage.getStorage();
     NNStorage dstStorage = checkpointImage.getStorage();
@@ -559,11 +565,18 @@ public class SecondaryNameNode implements Runnable,
 
 
     LOG.warn("Checkpoint done. New Image Size: " 
     LOG.warn("Checkpoint done. New Image Size: " 
              + dstStorage.getFsImageName(txid).length());
              + dstStorage.getFsImageName(txid).length());
-    
+
+    if (legacyOivImageDir != null && !legacyOivImageDir.isEmpty()) {
+      try {
+        checkpointImage.saveLegacyOIVImage(namesystem, legacyOivImageDir,
+            new Canceler());
+      } catch (IOException e) {
+        LOG.warn("Failed to write legacy OIV image: ", e);
+      }
+    }
     return loadImage;
     return loadImage;
   }
   }
-  
-  
+
   /**
   /**
    * @param opts The parameters passed to this program.
    * @param opts The parameters passed to this program.
    * @exception Exception if the filesystem does not exist.
    * @exception Exception if the filesystem does not exist.

+ 35 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java

@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+
+public abstract class AbstractNNFailoverProxyProvider<T> implements
+   FailoverProxyProvider <T> {
+
+  /**
+   * Inquire whether logical HA URI is used for the implementation. If it is
+   * used, a special token handling may be needed to make sure a token acquired 
+   * from a node in the HA pair can be used against the other node. 
+   *
+   * @return true if logical HA URI is used. false, if not used.
+   */
+  public abstract boolean useLogicalURI(); 
+}

+ 11 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java

@@ -34,8 +34,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
-import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 
 
@@ -46,8 +46,8 @@ import com.google.common.base.Preconditions;
  * to connect to during fail-over. The first configured address is tried first,
  * to connect to during fail-over. The first configured address is tried first,
  * and on a fail-over event the other address is tried.
  * and on a fail-over event the other address is tried.
  */
  */
-public class ConfiguredFailoverProxyProvider<T> implements
-    FailoverProxyProvider<T> {
+public class ConfiguredFailoverProxyProvider<T> extends
+    AbstractNNFailoverProxyProvider<T> {
   
   
   private static final Log LOG =
   private static final Log LOG =
       LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
       LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
@@ -165,4 +165,12 @@ public class ConfiguredFailoverProxyProvider<T> implements
       }
       }
     }
     }
   }
   }
+
+  /**
+   * Logical URI is required for this failover proxy provider.
+   */
+  @Override
+  public boolean useLogicalURI() {
+    return true;
+  }
 }
 }

+ 133 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java

@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A NNFailoverProxyProvider implementation which works on IP failover setup.
+ * Only one proxy is used to connect to both servers and switching between
+ * the servers is done by the environment/infrastructure, which guarantees
+ * clients can consistently reach only one node at a time.
+ *
+ * Clients with a live connection will likely get connection reset after an
+ * IP failover. This case will be handled by the 
+ * FailoverOnNetworkExceptionRetry retry policy. I.e. if the call is
+ * not idempotent, it won't get retried.
+ *
+ * A connection reset while setting up a connection (i.e. before sending a
+ * request) will be handled in ipc client.
+ *
+ * The namenode URI must contain a resolvable host name.
+ */
+public class IPFailoverProxyProvider<T> extends
+    AbstractNNFailoverProxyProvider<T> {
+  private final Configuration conf;
+  private final Class<T> xface;
+  private final URI nameNodeUri;
+  private ProxyInfo<T> nnProxyInfo = null;
+  
+  public IPFailoverProxyProvider(Configuration conf, URI uri,
+      Class<T> xface) {
+    Preconditions.checkArgument(
+        xface.isAssignableFrom(NamenodeProtocols.class),
+        "Interface class %s is not a valid NameNode protocol!");
+    this.xface = xface;
+    this.nameNodeUri = uri;
+
+    this.conf = new Configuration(conf);
+    int maxRetries = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
+        maxRetries);
+    
+    int maxRetriesOnSocketTimeouts = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        maxRetriesOnSocketTimeouts);
+  }
+    
+  @Override
+  public Class<T> getInterface() {
+    return xface;
+  }
+
+  @Override
+  public synchronized ProxyInfo<T> getProxy() {
+    // Create a non-ha proxy if not already created.
+    if (nnProxyInfo == null) {
+      try {
+        // Create a proxy that is not wrapped in RetryProxy
+        InetSocketAddress nnAddr = NameNode.getAddress(nameNodeUri);
+        nnProxyInfo = new ProxyInfo<T>(NameNodeProxies.createNonHAProxy(
+            conf, nnAddr, xface, UserGroupInformation.getCurrentUser(), 
+            false).getProxy(), nnAddr.toString());
+      } catch (IOException ioe) {
+        throw new RuntimeException(ioe);
+      }
+    }
+    return nnProxyInfo;
+  }
+
+  /** Nothing to do for IP failover */
+  @Override
+  public void performFailover(T currentProxy) {
+  }
+
+  /**
+   * Close the proxy,
+   */
+  @Override
+  public synchronized void close() throws IOException {
+    if (nnProxyInfo == null) {
+      return;
+    }
+    if (nnProxyInfo.proxy instanceof Closeable) {
+      ((Closeable)nnProxyInfo.proxy).close();
+    } else {
+      RPC.stopProxy(nnProxyInfo.proxy);
+    }
+  }
+
+  /**
+   * Logical URI is not used for IP failover.
+   */
+  @Override
+  public boolean useLogicalURI() {
+    return false;
+  }
+}

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java

@@ -183,6 +183,12 @@ public class StandbyCheckpointer {
       txid = img.getStorage().getMostRecentCheckpointTxId();
       txid = img.getStorage().getMostRecentCheckpointTxId();
       assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
       assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
         thisCheckpointTxId + " but instead saved at txid=" + txid;
         thisCheckpointTxId + " but instead saved at txid=" + txid;
+
+      // Save the legacy OIV image, if the output dir is defined.
+      String outputDir = checkpointConf.getLegacyOivImageDir();
+      if (outputDir != null && !outputDir.isEmpty()) {
+        img.saveLegacyOIVImage(namesystem, outputDir, canceler);
+      }
     } finally {
     } finally {
       namesystem.longReadUnlock();
       namesystem.longReadUnlock();
     }
     }

+ 80 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java

@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A NNFailoverProxyProvider implementation which wrapps old implementations
+ * directly implementing the {@link FailoverProxyProvider} interface.
+ *
+ * It is assumed that the old impelmentation is using logical URI.
+ */
+public class WrappedFailoverProxyProvider<T> extends
+    AbstractNNFailoverProxyProvider<T> {
+  private final FailoverProxyProvider<T> proxyProvider;
+  
+  /**
+   * Wrap the given instance of an old FailoverProxyProvider.
+   */
+  public WrappedFailoverProxyProvider(FailoverProxyProvider<T> provider) {
+    proxyProvider = provider;
+  }
+    
+  @Override
+  public Class<T> getInterface() {
+    return proxyProvider.getInterface();
+  }
+
+  @Override
+  public synchronized ProxyInfo<T> getProxy() {
+    return proxyProvider.getProxy();
+  }
+
+  @Override
+  public void performFailover(T currentProxy) {
+    proxyProvider.performFailover(currentProxy);
+  }
+
+  /**
+   * Close the proxy,
+   */
+  @Override
+  public synchronized void close() throws IOException {
+    proxyProvider.close();
+  }
+
+  /**
+   * Assume logical URI is used for old proxy provider implementations.
+   */
+  @Override
+  public boolean useLogicalURI() {
+    return true;
+  }
+}

+ 13 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java

@@ -17,13 +17,17 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 
-import com.google.common.base.Preconditions;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
 
-import java.util.List;
+import com.google.common.base.Preconditions;
 
 
 /**
 /**
  * The difference of an inode between in two snapshots.
  * The difference of an inode between in two snapshots.
@@ -128,4 +132,11 @@ abstract class AbstractINodeDiff<N extends INode,
     return getClass().getSimpleName() + ": " + this.getSnapshotId() + " (post="
     return getClass().getSimpleName() + ": " + this.getSnapshotId() + " (post="
         + (posteriorDiff == null? null: posteriorDiff.getSnapshotId()) + ")";
         + (posteriorDiff == null? null: posteriorDiff.getSnapshotId()) + ")";
   }
   }
+
+  void writeSnapshot(DataOutput out) throws IOException {
+    out.writeInt(snapshotId);
+  }
+
+  abstract void write(DataOutput out, ReferenceMap referenceMap
+      ) throws IOException;
 }
 }

+ 52 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java

@@ -17,6 +17,8 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 
+import java.io.DataOutput;
+import java.io.IOException;
 import java.util.ArrayDeque;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Collections;
@@ -32,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
 import org.apache.hadoop.hdfs.server.namenode.Content;
 import org.apache.hadoop.hdfs.server.namenode.Content;
 import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
 import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
@@ -39,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 import org.apache.hadoop.hdfs.util.Diff;
 import org.apache.hadoop.hdfs.util.Diff;
 import org.apache.hadoop.hdfs.util.Diff.Container;
 import org.apache.hadoop.hdfs.util.Diff.Container;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
@@ -120,6 +124,35 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return counts;
       return counts;
     }
     }
 
 
+    /** Serialize {@link #created} */
+    private void writeCreated(DataOutput out) throws IOException {
+      final List<INode> created = getList(ListType.CREATED);
+      out.writeInt(created.size());
+      for (INode node : created) {
+        // For INode in created list, we only need to record its local name
+        byte[] name = node.getLocalNameBytes();
+        out.writeShort(name.length);
+        out.write(name);
+      }
+    }
+
+    /** Serialize {@link #deleted} */
+    private void writeDeleted(DataOutput out,
+        ReferenceMap referenceMap) throws IOException {
+      final List<INode> deleted = getList(ListType.DELETED);
+      out.writeInt(deleted.size());
+      for (INode node : deleted) {
+        FSImageSerialization.saveINode2Image(node, out, true, referenceMap);
+      }
+    }
+
+    /** Serialize to out */
+    private void write(DataOutput out, ReferenceMap referenceMap
+        ) throws IOException {
+      writeCreated(out);
+      writeDeleted(out, referenceMap);
+    }
+
     /** Get the list of INodeDirectory contained in the deleted list */
     /** Get the list of INodeDirectory contained in the deleted list */
     private void getDirsInDeleted(List<INodeDirectory> dirList) {
     private void getDirsInDeleted(List<INodeDirectory> dirList) {
       for (INode node : getList(ListType.DELETED)) {
       for (INode node : getList(ListType.DELETED)) {
@@ -314,6 +347,25 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return childrenSize;
       return childrenSize;
     }
     }
 
 
+    @Override
+    void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
+      writeSnapshot(out);
+      out.writeInt(childrenSize);
+
+      // Write snapshotINode
+      out.writeBoolean(isSnapshotRoot);
+      if (!isSnapshotRoot) {
+        if (snapshotINode != null) {
+          out.writeBoolean(true);
+          FSImageSerialization.writeINodeDirectoryAttributes(snapshotINode, out);
+        } else {
+          out.writeBoolean(false);
+        }
+      }
+      // Write diff. Node need to write poseriorDiff, since diffs is a list.
+      diff.write(out, referenceMap);
+    }
+
     @Override
     @Override
     Quota.Counts destroyDiffAndCollectBlocks(INodeDirectory currentINode,
     Quota.Counts destroyDiffAndCollectBlocks(INodeDirectory currentINode,
         BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
         BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

+ 20 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java

@@ -17,13 +17,17 @@
  */
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
-
-import java.util.List;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
 
 /**
 /**
  * The difference of an {@link INodeFile} between two snapshots.
  * The difference of an {@link INodeFile} between two snapshots.
@@ -66,6 +70,20 @@ public class FileDiff extends
         + (snapshotINode == null? "?": snapshotINode.getFileReplication());
         + (snapshotINode == null? "?": snapshotINode.getFileReplication());
   }
   }
 
 
+  @Override
+  void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
+    writeSnapshot(out);
+    out.writeLong(fileSize);
+
+    // write snapshotINode
+    if (snapshotINode != null) {
+      out.writeBoolean(true);
+      FSImageSerialization.writeINodeFileAttributes(snapshotINode, out);
+    } else {
+      out.writeBoolean(false);
+    }
+  }
+
   @Override
   @Override
   Quota.Counts destroyDiffAndCollectBlocks(INodeFile currentINode,
   Quota.Counts destroyDiffAndCollectBlocks(INodeFile currentINode,
       BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
       BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 
 import java.io.DataInput;
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.io.IOException;
 import java.text.SimpleDateFormat;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Arrays;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.namenode.AclFeature;
 import org.apache.hadoop.hdfs.server.namenode.AclFeature;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.XAttrFeature;
 import org.apache.hadoop.hdfs.server.namenode.XAttrFeature;
@@ -227,4 +229,11 @@ public class Snapshot implements Comparable<byte[]> {
   public String toString() {
   public String toString() {
     return getClass().getSimpleName() + "." + root.getLocalName() + "(id=" + id + ")";
     return getClass().getSimpleName() + "." + root.getLocalName() + "(id=" + id + ")";
   }
   }
+
+  /** Serialize the fields to out */
+  void write(DataOutput out) throws IOException {
+    out.writeInt(id);
+    // write root
+    FSImageSerialization.writeINodeDirectory(root, out);
+  }
 }
 }

+ 72 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java

@@ -29,21 +29,75 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
 import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
+import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
 import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff;
 import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
-import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 
 
 /**
 /**
  * A helper class defining static methods for reading/writing snapshot related
  * A helper class defining static methods for reading/writing snapshot related
  * information from/to FSImage.
  * information from/to FSImage.
  */
  */
 public class SnapshotFSImageFormat {
 public class SnapshotFSImageFormat {
+  /**
+   * Save snapshots and snapshot quota for a snapshottable directory.
+   * @param current The directory that the snapshots belongs to.
+   * @param out The {@link DataOutput} to write.
+   * @throws IOException
+   */
+  public static void saveSnapshots(INodeDirectorySnapshottable current,
+      DataOutput out) throws IOException {
+    // list of snapshots in snapshotsByNames
+    ReadOnlyList<Snapshot> snapshots = current.getSnapshotsByNames();
+    out.writeInt(snapshots.size());
+    for (Snapshot s : snapshots) {
+      // write the snapshot id
+      out.writeInt(s.getId());
+    }
+    // snapshot quota
+    out.writeInt(current.getSnapshotQuota());
+  }
+
+  /**
+   * Save SnapshotDiff list for an INodeDirectoryWithSnapshot.
+   * @param sNode The directory that the SnapshotDiff list belongs to.
+   * @param out The {@link DataOutput} to write.
+   */
+  private static <N extends INode, A extends INodeAttributes, D extends AbstractINodeDiff<N, A, D>>
+      void saveINodeDiffs(final AbstractINodeDiffList<N, A, D> diffs,
+      final DataOutput out, ReferenceMap referenceMap) throws IOException {
+    // Record the diffs in reversed order, so that we can find the correct
+    // reference for INodes in the created list when loading the FSImage
+    if (diffs == null) {
+      out.writeInt(-1); // no diffs
+    } else {
+      final List<D> list = diffs.asList();
+      final int size = list.size();
+      out.writeInt(size);
+      for (int i = size - 1; i >= 0; i--) {
+        list.get(i).write(out, referenceMap);
+      }
+    }
+  }
+
+  public static void saveDirectoryDiffList(final INodeDirectory dir,
+      final DataOutput out, final ReferenceMap referenceMap
+      ) throws IOException {
+    saveINodeDiffs(dir.getDiffs(), out, referenceMap);
+  }
+
+  public static void saveFileDiffList(final INodeFile file,
+      final DataOutput out) throws IOException {
+    saveINodeDiffs(file.getDiffs(), out, null);
+  }
+
   public static FileDiffList loadFileDiffList(DataInput in,
   public static FileDiffList loadFileDiffList(DataInput in,
       FSImageFormat.Loader loader) throws IOException {
       FSImageFormat.Loader loader) throws IOException {
     final int size = in.readInt();
     final int size = in.readInt();
@@ -264,6 +318,23 @@ public class SnapshotFSImageFormat {
      * Used to record whether the subtree of the reference node has been saved 
      * Used to record whether the subtree of the reference node has been saved 
      */
      */
     private final Map<Long, Long> dirMap = new HashMap<Long, Long>();
     private final Map<Long, Long> dirMap = new HashMap<Long, Long>();
+
+    public void writeINodeReferenceWithCount(
+        INodeReference.WithCount withCount, DataOutput out,
+        boolean writeUnderConstruction) throws IOException {
+      final INode referred = withCount.getReferredINode();
+      final long id = withCount.getId();
+      final boolean firstReferred = !referenceMap.containsKey(id);
+      out.writeBoolean(firstReferred);
+
+      if (firstReferred) {
+        FSImageSerialization.saveINode2Image(referred, out,
+            writeUnderConstruction, this);
+        referenceMap.put(id, withCount);
+      } else {
+        out.writeLong(id);
+      }
+    }
     
     
     public boolean toProcessSubtree(long id) {
     public boolean toProcessSubtree(long id) {
       if (dirMap.containsKey(id)) {
       if (dirMap.containsKey(id)) {

+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 
 import java.io.DataInput;
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Collections;
@@ -287,6 +288,22 @@ public class SnapshotManager implements SnapshotStatsMXBean {
     return snapshottables.values().toArray(
     return snapshottables.values().toArray(
         new INodeDirectorySnapshottable[snapshottables.size()]);
         new INodeDirectorySnapshottable[snapshottables.size()]);
   }
   }
+
+  /**
+   * Write {@link #snapshotCounter}, {@link #numSnapshots},
+   * and all snapshots to the DataOutput.
+   */
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(snapshotCounter);
+    out.writeInt(numSnapshots.get());
+
+    // write all snapshots.
+    for(INodeDirectorySnapshottable snapshottableDir : snapshottables.values()) {
+      for(Snapshot s : snapshottableDir.getSnapshotsByNames()) {
+        s.write(out);
+      }
+    }
+  }
   
   
   /**
   /**
    * Read values of {@link #snapshotCounter}, {@link #numSnapshots}, and
    * Read values of {@link #snapshotCounter}, {@link #numSnapshots}, and

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java

@@ -913,9 +913,10 @@ public class DFSAdmin extends FsShell {
     
     
     Configuration dfsConf = dfs.getConf();
     Configuration dfsConf = dfs.getConf();
     URI dfsUri = dfs.getUri();
     URI dfsUri = dfs.getUri();
-    boolean isHaEnabled = HAUtil.isLogicalUri(dfsConf, dfsUri);
-    if (isHaEnabled) {
-      // In the case of HA, run finalizeUpgrade for all NNs in this nameservice
+    boolean isHaAndLogicalUri = HAUtil.isLogicalUri(dfsConf, dfsUri);
+    if (isHaAndLogicalUri) {
+      // In the case of HA and logical URI, run finalizeUpgrade for all
+      // NNs in this nameservice.
       String nsId = dfsUri.getHost();
       String nsId = dfsUri.getHost();
       List<ClientProtocol> namenodes =
       List<ClientProtocol> namenodes =
           HAUtil.getProxiesForAllNameNodesInNameservice(dfsConf, nsId);
           HAUtil.getProxiesForAllNameNodesInNameservice(dfsConf, nsId);

+ 11 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.tools;
 
 
 import java.io.PrintStream;
 import java.io.PrintStream;
 import java.util.Arrays;
 import java.util.Arrays;
+import java.util.Collection;
 
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
@@ -27,6 +28,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.ha.HAAdmin;
 import org.apache.hadoop.ha.HAAdmin;
 import org.apache.hadoop.ha.HAServiceTarget;
 import org.apache.hadoop.ha.HAServiceTarget;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.hadoop.util.ToolRunner;
 
 
@@ -117,7 +119,15 @@ public class DFSHAAdmin extends HAAdmin {
     
     
     return super.runCmd(argv);
     return super.runCmd(argv);
   }
   }
-
+  
+  /**
+   * returns the list of all namenode ids for the given configuration 
+   */
+  @Override
+  protected Collection<String> getTargetIds(String namenodeToActivate) {
+    return DFSUtil.getNameNodeIds(getConf(), (nameserviceId != null)? nameserviceId : DFSUtil.getNamenodeNameServiceId(getConf()));
+  }
+  
   public static void main(String[] argv) throws Exception {
   public static void main(String[] argv) throws Exception {
     int res = ToolRunner.run(new DFSHAAdmin(), argv);
     int res = ToolRunner.run(new DFSHAAdmin(), argv);
     System.exit(res);
     System.exit(res);

+ 172 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java

@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+/**
+ * A DelimitedImageVisitor generates a text representation of the fsimage,
+ * with each element separated by a delimiter string.  All of the elements
+ * common to both inodes and inodes-under-construction are included. When 
+ * processing an fsimage with a layout version that did not include an 
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ * 
+ * Individual block information for each file is not currently included.
+ * 
+ * The default delimiter is tab, as this is an unlikely value to be included
+ * an inode path or other text metadata.  The delimiter value can be via the
+ * constructor.
+ */
+class DelimitedImageVisitor extends TextWriterImageVisitor {
+  private static final String defaultDelimiter = "\t"; 
+  
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+  private long fileSize = 0l;
+  // Elements of fsimage we're interested in tracking
+  private final Collection<ImageElement> elementsToTrack;
+  // Values for each of the elements in elementsToTrack
+  private final AbstractMap<ImageElement, String> elements = 
+                                            new HashMap<ImageElement, String>();
+  private final String delimiter;
+
+  {
+    elementsToTrack = new ArrayList<ImageElement>();
+    
+    // This collection determines what elements are tracked and the order
+    // in which they are output
+    Collections.addAll(elementsToTrack,  ImageElement.INODE_PATH,
+                                         ImageElement.REPLICATION,
+                                         ImageElement.MODIFICATION_TIME,
+                                         ImageElement.ACCESS_TIME,
+                                         ImageElement.BLOCK_SIZE,
+                                         ImageElement.NUM_BLOCKS,
+                                         ImageElement.NUM_BYTES,
+                                         ImageElement.NS_QUOTA,
+                                         ImageElement.DS_QUOTA,
+                                         ImageElement.PERMISSION_STRING,
+                                         ImageElement.USER_NAME,
+                                         ImageElement.GROUP_NAME);
+  }
+  
+  public DelimitedImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen) 
+                                                           throws IOException {
+    this(outputFile, printToScreen, defaultDelimiter);
+  }
+  
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen, 
+                               String delimiter) throws IOException {
+    super(outputFile, printToScreen);
+    this.delimiter = delimiter;
+    reset();
+  }
+
+  /**
+   * Reset the values of the elements we're tracking in order to handle
+   * the next file
+   */
+  private void reset() {
+    elements.clear();
+    for(ImageElement e : elementsToTrack) 
+      elements.put(e, null);
+    
+    fileSize = 0l;
+  }
+  
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    // If we're done with an inode, write out our results and start over
+    if(elem == ImageElement.INODE || 
+       elem == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      writeLine();
+      write("\n");
+      reset();
+    }
+  }
+
+  /**
+   * Iterate through all the elements we're tracking and, if a value was
+   * recorded for it, write it out.
+   */
+  private void writeLine() throws IOException {
+    Iterator<ImageElement> it = elementsToTrack.iterator();
+    
+    while(it.hasNext()) {
+      ImageElement e = it.next();
+      
+      String v = null;
+      if(e == ImageElement.NUM_BYTES)
+        v = String.valueOf(fileSize);
+      else
+        v = elements.get(e);
+      
+      if(v != null)
+        write(v);
+      
+      if(it.hasNext())
+        write(delimiter);
+    }
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    // Explicitly label the root path
+    if(element == ImageElement.INODE_PATH && value.equals(""))
+      value = "/";
+    
+    // Special case of file size, which is sum of the num bytes in each block
+    if(element == ImageElement.NUM_BYTES)
+      fileSize += Long.valueOf(value);
+    
+    if(elements.containsKey(element) && element != ImageElement.NUM_BYTES)
+      elements.put(element, value);
+    
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+    // Special case as numBlocks is an attribute of the blocks element
+    if(key == ImageElement.NUM_BLOCKS 
+        && elements.containsKey(ImageElement.NUM_BLOCKS))
+      elements.put(key, value);
+    
+    elemQ.push(element);
+  }
+  
+  @Override
+  void start() throws IOException { /* Nothing to do */ }
+}

+ 36 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java

@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Utility class for tracking descent into the structure of the
+ * Visitor class (ImageVisitor, EditsVisitor etc.)
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DepthCounter {
+  private int depth = 0;
+
+  public void incLevel() { depth++; }
+  public void decLevel() { if(depth >= 1) depth--; }
+  public int  getLevel() { return depth; }
+}
+

+ 193 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java

@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * File size distribution visitor.
+ * 
+ * <h3>Description.</h3>
+ * This is the tool for analyzing file sizes in the namespace image.
+ * In order to run the tool one should define a range of integers
+ * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
+ * The range of integers is divided into segments of size <tt>step</tt>: 
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
+ * and the visitor calculates how many files in the system fall into 
+ * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. 
+ * Note that files larger than <tt>maxSize</tt> always fall into 
+ * the very last segment.
+ * 
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3>
+ * The output file is formatted as a tab separated two column table:
+ * Size and NumFiles. Where Size represents the start of the segment,
+ * and numFiles is the number of files form the image which size falls in 
+ * this segment.
+ */
+class FileDistributionVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
+
+  private final static long MAX_SIZE_DEFAULT = 0x2000000000L;   // 1/8 TB = 2^37
+  private final static int INTERVAL_DEFAULT = 0x200000;         // 2 MB = 2^21
+
+  private int[] distribution;
+  private long maxSize;
+  private int step;
+
+  private int totalFiles;
+  private int totalDirectories;
+  private int totalBlocks;
+  private long totalSpace;
+  private long maxFileSize;
+
+  private FileContext current;
+
+  private boolean inInode = false;
+
+  /**
+   * File or directory information.
+   */
+  private static class FileContext {
+    String path;
+    long fileSize;
+    int numBlocks;
+    int replication;
+  }
+
+  public FileDistributionVisitor(String filename,
+                                 long maxSize,
+                                 int step) throws IOException {
+    super(filename, false);
+    this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
+    this.step = (step == 0 ? INTERVAL_DEFAULT : step);
+    long numIntervals = this.maxSize / this.step;
+    if(numIntervals >= Integer.MAX_VALUE)
+      throw new IOException("Too many distribution intervals " + numIntervals);
+    this.distribution = new int[1 + (int)(numIntervals)];
+    this.totalFiles = 0;
+    this.totalDirectories = 0;
+    this.totalBlocks = 0;
+    this.totalSpace = 0;
+    this.maxFileSize = 0;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    output();
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    output();
+    super.finishAbnormally();
+  }
+
+  private void output() throws IOException {
+    // write the distribution into the output file
+    write("Size\tNumFiles\n");
+    for(int i = 0; i < distribution.length; i++)
+      write(((long)i * step) + "\t" + distribution[i] + "\n");
+    System.out.println("totalFiles = " + totalFiles);
+    System.out.println("totalDirectories = " + totalDirectories);
+    System.out.println("totalBlocks = " + totalBlocks);
+    System.out.println("totalSpace = " + totalSpace);
+    System.out.println("maxFileSize = " + maxFileSize);
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemS.pop();
+
+    if(elem != ImageElement.INODE &&
+       elem != ImageElement.INODE_UNDER_CONSTRUCTION)
+      return;
+    inInode = false;
+    if(current.numBlocks < 0) {
+      totalDirectories ++;
+      return;
+    }
+    totalFiles++;
+    totalBlocks += current.numBlocks;
+    totalSpace += current.fileSize * current.replication;
+    if(maxFileSize < current.fileSize)
+      maxFileSize = current.fileSize;
+    int high;
+    if(current.fileSize > maxSize)
+      high = distribution.length-1;
+    else
+      high = (int)Math.ceil((double)current.fileSize / step);
+    distribution[high]++;
+    if(totalFiles % 1000000 == 1)
+      System.out.println("Files processed: " + totalFiles
+          + "  Current: " + current.path);
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        current.path = (value.equals("") ? "/" : value);
+        break;
+      case REPLICATION:
+        current.replication = Integer.valueOf(value);
+        break;
+      case NUM_BYTES:
+        current.fileSize += Long.valueOf(value);
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      current = new FileContext();
+      inInode = true;
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION)
+      inInode = true;
+    else if(element == ImageElement.BLOCKS)
+      current.numBlocks = Integer.parseInt(value);
+  }
+}

+ 83 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java

@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An ImageLoader can accept a DataInputStream to an Hadoop FSImage file
+ * and walk over its structure using the supplied ImageVisitor.
+ *
+ * Each implementation of ImageLoader is designed to rapidly process an
+ * image file.  As long as minor changes are made from one layout version
+ * to another, it is acceptable to tweak one implementation to read the next.
+ * However, if the layout version changes enough that it would make a
+ * processor slow or difficult to read, another processor should be created.
+ * This allows each processor to quickly read an image without getting
+ * bogged down in dealing with significant differences between layout versions.
+ */
+interface ImageLoader {
+
+  /**
+   * @param in DataInputStream pointing to an Hadoop FSImage file
+   * @param v Visit to apply to the FSImage file
+   * @param enumerateBlocks Should visitor visit each of the file blocks?
+   */
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean enumerateBlocks) throws IOException;
+
+  /**
+   * Can this processor handle the specified version of FSImage file?
+   *
+   * @param version FSImage version file
+   * @return True if this instance can process the file
+   */
+  public boolean canLoadVersion(int version);
+
+  /**
+   * Factory for obtaining version of image loader that can read
+   * a particular image format.
+   */
+  @InterfaceAudience.Private
+  public class LoaderFactory {
+    // Java doesn't support static methods on interfaces, which necessitates
+    // this factory class
+
+    /**
+     * Find an image loader capable of interpreting the specified
+     * layout version number.  If none, return null;
+     *
+     * @param version fsimage layout version number to be processed
+     * @return ImageLoader that can interpret specified version, or null
+     */
+    static public ImageLoader getLoader(int version) {
+      // Easy to add more image processors as they are written
+      ImageLoader[] loaders = { new ImageLoaderCurrent() };
+
+      for (ImageLoader l : loaders) {
+        if (l.canLoadVersion(version))
+          return l;
+      }
+
+      return null;
+    }
+  }
+}

+ 821 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java

@@ -0,0 +1,821 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
+import org.apache.hadoop.hdfs.protocol.LayoutFlags;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor.ImageElement;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+/**
+ * ImageLoaderCurrent processes Hadoop FSImage files and walks over
+ * them using a provided ImageVisitor, calling the visitor at each element
+ * enumerated below.
+ *
+ * The only difference between v18 and v19 was the utilization of the
+ * stickybit.  Therefore, the same viewer can reader either format.
+ *
+ * Versions -19 fsimage layout (with changes from -16 up):
+ * Image version (int)
+ * Namepsace ID (int)
+ * NumFiles (long)
+ * Generation stamp (long)
+ * INodes (count = NumFiles)
+ *  INode
+ *    Path (String)
+ *    Replication (short)
+ *    Modification Time (long as date)
+ *    Access Time (long) // added in -16
+ *    Block size (long)
+ *    Num blocks (int)
+ *    Blocks (count = Num blocks)
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Namespace Quota (long)
+ *    Diskspace Quota (long) // added in -18
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)  // Modified in -19
+ *    Symlink (String) // added in -23
+ * NumINodesUnderConstruction (int)
+ * INodesUnderConstruction (count = NumINodesUnderConstruction)
+ *  INodeUnderConstruction
+ *    Path (bytes as string)
+ *    Replication (short)
+ *    Modification time (long as date)
+ *    Preferred block size (long)
+ *    Num blocks (int)
+ *    Blocks
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)
+ *    Client Name (String)
+ *    Client Machine (String)
+ *    NumLocations (int)
+ *    DatanodeDescriptors (count = numLocations) // not loaded into memory
+ *      short                                    // but still in file
+ *      long
+ *      string
+ *      long
+ *      int
+ *      string
+ *      string
+ *      enum
+ *    CurrentDelegationKeyId (int)
+ *    NumDelegationKeys (int)
+ *      DelegationKeys (count = NumDelegationKeys)
+ *        DelegationKeyLength (vint)
+ *        DelegationKey (bytes)
+ *    DelegationTokenSequenceNumber (int)
+ *    NumDelegationTokens (int)
+ *    DelegationTokens (count = NumDelegationTokens)
+ *      DelegationTokenIdentifier
+ *        owner (String)
+ *        renewer (String)
+ *        realUser (String)
+ *        issueDate (vlong)
+ *        maxDate (vlong)
+ *        sequenceNumber (vint)
+ *        masterKeyId (vint)
+ *      expiryTime (long)     
+ *
+ */
+class ImageLoaderCurrent implements ImageLoader {
+  protected final DateFormat dateFormat = 
+                                      new SimpleDateFormat("yyyy-MM-dd HH:mm");
+  private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
+      -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39,
+      -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 };
+  private int imageVersion = 0;
+  
+  private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
+  private final Map<Long, String> dirNodeMap = new HashMap<Long, String>();
+
+  /* (non-Javadoc)
+   * @see ImageLoader#canProcessVersion(int)
+   */
+  @Override
+  public boolean canLoadVersion(int version) {
+    for(int v : versions)
+      if(v == version) return true;
+
+    return false;
+  }
+
+  /* (non-Javadoc)
+   * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
+   */
+  @Override
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    boolean done = false;
+    try {
+      v.start();
+      v.visitEnclosingElement(ImageElement.FS_IMAGE);
+
+      imageVersion = in.readInt();
+      if( !canLoadVersion(imageVersion))
+        throw new IOException("Cannot process fslayout version " + imageVersion);
+      if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
+        LayoutFlags.read(in);
+      }
+
+      v.visit(ImageElement.IMAGE_VERSION, imageVersion);
+      v.visit(ImageElement.NAMESPACE_ID, in.readInt());
+
+      long numInodes = in.readLong();
+
+      v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+
+      if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
+        v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
+        v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
+      }
+
+      if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
+        v.visit(ImageElement.TRANSACTION_ID, in.readLong());
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
+        v.visit(ImageElement.LAST_INODE_ID, in.readLong());
+      }
+      
+      boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT,
+          imageVersion);
+      if (supportSnapshot) {
+        v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
+        int numSnapshots = in.readInt();
+        v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
+        for (int i = 0; i < numSnapshots; i++) {
+          processSnapshot(in, v);
+        }
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
+        boolean isCompressed = in.readBoolean();
+        v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
+        if (isCompressed) {
+          String codecClassName = Text.readString(in);
+          v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
+          CompressionCodecFactory codecFac = new CompressionCodecFactory(
+              new Configuration());
+          CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
+          if (codec == null) {
+            throw new IOException("Image compression codec not supported: "
+                + codecClassName);
+          }
+          in = new DataInputStream(codec.createInputStream(in));
+        }
+      }
+      processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
+      subtreeMap.clear();
+      dirNodeMap.clear();
+
+      processINodesUC(in, v, skipBlocks);
+
+      if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
+        processDelegationTokens(in, v);
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
+        processCacheManagerState(in, v);
+      }
+      v.leaveEnclosingElement(); // FSImage
+      done = true;
+    } finally {
+      if (done) {
+        v.finish();
+      } else {
+        v.finishAbnormally();
+      }
+    }
+  }
+
+  /**
+   * Process CacheManager state from the fsimage.
+   */
+  private void processCacheManagerState(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CACHE_NEXT_ENTRY_ID, in.readLong());
+    final int numPools = in.readInt();
+    for (int i=0; i<numPools; i++) {
+      v.visit(ImageElement.CACHE_POOL_NAME, Text.readString(in));
+      processCachePoolPermission(in, v);
+      v.visit(ImageElement.CACHE_POOL_WEIGHT, in.readInt());
+    }
+    final int numEntries = in.readInt();
+    for (int i=0; i<numEntries; i++) {
+      v.visit(ImageElement.CACHE_ENTRY_PATH, Text.readString(in));
+      v.visit(ImageElement.CACHE_ENTRY_REPLICATION, in.readShort());
+      v.visit(ImageElement.CACHE_ENTRY_POOL_NAME, Text.readString(in));
+    }
+  }
+  /**
+   * Process the Delegation Token related section in fsimage.
+   * 
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over records
+   */
+  private void processDelegationTokens(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CURRENT_DELEGATION_KEY_ID, in.readInt());
+    int numDKeys = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_KEYS,
+        ImageElement.NUM_DELEGATION_KEYS, numDKeys);
+    for(int i =0; i < numDKeys; i++) {
+      DelegationKey key = new DelegationKey();
+      key.readFields(in);
+      v.visit(ImageElement.DELEGATION_KEY, key.toString());
+    }
+    v.leaveEnclosingElement();
+    v.visit(ImageElement.DELEGATION_TOKEN_SEQUENCE_NUMBER, in.readInt());
+    int numDTokens = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_TOKENS,
+        ImageElement.NUM_DELEGATION_TOKENS, numDTokens);
+    for(int i=0; i<numDTokens; i++){
+      DelegationTokenIdentifier id = new  DelegationTokenIdentifier();
+      id.readFields(in);
+      long expiryTime = in.readLong();
+      v.visitEnclosingElement(ImageElement.DELEGATION_TOKEN_IDENTIFIER);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_KIND,
+          id.getKind().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+          id.getSequenceNumber());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_OWNER,
+          id.getOwner().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+          id.getRenewer().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+          id.getRealUser().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+          id.getIssueDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+          id.getMaxDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+          expiryTime);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+          id.getMasterKeyId());
+      v.leaveEnclosingElement(); // DELEGATION_TOKEN_IDENTIFIER
+    }
+    v.leaveEnclosingElement(); // DELEGATION_TOKENS
+  }
+
+  /**
+   * Process the INodes under construction section of the fsimage.
+   *
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processINodesUC(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    int numINUC = in.readInt();
+
+    v.visitEnclosingElement(ImageElement.INODES_UNDER_CONSTRUCTION,
+                           ImageElement.NUM_INODES_UNDER_CONSTRUCTION, numINUC);
+
+    for(int i = 0; i < numINUC; i++) {
+      v.visitEnclosingElement(ImageElement.INODE_UNDER_CONSTRUCTION);
+      byte [] name = FSImageSerialization.readBytes(in);
+      String n = new String(name, "UTF8");
+      v.visit(ImageElement.INODE_PATH, n);
+      
+      if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
+        long inodeId = in.readLong();
+        v.visit(ImageElement.INODE_ID, inodeId);
+      }
+      
+      v.visit(ImageElement.REPLICATION, in.readShort());
+      v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+      v.visit(ImageElement.PREFERRED_BLOCK_SIZE, in.readLong());
+      int numBlocks = in.readInt();
+      processBlocks(in, v, numBlocks, skipBlocks);
+
+      processPermission(in, v);
+      v.visit(ImageElement.CLIENT_NAME, FSImageSerialization.readString(in));
+      v.visit(ImageElement.CLIENT_MACHINE, FSImageSerialization.readString(in));
+
+      // Skip over the datanode descriptors, which are still stored in the
+      // file but are not used by the datanode or loaded into memory
+      int numLocs = in.readInt();
+      for(int j = 0; j < numLocs; j++) {
+        in.readShort();
+        in.readLong();
+        in.readLong();
+        in.readLong();
+        in.readInt();
+        FSImageSerialization.readString(in);
+        FSImageSerialization.readString(in);
+        WritableUtils.readEnum(in, AdminStates.class);
+      }
+
+      v.leaveEnclosingElement(); // INodeUnderConstruction
+    }
+
+    v.leaveEnclosingElement(); // INodesUnderConstruction
+  }
+
+  /**
+   * Process the blocks section of the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processBlocks(DataInputStream in, ImageVisitor v,
+      int numBlocks, boolean skipBlocks) throws IOException {
+    v.visitEnclosingElement(ImageElement.BLOCKS,
+                            ImageElement.NUM_BLOCKS, numBlocks);
+    
+    // directory or symlink or reference node, no blocks to process    
+    if(numBlocks < 0) { 
+      v.leaveEnclosingElement(); // Blocks
+      return;
+    }
+    
+    if(skipBlocks) {
+      int bytesToSkip = ((Long.SIZE * 3 /* fields */) / 8 /*bits*/) * numBlocks;
+      if(in.skipBytes(bytesToSkip) != bytesToSkip)
+        throw new IOException("Error skipping over blocks");
+      
+    } else {
+      for(int j = 0; j < numBlocks; j++) {
+        v.visitEnclosingElement(ImageElement.BLOCK);
+        v.visit(ImageElement.BLOCK_ID, in.readLong());
+        v.visit(ImageElement.NUM_BYTES, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+        v.leaveEnclosingElement(); // Block
+      }
+    }
+    v.leaveEnclosingElement(); // Blocks
+  }
+
+  /**
+   * Extract the INode permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.USER_NAME, Text.readString(in));
+    v.visit(ImageElement.GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Extract CachePool permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processCachePoolPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.CACHE_POOL_OWNER_NAME, Text.readString(in));
+    v.visit(ImageElement.CACHE_POOL_GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.CACHE_POOL_PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Process the INode records stored in the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over INodes
+   * @param numInodes Number of INodes stored in file
+   * @param skipBlocks Process all the blocks within the INode?
+   * @param supportSnapshot Whether or not the imageVersion supports snapshot
+   * @throws VisitException
+   * @throws IOException
+   */
+  private void processINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks, boolean supportSnapshot)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.INODES,
+        ImageElement.NUM_INODES, numInodes);
+    
+    if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, imageVersion)) {
+      if (!supportSnapshot) {
+        processLocalNameINodes(in, v, numInodes, skipBlocks);
+      } else {
+        processLocalNameINodesWithSnapshot(in, v, skipBlocks);
+      }
+    } else { // full path name
+      processFullNameINodes(in, v, numInodes, skipBlocks);
+    }
+
+    
+    v.leaveEnclosingElement(); // INodes
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processLocalNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "", false);
+    numInodes--;
+    while (numInodes > 0) {
+      numInodes -= processDirectory(in, v, skipBlocks);
+    }
+  }
+  
+  private int processDirectory(DataInputStream in, ImageVisitor v,
+     boolean skipBlocks) throws IOException {
+    String parentName = FSImageSerialization.readString(in);
+    return processChildren(in, v, skipBlocks, parentName);
+  }
+  
+  /**
+   * Process image with local path name and snapshot support
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param skipBlocks skip blocks or not
+   */
+  private void processLocalNameINodesWithSnapshot(DataInputStream in,
+      ImageVisitor v, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "", false);
+    processDirectoryWithSnapshot(in, v, skipBlocks);
+  }
+  
+  /**
+   * Process directories when snapshot is supported.
+   */
+  private void processDirectoryWithSnapshot(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    // 1. load dir node id
+    long inodeId = in.readLong();
+    
+    String dirName = dirNodeMap.remove(inodeId);
+    Boolean visitedRef = subtreeMap.get(inodeId);
+    if (visitedRef != null) {
+      if (visitedRef.booleanValue()) { // the subtree has been visited
+        return;
+      } else { // first time to visit
+        subtreeMap.put(inodeId, true);
+      }
+    } // else the dir is not linked by a RefNode, thus cannot be revisited
+    
+    // 2. load possible snapshots
+    processSnapshots(in, v, dirName);
+    // 3. load children nodes
+    processChildren(in, v, skipBlocks, dirName);
+    // 4. load possible directory diff list
+    processDirectoryDiffList(in, v, dirName);
+    // recursively process sub-directories
+    final int numSubTree = in.readInt();
+    for (int i = 0; i < numSubTree; i++) {
+      processDirectoryWithSnapshot(in, v, skipBlocks);
+    }
+  }
+  
+  /**
+   * Process snapshots of a snapshottable directory
+   */
+  private void processSnapshots(DataInputStream in, ImageVisitor v,
+      String rootName) throws IOException {
+    final int numSnapshots = in.readInt();
+    if (numSnapshots >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOTS,
+          ImageElement.NUM_SNAPSHOTS, numSnapshots);
+      for (int i = 0; i < numSnapshots; i++) {
+        // process snapshot
+        v.visitEnclosingElement(ImageElement.SNAPSHOT);
+        v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
+        v.leaveEnclosingElement();
+      }
+      v.visit(ImageElement.SNAPSHOT_QUOTA, in.readInt());
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processSnapshot(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.SNAPSHOT);
+    v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
+    // process root of snapshot
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_ROOT);
+    processINode(in, v, true, "", false);
+    v.leaveEnclosingElement();
+    v.leaveEnclosingElement();
+  }
+  
+  private void processDirectoryDiffList(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    final int numDirDiff = in.readInt();
+    if (numDirDiff >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFFS,
+          ImageElement.NUM_SNAPSHOT_DIR_DIFF, numDirDiff);
+      for (int i = 0; i < numDirDiff; i++) {
+        // process directory diffs in reverse chronological oder
+        processDirectoryDiff(in, v, currentINodeName); 
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processDirectoryDiff(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF);
+    int snapshotId = in.readInt();
+    v.visit(ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
+    v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CHILDREN_SIZE, in.readInt());
+    
+    // process snapshotINode
+    boolean useRoot = in.readBoolean();
+    if (!useRoot) {
+      if (in.readBoolean()) {
+        v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES);
+        if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
+          processINodeDirectoryAttributes(in, v, currentINodeName);
+        } else {
+          processINode(in, v, true, currentINodeName, true);
+        }
+        v.leaveEnclosingElement();
+      }
+    }
+    
+    // process createdList
+    int createdSize = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST,
+        ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE, createdSize);
+    for (int i = 0; i < createdSize; i++) {
+      String createdNode = FSImageSerialization.readString(in);
+      v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CREATED_INODE, createdNode);
+    }
+    v.leaveEnclosingElement();
+    
+    // process deletedList
+    int deletedSize = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST,
+        ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE, deletedSize);
+    for (int i = 0; i < deletedSize; i++) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETED_INODE);
+      processINode(in, v, false, currentINodeName, true);
+      v.leaveEnclosingElement();
+    }
+    v.leaveEnclosingElement();
+    v.leaveEnclosingElement();
+  }
+
+  private void processINodeDirectoryAttributes(DataInputStream in, ImageVisitor v,
+      String parentName) throws IOException {
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+    processPermission(in, v);
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+    v.visit(ImageElement.NS_QUOTA, in.readLong());
+    v.visit(ImageElement.DS_QUOTA, in.readLong());
+  }
+
+  /** Process children under a directory */
+  private int processChildren(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName) throws IOException {
+    int numChildren = in.readInt();
+    for (int i = 0; i < numChildren; i++) {
+      processINode(in, v, skipBlocks, parentName, false);
+    }
+    return numChildren;
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processFullNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    for(long i = 0; i < numInodes; i++) {
+      processINode(in, v, skipBlocks, null, false);
+    }
+  }
+ 
+  private String readINodePath(DataInputStream in, String parentName)
+      throws IOException {
+    String pathName = FSImageSerialization.readString(in);
+    if (parentName != null) {  // local name
+      pathName = "/" + pathName;
+      if (!"/".equals(parentName)) { // children of non-root directory
+        pathName = parentName + pathName;
+      }
+    }
+    return pathName;
+  }
+
+  /**
+   * Process an INode
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param skipBlocks skip blocks or not
+   * @param parentName the name of its parent node
+   * @param isSnapshotCopy whether or not the inode is a snapshot copy
+   * @throws IOException
+   */
+  private void processINode(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName, boolean isSnapshotCopy)
+      throws IOException {
+    boolean supportSnapshot = 
+        NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
+    boolean supportInodeId = 
+        NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion);
+    
+    v.visitEnclosingElement(ImageElement.INODE);
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+
+    long inodeId = INodeId.GRANDFATHER_INODE_ID;
+    if (supportInodeId) {
+      inodeId = in.readLong();
+      v.visit(ImageElement.INODE_ID, inodeId);
+    }
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion))
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+    int numBlocks = in.readInt();
+
+    processBlocks(in, v, numBlocks, skipBlocks);
+    
+    if (numBlocks >= 0) { // File
+      if (supportSnapshot) {
+        // make sure subtreeMap only contains entry for directory
+        subtreeMap.remove(inodeId);
+        // process file diffs
+        processFileDiffList(in, v, parentName);
+        if (isSnapshotCopy) {
+          boolean underConstruction = in.readBoolean();
+          if (underConstruction) {
+            v.visit(ImageElement.CLIENT_NAME,
+                FSImageSerialization.readString(in));
+            v.visit(ImageElement.CLIENT_MACHINE,
+                FSImageSerialization.readString(in));
+          }
+        }
+      }
+      processPermission(in, v);
+    } else if (numBlocks == -1) { // Directory
+      if (supportSnapshot && supportInodeId) {
+        dirNodeMap.put(inodeId, pathName);
+      }
+      v.visit(ImageElement.NS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (NameNodeLayoutVersion.supports(Feature.DISKSPACE_QUOTA, imageVersion))
+        v.visit(ImageElement.DS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (supportSnapshot) {
+        boolean snapshottable = in.readBoolean();
+        if (!snapshottable) {
+          boolean withSnapshot = in.readBoolean();
+          v.visit(ImageElement.IS_WITHSNAPSHOT_DIR, Boolean.toString(withSnapshot));
+        } else {
+          v.visit(ImageElement.IS_SNAPSHOTTABLE_DIR, Boolean.toString(snapshottable));
+        }
+      }
+      processPermission(in, v);
+    } else if (numBlocks == -2) {
+      v.visit(ImageElement.SYMLINK, Text.readString(in));
+      processPermission(in, v);
+    } else if (numBlocks == -3) { // reference node
+      final boolean isWithName = in.readBoolean();
+      int snapshotId = in.readInt();
+      if (isWithName) {
+        v.visit(ImageElement.SNAPSHOT_LAST_SNAPSHOT_ID, snapshotId);
+      } else {
+        v.visit(ImageElement.SNAPSHOT_DST_SNAPSHOT_ID, snapshotId);
+      }
+      
+      final boolean firstReferred = in.readBoolean();
+      if (firstReferred) {
+        // if a subtree is linked by multiple "parents", the corresponding dir
+        // must be referred by a reference node. we put the reference node into
+        // the subtreeMap here and let its value be false. when we later visit
+        // the subtree for the first time, we change the value to true.
+        subtreeMap.put(inodeId, false);
+        v.visitEnclosingElement(ImageElement.SNAPSHOT_REF_INODE);
+        processINode(in, v, skipBlocks, parentName, isSnapshotCopy);
+        v.leaveEnclosingElement();  // referred inode    
+      } else {
+        v.visit(ImageElement.SNAPSHOT_REF_INODE_ID, in.readLong());
+      }
+    }
+
+    v.leaveEnclosingElement(); // INode
+  }
+
+  private void processINodeFileAttributes(DataInputStream in, ImageVisitor v,
+      String parentName) throws IOException {
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+    processPermission(in, v);
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion)) {
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    }
+
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+  }
+  
+  private void processFileDiffList(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    final int size = in.readInt();
+    if (size >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFFS,
+          ImageElement.NUM_SNAPSHOT_FILE_DIFF, size);
+      for (int i = 0; i < size; i++) {
+        processFileDiff(in, v, currentINodeName);
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processFileDiff(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    int snapshotId = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFF,
+        ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
+    v.visit(ImageElement.SNAPSHOT_FILE_SIZE, in.readLong());
+    if (in.readBoolean()) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_FILE_ATTRIBUTES);
+      if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
+        processINodeFileAttributes(in, v, currentINodeName);
+      } else {
+        processINode(in, v, true, currentINodeName, true);
+      }
+      v.leaveEnclosingElement();
+    }
+    v.leaveEnclosingElement();
+  }
+  
+  /**
+   * Helper method to format dates during processing.
+   * @param date Date as read from image file
+   * @return String version of date format
+   */
+  private String formatDate(long date) {
+    return dateFormat.format(new Date(date));
+  }
+}

+ 212 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java

@@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+
+/**
+ * An implementation of ImageVisitor can traverse the structure of an
+ * Hadoop fsimage and respond to each of the structures within the file.
+ */
+abstract class ImageVisitor {
+
+  /**
+   * Structural elements of an FSImage that may be encountered within the
+   * file. ImageVisitors are able to handle processing any of these elements.
+   */
+  public enum ImageElement {
+    FS_IMAGE,
+    IMAGE_VERSION,
+    NAMESPACE_ID,
+    IS_COMPRESSED,
+    COMPRESS_CODEC,
+    LAYOUT_VERSION,
+    NUM_INODES,
+    GENERATION_STAMP,
+    GENERATION_STAMP_V2,
+    GENERATION_STAMP_V1_LIMIT,
+    LAST_ALLOCATED_BLOCK_ID,
+    INODES,
+    INODE,
+    INODE_PATH,
+    REPLICATION,
+    MODIFICATION_TIME,
+    ACCESS_TIME,
+    BLOCK_SIZE,
+    NUM_BLOCKS,
+    BLOCKS,
+    BLOCK,
+    BLOCK_ID,
+    NUM_BYTES,
+    NS_QUOTA,
+    DS_QUOTA,
+    PERMISSIONS,
+    SYMLINK,
+    NUM_INODES_UNDER_CONSTRUCTION,
+    INODES_UNDER_CONSTRUCTION,
+    INODE_UNDER_CONSTRUCTION,
+    PREFERRED_BLOCK_SIZE,
+    CLIENT_NAME,
+    CLIENT_MACHINE,
+    USER_NAME,
+    GROUP_NAME,
+    PERMISSION_STRING,
+    CURRENT_DELEGATION_KEY_ID,
+    NUM_DELEGATION_KEYS,
+    DELEGATION_KEYS,
+    DELEGATION_KEY,
+    DELEGATION_TOKEN_SEQUENCE_NUMBER,
+    NUM_DELEGATION_TOKENS,
+    DELEGATION_TOKENS,
+    DELEGATION_TOKEN_IDENTIFIER,
+    DELEGATION_TOKEN_IDENTIFIER_KIND,
+    DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+    DELEGATION_TOKEN_IDENTIFIER_OWNER,
+    DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+    DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+    DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+    DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+    TRANSACTION_ID,
+    LAST_INODE_ID,
+    INODE_ID,
+
+    SNAPSHOT_COUNTER,
+    NUM_SNAPSHOTS_TOTAL,
+    NUM_SNAPSHOTS,
+    SNAPSHOTS,
+    SNAPSHOT,
+    SNAPSHOT_ID,
+    SNAPSHOT_ROOT,
+    SNAPSHOT_QUOTA,
+    NUM_SNAPSHOT_DIR_DIFF,
+    SNAPSHOT_DIR_DIFFS,
+    SNAPSHOT_DIR_DIFF,
+    SNAPSHOT_DIFF_SNAPSHOTID,
+    SNAPSHOT_DIR_DIFF_CHILDREN_SIZE,
+    SNAPSHOT_INODE_FILE_ATTRIBUTES,
+    SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES,
+    SNAPSHOT_DIR_DIFF_CREATEDLIST,
+    SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE,
+    SNAPSHOT_DIR_DIFF_CREATED_INODE,
+    SNAPSHOT_DIR_DIFF_DELETEDLIST,
+    SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE,
+    SNAPSHOT_DIR_DIFF_DELETED_INODE,
+    IS_SNAPSHOTTABLE_DIR,
+    IS_WITHSNAPSHOT_DIR,
+    SNAPSHOT_FILE_DIFFS,
+    SNAPSHOT_FILE_DIFF,
+    NUM_SNAPSHOT_FILE_DIFF,
+    SNAPSHOT_FILE_SIZE,
+    SNAPSHOT_DST_SNAPSHOT_ID,
+    SNAPSHOT_LAST_SNAPSHOT_ID,
+    SNAPSHOT_REF_INODE_ID,
+    SNAPSHOT_REF_INODE,
+
+    CACHE_NEXT_ENTRY_ID,
+    CACHE_NUM_POOLS,
+    CACHE_POOL_NAME,
+    CACHE_POOL_OWNER_NAME,
+    CACHE_POOL_GROUP_NAME,
+    CACHE_POOL_PERMISSION_STRING,
+    CACHE_POOL_WEIGHT,
+    CACHE_NUM_ENTRIES,
+    CACHE_ENTRY_PATH,
+    CACHE_ENTRY_REPLICATION,
+    CACHE_ENTRY_POOL_NAME
+  }
+  
+  /**
+   * Begin visiting the fsimage structure.  Opportunity to perform
+   * any initialization necessary for the implementing visitor.
+   */
+  abstract void start() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure.  Opportunity to perform any
+   * clean up necessary for the implementing visitor.
+   */
+  abstract void finish() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure after an error has occurred
+   * during the processing.  Opportunity to perform any clean up necessary
+   * for the implementing visitor.
+   */
+  abstract void finishAbnormally() throws IOException;
+
+  /**
+   * Visit non enclosing element of fsimage with specified value.
+   *
+   * @param element FSImage element
+   * @param value Element's value
+   */
+  abstract void visit(ImageElement element, String value) throws IOException;
+
+  // Convenience methods to automatically convert numeric value types to strings
+  void visit(ImageElement element, int value) throws IOException {
+    visit(element, Integer.toString(value));
+  }
+
+  void visit(ImageElement element, long value) throws IOException {
+    visit(element, Long.toString(value));
+  }
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * @param element Element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element)
+     throws IOException;
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * Also provide an additional key and value for the element, such as the
+   * number items within the element.
+   *
+   * @param element Element being visited
+   * @param key Key describing the element being visited
+   * @param value Value associated with element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException;
+
+  // Convenience methods to automatically convert value types to strings
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, int value)
+     throws IOException {
+    visitEnclosingElement(element, key, Integer.toString(value));
+  }
+
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, long value)
+     throws IOException {
+    visitEnclosingElement(element, key, Long.toString(value));
+  }
+
+  /**
+   * Leave current enclosing element.  Called, for instance, at the end of
+   * processing the blocks that compromise a file.
+   */
+  abstract void leaveEnclosingElement() throws IOException;
+}

+ 111 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java

@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * IndentedImageVisitor walks over an FSImage and displays its structure 
+ * using indenting to organize sections within the image file.
+ */
+class IndentedImageVisitor extends TextWriterImageVisitor {
+  
+  public IndentedImageVisitor(String filename) throws IOException {
+    super(filename);
+  }
+
+  public IndentedImageVisitor(String filename, boolean printToScreen) throws IOException {
+    super(filename, printToScreen);
+  }
+
+  final private DepthCounter dc = new DepthCounter();// to track leading spacing
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException { super.finish(); }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    dc.decLevel();
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    printIndents();
+    write(element + " = " + value + "\n");
+  }
+
+  @Override
+  void visit(ImageElement element, long value) throws IOException {
+    if ((element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME) || 
+        (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE) || 
+        (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE)) {
+      visit(element, new Date(value).toString());
+    } else {
+      visit(element, Long.toString(value));
+    }
+  }
+  
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    printIndents();
+    write(element + "\n");
+    dc.incLevel();
+  }
+
+  // Print element, along with associated key/value pair, in brackets
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value)
+      throws IOException {
+    printIndents();
+    write(element + " [" + key + " = " + value + "]\n");
+    dc.incLevel();
+  }
+
+  /**
+  * Print an appropriate number of spaces for the current level.
+  * FsImages can potentially be millions of lines long, so caching can
+  * significantly speed up output.
+  */
+  final private static String [] indents = { "",
+                                             "  ",
+                                             "    ",
+                                             "      ",
+                                             "        ",
+                                             "          ",
+                                             "            "};
+  private void printIndents() throws IOException {
+    try {
+      write(indents[dc.getLevel()]);
+    } catch (IndexOutOfBoundsException e) {
+      // There's no reason in an fsimage would need a deeper indent
+      for(int i = 0; i < dc.getLevel(); i++)
+        write(" ");
+    }
+   }
+}

+ 178 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.Formatter;
+import java.util.LinkedList;
+
+/**
+ * LsImageVisitor displays the blocks of the namespace in a format very similar
+ * to the output of ls/lsr.  Entries are marked as directories or not,
+ * permissions listed, replication, username and groupname, along with size,
+ * modification date and full path.
+ *
+ * Note: A significant difference between the output of the lsr command
+ * and this image visitor is that this class cannot sort the file entries;
+ * they are listed in the order they are stored within the fsimage file. 
+ * Therefore, the output of this class cannot be directly compared to the
+ * output of the lsr command.
+ */
+class LsImageVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+
+  private int numBlocks;
+  private String perms;
+  private int replication;
+  private String username;
+  private String group;
+  private long filesize;
+  private String modTime;
+  private String path;
+  private String linkTarget;
+
+  private boolean inInode = false;
+  final private StringBuilder sb = new StringBuilder();
+  final private Formatter formatter = new Formatter(sb);
+
+  public LsImageVisitor(String filename) throws IOException {
+    super(filename);
+  }
+
+  public LsImageVisitor(String filename, boolean printToScreen) throws IOException {
+    super(filename, printToScreen);
+  }
+
+  /**
+   * Start a new line of output, reset values.
+   */
+  private void newLine() {
+    numBlocks = 0;
+    perms = username = group = path = linkTarget = "";
+    filesize = 0l;
+    replication = 0;
+
+    inInode = true;
+  }
+
+  /**
+   * All the values have been gathered.  Print them to the console in an
+   * ls-style format.
+   */
+  private final static int widthRepl = 2;  
+  private final static int widthUser = 8; 
+  private final static int widthGroup = 10; 
+  private final static int widthSize = 10;
+  private final static int widthMod = 10;
+  private final static String lsStr = " %" + widthRepl + "s %" + widthUser + 
+                                       "s %" + widthGroup + "s %" + widthSize +
+                                       "d %" + widthMod + "s %s";
+  private void printLine() throws IOException {
+    sb.append(numBlocks < 0 ? "d" : "-");
+    sb.append(perms);
+
+    if (0 != linkTarget.length()) {
+      path = path + " -> " + linkTarget; 
+    }
+    formatter.format(lsStr, replication > 0 ? replication : "-",
+                           username, group, filesize, modTime, path);
+    sb.append("\n");
+
+    write(sb.toString());
+    sb.setLength(0); // clear string builder
+
+    inInode = false;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("Input ended unexpectedly.");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    if(elem == ImageElement.INODE)
+      printLine();
+  }
+
+  // Maintain state of location within the image tree and record
+  // values needed to display the inode in ls-style format.
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        if(value.equals("")) path = "/";
+        else path = value;
+        break;
+      case PERMISSION_STRING:
+        perms = value;
+        break;
+      case REPLICATION:
+        replication = Integer.valueOf(value);
+        break;
+      case USER_NAME:
+        username = value;
+        break;
+      case GROUP_NAME:
+        group = value;
+        break;
+      case NUM_BYTES:
+        filesize += Long.valueOf(value);
+        break;
+      case MODIFICATION_TIME:
+        modTime = value;
+        break;
+      case SYMLINK:
+        linkTarget = value;
+        break;
+      default:
+        // This is OK.  We're not looking for all the values.
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+    if(element == ImageElement.INODE)
+      newLine();
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemQ.push(element);
+    if(element == ImageElement.INODE)
+      newLine();
+    else if (element == ImageElement.BLOCKS)
+      numBlocks = Integer.valueOf(value);
+  }
+}

+ 118 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java

@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * File name distribution visitor. 
+ * <p>
+ * It analyzes file names in fsimage and prints the following information: 
+ * <li>Number of unique file names</li> 
+ * <li>Number file names and the corresponding number range of files that use 
+ * these same names</li>
+ * <li>Heap saved if the file name objects are reused</li>
+ */
+@InterfaceAudience.Private
+public class NameDistributionVisitor extends TextWriterImageVisitor {
+  HashMap<String, Integer> counts = new HashMap<String, Integer>();
+
+  public NameDistributionVisitor(String filename, boolean printToScreen)
+      throws IOException {
+    super(filename, printToScreen);
+  }
+
+  @Override
+  void finish() throws IOException {
+    final int BYTEARRAY_OVERHEAD = 24;
+
+    write("Total unique file names " + counts.size());
+    // Columns: Frequency of file occurrence, savings in heap, total files using
+    // the name and number of file names
+    final long stats[][] = { { 100000, 0, 0, 0 },
+                             { 10000, 0, 0, 0 },
+                             { 1000, 0, 0, 0 },
+                             { 100, 0, 0, 0 },
+                             { 10, 0, 0, 0 },
+                             { 5, 0, 0, 0 },
+                             { 4, 0, 0, 0 },
+                             { 3, 0, 0, 0 },
+                             { 2, 0, 0, 0 }};
+
+    int highbound = Integer.MIN_VALUE;
+    for (Entry<String, Integer> entry : counts.entrySet()) {
+      highbound = Math.max(highbound, entry.getValue());
+      for (int i = 0; i < stats.length; i++) {
+        if (entry.getValue() >= stats[i][0]) {
+          stats[i][1] += (BYTEARRAY_OVERHEAD + entry.getKey().length())
+              * (entry.getValue() - 1);
+          stats[i][2] += entry.getValue();
+          stats[i][3]++;
+          break;
+        }
+      }
+    }
+
+    long lowbound = 0;
+    long totalsavings = 0;
+    for (long[] stat : stats) {
+      lowbound = stat[0];
+      totalsavings += stat[1];
+      String range = lowbound == highbound ? " " + lowbound :
+          " between " + lowbound + "-" + highbound;
+      write("\n" + stat[3] + " names are used by " + stat[2] + " files"
+          + range + " times. Heap savings ~" + stat[1] + " bytes.");
+      highbound = (int) stat[0] - 1;
+    }
+    write("\n\nTotal saved heap ~" + totalsavings + "bytes.\n");
+    super.finish();
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if (element == ImageElement.INODE_PATH) {
+      String filename = value.substring(value.lastIndexOf("/") + 1);
+      if (counts.containsKey(filename)) {
+        counts.put(filename, counts.get(filename) + 1);
+      } else {
+        counts.put(filename, 1);
+      }
+    }
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+  }
+
+  @Override
+  void start() throws IOException {
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+  }
+}

+ 274 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java

@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.PositionTrackingInputStream;
+
+/**
+ * OfflineImageViewer to dump the contents of an Hadoop image file to XML
+ * or the console.  Main entry point into utility, either via the
+ * command line or programatically.
+ */
+@InterfaceAudience.Private
+public class OfflineImageViewer {
+  public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
+  
+  private final static String usage = 
+    "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" +
+    "Offline Image Viewer\n" + 
+    "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" +
+    "saving the results in OUTPUTFILE.\n" +
+    "\n" +
+    "The oiv utility will attempt to parse correctly formed image files\n" +
+    "and will abort fail with mal-formed image files.\n" +
+    "\n" +
+    "The tool works offline and does not require a running cluster in\n" +
+    "order to process an image file.\n" +
+    "\n" +
+    "The following image processors are available:\n" +
+    "  * Ls: The default image processor generates an lsr-style listing\n" +
+    "    of the files in the namespace, with the same fields in the same\n" +
+    "    order.  Note that in order to correctly determine file sizes,\n" +
+    "    this formatter cannot skip blocks and will override the\n" +
+    "    -skipBlocks option.\n" +
+    "  * Indented: This processor enumerates over all of the elements in\n" +
+    "    the fsimage file, using levels of indentation to delineate\n" +
+    "    sections within the file.\n" +
+    "  * Delimited: Generate a text file with all of the elements common\n" +
+    "    to both inodes and inodes-under-construction, separated by a\n" +
+    "    delimiter. The default delimiter is \u0001, though this may be\n" +
+    "    changed via the -delimiter argument. This processor also overrides\n" +
+    "    the -skipBlocks option for the same reason as the Ls processor\n" +
+    "  * XML: This processor creates an XML document with all elements of\n" +
+    "    the fsimage enumerated, suitable for further analysis by XML\n" +
+    "    tools.\n" +
+    "  * FileDistribution: This processor analyzes the file size\n" +
+    "    distribution in the image.\n" +
+    "    -maxSize specifies the range [0, maxSize] of file sizes to be\n" +
+    "     analyzed (128GB by default).\n" +
+    "    -step defines the granularity of the distribution. (2MB by default)\n" +
+    "  * NameDistribution: This processor analyzes the file names\n" +
+    "    in the image and prints total number of file names and how frequently\n" +
+    "    file names are reused.\n" +
+    "\n" + 
+    "Required command line arguments:\n" +
+    "-i,--inputFile <arg>   FSImage file to process.\n" +
+    "-o,--outputFile <arg>  Name of output file. If the specified\n" +
+    "                       file exists, it will be overwritten.\n" +
+    "\n" + 
+    "Optional command line arguments:\n" +
+    "-p,--processor <arg>   Select which type of processor to apply\n" +
+    "                       against image file." +
+    " (Ls|XML|Delimited|Indented|FileDistribution).\n" +
+    "-h,--help              Display usage information and exit\n" +
+    "-printToScreen         For processors that write to a file, also\n" +
+    "                       output to screen. On large image files this\n" +
+    "                       will dramatically increase processing time.\n" +
+    "-skipBlocks            Skip inodes' blocks information. May\n" +
+    "                       significantly decrease output.\n" +
+    "                       (default = false).\n" +
+    "-delimiter <arg>       Delimiting string to use with Delimited processor\n";
+
+  private final boolean skipBlocks;
+  private final String inputFile;
+  private final ImageVisitor processor;
+  
+  public OfflineImageViewer(String inputFile, ImageVisitor processor, 
+             boolean skipBlocks) {
+    this.inputFile = inputFile;
+    this.processor = processor;
+    this.skipBlocks = skipBlocks;
+  }
+
+  /**
+   * Process image file.
+   */
+  public void go() throws IOException  {
+    DataInputStream in = null;
+    PositionTrackingInputStream tracker = null;
+    ImageLoader fsip = null;
+    boolean done = false;
+    try {
+      tracker = new PositionTrackingInputStream(new BufferedInputStream(
+               new FileInputStream(new File(inputFile))));
+      in = new DataInputStream(tracker);
+
+      int imageVersionFile = findImageVersion(in);
+
+      fsip = ImageLoader.LoaderFactory.getLoader(imageVersionFile);
+
+      if(fsip == null) 
+        throw new IOException("No image processor to read version " +
+            imageVersionFile + " is available.");
+      fsip.loadImage(in, processor, skipBlocks);
+      done = true;
+    } finally {
+      if (!done) {
+        LOG.error("image loading failed at offset " + tracker.getPos());
+      }
+      IOUtils.cleanup(LOG, in, tracker);
+    }
+  }
+
+  /**
+   * Check an fsimage datainputstream's version number.
+   *
+   * The datainput stream is returned at the same point as it was passed in;
+   * this method has no effect on the datainputstream's read pointer.
+   *
+   * @param in Datainputstream of fsimage
+   * @return Filesystem layout version of fsimage represented by stream
+   * @throws IOException If problem reading from in
+   */
+  private int findImageVersion(DataInputStream in) throws IOException {
+    in.mark(42); // arbitrary amount, resetting immediately
+
+    int version = in.readInt();
+    in.reset();
+
+    return version;
+  }
+  
+  /**
+   * Build command-line options and descriptions
+   */
+  public static Options buildOptions() {
+    Options options = new Options();
+
+    // Build in/output file arguments, which are required, but there is no 
+    // addOption method that can specify this
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("outputFile");
+    options.addOption(OptionBuilder.create("o"));
+    
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("inputFile");
+    options.addOption(OptionBuilder.create("i"));
+    
+    options.addOption("p", "processor", true, "");
+    options.addOption("h", "help", false, "");
+    options.addOption("skipBlocks", false, "");
+    options.addOption("printToScreen", false, "");
+    options.addOption("delimiter", true, "");
+
+    return options;
+  }
+  
+  /**
+   * Entry point to command-line-driven operation.  User may specify
+   * options and start fsimage viewer from the command line.  Program
+   * will process image file and exit cleanly or, if an error is
+   * encountered, inform user and exit.
+   *
+   * @param args Command line options
+   * @throws IOException 
+   */
+  public static void main(String[] args) throws IOException {
+    Options options = buildOptions();
+    if(args.length == 0) {
+      printUsage();
+      return;
+    }
+    
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd;
+
+    try {
+      cmd = parser.parse(options, args);
+    } catch (ParseException e) {
+      System.out.println("Error parsing command-line options: ");
+      printUsage();
+      return;
+    }
+
+    if(cmd.hasOption("h")) { // print help and exit
+      printUsage();
+      return;
+    }
+
+    boolean skipBlocks = cmd.hasOption("skipBlocks");
+    boolean printToScreen = cmd.hasOption("printToScreen");
+    String inputFile = cmd.getOptionValue("i");
+    String processor = cmd.getOptionValue("p", "Ls");
+    String outputFile = cmd.getOptionValue("o");
+    String delimiter = cmd.getOptionValue("delimiter");
+    
+    if( !(delimiter == null || processor.equals("Delimited")) ) {
+      System.out.println("Can only specify -delimiter with Delimited processor");
+      printUsage();
+      return;
+    }
+    
+    ImageVisitor v;
+    if(processor.equals("Indented")) {
+      v = new IndentedImageVisitor(outputFile, printToScreen);
+    } else if (processor.equals("XML")) {
+      v = new XmlImageVisitor(outputFile, printToScreen);
+    } else if (processor.equals("Delimited")) {
+      v = delimiter == null ?  
+                 new DelimitedImageVisitor(outputFile, printToScreen) :
+                 new DelimitedImageVisitor(outputFile, printToScreen, delimiter);
+      skipBlocks = false;
+    } else if (processor.equals("FileDistribution")) {
+      long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
+      int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
+      v = new FileDistributionVisitor(outputFile, maxSize, step);
+    } else if (processor.equals("NameDistribution")) {
+      v = new NameDistributionVisitor(outputFile, printToScreen);
+    } else {
+      v = new LsImageVisitor(outputFile, printToScreen);
+      skipBlocks = false;
+    }
+    
+    try {
+      OfflineImageViewer d = new OfflineImageViewer(inputFile, v, skipBlocks);
+      d.go();
+    } catch (EOFException e) {
+      System.err.println("Input file ended unexpectedly.  Exiting");
+    } catch(IOException e) {
+      System.err.println("Encountered exception.  Exiting: " + e.getMessage());
+    }
+  }
+
+  /**
+   * Print application usage instructions.
+   */
+  private static void printUsage() {
+    System.out.println(usage);
+  }
+}

+ 109 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java

@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import com.google.common.base.Charsets;
+
+/**
+ * TextWriterImageProcessor mixes in the ability for ImageVisitor
+ * implementations to easily write their output to a text file.
+ *
+ * Implementing classes should be sure to call the super methods for the
+ * constructors, finish and finishAbnormally methods, in order that the
+ * underlying file may be opened and closed correctly.
+ *
+ * Note, this class does not add newlines to text written to file or (if
+ * enabled) screen.  This is the implementing class' responsibility.
+ */
+abstract class TextWriterImageVisitor extends ImageVisitor {
+  private boolean printToScreen = false;
+  private boolean okToWrite = false;
+  final private OutputStreamWriter fw;
+
+  /**
+   * Create a processor that writes to the file named.
+   *
+   * @param filename Name of file to write output to
+   */
+  public TextWriterImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  /**
+   * Create a processor that writes to the file named and may or may not
+   * also output to the screen, as specified.
+   *
+   * @param filename Name of file to write output to
+   * @param printToScreen Mirror output to screen?
+   */
+  public TextWriterImageVisitor(String filename, boolean printToScreen)
+         throws IOException {
+    super();
+    this.printToScreen = printToScreen;
+    fw = new OutputStreamWriter(new FileOutputStream(filename), Charsets.UTF_8);
+    okToWrite = true;
+  }
+  
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finish()
+   */
+  @Override
+  void finish() throws IOException {
+    close();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finishAbnormally()
+   */
+  @Override
+  void finishAbnormally() throws IOException {
+    close();
+  }
+
+  /**
+   * Close output stream and prevent further writing
+   */
+  private void close() throws IOException {
+    fw.close();
+    okToWrite = false;
+  }
+
+  /**
+   * Write parameter to output file (and possibly screen).
+   *
+   * @param toWrite Text to write to file
+   */
+  protected void write(String toWrite) throws IOException  {
+    if(!okToWrite)
+      throw new IOException("file not open for writing.");
+
+    if(printToScreen)
+      System.out.print(toWrite);
+
+    try {
+      fw.write(toWrite);
+    } catch (IOException e) {
+      okToWrite = false;
+      throw e;
+    }
+  }
+}

+ 88 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java

@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * An XmlImageVisitor walks over an fsimage structure and writes out
+ * an equivalent XML document that contains the fsimage's components.
+ */
+public class XmlImageVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> tagQ =
+                                          new LinkedList<ImageElement>();
+
+  public XmlImageVisitor(String filename) throws IOException {
+    super(filename, false);
+  }
+
+  public XmlImageVisitor(String filename, boolean printToScreen)
+       throws IOException {
+    super(filename, printToScreen);
+  }
+
+  @Override
+  void finish() throws IOException {
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    write("\n<!-- Error processing image file.  Exiting -->\n");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    if(tagQ.size() == 0)
+      throw new IOException("Tried to exit non-existent enclosing element " +
+                "in FSImage file");
+
+    ImageElement element = tagQ.pop();
+    write("</" + element.toString() + ">\n");
+  }
+
+  @Override
+  void start() throws IOException {
+    write("<?xml version=\"1.0\" ?>\n");
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    writeTag(element.toString(), value);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    write("<" + element.toString() + ">\n");
+    tagQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value)
+       throws IOException {
+    write("<" + element.toString() + " " + key + "=\"" + value +"\">\n");
+    tagQ.push(element);
+  }
+
+  private void writeTag(String tag, String value) throws IOException {
+    write("<" + tag + ">" + value + "</" + tag + ">\n");
+  }
+}

+ 10 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java

@@ -176,8 +176,9 @@ public class JsonUtil {
   }
   }
 
 
   /** Convert a string to a FsPermission object. */
   /** Convert a string to a FsPermission object. */
-  private static FsPermission toFsPermission(final String s) {
-    return new FsPermission(Short.parseShort(s, 8));
+  private static FsPermission toFsPermission(final String s, Boolean aclBit) {
+    FsPermission perm = new FsPermission(Short.parseShort(s, 8));
+    return (aclBit != null && aclBit) ? new FsAclPermission(perm) : perm;
   }
   }
 
 
   static enum PathType {
   static enum PathType {
@@ -204,7 +205,11 @@ public class JsonUtil {
     m.put("length", status.getLen());
     m.put("length", status.getLen());
     m.put("owner", status.getOwner());
     m.put("owner", status.getOwner());
     m.put("group", status.getGroup());
     m.put("group", status.getGroup());
-    m.put("permission", toString(status.getPermission()));
+    FsPermission perm = status.getPermission();
+    m.put("permission", toString(perm));
+    if (perm.getAclBit()) {
+      m.put("aclBit", true);
+    }
     m.put("accessTime", status.getAccessTime());
     m.put("accessTime", status.getAccessTime());
     m.put("modificationTime", status.getModificationTime());
     m.put("modificationTime", status.getModificationTime());
     m.put("blockSize", status.getBlockSize());
     m.put("blockSize", status.getBlockSize());
@@ -230,7 +235,8 @@ public class JsonUtil {
     final long len = (Long) m.get("length");
     final long len = (Long) m.get("length");
     final String owner = (String) m.get("owner");
     final String owner = (String) m.get("owner");
     final String group = (String) m.get("group");
     final String group = (String) m.get("group");
-    final FsPermission permission = toFsPermission((String) m.get("permission"));
+    final FsPermission permission = toFsPermission((String) m.get("permission"),
+      (Boolean)m.get("aclBit"));
     final long aTime = (Long) m.get("accessTime");
     final long aTime = (Long) m.get("accessTime");
     final long mTime = (Long) m.get("modificationTime");
     final long mTime = (Long) m.get("modificationTime");
     final long blockSize = (Long) m.get("blockSize");
     final long blockSize = (Long) m.get("blockSize");

+ 290 - 227
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -58,34 +58,8 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
-import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
-import org.apache.hadoop.hdfs.web.resources.AclPermissionParam;
-import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
-import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
-import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam;
-import org.apache.hadoop.hdfs.web.resources.CreateParentParam;
-import org.apache.hadoop.hdfs.web.resources.DelegationParam;
-import org.apache.hadoop.hdfs.web.resources.DeleteOpParam;
-import org.apache.hadoop.hdfs.web.resources.DestinationParam;
-import org.apache.hadoop.hdfs.web.resources.DoAsParam;
-import org.apache.hadoop.hdfs.web.resources.GetOpParam;
-import org.apache.hadoop.hdfs.web.resources.GroupParam;
-import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
-import org.apache.hadoop.hdfs.web.resources.LengthParam;
-import org.apache.hadoop.hdfs.web.resources.ModificationTimeParam;
-import org.apache.hadoop.hdfs.web.resources.OffsetParam;
-import org.apache.hadoop.hdfs.web.resources.OverwriteParam;
-import org.apache.hadoop.hdfs.web.resources.OwnerParam;
-import org.apache.hadoop.hdfs.web.resources.Param;
-import org.apache.hadoop.hdfs.web.resources.PermissionParam;
-import org.apache.hadoop.hdfs.web.resources.PostOpParam;
-import org.apache.hadoop.hdfs.web.resources.PutOpParam;
-import org.apache.hadoop.hdfs.web.resources.RecursiveParam;
-import org.apache.hadoop.hdfs.web.resources.RenameOptionSetParam;
-import org.apache.hadoop.hdfs.web.resources.RenewerParam;
-import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
-import org.apache.hadoop.hdfs.web.resources.TokenArgumentParam;
-import org.apache.hadoop.hdfs.web.resources.UserParam;
+import org.apache.hadoop.hdfs.web.resources.*;
+import org.apache.hadoop.hdfs.web.resources.HttpOpParam.Op;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.io.retry.RetryPolicy;
@@ -176,10 +150,13 @@ public class WebHdfsFileSystem extends FileSystem
     this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
     this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
     this.nnAddrs = resolveNNAddr();
     this.nnAddrs = resolveNNAddr();
 
 
-    boolean isHA = HAUtil.isLogicalUri(conf, this.uri);
-    // In non-HA case, the code needs to call getCanonicalUri() in order to
-    // handle the case where no port is specified in the URI
-    this.tokenServiceName = isHA ? HAUtil.buildTokenServiceForLogicalUri(uri)
+    boolean isHA = HAUtil.isClientFailoverConfigured(conf, this.uri);
+    boolean isLogicalUri = isHA && HAUtil.isLogicalUri(conf, this.uri);
+    // In non-HA or non-logical URI case, the code needs to call
+    // getCanonicalUri() in order to handle the case where no port is
+    // specified in the URI
+    this.tokenServiceName = isLogicalUri ?
+        HAUtil.buildTokenServiceForLogicalUri(uri)
         : SecurityUtil.buildTokenService(getCanonicalUri());
         : SecurityUtil.buildTokenService(getCanonicalUri());
     initializeTokenAspect();
     initializeTokenAspect();
 
 
@@ -422,41 +399,24 @@ public class WebHdfsFileSystem extends FileSystem
     return url;
     return url;
   }
   }
 
 
-  /**
-   * Run a http operation.
-   * Connect to the http server, validate response, and obtain the JSON output.
-   * 
-   * @param op http operation
-   * @param fspath file system path
-   * @param parameters parameters for the operation
-   * @return a JSON object, e.g. Object[], Map<?, ?>, etc.
-   * @throws IOException
-   */
-  private Map<?, ?> run(final HttpOpParam.Op op, final Path fspath,
-      final Param<?,?>... parameters) throws IOException {
-    return new FsPathRunner(op, fspath, parameters).run().json;
-  }
-
   /**
   /**
    * This class is for initialing a HTTP connection, connecting to server,
    * This class is for initialing a HTTP connection, connecting to server,
    * obtaining a response, and also handling retry on failures.
    * obtaining a response, and also handling retry on failures.
    */
    */
-  abstract class AbstractRunner {
+  abstract class AbstractRunner<T> {
     abstract protected URL getUrl() throws IOException;
     abstract protected URL getUrl() throws IOException;
 
 
     protected final HttpOpParam.Op op;
     protected final HttpOpParam.Op op;
     private final boolean redirected;
     private final boolean redirected;
 
 
     private boolean checkRetry;
     private boolean checkRetry;
-    protected HttpURLConnection conn = null;
-    private Map<?, ?> json = null;
 
 
     protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) {
     protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) {
       this.op = op;
       this.op = op;
       this.redirected = redirected;
       this.redirected = redirected;
     }
     }
 
 
-    AbstractRunner run() throws IOException {
+    T run() throws IOException {
       UserGroupInformation connectUgi = ugi.getRealUser();
       UserGroupInformation connectUgi = ugi.getRealUser();
       if (connectUgi == null) {
       if (connectUgi == null) {
         connectUgi = ugi;
         connectUgi = ugi;
@@ -468,9 +428,9 @@ public class WebHdfsFileSystem extends FileSystem
         // the entire lifecycle of the connection must be run inside the
         // the entire lifecycle of the connection must be run inside the
         // doAs to ensure authentication is performed correctly
         // doAs to ensure authentication is performed correctly
         return connectUgi.doAs(
         return connectUgi.doAs(
-            new PrivilegedExceptionAction<AbstractRunner>() {
+            new PrivilegedExceptionAction<T>() {
               @Override
               @Override
-              public AbstractRunner run() throws IOException {
+              public T run() throws IOException {
                 return runWithRetry();
                 return runWithRetry();
               }
               }
             });
             });
@@ -478,18 +438,51 @@ public class WebHdfsFileSystem extends FileSystem
         throw new IOException(e);
         throw new IOException(e);
       }
       }
     }
     }
-    
-    private void init() throws IOException {
-      checkRetry = !redirected;
-      URL url = getUrl();
-      conn = (HttpURLConnection) connectionFactory.openConnection(url);
-    }
-    
-    private void connect() throws IOException {
-      connect(op.getDoOutput());
+
+    /**
+     * Two-step requests redirected to a DN
+     * 
+     * Create/Append:
+     * Step 1) Submit a Http request with neither auto-redirect nor data. 
+     * Step 2) Submit another Http request with the URL from the Location header with data.
+     * 
+     * The reason of having two-step create/append is for preventing clients to
+     * send out the data before the redirect. This issue is addressed by the
+     * "Expect: 100-continue" header in HTTP/1.1; see RFC 2616, Section 8.2.3.
+     * Unfortunately, there are software library bugs (e.g. Jetty 6 http server
+     * and Java 6 http client), which do not correctly implement "Expect:
+     * 100-continue". The two-step create/append is a temporary workaround for
+     * the software library bugs.
+     * 
+     * Open/Checksum
+     * Also implements two-step connects for other operations redirected to
+     * a DN such as open and checksum
+     */
+    private HttpURLConnection connect(URL url) throws IOException {
+      // resolve redirects for a DN operation unless already resolved
+      if (op.getRedirect() && !redirected) {
+        final HttpOpParam.Op redirectOp =
+            HttpOpParam.TemporaryRedirectOp.valueOf(op);
+        final HttpURLConnection conn = connect(redirectOp, url);
+        // application level proxy like httpfs might not issue a redirect
+        if (conn.getResponseCode() == op.getExpectedHttpResponseCode()) {
+          return conn;
+        }
+        try {
+          validateResponse(redirectOp, conn, false);
+          url = new URL(conn.getHeaderField("Location"));
+        } finally {
+          conn.disconnect();
+        }
+      }
+      return connect(op, url);
     }
     }
 
 
-    private void connect(boolean doOutput) throws IOException {
+    private HttpURLConnection connect(final HttpOpParam.Op op, final URL url)
+        throws IOException {
+      final HttpURLConnection conn =
+          (HttpURLConnection)connectionFactory.openConnection(url);
+      final boolean doOutput = op.getDoOutput();
       conn.setRequestMethod(op.getType().toString());
       conn.setRequestMethod(op.getType().toString());
       conn.setInstanceFollowRedirects(false);
       conn.setInstanceFollowRedirects(false);
       switch (op.getType()) {
       switch (op.getType()) {
@@ -502,6 +495,10 @@ public class WebHdfsFileSystem extends FileSystem
             // explicitly setting content-length to 0 won't do spnego!!
             // explicitly setting content-length to 0 won't do spnego!!
             // opening and closing the stream will send "Content-Length: 0"
             // opening and closing the stream will send "Content-Length: 0"
             conn.getOutputStream().close();
             conn.getOutputStream().close();
+          } else {
+            conn.setRequestProperty("Content-Type",
+                MediaType.APPLICATION_OCTET_STREAM);
+            conn.setChunkedStreamingMode(32 << 10); //32kB-chunk
           }
           }
           break;
           break;
         }
         }
@@ -511,16 +508,10 @@ public class WebHdfsFileSystem extends FileSystem
         }
         }
       }
       }
       conn.connect();
       conn.connect();
+      return conn;
     }
     }
 
 
-    private void disconnect() {
-      if (conn != null) {
-        conn.disconnect();
-        conn = null;
-      }
-    }
-
-    private AbstractRunner runWithRetry() throws IOException {
+    private T runWithRetry() throws IOException {
       /**
       /**
        * Do the real work.
        * Do the real work.
        *
        *
@@ -538,15 +529,16 @@ public class WebHdfsFileSystem extends FileSystem
        * examines the exception and swallows it if it decides to rerun the work.
        * examines the exception and swallows it if it decides to rerun the work.
        */
        */
       for(int retry = 0; ; retry++) {
       for(int retry = 0; ; retry++) {
+        checkRetry = !redirected;
+        final URL url = getUrl();
         try {
         try {
-          init();
-          if (op.getDoOutput()) {
-            twoStepWrite();
-          } else {
-            getResponse(op != GetOpParam.Op.OPEN);
+          final HttpURLConnection conn = connect(url);
+          // output streams will validate on close
+          if (!op.getDoOutput()) {
+            validateResponse(op, conn, false);
           }
           }
-          return this;
-        } catch(IOException ioe) {
+          return getResponse(conn);
+        } catch (IOException ioe) {
           Throwable cause = ioe.getCause();
           Throwable cause = ioe.getCause();
           if (cause != null && cause instanceof AuthenticationException) {
           if (cause != null && cause instanceof AuthenticationException) {
             throw ioe; // no retries for auth failures
             throw ioe; // no retries for auth failures
@@ -588,87 +580,129 @@ public class WebHdfsFileSystem extends FileSystem
       throw toIOException(ioe);
       throw toIOException(ioe);
     }
     }
 
 
-    /**
-     * Two-step Create/Append:
-     * Step 1) Submit a Http request with neither auto-redirect nor data. 
-     * Step 2) Submit another Http request with the URL from the Location header with data.
-     * 
-     * The reason of having two-step create/append is for preventing clients to
-     * send out the data before the redirect. This issue is addressed by the
-     * "Expect: 100-continue" header in HTTP/1.1; see RFC 2616, Section 8.2.3.
-     * Unfortunately, there are software library bugs (e.g. Jetty 6 http server
-     * and Java 6 http client), which do not correctly implement "Expect:
-     * 100-continue". The two-step create/append is a temporary workaround for
-     * the software library bugs.
-     */
-    HttpURLConnection twoStepWrite() throws IOException {
-      //Step 1) Submit a Http request with neither auto-redirect nor data. 
-      connect(false);
-      validateResponse(HttpOpParam.TemporaryRedirectOp.valueOf(op), conn, false);
-      final String redirect = conn.getHeaderField("Location");
-      disconnect();
-      checkRetry = false;
-      
-      //Step 2) Submit another Http request with the URL from the Location header with data.
-      conn = (HttpURLConnection) connectionFactory.openConnection(new URL(
-          redirect));
-      conn.setRequestProperty("Content-Type",
-          MediaType.APPLICATION_OCTET_STREAM);
-      conn.setChunkedStreamingMode(32 << 10); //32kB-chunk
-      connect();
-      return conn;
+    abstract T getResponse(HttpURLConnection conn) throws IOException;
+  }
+
+  /**
+   * Abstract base class to handle path-based operations with params
+   */
+  abstract class AbstractFsPathRunner<T> extends AbstractRunner<T> {
+    private final Path fspath;
+    private final Param<?,?>[] parameters;
+    
+    AbstractFsPathRunner(final HttpOpParam.Op op, final Path fspath,
+        Param<?,?>... parameters) {
+      super(op, false);
+      this.fspath = fspath;
+      this.parameters = parameters;
+    }
+    
+    @Override
+    protected URL getUrl() throws IOException {
+      return toUrl(op, fspath, parameters);
     }
     }
+  }
 
 
-    FSDataOutputStream write(final int bufferSize) throws IOException {
-      return WebHdfsFileSystem.this.write(op, conn, bufferSize);
+  /**
+   * Default path-based implementation expects no json response
+   */
+  class FsPathRunner extends AbstractFsPathRunner<Void> {
+    FsPathRunner(Op op, Path fspath, Param<?,?>... parameters) {
+      super(op, fspath, parameters);
     }
     }
+    
+    @Override
+    Void getResponse(HttpURLConnection conn) throws IOException {
+      return null;
+    }
+  }
 
 
-    void getResponse(boolean getJsonAndDisconnect) throws IOException {
+  /**
+   * Handle path-based operations with a json response
+   */
+  abstract class FsPathResponseRunner<T> extends AbstractFsPathRunner<T> {
+    FsPathResponseRunner(final HttpOpParam.Op op, final Path fspath,
+        Param<?,?>... parameters) {
+      super(op, fspath, parameters);
+    }
+    
+    @Override
+    final T getResponse(HttpURLConnection conn) throws IOException {
       try {
       try {
-        connect();
-        final int code = conn.getResponseCode();
-        if (!redirected && op.getRedirect()
-            && code != op.getExpectedHttpResponseCode()) {
-          final String redirect = conn.getHeaderField("Location");
-          json = validateResponse(HttpOpParam.TemporaryRedirectOp.valueOf(op),
-              conn, false);
-          disconnect();
-  
-          checkRetry = false;
-          conn = (HttpURLConnection) connectionFactory.openConnection(new URL(
-              redirect));
-          connect();
+        final Map<?,?> json = jsonParse(conn, false);
+        if (json == null) {
+          // match exception class thrown by parser
+          throw new IllegalStateException("Missing response");
         }
         }
-
-        json = validateResponse(op, conn, false);
-        if (json == null && getJsonAndDisconnect) {
-          json = jsonParse(conn, false);
+        return decodeResponse(json);
+      } catch (IOException ioe) {
+        throw ioe;
+      } catch (Exception e) { // catch json parser errors
+        final IOException ioe =
+            new IOException("Response decoding failure: "+e.toString(), e);
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(ioe);
         }
         }
+        throw ioe;
       } finally {
       } finally {
-        if (getJsonAndDisconnect) {
-          disconnect();
-        }
+        conn.disconnect();
       }
       }
     }
     }
+    
+    abstract T decodeResponse(Map<?,?> json) throws IOException;
   }
   }
 
 
-  final class FsPathRunner extends AbstractRunner {
-    private final Path fspath;
-    private final Param<?, ?>[] parameters;
-
-    FsPathRunner(final HttpOpParam.Op op, final Path fspath, final Param<?,?>... parameters) {
-      super(op, false);
-      this.fspath = fspath;
-      this.parameters = parameters;
+  /**
+   * Handle path-based operations with json boolean response
+   */
+  class FsPathBooleanRunner extends FsPathResponseRunner<Boolean> {
+    FsPathBooleanRunner(Op op, Path fspath, Param<?,?>... parameters) {
+      super(op, fspath, parameters);
     }
     }
-
+    
     @Override
     @Override
-    protected URL getUrl() throws IOException {
-      return toUrl(op, fspath, parameters);
+    Boolean decodeResponse(Map<?,?> json) throws IOException {
+      return (Boolean)json.get("boolean");
     }
     }
   }
   }
 
 
-  final class URLRunner extends AbstractRunner {
+  /**
+   * Handle create/append output streams
+   */
+  class FsPathOutputStreamRunner extends AbstractFsPathRunner<FSDataOutputStream> {
+    private final int bufferSize;
+    
+    FsPathOutputStreamRunner(Op op, Path fspath, int bufferSize,
+        Param<?,?>... parameters) {
+      super(op, fspath, parameters);
+      this.bufferSize = bufferSize;
+    }
+    
+    @Override
+    FSDataOutputStream getResponse(final HttpURLConnection conn)
+        throws IOException {
+      return new FSDataOutputStream(new BufferedOutputStream(
+          conn.getOutputStream(), bufferSize), statistics) {
+        @Override
+        public void close() throws IOException {
+          try {
+            super.close();
+          } finally {
+            try {
+              validateResponse(op, conn, true);
+            } finally {
+              conn.disconnect();
+            }
+          }
+        }
+      };
+    }
+  }
+  
+  /**
+   * Used by open() which tracks the resolved url itself
+   */
+  final class URLRunner extends AbstractRunner<HttpURLConnection> {
     private final URL url;
     private final URL url;
     @Override
     @Override
     protected URL getUrl() {
     protected URL getUrl() {
@@ -679,6 +713,11 @@ public class WebHdfsFileSystem extends FileSystem
       super(op, redirected);
       super(op, redirected);
       this.url = url;
       this.url = url;
     }
     }
+
+    @Override
+    HttpURLConnection getResponse(HttpURLConnection conn) throws IOException {
+      return conn;
+    }
   }
   }
 
 
   private FsPermission applyUMask(FsPermission permission) {
   private FsPermission applyUMask(FsPermission permission) {
@@ -690,8 +729,12 @@ public class WebHdfsFileSystem extends FileSystem
 
 
   private HdfsFileStatus getHdfsFileStatus(Path f) throws IOException {
   private HdfsFileStatus getHdfsFileStatus(Path f) throws IOException {
     final HttpOpParam.Op op = GetOpParam.Op.GETFILESTATUS;
     final HttpOpParam.Op op = GetOpParam.Op.GETFILESTATUS;
-    final Map<?, ?> json = run(op, f);
-    final HdfsFileStatus status = JsonUtil.toFileStatus(json, true);
+    HdfsFileStatus status = new FsPathResponseRunner<HdfsFileStatus>(op, f) {
+      @Override
+      HdfsFileStatus decodeResponse(Map<?,?> json) {
+        return JsonUtil.toFileStatus(json, true);
+      }
+    }.run();
     if (status == null) {
     if (status == null) {
       throw new FileNotFoundException("File does not exist: " + f);
       throw new FileNotFoundException("File does not exist: " + f);
     }
     }
@@ -715,8 +758,12 @@ public class WebHdfsFileSystem extends FileSystem
   @Override
   @Override
   public AclStatus getAclStatus(Path f) throws IOException {
   public AclStatus getAclStatus(Path f) throws IOException {
     final HttpOpParam.Op op = GetOpParam.Op.GETACLSTATUS;
     final HttpOpParam.Op op = GetOpParam.Op.GETACLSTATUS;
-    final Map<?, ?> json = run(op, f);
-    AclStatus status = JsonUtil.toAclStatus(json);
+    AclStatus status = new FsPathResponseRunner<AclStatus>(op, f) {
+      @Override
+      AclStatus decodeResponse(Map<?,?> json) {
+        return JsonUtil.toAclStatus(json);
+      }
+    }.run();
     if (status == null) {
     if (status == null) {
       throw new FileNotFoundException("File does not exist: " + f);
       throw new FileNotFoundException("File does not exist: " + f);
     }
     }
@@ -727,9 +774,9 @@ public class WebHdfsFileSystem extends FileSystem
   public boolean mkdirs(Path f, FsPermission permission) throws IOException {
   public boolean mkdirs(Path f, FsPermission permission) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.MKDIRS;
     final HttpOpParam.Op op = PutOpParam.Op.MKDIRS;
-    final Map<?, ?> json = run(op, f,
-        new PermissionParam(applyUMask(permission)));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, f,
+        new PermissionParam(applyUMask(permission))
+    ).run();
   }
   }
 
 
   /**
   /**
@@ -740,17 +787,19 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
       ) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.CREATESYMLINK;
     final HttpOpParam.Op op = PutOpParam.Op.CREATESYMLINK;
-    run(op, f, new DestinationParam(makeQualified(destination).toUri().getPath()),
-        new CreateParentParam(createParent));
+    new FsPathRunner(op, f,
+        new DestinationParam(makeQualified(destination).toUri().getPath()),
+        new CreateParentParam(createParent)
+    ).run();
   }
   }
 
 
   @Override
   @Override
   public boolean rename(final Path src, final Path dst) throws IOException {
   public boolean rename(final Path src, final Path dst) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.RENAME;
     final HttpOpParam.Op op = PutOpParam.Op.RENAME;
-    final Map<?, ?> json = run(op, src,
-        new DestinationParam(makeQualified(dst).toUri().getPath()));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, src,
+        new DestinationParam(makeQualified(dst).toUri().getPath())
+    ).run();
   }
   }
 
 
   @SuppressWarnings("deprecation")
   @SuppressWarnings("deprecation")
@@ -759,8 +808,10 @@ public class WebHdfsFileSystem extends FileSystem
       final Options.Rename... options) throws IOException {
       final Options.Rename... options) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.RENAME;
     final HttpOpParam.Op op = PutOpParam.Op.RENAME;
-    run(op, src, new DestinationParam(makeQualified(dst).toUri().getPath()),
-        new RenameOptionSetParam(options));
+    new FsPathRunner(op, src,
+        new DestinationParam(makeQualified(dst).toUri().getPath()),
+        new RenameOptionSetParam(options)
+    ).run();
   }
   }
 
 
   @Override
   @Override
@@ -772,7 +823,9 @@ public class WebHdfsFileSystem extends FileSystem
 
 
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETOWNER;
     final HttpOpParam.Op op = PutOpParam.Op.SETOWNER;
-    run(op, p, new OwnerParam(owner), new GroupParam(group));
+    new FsPathRunner(op, p,
+        new OwnerParam(owner), new GroupParam(group)
+    ).run();
   }
   }
 
 
   @Override
   @Override
@@ -780,7 +833,7 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
       ) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETPERMISSION;
     final HttpOpParam.Op op = PutOpParam.Op.SETPERMISSION;
-    run(op, p, new PermissionParam(permission));
+    new FsPathRunner(op, p,new PermissionParam(permission)).run();
   }
   }
 
 
   @Override
   @Override
@@ -788,7 +841,7 @@ public class WebHdfsFileSystem extends FileSystem
       throws IOException {
       throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.MODIFYACLENTRIES;
     final HttpOpParam.Op op = PutOpParam.Op.MODIFYACLENTRIES;
-    run(op, path, new AclPermissionParam(aclSpec));
+    new FsPathRunner(op, path, new AclPermissionParam(aclSpec)).run();
   }
   }
 
 
   @Override
   @Override
@@ -796,21 +849,21 @@ public class WebHdfsFileSystem extends FileSystem
       throws IOException {
       throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEACLENTRIES;
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEACLENTRIES;
-    run(op, path, new AclPermissionParam(aclSpec));
+    new FsPathRunner(op, path, new AclPermissionParam(aclSpec)).run();
   }
   }
 
 
   @Override
   @Override
   public void removeDefaultAcl(Path path) throws IOException {
   public void removeDefaultAcl(Path path) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEDEFAULTACL;
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEDEFAULTACL;
-    run(op, path);
+    new FsPathRunner(op, path).run();
   }
   }
 
 
   @Override
   @Override
   public void removeAcl(Path path) throws IOException {
   public void removeAcl(Path path) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEACL;
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEACL;
-    run(op, path);
+    new FsPathRunner(op, path).run();
   }
   }
 
 
   @Override
   @Override
@@ -818,7 +871,7 @@ public class WebHdfsFileSystem extends FileSystem
       throws IOException {
       throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETACL;
     final HttpOpParam.Op op = PutOpParam.Op.SETACL;
-    run(op, p, new AclPermissionParam(aclSpec));
+    new FsPathRunner(op, p, new AclPermissionParam(aclSpec)).run();
   }
   }
 
 
   @Override
   @Override
@@ -826,8 +879,9 @@ public class WebHdfsFileSystem extends FileSystem
      ) throws IOException {
      ) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETREPLICATION;
     final HttpOpParam.Op op = PutOpParam.Op.SETREPLICATION;
-    final Map<?, ?> json = run(op, p, new ReplicationParam(replication));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, p,
+        new ReplicationParam(replication)
+    ).run();
   }
   }
 
 
   @Override
   @Override
@@ -835,7 +889,10 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
       ) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETTIMES;
     final HttpOpParam.Op op = PutOpParam.Op.SETTIMES;
-    run(op, p, new ModificationTimeParam(mtime), new AccessTimeParam(atime));
+    new FsPathRunner(op, p,
+        new ModificationTimeParam(mtime),
+        new AccessTimeParam(atime)
+    ).run();
   }
   }
 
 
   @Override
   @Override
@@ -850,32 +907,11 @@ public class WebHdfsFileSystem extends FileSystem
         DFSConfigKeys.DFS_REPLICATION_DEFAULT);
         DFSConfigKeys.DFS_REPLICATION_DEFAULT);
   }
   }
 
 
-  FSDataOutputStream write(final HttpOpParam.Op op,
-      final HttpURLConnection conn, final int bufferSize) throws IOException {
-    return new FSDataOutputStream(new BufferedOutputStream(
-        conn.getOutputStream(), bufferSize), statistics) {
-      @Override
-      public void close() throws IOException {
-        try {
-          super.close();
-        } finally {
-          try {
-            validateResponse(op, conn, true);
-          } finally {
-            conn.disconnect();
-          }
-        }
-      }
-    };
-  }
-
   @Override
   @Override
   public void concat(final Path trg, final Path [] srcs) throws IOException {
   public void concat(final Path trg, final Path [] srcs) throws IOException {
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PostOpParam.Op.CONCAT;
     final HttpOpParam.Op op = PostOpParam.Op.CONCAT;
-
-    ConcatSourcesParam param = new ConcatSourcesParam(srcs);
-    run(op, trg, param);
+    new FsPathRunner(op, trg, new ConcatSourcesParam(srcs)).run();
   }
   }
 
 
   @Override
   @Override
@@ -885,14 +921,13 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
 
 
     final HttpOpParam.Op op = PutOpParam.Op.CREATE;
     final HttpOpParam.Op op = PutOpParam.Op.CREATE;
-    return new FsPathRunner(op, f,
+    return new FsPathOutputStreamRunner(op, f, bufferSize,
         new PermissionParam(applyUMask(permission)),
         new PermissionParam(applyUMask(permission)),
         new OverwriteParam(overwrite),
         new OverwriteParam(overwrite),
         new BufferSizeParam(bufferSize),
         new BufferSizeParam(bufferSize),
         new ReplicationParam(replication),
         new ReplicationParam(replication),
-        new BlockSizeParam(blockSize))
-      .run()
-      .write(bufferSize);
+        new BlockSizeParam(blockSize)
+    ).run();
   }
   }
 
 
   @Override
   @Override
@@ -901,16 +936,17 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementWriteOps(1);
     statistics.incrementWriteOps(1);
 
 
     final HttpOpParam.Op op = PostOpParam.Op.APPEND;
     final HttpOpParam.Op op = PostOpParam.Op.APPEND;
-    return new FsPathRunner(op, f, new BufferSizeParam(bufferSize))
-      .run()
-      .write(bufferSize);
+    return new FsPathOutputStreamRunner(op, f, bufferSize,
+        new BufferSizeParam(bufferSize)
+    ).run();
   }
   }
 
 
   @Override
   @Override
   public boolean delete(Path f, boolean recursive) throws IOException {
   public boolean delete(Path f, boolean recursive) throws IOException {
     final HttpOpParam.Op op = DeleteOpParam.Op.DELETE;
     final HttpOpParam.Op op = DeleteOpParam.Op.DELETE;
-    final Map<?, ?> json = run(op, f, new RecursiveParam(recursive));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, f,
+        new RecursiveParam(recursive)
+    ).run();
   }
   }
 
 
   @Override
   @Override
@@ -942,7 +978,7 @@ public class WebHdfsFileSystem extends FileSystem
         final boolean resolved) throws IOException {
         final boolean resolved) throws IOException {
       final URL offsetUrl = offset == 0L? url
       final URL offsetUrl = offset == 0L? url
           : new URL(url + "&" + new OffsetParam(offset));
           : new URL(url + "&" + new OffsetParam(offset));
-      return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run().conn;
+      return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run();
     }  
     }  
   }
   }
 
 
@@ -998,25 +1034,36 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
     statistics.incrementReadOps(1);
 
 
     final HttpOpParam.Op op = GetOpParam.Op.LISTSTATUS;
     final HttpOpParam.Op op = GetOpParam.Op.LISTSTATUS;
-    final Map<?, ?> json  = run(op, f);
-    final Map<?, ?> rootmap = (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
-    final Object[] array = (Object[])rootmap.get(FileStatus.class.getSimpleName());
-
-    //convert FileStatus
-    final FileStatus[] statuses = new FileStatus[array.length];
-    for(int i = 0; i < array.length; i++) {
-      final Map<?, ?> m = (Map<?, ?>)array[i];
-      statuses[i] = makeQualified(JsonUtil.toFileStatus(m, false), f);
-    }
-    return statuses;
+    return new FsPathResponseRunner<FileStatus[]>(op, f) {
+      @Override
+      FileStatus[] decodeResponse(Map<?,?> json) {
+        final Map<?, ?> rootmap = (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
+        final Object[] array = (Object[])rootmap.get(FileStatus.class.getSimpleName());
+
+        //convert FileStatus
+        final FileStatus[] statuses = new FileStatus[array.length];
+        for (int i = 0; i < array.length; i++) {
+          final Map<?, ?> m = (Map<?, ?>)array[i];
+          statuses[i] = makeQualified(JsonUtil.toFileStatus(m, false), f);
+        }
+        return statuses;
+      }
+    }.run();
   }
   }
 
 
   @Override
   @Override
   public Token<DelegationTokenIdentifier> getDelegationToken(
   public Token<DelegationTokenIdentifier> getDelegationToken(
       final String renewer) throws IOException {
       final String renewer) throws IOException {
     final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN;
     final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN;
-    final Map<?, ?> m = run(op, null, new RenewerParam(renewer));
-    final Token<DelegationTokenIdentifier> token = JsonUtil.toDelegationToken(m);
+    Token<DelegationTokenIdentifier> token =
+        new FsPathResponseRunner<Token<DelegationTokenIdentifier>>(
+            op, null, new RenewerParam(renewer)) {
+      @Override
+      Token<DelegationTokenIdentifier> decodeResponse(Map<?,?> json)
+          throws IOException {
+        return JsonUtil.toDelegationToken(json);
+      }
+    }.run();
     token.setService(tokenServiceName);
     token.setService(tokenServiceName);
     return token;
     return token;
   }
   }
@@ -1038,19 +1085,22 @@ public class WebHdfsFileSystem extends FileSystem
   public synchronized long renewDelegationToken(final Token<?> token
   public synchronized long renewDelegationToken(final Token<?> token
       ) throws IOException {
       ) throws IOException {
     final HttpOpParam.Op op = PutOpParam.Op.RENEWDELEGATIONTOKEN;
     final HttpOpParam.Op op = PutOpParam.Op.RENEWDELEGATIONTOKEN;
-    TokenArgumentParam dtargParam = new TokenArgumentParam(
-        token.encodeToUrlString());
-    final Map<?, ?> m = run(op, null, dtargParam);
-    return (Long) m.get("long");
+    return new FsPathResponseRunner<Long>(op, null,
+        new TokenArgumentParam(token.encodeToUrlString())) {
+      @Override
+      Long decodeResponse(Map<?,?> json) throws IOException {
+        return (Long) json.get("long");
+      }
+    }.run();
   }
   }
 
 
   @Override
   @Override
   public synchronized void cancelDelegationToken(final Token<?> token
   public synchronized void cancelDelegationToken(final Token<?> token
       ) throws IOException {
       ) throws IOException {
     final HttpOpParam.Op op = PutOpParam.Op.CANCELDELEGATIONTOKEN;
     final HttpOpParam.Op op = PutOpParam.Op.CANCELDELEGATIONTOKEN;
-    TokenArgumentParam dtargParam = new TokenArgumentParam(
-        token.encodeToUrlString());
-    run(op, null, dtargParam);
+    new FsPathRunner(op, null,
+        new TokenArgumentParam(token.encodeToUrlString())
+    ).run();
   }
   }
   
   
   @Override
   @Override
@@ -1068,9 +1118,14 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
     statistics.incrementReadOps(1);
 
 
     final HttpOpParam.Op op = GetOpParam.Op.GET_BLOCK_LOCATIONS;
     final HttpOpParam.Op op = GetOpParam.Op.GET_BLOCK_LOCATIONS;
-    final Map<?, ?> m = run(op, p, new OffsetParam(offset),
-        new LengthParam(length));
-    return DFSUtil.locatedBlocks2Locations(JsonUtil.toLocatedBlocks(m));
+    return new FsPathResponseRunner<BlockLocation[]>(op, p,
+        new OffsetParam(offset), new LengthParam(length)) {
+      @Override
+      BlockLocation[] decodeResponse(Map<?,?> json) throws IOException {
+        return DFSUtil.locatedBlocks2Locations(
+            JsonUtil.toLocatedBlocks(json));
+      }
+    }.run();
   }
   }
 
 
   @Override
   @Override
@@ -1078,8 +1133,12 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
     statistics.incrementReadOps(1);
 
 
     final HttpOpParam.Op op = GetOpParam.Op.GETCONTENTSUMMARY;
     final HttpOpParam.Op op = GetOpParam.Op.GETCONTENTSUMMARY;
-    final Map<?, ?> m = run(op, p);
-    return JsonUtil.toContentSummary(m);
+    return new FsPathResponseRunner<ContentSummary>(op, p) {
+      @Override
+      ContentSummary decodeResponse(Map<?,?> json) {
+        return JsonUtil.toContentSummary(json);        
+      }
+    }.run();
   }
   }
 
 
   @Override
   @Override
@@ -1088,15 +1147,19 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
     statistics.incrementReadOps(1);
   
   
     final HttpOpParam.Op op = GetOpParam.Op.GETFILECHECKSUM;
     final HttpOpParam.Op op = GetOpParam.Op.GETFILECHECKSUM;
-    final Map<?, ?> m = run(op, p);
-    return JsonUtil.toMD5MD5CRC32FileChecksum(m);
+    return new FsPathResponseRunner<MD5MD5CRC32FileChecksum>(op, p) {
+      @Override
+      MD5MD5CRC32FileChecksum decodeResponse(Map<?,?> json) throws IOException {
+        return JsonUtil.toMD5MD5CRC32FileChecksum(json);
+      }
+    }.run();
   }
   }
 
 
   /**
   /**
    * Resolve an HDFS URL into real INetSocketAddress. It works like a DNS
    * Resolve an HDFS URL into real INetSocketAddress. It works like a DNS
    * resolver when the URL points to an non-HA cluster. When the URL points to
    * resolver when the URL points to an non-HA cluster. When the URL points to
-   * an HA cluster, the resolver further resolves the logical name (i.e., the
-   * authority in the URL) into real namenode addresses.
+   * an HA cluster with its logical name, the resolver further resolves the
+   * logical name(i.e., the authority in the URL) into real namenode addresses.
    */
    */
   private InetSocketAddress[] resolveNNAddr() throws IOException {
   private InetSocketAddress[] resolveNNAddr() throws IOException {
     Configuration conf = getConf();
     Configuration conf = getConf();

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java

@@ -102,7 +102,7 @@ public abstract class HttpOpParam<E extends Enum<E> & HttpOpParam.Op>
 
 
     @Override
     @Override
     public boolean getDoOutput() {
     public boolean getDoOutput() {
-      return op.getDoOutput();
+      return false;
     }
     }
 
 
     @Override
     @Override

+ 804 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/vecsum.c

@@ -0,0 +1,804 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "hdfs.h"
+
+#define VECSUM_CHUNK_SIZE (8 * 1024 * 1024)
+#define ZCR_READ_CHUNK_SIZE (1024 * 1024 * 8)
+#define NORMAL_READ_CHUNK_SIZE (8 * 1024 * 1024)
+#define DOUBLES_PER_LOOP_ITER 16
+
+static double timespec_to_double(const struct timespec *ts)
+{
+    double sec = ts->tv_sec;
+    double nsec = ts->tv_nsec;
+    return sec + (nsec / 1000000000L);
+}
+
+struct stopwatch {
+    struct timespec start;
+    struct timespec stop;
+    struct rusage rusage;
+};
+
+static struct stopwatch *stopwatch_create(void)
+{
+    struct stopwatch *watch;
+
+    watch = calloc(1, sizeof(struct stopwatch));
+    if (!watch) {
+        fprintf(stderr, "failed to allocate memory for stopwatch\n");
+        goto error;
+    }
+    if (clock_gettime(CLOCK_MONOTONIC, &watch->start)) {
+        int err = errno;
+        fprintf(stderr, "clock_gettime(CLOCK_MONOTONIC) failed with "
+            "error %d (%s)\n", err, strerror(err));
+        goto error;
+    }
+    if (getrusage(RUSAGE_THREAD, &watch->rusage) < 0) {
+        int err = errno;
+        fprintf(stderr, "getrusage failed: error %d (%s)\n",
+            err, strerror(err));
+        goto error;
+    }
+    return watch;
+
+error:
+    free(watch);
+    return NULL;
+}
+
+static void stopwatch_stop(struct stopwatch *watch,
+        long long bytes_read)
+{
+    double elapsed, rate;
+
+    if (clock_gettime(CLOCK_MONOTONIC, &watch->stop)) {
+        int err = errno;
+        fprintf(stderr, "clock_gettime(CLOCK_MONOTONIC) failed with "
+            "error %d (%s)\n", err, strerror(err));
+        goto done;
+    }
+    elapsed = timespec_to_double(&watch->stop) -
+        timespec_to_double(&watch->start);
+    rate = (bytes_read / elapsed) / (1024 * 1024 * 1024);
+    printf("stopwatch: took %.5g seconds to read %lld bytes, "
+        "for %.5g GB/s\n", elapsed, bytes_read, rate);
+    printf("stopwatch:  %.5g seconds\n", elapsed);
+done:
+    free(watch);
+}
+
+enum vecsum_type {
+    VECSUM_LOCAL = 0,
+    VECSUM_LIBHDFS,
+    VECSUM_ZCR,
+};
+
+#define VECSUM_TYPE_VALID_VALUES "libhdfs, zcr, or local"
+
+int parse_vecsum_type(const char *str)
+{
+    if (strcasecmp(str, "local") == 0)
+        return VECSUM_LOCAL;
+    else if (strcasecmp(str, "libhdfs") == 0)
+        return VECSUM_LIBHDFS;
+    else if (strcasecmp(str, "zcr") == 0)
+        return VECSUM_ZCR;
+    else
+        return -1;
+}
+
+struct options {
+    // The path to read.
+    const char *path;
+
+    // Length of the file.
+    long long length;
+
+    // The number of times to read the path.
+    int passes;
+
+    // Type of vecsum to do
+    enum vecsum_type ty;
+
+    // RPC address to use for HDFS
+    const char *rpc_address;
+};
+
+static struct options *options_create(void)
+{
+    struct options *opts = NULL;
+    const char *pass_str;
+    const char *ty_str;
+    const char *length_str;
+    int ty;
+
+    opts = calloc(1, sizeof(struct options));
+    if (!opts) {
+        fprintf(stderr, "failed to calloc options\n");
+        goto error;
+    }
+    opts->path = getenv("VECSUM_PATH");
+    if (!opts->path) {
+        fprintf(stderr, "You must set the VECSUM_PATH environment "
+            "variable to the path of the file to read.\n");
+        goto error;
+    }
+    length_str = getenv("VECSUM_LENGTH");
+    if (!length_str) {
+        length_str = "2147483648";
+    }
+    opts->length = atoll(length_str);
+    if (!opts->length) {
+        fprintf(stderr, "Can't parse VECSUM_LENGTH of '%s'.\n",
+                length_str);
+        goto error;
+    }
+    if (opts->length % VECSUM_CHUNK_SIZE) {
+        fprintf(stderr, "VECSUM_LENGTH must be a multiple of '%lld'.  The "
+                "currently specified length of '%lld' is not.\n",
+                (long long)VECSUM_CHUNK_SIZE, (long long)opts->length);
+        goto error;
+    }
+    pass_str = getenv("VECSUM_PASSES");
+    if (!pass_str) {
+        fprintf(stderr, "You must set the VECSUM_PASSES environment "
+            "variable to the number of passes to make.\n");
+        goto error;
+    }
+    opts->passes = atoi(pass_str);
+    if (opts->passes <= 0) {
+        fprintf(stderr, "Invalid value for the VECSUM_PASSES "
+            "environment variable.  You must set this to a "
+            "number greater than 0.\n");
+        goto error;
+    }
+    ty_str = getenv("VECSUM_TYPE");
+    if (!ty_str) {
+        fprintf(stderr, "You must set the VECSUM_TYPE environment "
+            "variable to " VECSUM_TYPE_VALID_VALUES "\n");
+        goto error;
+    }
+    ty = parse_vecsum_type(ty_str);
+    if (ty < 0) {
+        fprintf(stderr, "Invalid VECSUM_TYPE environment variable.  "
+            "Valid values are " VECSUM_TYPE_VALID_VALUES "\n");
+        goto error;
+    }
+    opts->ty = ty;
+    opts->rpc_address = getenv("VECSUM_RPC_ADDRESS");
+    if (!opts->rpc_address) {
+        opts->rpc_address = "default";
+    }
+    return opts;
+error:
+    free(opts);
+    return NULL;
+}
+
+static int test_file_chunk_setup(double **chunk)
+{
+    int i;
+    double *c, val;
+
+    c = malloc(VECSUM_CHUNK_SIZE);
+    if (!c) {
+        fprintf(stderr, "test_file_create: failed to malloc "
+                "a buffer of size '%lld'\n",
+                (long long) VECSUM_CHUNK_SIZE);
+        return EIO;
+    }
+    val = 0.0;
+    for (i = 0; i < VECSUM_CHUNK_SIZE / sizeof(double); i++) {
+        c[i] = val;
+        val += 0.5;
+    }
+    *chunk = c;
+    return 0;
+}
+
+static void options_free(struct options *opts)
+{
+    free(opts);
+}
+
+struct local_data {
+    int fd;
+    double *mmap;
+    long long length;
+};
+
+static int local_data_create_file(struct local_data *cdata,
+                                  const struct options *opts)
+{
+    int ret = EIO;
+    int dup_fd = -1;
+    FILE *fp = NULL;
+    double *chunk = NULL;
+    long long offset = 0;
+
+    dup_fd = dup(cdata->fd);
+    if (dup_fd < 0) {
+        ret = errno;
+        fprintf(stderr, "local_data_create_file: dup failed: %s (%d)\n",
+                strerror(ret), ret);
+        goto done;
+    }
+    fp = fdopen(dup_fd, "w");
+    if (!fp) {
+        ret = errno;
+        fprintf(stderr, "local_data_create_file: fdopen failed: %s (%d)\n",
+                strerror(ret), ret);
+        goto done;
+    }
+    ret = test_file_chunk_setup(&chunk);
+    if (ret)
+        goto done;
+    while (offset < opts->length) {
+        if (fwrite(chunk, VECSUM_CHUNK_SIZE, 1, fp) != 1) {
+            fprintf(stderr, "local_data_create_file: failed to write to "
+                    "the local file '%s' at offset %lld\n",
+                    opts->path, offset);
+            ret = EIO;
+            goto done;
+        }
+        offset += VECSUM_CHUNK_SIZE;
+    }
+    fprintf(stderr, "local_data_create_file: successfully re-wrote %s as "
+            "a file of length %lld\n", opts->path, opts->length);
+    ret = 0;
+
+done:
+    if (dup_fd >= 0) {
+        close(dup_fd);
+    }
+    if (fp) {
+        fclose(fp);
+    }
+    free(chunk);
+    return ret;
+}
+
+static struct local_data *local_data_create(const struct options *opts)
+{
+    struct local_data *cdata = NULL;
+    struct stat st_buf;
+
+    cdata = malloc(sizeof(*cdata));
+    if (!cdata) {
+        fprintf(stderr, "Failed to allocate local test data.\n");
+        goto error;
+    }
+    cdata->fd = -1;
+    cdata->mmap = MAP_FAILED;
+    cdata->length = opts->length;
+
+    cdata->fd = open(opts->path, O_RDWR | O_CREAT, 0777);
+    if (cdata->fd < 0) {
+        int err = errno;
+        fprintf(stderr, "local_data_create: failed to open %s "
+            "for read/write: error %d (%s)\n", opts->path, err, strerror(err));
+        goto error;
+    }
+    if (fstat(cdata->fd, &st_buf)) {
+        int err = errno;
+        fprintf(stderr, "local_data_create: fstat(%s) failed: "
+            "error %d (%s)\n", opts->path, err, strerror(err));
+        goto error;
+    }
+    if (st_buf.st_size != opts->length) {
+        int err;
+        fprintf(stderr, "local_data_create: current size of %s is %lld, but "
+                "we want %lld.  Re-writing the file.\n",
+                opts->path, (long long)st_buf.st_size,
+                (long long)opts->length);
+        err = local_data_create_file(cdata, opts);
+        if (err)
+            goto error;
+    }
+    cdata->mmap = mmap(NULL, cdata->length, PROT_READ,
+                       MAP_PRIVATE, cdata->fd, 0);
+    if (cdata->mmap == MAP_FAILED) {
+        int err = errno;
+        fprintf(stderr, "local_data_create: mmap(%s) failed: "
+            "error %d (%s)\n", opts->path, err, strerror(err));
+        goto error;
+    }
+    return cdata;
+
+error:
+    if (cdata) {
+        if (cdata->fd >= 0) {
+            close(cdata->fd);
+        }
+        free(cdata);
+    }
+    return NULL;
+}
+
+static void local_data_free(struct local_data *cdata)
+{
+    close(cdata->fd);
+    munmap(cdata->mmap, cdata->length);
+}
+
+struct libhdfs_data {
+    hdfsFS fs;
+    hdfsFile file;
+    long long length;
+    double *buf;
+};
+
+static void libhdfs_data_free(struct libhdfs_data *ldata)
+{
+    if (ldata->fs) {
+        free(ldata->buf);
+        if (ldata->file) {
+            hdfsCloseFile(ldata->fs, ldata->file);
+        }
+        hdfsDisconnect(ldata->fs);
+    }
+    free(ldata);
+}
+
+static int libhdfs_data_create_file(struct libhdfs_data *ldata,
+                                    const struct options *opts)
+{
+    int ret;
+    double *chunk = NULL;
+    long long offset = 0;
+
+    ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_WRONLY, 0, 1, 0);
+    if (!ldata->file) {
+        ret = errno;
+        fprintf(stderr, "libhdfs_data_create_file: hdfsOpenFile(%s, "
+            "O_WRONLY) failed: error %d (%s)\n", opts->path, ret,
+            strerror(ret));
+        goto done;
+    }
+    ret = test_file_chunk_setup(&chunk);
+    if (ret)
+        goto done;
+    while (offset < opts->length) {
+        ret = hdfsWrite(ldata->fs, ldata->file, chunk, VECSUM_CHUNK_SIZE);
+        if (ret < 0) {
+            ret = errno;
+            fprintf(stderr, "libhdfs_data_create_file: got error %d (%s) at "
+                    "offset %lld of %s\n", ret, strerror(ret),
+                    offset, opts->path);
+            goto done;
+        } else if (ret < VECSUM_CHUNK_SIZE) {
+            fprintf(stderr, "libhdfs_data_create_file: got short write "
+                    "of %d at offset %lld of %s\n", ret, offset, opts->path);
+            goto done;
+        }
+        offset += VECSUM_CHUNK_SIZE;
+    }
+    ret = 0;
+done:
+    free(chunk);
+    if (ldata->file) {
+        if (hdfsCloseFile(ldata->fs, ldata->file)) {
+            fprintf(stderr, "libhdfs_data_create_file: hdfsCloseFile error.");
+            ret = EIO;
+        }
+        ldata->file = NULL;
+    }
+    return ret;
+}
+
+static struct libhdfs_data *libhdfs_data_create(const struct options *opts)
+{
+    struct libhdfs_data *ldata = NULL;
+    struct hdfsBuilder *builder = NULL;
+    hdfsFileInfo *pinfo = NULL;
+
+    ldata = calloc(1, sizeof(struct libhdfs_data));
+    if (!ldata) {
+        fprintf(stderr, "Failed to allocate libhdfs test data.\n");
+        goto error;
+    }
+    builder = hdfsNewBuilder();
+    if (!builder) {
+        fprintf(stderr, "Failed to create builder.\n");
+        goto error;
+    }
+    hdfsBuilderSetNameNode(builder, opts->rpc_address);
+    hdfsBuilderConfSetStr(builder,
+        "dfs.client.read.shortcircuit.skip.checksum", "true");
+    ldata->fs = hdfsBuilderConnect(builder);
+    if (!ldata->fs) {
+        fprintf(stderr, "Could not connect to default namenode!\n");
+        goto error;
+    }
+    pinfo = hdfsGetPathInfo(ldata->fs, opts->path);
+    if (!pinfo) {
+        int err = errno;
+        fprintf(stderr, "hdfsGetPathInfo(%s) failed: error %d (%s).  "
+                "Attempting to re-create file.\n",
+            opts->path, err, strerror(err));
+        if (libhdfs_data_create_file(ldata, opts))
+            goto error;
+    } else if (pinfo->mSize != opts->length) {
+        fprintf(stderr, "hdfsGetPathInfo(%s) failed: length was %lld, "
+                "but we want length %lld.  Attempting to re-create file.\n",
+                opts->path, (long long)pinfo->mSize, (long long)opts->length);
+        if (libhdfs_data_create_file(ldata, opts))
+            goto error;
+    }
+    ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_RDONLY, 0, 0, 0);
+    if (!ldata->file) {
+        int err = errno;
+        fprintf(stderr, "hdfsOpenFile(%s) failed: error %d (%s)\n",
+            opts->path, err, strerror(err));
+        goto error;
+    }
+    ldata->length = opts->length;
+    return ldata;
+
+error:
+    if (pinfo)
+        hdfsFreeFileInfo(pinfo, 1);
+    if (ldata)
+        libhdfs_data_free(ldata);
+    return NULL;
+}
+
+static int check_byte_size(int byte_size, const char *const str)
+{
+    if (byte_size % sizeof(double)) {
+        fprintf(stderr, "%s is not a multiple "
+            "of sizeof(double)\n", str);
+        return EINVAL;
+    }
+    if ((byte_size / sizeof(double)) % DOUBLES_PER_LOOP_ITER) {
+        fprintf(stderr, "The number of doubles contained in "
+            "%s is not a multiple of DOUBLES_PER_LOOP_ITER\n",
+            str);
+        return EINVAL;
+    }
+    return 0;
+}
+
+#ifdef HAVE_INTEL_SSE_INTRINSICS
+
+#include <emmintrin.h>
+
+static double vecsum(const double *buf, int num_doubles)
+{
+    int i;
+    double hi, lo;
+    __m128d x0, x1, x2, x3, x4, x5, x6, x7;
+    __m128d sum0 = _mm_set_pd(0.0,0.0);
+    __m128d sum1 = _mm_set_pd(0.0,0.0);
+    __m128d sum2 = _mm_set_pd(0.0,0.0);
+    __m128d sum3 = _mm_set_pd(0.0,0.0);
+    __m128d sum4 = _mm_set_pd(0.0,0.0);
+    __m128d sum5 = _mm_set_pd(0.0,0.0);
+    __m128d sum6 = _mm_set_pd(0.0,0.0);
+    __m128d sum7 = _mm_set_pd(0.0,0.0);
+    for (i = 0; i < num_doubles; i+=DOUBLES_PER_LOOP_ITER) {
+        x0 = _mm_load_pd(buf + i + 0);
+        x1 = _mm_load_pd(buf + i + 2);
+        x2 = _mm_load_pd(buf + i + 4);
+        x3 = _mm_load_pd(buf + i + 6);
+        x4 = _mm_load_pd(buf + i + 8);
+        x5 = _mm_load_pd(buf + i + 10);
+        x6 = _mm_load_pd(buf + i + 12);
+        x7 = _mm_load_pd(buf + i + 14);
+        sum0 = _mm_add_pd(sum0, x0);
+        sum1 = _mm_add_pd(sum1, x1);
+        sum2 = _mm_add_pd(sum2, x2);
+        sum3 = _mm_add_pd(sum3, x3);
+        sum4 = _mm_add_pd(sum4, x4);
+        sum5 = _mm_add_pd(sum5, x5);
+        sum6 = _mm_add_pd(sum6, x6);
+        sum7 = _mm_add_pd(sum7, x7);
+    }
+    x0 = _mm_add_pd(sum0, sum1);
+    x1 = _mm_add_pd(sum2, sum3);
+    x2 = _mm_add_pd(sum4, sum5);
+    x3 = _mm_add_pd(sum6, sum7);
+    x4 = _mm_add_pd(x0, x1);
+    x5 = _mm_add_pd(x2, x3);
+    x6 = _mm_add_pd(x4, x5);
+    _mm_storeh_pd(&hi, x6);
+    _mm_storel_pd(&lo, x6);
+    return hi + lo;
+}
+
+#else
+
+static double vecsum(const double *buf, int num_doubles)
+{
+    int i;
+    double sum = 0.0;
+    for (i = 0; i < num_doubles; i++) {
+        sum += buf[i];
+    }
+    return sum;
+}
+
+#endif
+
+static int vecsum_zcr_loop(int pass, struct libhdfs_data *ldata,
+        struct hadoopRzOptions *zopts,
+        const struct options *opts)
+{
+    int32_t len;
+    double sum = 0.0;
+    const double *buf;
+    struct hadoopRzBuffer *rzbuf = NULL;
+    int ret;
+
+    while (1) {
+        rzbuf = hadoopReadZero(ldata->file, zopts, ZCR_READ_CHUNK_SIZE);
+        if (!rzbuf) {
+            ret = errno;
+            fprintf(stderr, "hadoopReadZero failed with error "
+                "code %d (%s)\n", ret, strerror(ret));
+            goto done;
+        }
+        buf = hadoopRzBufferGet(rzbuf);
+        if (!buf) break;
+        len = hadoopRzBufferLength(rzbuf);
+        if (len < ZCR_READ_CHUNK_SIZE) {
+            fprintf(stderr, "hadoopReadZero got a partial read "
+                "of length %d\n", len);
+            ret = EINVAL;
+            goto done;
+        }
+        sum += vecsum(buf,
+            ZCR_READ_CHUNK_SIZE / sizeof(double));
+        hadoopRzBufferFree(ldata->file, rzbuf);
+    }
+    printf("finished zcr pass %d.  sum = %g\n", pass, sum);
+    ret = 0;
+
+done:
+    if (rzbuf)
+        hadoopRzBufferFree(ldata->file, rzbuf);
+    return ret;
+}
+
+static int vecsum_zcr(struct libhdfs_data *ldata,
+        const struct options *opts)
+{
+    int ret, pass;
+    struct hadoopRzOptions *zopts = NULL;
+
+    zopts = hadoopRzOptionsAlloc();
+    if (!zopts) {
+        fprintf(stderr, "hadoopRzOptionsAlloc failed.\n");
+        ret = ENOMEM;
+        goto done;
+    }
+    if (hadoopRzOptionsSetSkipChecksum(zopts, 1)) {
+        ret = errno;
+        perror("hadoopRzOptionsSetSkipChecksum failed: ");
+        goto done;
+    }
+    if (hadoopRzOptionsSetByteBufferPool(zopts, NULL)) {
+        ret = errno;
+        perror("hadoopRzOptionsSetByteBufferPool failed: ");
+        goto done;
+    }
+    for (pass = 0; pass < opts->passes; ++pass) {
+        ret = vecsum_zcr_loop(pass, ldata, zopts, opts);
+        if (ret) {
+            fprintf(stderr, "vecsum_zcr_loop pass %d failed "
+                "with error %d\n", pass, ret);
+            goto done;
+        }
+        hdfsSeek(ldata->fs, ldata->file, 0);
+    }
+    ret = 0;
+done:
+    if (zopts)
+        hadoopRzOptionsFree(zopts);
+    return ret;
+}
+
+tSize hdfsReadFully(hdfsFS fs, hdfsFile f, void* buffer, tSize length)
+{
+    uint8_t *buf = buffer;
+    tSize ret, nread = 0;
+
+    while (length > 0) {
+        ret = hdfsRead(fs, f, buf, length);
+        if (ret < 0) {
+            if (errno != EINTR) {
+                return -1;
+            }
+        }
+        if (ret == 0) {
+            break;
+        }
+        nread += ret;
+        length -= ret;
+        buf += ret;
+    }
+    return nread;
+}
+
+static int vecsum_normal_loop(int pass, const struct libhdfs_data *ldata,
+            const struct options *opts)
+{
+    double sum = 0.0;
+
+    while (1) {
+        int res = hdfsReadFully(ldata->fs, ldata->file, ldata->buf,
+                NORMAL_READ_CHUNK_SIZE);
+        if (res == 0) // EOF
+            break;
+        if (res < 0) {
+            int err = errno;
+            fprintf(stderr, "hdfsRead failed with error %d (%s)\n",
+                err, strerror(err));
+            return err;
+        }
+        if (res < NORMAL_READ_CHUNK_SIZE) {
+            fprintf(stderr, "hdfsRead got a partial read of "
+                "length %d\n", res);
+            return EINVAL;
+        }
+        sum += vecsum(ldata->buf,
+                  NORMAL_READ_CHUNK_SIZE / sizeof(double));
+    }
+    printf("finished normal pass %d.  sum = %g\n", pass, sum);
+    return 0;
+}
+
+static int vecsum_libhdfs(struct libhdfs_data *ldata,
+            const struct options *opts)
+{
+    int pass;
+
+    ldata->buf = malloc(NORMAL_READ_CHUNK_SIZE);
+    if (!ldata->buf) {
+        fprintf(stderr, "failed to malloc buffer of size %d\n",
+            NORMAL_READ_CHUNK_SIZE);
+        return ENOMEM;
+    }
+    for (pass = 0; pass < opts->passes; ++pass) {
+        int ret = vecsum_normal_loop(pass, ldata, opts);
+        if (ret) {
+            fprintf(stderr, "vecsum_normal_loop pass %d failed "
+                "with error %d\n", pass, ret);
+            return ret;
+        }
+        hdfsSeek(ldata->fs, ldata->file, 0);
+    }
+    return 0;
+}
+
+static void vecsum_local(struct local_data *cdata, const struct options *opts)
+{
+    int pass;
+
+    for (pass = 0; pass < opts->passes; pass++) {
+        double sum = vecsum(cdata->mmap, cdata->length / sizeof(double));
+        printf("finished vecsum_local pass %d.  sum = %g\n", pass, sum);
+    }
+}
+
+static long long vecsum_length(const struct options *opts,
+                const struct libhdfs_data *ldata)
+{
+    if (opts->ty == VECSUM_LOCAL) {
+        struct stat st_buf = { 0 };
+        if (stat(opts->path, &st_buf)) {
+            int err = errno;
+            fprintf(stderr, "vecsum_length: stat(%s) failed: "
+                "error %d (%s)\n", opts->path, err, strerror(err));
+            return -EIO;
+        }
+        return st_buf.st_size;
+    } else {
+        return ldata->length;
+    }
+}
+
+/*
+ * vecsum is a microbenchmark which measures the speed of various ways of
+ * reading from HDFS.  It creates a file containing floating-point 'doubles',
+ * and computes the sum of all the doubles several times.  For some CPUs,
+ * assembly optimizations are used for the summation (SSE, etc).
+ */
+int main(void)
+{
+    int ret = 1;
+    struct options *opts = NULL;
+    struct local_data *cdata = NULL;
+    struct libhdfs_data *ldata = NULL;
+    struct stopwatch *watch = NULL;
+
+    if (check_byte_size(VECSUM_CHUNK_SIZE, "VECSUM_CHUNK_SIZE") ||
+        check_byte_size(ZCR_READ_CHUNK_SIZE,
+                "ZCR_READ_CHUNK_SIZE") ||
+        check_byte_size(NORMAL_READ_CHUNK_SIZE,
+                "NORMAL_READ_CHUNK_SIZE")) {
+        goto done;
+    }
+    opts = options_create();
+    if (!opts)
+        goto done;
+    if (opts->ty == VECSUM_LOCAL) {
+        cdata = local_data_create(opts);
+        if (!cdata)
+            goto done;
+    } else {
+        ldata = libhdfs_data_create(opts);
+        if (!ldata)
+            goto done;
+    }
+    watch = stopwatch_create();
+    if (!watch)
+        goto done;
+    switch (opts->ty) {
+    case VECSUM_LOCAL:
+        vecsum_local(cdata, opts);
+        ret = 0;
+        break;
+    case VECSUM_LIBHDFS:
+        ret = vecsum_libhdfs(ldata, opts);
+        break;
+    case VECSUM_ZCR:
+        ret = vecsum_zcr(ldata, opts);
+        break;
+    }
+    if (ret) {
+        fprintf(stderr, "vecsum failed with error %d\n", ret);
+        goto done;
+    }
+    ret = 0;
+done:
+    fprintf(stderr, "cleaning up...\n");
+    if (watch && (ret == 0)) {
+        long long length = vecsum_length(opts, ldata);
+        if (length >= 0) {
+            stopwatch_stop(watch, length * opts->passes);
+        }
+    }
+    if (cdata)
+        local_data_free(cdata);
+    if (ldata)
+        libhdfs_data_free(ldata);
+    if (opts)
+        options_free(opts);
+    return ret;
+}
+
+// vim: ts=4:sw=4:tw=79:et

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c

@@ -122,7 +122,7 @@ struct tlhPaths {
 
 
 static int setupPaths(const struct tlhThreadInfo *ti, struct tlhPaths *paths)
 static int setupPaths(const struct tlhThreadInfo *ti, struct tlhPaths *paths)
 {
 {
-    memset(paths, sizeof(*paths), 0);
+    memset(paths, 0, sizeof(*paths));
     if (snprintf(paths->prefix, sizeof(paths->prefix), "/tlhData%04d",
     if (snprintf(paths->prefix, sizeof(paths->prefix), "/tlhData%04d",
                  ti->threadIdx) >= sizeof(paths->prefix)) {
                  ti->threadIdx) >= sizeof(paths->prefix)) {
         return ENAMETOOLONG;
         return ENAMETOOLONG;

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/index.html

@@ -18,6 +18,7 @@
 -->
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <head>
+<meta http-equiv="X-UA-Compatible" content="IE=9" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <title>DataNode Information</title>
 <title>DataNode Information</title>

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html

@@ -18,6 +18,7 @@
 -->
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <head>
+<meta http-equiv="X-UA-Compatible" content="IE=9" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <title>Namenode information</title>
 <title>Namenode information</title>
@@ -72,7 +73,7 @@
 <script type="text/x-dust-template" id="tmpl-dfshealth">
 <script type="text/x-dust-template" id="tmpl-dfshealth">
 
 
 {#nn}
 {#nn}
-{@if cond="{DistinctVersionCount} > 1 || '{RollingUpgradeStatus}'.length"}
+{@if cond="{DistinctVersionCount} > 1 || '{RollingUpgradeStatus}'.length || !'{UpgradeFinalized}'"}
 <div class="alert alert-dismissable alert-info">
 <div class="alert alert-dismissable alert-info">
   <button type="button" class="close" data-dismiss="alert" aria-hidden="true">&times;</button>
   <button type="button" class="close" data-dismiss="alert" aria-hidden="true">&times;</button>
 
 
@@ -92,6 +93,10 @@
     {key} ({value}) {@sep},{/sep}
     {key} ({value}) {@sep},{/sep}
     {/DistinctVersions}
     {/DistinctVersions}
   {/if}
   {/if}
+
+  {^UpgradeFinalized}
+     <p>Upgrade in progress. Not yet finalized.</p>
+  {/UpgradeFinalized}
 </div>
 </div>
 {/if}
 {/if}
 
 

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html

@@ -18,6 +18,7 @@
   -->
   -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <html xmlns="http://www.w3.org/1999/xhtml">
   <head>
   <head>
+    <meta http-equiv="X-UA-Compatible" content="IE=9" />
     <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
     <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
     <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
     <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
     <title>Browsing HDFS</title>
     <title>Browsing HDFS</title>
@@ -90,7 +91,7 @@
         <tbody>
         <tbody>
           {#FileStatus}
           {#FileStatus}
           <tr>
           <tr>
-            <td>{type|helper_to_directory}{permission|helper_to_permission}</td>
+            <td>{type|helper_to_directory}{permission|helper_to_permission}{aclBit|helper_to_acl_bit}</td>
             <td>{owner}</td>
             <td>{owner}</td>
             <td>{group}</td>
             <td>{group}</td>
             <td>{length|fmt_bytes}</td>
             <td>{length|fmt_bytes}</td>

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/index.html

@@ -18,6 +18,7 @@
 -->
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <head>
+<meta http-equiv="X-UA-Compatible" content="IE=9" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <title>JournalNode Information</title>
 <title>JournalNode Information</title>

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html

@@ -18,6 +18,7 @@
 -->
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <head>
+  <meta http-equiv="X-UA-Compatible" content="IE=9" />
   <link rel="stylesheet" type="text/css"
   <link rel="stylesheet" type="text/css"
        href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
        href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
   <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
   <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />

+ 7 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js

@@ -63,7 +63,8 @@
 
 
     'helper_to_permission': function (v) {
     'helper_to_permission': function (v) {
       var symbols = [ '---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx' ];
       var symbols = [ '---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx' ];
-      var sticky = v > 1000;
+      var vInt = parseInt(v, 8);
+      var sticky = (vInt & (1 << 9)) != 0;
 
 
       var res = "";
       var res = "";
       for (var i = 0; i < 3; ++i) {
       for (var i = 0; i < 3; ++i) {
@@ -72,7 +73,7 @@
       }
       }
 
 
       if (sticky) {
       if (sticky) {
-        var otherExec = ((v % 10) & 1) == 1;
+        var otherExec = (vInt & 1) == 1;
         res = res.substr(0, res.length - 1) + (otherExec ? 't' : 'T');
         res = res.substr(0, res.length - 1) + (otherExec ? 't' : 'T');
       }
       }
 
 
@@ -81,6 +82,10 @@
 
 
     'helper_to_directory' : function (v) {
     'helper_to_directory' : function (v) {
       return v === 'DIRECTORY' ? 'd' : '-';
       return v === 'DIRECTORY' ? 'd' : '-';
+    },
+
+    'helper_to_acl_bit': function (v) {
+      return v ? '+' : "";
     }
     }
   };
   };
   $.extend(dust.filters, filters);
   $.extend(dust.filters, filters);

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm

@@ -318,7 +318,7 @@ HDFS NFS Gateway
   The users can mount the HDFS namespace as shown below:
   The users can mount the HDFS namespace as shown below:
 
 
 -------------------------------------------------------------------  
 -------------------------------------------------------------------  
-       mount -t nfs -o vers=3,proto=tcp,nolock $server:/  $mount_point
+       mount -t nfs -o vers=3,proto=tcp,nolock,noacl $server:/  $mount_point
 -------------------------------------------------------------------
 -------------------------------------------------------------------
 
 
   Then the users can access HDFS as part of the local file system except that, 
   Then the users can access HDFS as part of the local file system except that, 

+ 79 - 5
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs;
 
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.Assert.fail;
 
 
@@ -41,12 +42,17 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
+import org.apache.hadoop.hdfs.server.namenode.ha.IPFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.net.ConnectTimeoutException;
 import org.apache.hadoop.net.ConnectTimeoutException;
 import org.apache.hadoop.net.StandardSocketFactory;
 import org.apache.hadoop.net.StandardSocketFactory;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.hamcrest.BaseMatcher;
 import org.hamcrest.BaseMatcher;
@@ -172,12 +178,12 @@ public class TestDFSClientFailover {
    */
    */
   @Test
   @Test
   public void testLogicalUriShouldNotHavePorts() {
   public void testLogicalUriShouldNotHavePorts() {
-    Configuration conf = new HdfsConfiguration();
-    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + ".foo",
-        ConfiguredFailoverProxyProvider.class.getName());
-    Path p = new Path("hdfs://foo:12345/");
+    Configuration config = new HdfsConfiguration(conf);
+    String logicalName = HATestUtil.getLogicalHostname(cluster);
+    HATestUtil.setFailoverConfigurations(cluster, config, logicalName);
+    Path p = new Path("hdfs://" + logicalName + ":12345/");
     try {
     try {
-      p.getFileSystem(conf).exists(p);
+      p.getFileSystem(config).exists(p);
       fail("Did not fail with fake FS");
       fail("Did not fail with fake FS");
     } catch (IOException ioe) {
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
       GenericTestUtils.assertExceptionContains(
@@ -278,4 +284,72 @@ public class TestDFSClientFailover {
     // Ensure that the logical hostname was never resolved.
     // Ensure that the logical hostname was never resolved.
     Mockito.verify(spyNS, Mockito.never()).lookupAllHostAddr(Mockito.eq(logicalHost));
     Mockito.verify(spyNS, Mockito.never()).lookupAllHostAddr(Mockito.eq(logicalHost));
   }
   }
+
+  /** Dummy implementation of plain FailoverProxyProvider */
+  public static class DummyLegacyFailoverProxyProvider<T>
+      implements FailoverProxyProvider<T> {
+    private Class<T> xface;
+    private T proxy;
+    public DummyLegacyFailoverProxyProvider(Configuration conf, URI uri,
+        Class<T> xface) {
+      try {
+        this.proxy = NameNodeProxies.createNonHAProxy(conf,
+            NameNode.getAddress(uri), xface,
+            UserGroupInformation.getCurrentUser(), false).getProxy();
+        this.xface = xface;
+      } catch (IOException ioe) {
+      }
+    }
+
+    @Override
+    public Class<T> getInterface() {
+      return xface;
+    }
+
+    @Override
+    public ProxyInfo<T> getProxy() {
+      return new ProxyInfo<T>(proxy, "dummy");
+    }
+
+    @Override
+    public void performFailover(T currentProxy) {
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+  }
+
+  /**
+   * Test to verify legacy proxy providers are correctly wrapped.
+   */
+  public void testWrappedFailoverProxyProvider() throws Exception {
+    // setup the config with the dummy provider class
+    Configuration config = new HdfsConfiguration(conf);
+    String logicalName = HATestUtil.getLogicalHostname(cluster);
+    HATestUtil.setFailoverConfigurations(cluster, config, logicalName);
+    config.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
+        DummyLegacyFailoverProxyProvider.class.getName());
+    Path p = new Path("hdfs://" + logicalName + "/");
+
+    // Logical URI should be used.
+    assertTrue("Legacy proxy providers should use logical URI.",
+        HAUtil.useLogicalUri(config, p.toUri()));
+  }
+
+  /**
+   * Test to verify IPFailoverProxyProvider is not requiring logical URI.
+   */
+  public void testIPFailoverProxyProviderLogicalUri() throws Exception {
+    // setup the config with the IP failover proxy provider class
+    Configuration config = new HdfsConfiguration(conf);
+    URI nnUri = cluster.getURI(0);
+    config.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." +
+        nnUri.getHost(),
+        IPFailoverProxyProvider.class.getName());
+
+    assertFalse("IPFailoverProxyProvider should not use logical URI.",
+        HAUtil.useLogicalUri(config, nnUri));
+  }
+
 }
 }

+ 27 - 7
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java

@@ -429,6 +429,7 @@ public class TestDFSPermission {
       short[] ancestorPermission, short[] parentPermission,
       short[] ancestorPermission, short[] parentPermission,
       short[] filePermission, Path[] parentDirs, Path[] files, Path[] dirs)
       short[] filePermission, Path[] parentDirs, Path[] files, Path[] dirs)
       throws Exception {
       throws Exception {
+    boolean[] isDirEmpty = new boolean[NUM_TEST_PERMISSIONS];
     login(SUPERUSER);
     login(SUPERUSER);
     for (int i = 0; i < NUM_TEST_PERMISSIONS; i++) {
     for (int i = 0; i < NUM_TEST_PERMISSIONS; i++) {
       create(OpType.CREATE, files[i]);
       create(OpType.CREATE, files[i]);
@@ -441,6 +442,8 @@ public class TestDFSPermission {
       FsPermission fsPermission = new FsPermission(filePermission[i]);
       FsPermission fsPermission = new FsPermission(filePermission[i]);
       fs.setPermission(files[i], fsPermission);
       fs.setPermission(files[i], fsPermission);
       fs.setPermission(dirs[i], fsPermission);
       fs.setPermission(dirs[i], fsPermission);
+
+      isDirEmpty[i] = (fs.listStatus(dirs[i]).length == 0);
     }
     }
 
 
     login(ugi);
     login(ugi);
@@ -461,7 +464,7 @@ public class TestDFSPermission {
           parentPermission[i], ancestorPermission[next], parentPermission[next]);
           parentPermission[i], ancestorPermission[next], parentPermission[next]);
       testDeleteFile(ugi, files[i], ancestorPermission[i], parentPermission[i]);
       testDeleteFile(ugi, files[i], ancestorPermission[i], parentPermission[i]);
       testDeleteDir(ugi, dirs[i], ancestorPermission[i], parentPermission[i],
       testDeleteDir(ugi, dirs[i], ancestorPermission[i], parentPermission[i],
-          filePermission[i], null);
+          filePermission[i], null, isDirEmpty[i]);
     }
     }
     
     
     // test non existent file
     // test non existent file
@@ -924,7 +927,8 @@ public class TestDFSPermission {
   }
   }
 
 
   /* A class that verifies the permission checking is correct for
   /* A class that verifies the permission checking is correct for
-   * directory deletion */
+   * directory deletion
+   */
   private class DeleteDirPermissionVerifier extends DeletePermissionVerifier {
   private class DeleteDirPermissionVerifier extends DeletePermissionVerifier {
     private short[] childPermissions;
     private short[] childPermissions;
 
 
@@ -958,6 +962,17 @@ public class TestDFSPermission {
     }
     }
   }
   }
 
 
+  /* A class that verifies the permission checking is correct for
+   * empty-directory deletion
+   */
+  private class DeleteEmptyDirPermissionVerifier extends DeleteDirPermissionVerifier {
+    @Override
+    void setOpPermission() {
+      this.opParentPermission = SEARCH_MASK | WRITE_MASK;
+      this.opPermission = NULL_MASK;
+    }
+  }
+
   final DeletePermissionVerifier fileDeletionVerifier =
   final DeletePermissionVerifier fileDeletionVerifier =
     new DeletePermissionVerifier();
     new DeletePermissionVerifier();
 
 
@@ -971,14 +986,19 @@ public class TestDFSPermission {
   final DeleteDirPermissionVerifier dirDeletionVerifier =
   final DeleteDirPermissionVerifier dirDeletionVerifier =
     new DeleteDirPermissionVerifier();
     new DeleteDirPermissionVerifier();
 
 
+  final DeleteEmptyDirPermissionVerifier emptyDirDeletionVerifier =
+      new DeleteEmptyDirPermissionVerifier();
+
   /* test if the permission checking of directory deletion is correct */
   /* test if the permission checking of directory deletion is correct */
   private void testDeleteDir(UserGroupInformation ugi, Path path,
   private void testDeleteDir(UserGroupInformation ugi, Path path,
       short ancestorPermission, short parentPermission, short permission,
       short ancestorPermission, short parentPermission, short permission,
-      short[] childPermissions) throws Exception {
-    dirDeletionVerifier.set(path, ancestorPermission, parentPermission,
-        permission, childPermissions);
-    dirDeletionVerifier.verifyPermission(ugi);
-
+      short[] childPermissions,
+      final boolean isDirEmpty) throws Exception {
+    DeleteDirPermissionVerifier ddpv = isDirEmpty?
+        emptyDirDeletionVerifier : dirDeletionVerifier;
+    ddpv.set(path, ancestorPermission, parentPermission, permission,
+        childPermissions);
+    ddpv.verifyPermission(ugi);
   }
   }
 
 
   /* log into dfs as the given user */
   /* log into dfs as the given user */

+ 9 - 9
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java

@@ -87,15 +87,15 @@ public class TestDatanodeBlockScanner {
   throws IOException, TimeoutException {
   throws IOException, TimeoutException {
     URL url = new URL("http://localhost:" + infoPort +
     URL url = new URL("http://localhost:" + infoPort +
                       "/blockScannerReport?listblocks");
                       "/blockScannerReport?listblocks");
-    long lastWarnTime = Time.now();
+    long lastWarnTime = Time.monotonicNow();
     if (newTime <= 0) newTime = 1L;
     if (newTime <= 0) newTime = 1L;
     long verificationTime = 0;
     long verificationTime = 0;
     
     
     String block = DFSTestUtil.getFirstBlock(fs, file).getBlockName();
     String block = DFSTestUtil.getFirstBlock(fs, file).getBlockName();
     long failtime = (timeout <= 0) ? Long.MAX_VALUE 
     long failtime = (timeout <= 0) ? Long.MAX_VALUE 
-        : Time.now() + timeout;
+        : Time.monotonicNow() + timeout;
     while (verificationTime < newTime) {
     while (verificationTime < newTime) {
-      if (failtime < Time.now()) {
+      if (failtime < Time.monotonicNow()) {
         throw new TimeoutException("failed to achieve block verification after "
         throw new TimeoutException("failed to achieve block verification after "
             + timeout + " msec.  Current verification timestamp = "
             + timeout + " msec.  Current verification timestamp = "
             + verificationTime + ", requested verification time > " 
             + verificationTime + ", requested verification time > " 
@@ -118,7 +118,7 @@ public class TestDatanodeBlockScanner {
       }
       }
       
       
       if (verificationTime < newTime) {
       if (verificationTime < newTime) {
-        long now = Time.now();
+        long now = Time.monotonicNow();
         if ((now - lastWarnTime) >= 5*1000) {
         if ((now - lastWarnTime) >= 5*1000) {
           LOG.info("Waiting for verification of " + block);
           LOG.info("Waiting for verification of " + block);
           lastWarnTime = now; 
           lastWarnTime = now; 
@@ -134,7 +134,7 @@ public class TestDatanodeBlockScanner {
 
 
   @Test
   @Test
   public void testDatanodeBlockScanner() throws IOException, TimeoutException {
   public void testDatanodeBlockScanner() throws IOException, TimeoutException {
-    long startTime = Time.now();
+    long startTime = Time.monotonicNow();
     
     
     Configuration conf = new HdfsConfiguration();
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
@@ -344,7 +344,7 @@ public class TestDatanodeBlockScanner {
     conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3L);
     conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3L);
     conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false);
     conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false);
 
 
-    long startTime = Time.now();
+    long startTime = Time.monotonicNow();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
                                                .numDataNodes(REPLICATION_FACTOR)
                                                .numDataNodes(REPLICATION_FACTOR)
                                                .build();
                                                .build();
@@ -428,10 +428,10 @@ public class TestDatanodeBlockScanner {
   private static void waitForBlockDeleted(ExtendedBlock blk, int dnIndex,
   private static void waitForBlockDeleted(ExtendedBlock blk, int dnIndex,
       long timeout) throws TimeoutException, InterruptedException {
       long timeout) throws TimeoutException, InterruptedException {
     File blockFile = MiniDFSCluster.getBlockFile(dnIndex, blk);
     File blockFile = MiniDFSCluster.getBlockFile(dnIndex, blk);
-    long failtime = Time.now() 
+    long failtime = Time.monotonicNow()
                     + ((timeout > 0) ? timeout : Long.MAX_VALUE);
                     + ((timeout > 0) ? timeout : Long.MAX_VALUE);
     while (blockFile != null && blockFile.exists()) {
     while (blockFile != null && blockFile.exists()) {
-      if (failtime < Time.now()) {
+      if (failtime < Time.monotonicNow()) {
         throw new TimeoutException("waited too long for blocks to be deleted: "
         throw new TimeoutException("waited too long for blocks to be deleted: "
             + blockFile.getPath() + (blockFile.exists() ? " still exists; " : " is absent; "));
             + blockFile.getPath() + (blockFile.exists() ? " still exists; " : " is absent; "));
       }
       }
@@ -462,7 +462,7 @@ public class TestDatanodeBlockScanner {
 
 
   @Test
   @Test
   public void testDuplicateScans() throws Exception {
   public void testDuplicateScans() throws Exception {
-    long startTime = Time.now();
+    long startTime = Time.monotonicNow();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
         .numDataNodes(1).build();
         .numDataNodes(1).build();
     FileSystem fs = null;
     FileSystem fs = null;

+ 274 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFsShellPermission.java

@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
+import org.apache.hadoop.fs.FsShell;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Test;
+
+/**
+ * This test covers privilege related aspects of FsShell
+ *
+ */
+public class TestFsShellPermission {
+
+  static private final String TEST_ROOT = "/testroot";
+
+  static UserGroupInformation createUGI(String ownername, String groupName) {
+    return UserGroupInformation.createUserForTesting(ownername,
+        new String[]{groupName});
+  }
+
+  private class FileEntry {
+    private String path;
+    private boolean isDir;
+    private String owner;
+    private String group;
+    private String permission;
+    public FileEntry(String path, boolean isDir,
+        String owner, String group, String permission) {
+      this.path = path;
+      this.isDir = isDir;
+      this.owner = owner;
+      this.group = group;
+      this.permission = permission;
+    }
+    String getPath() { return path; }
+    boolean isDirectory() { return isDir; }
+    String getOwner() { return owner; }
+    String getGroup() { return group; }
+    String getPermission() { return permission; }
+  }
+
+  private void createFiles(FileSystem fs, String topdir,
+      FileEntry[] entries) throws IOException {
+    for (FileEntry entry : entries) {
+      String newPathStr = topdir + "/" + entry.getPath();
+      Path newPath = new Path(newPathStr);
+      if (entry.isDirectory()) {
+        fs.mkdirs(newPath);
+      } else {
+        FileSystemTestHelper.createFile(fs,  newPath);
+      }
+      fs.setPermission(newPath, new FsPermission(entry.getPermission()));
+      fs.setOwner(newPath, entry.getOwner(), entry.getGroup());
+    }
+  }
+
+  /** delete directory and everything underneath it.*/
+  private static void deldir(FileSystem fs, String topdir) throws IOException {
+    fs.delete(new Path(topdir), true);
+  }
+
+  static String execCmd(FsShell shell, final String[] args) throws Exception {
+    ByteArrayOutputStream baout = new ByteArrayOutputStream();
+    PrintStream out = new PrintStream(baout, true);
+    PrintStream old = System.out;
+    System.setOut(out);
+    int ret = shell.run(args);
+    out.close();
+    System.setOut(old);
+    return String.valueOf(ret);
+  }
+
+  /*
+   * Each instance of TestDeleteHelper captures one testing scenario.
+   *
+   * To create all files listed in fileEntries, and then delete as user
+   * doAsuser the deleteEntry with command+options specified in cmdAndOptions.
+   *
+   * When expectedToDelete is true, the deleteEntry is expected to be deleted;
+   * otherwise, it's not expected to be deleted. At the end of test,
+   * the existence of deleteEntry is checked against expectedToDelete
+   * to ensure the command is finished with expected result
+   */
+  private class TestDeleteHelper {
+    private FileEntry[] fileEntries;
+    private FileEntry deleteEntry;
+    private String cmdAndOptions;
+    private boolean expectedToDelete;
+
+    final String doAsGroup;
+    final UserGroupInformation userUgi;
+
+    public TestDeleteHelper(
+        FileEntry[] fileEntries,
+        FileEntry deleteEntry,
+        String cmdAndOptions,
+        String doAsUser,
+        boolean expectedToDelete) {
+      this.fileEntries = fileEntries;
+      this.deleteEntry = deleteEntry;
+      this.cmdAndOptions = cmdAndOptions;
+      this.expectedToDelete = expectedToDelete;
+
+      doAsGroup = doAsUser.equals("hdfs")? "supergroup" : "users";
+      userUgi = createUGI(doAsUser, doAsGroup);
+    }
+
+    public void execute(Configuration conf, FileSystem fs) throws Exception {
+      fs.mkdirs(new Path(TEST_ROOT));
+
+      createFiles(fs, TEST_ROOT, fileEntries);
+      final FsShell fsShell = new FsShell(conf);
+      final String deletePath =  TEST_ROOT + "/" + deleteEntry.getPath();
+
+      String[] tmpCmdOpts = StringUtils.split(cmdAndOptions);
+      ArrayList<String> tmpArray = new ArrayList<String>(Arrays.asList(tmpCmdOpts));
+      tmpArray.add(deletePath);
+      final String[] cmdOpts = tmpArray.toArray(new String[tmpArray.size()]);
+      userUgi.doAs(new PrivilegedExceptionAction<String>() {
+        public String run() throws Exception {
+          return execCmd(fsShell, cmdOpts);
+        }
+      });
+
+      boolean deleted = !fs.exists(new Path(deletePath));
+      assertEquals(expectedToDelete, deleted);
+
+      deldir(fs, TEST_ROOT);
+    }
+  }
+
+  private TestDeleteHelper genDeleteEmptyDirHelper(final String cmdOpts,
+      final String targetPerm,
+      final String asUser,
+      boolean expectedToDelete) {
+    FileEntry[] files = {
+        new FileEntry("userA", true, "userA", "users", "755"),
+        new FileEntry("userA/userB", true, "userB", "users", targetPerm)
+    };
+    FileEntry deleteEntry = files[1];
+    return new TestDeleteHelper(files, deleteEntry, cmdOpts, asUser,
+        expectedToDelete);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrEmptyDirWithReadPerm() {
+    return genDeleteEmptyDirHelper("-rm -r", "744", "userA", true);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrEmptyDirWithNoPerm() {
+    return genDeleteEmptyDirHelper("-rm -r", "700", "userA", true);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrfEmptyDirWithNoPerm() {
+    return genDeleteEmptyDirHelper("-rm -r -f", "700", "userA", true);
+  }
+
+  private TestDeleteHelper genDeleteNonEmptyDirHelper(final String cmd,
+      final String targetPerm,
+      final String asUser,
+      boolean expectedToDelete) {
+    FileEntry[] files = {
+        new FileEntry("userA", true, "userA", "users", "755"),
+        new FileEntry("userA/userB", true, "userB", "users", targetPerm),
+        new FileEntry("userA/userB/xyzfile", false, "userB", "users",
+            targetPerm)
+    };
+    FileEntry deleteEntry = files[1];
+    return new TestDeleteHelper(files, deleteEntry, cmd, asUser,
+        expectedToDelete);
+  }
+
+  // Expect target not to be deleted
+  private TestDeleteHelper genRmrNonEmptyDirWithReadPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r", "744", "userA", false);
+  }
+
+  // Expect target not to be deleted
+  private TestDeleteHelper genRmrNonEmptyDirWithNoPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r", "700", "userA", false);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrNonEmptyDirWithAllPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r", "777", "userA", true);
+  }
+
+  // Expect target not to be deleted
+  private TestDeleteHelper genRmrfNonEmptyDirWithNoPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r -f", "700", "userA", false);
+  }
+
+  // Expect target to be deleted
+  public TestDeleteHelper genDeleteSingleFileNotAsOwner() throws Exception {
+    FileEntry[] files = {
+        new FileEntry("userA", true, "userA", "users", "755"),
+        new FileEntry("userA/userB", false, "userB", "users", "700")
+    };
+    FileEntry deleteEntry = files[1];
+    return new TestDeleteHelper(files, deleteEntry, "-rm -r", "userA", true);
+  }
+
+  @Test
+  public void testDelete() throws Exception {
+    Configuration conf = null;
+    MiniDFSCluster cluster = null;
+    try {
+      conf = new Configuration();
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
+
+      String nnUri = FileSystem.getDefaultUri(conf).toString();
+      FileSystem fs = FileSystem.get(URI.create(nnUri), conf);
+
+      ArrayList<TestDeleteHelper> ta = new ArrayList<TestDeleteHelper>();
+
+      // Add empty dir tests
+      ta.add(genRmrEmptyDirWithReadPerm());
+      ta.add(genRmrEmptyDirWithNoPerm());
+      ta.add(genRmrfEmptyDirWithNoPerm());
+
+      // Add non-empty dir tests
+      ta.add(genRmrNonEmptyDirWithReadPerm());
+      ta.add(genRmrNonEmptyDirWithNoPerm());
+      ta.add(genRmrNonEmptyDirWithAllPerm());
+      ta.add(genRmrfNonEmptyDirWithNoPerm());
+
+      // Add single tile test
+      ta.add(genDeleteSingleFileNotAsOwner());
+
+      // Run all tests
+      for(TestDeleteHelper t : ta) {
+        t.execute(conf,  fs);
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+}

+ 162 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java

@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
+import org.apache.log4j.Level;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.internal.util.reflection.Whitebox;
+
+/**
+ * Test if we can correctly delay the deletion of blocks.
+ */
+public class TestPendingInvalidateBlock {
+  {
+    ((Log4JLogger)BlockManager.LOG).getLogger().setLevel(Level.DEBUG);    
+  }
+
+  private static final int BLOCKSIZE = 1024;
+  private static final short REPLICATION = 2;
+
+  private Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem dfs;
+
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE);
+    // block deletion pending period
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY, 1000 * 5);
+    // set the block report interval to 2s
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 2000);
+    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1);
+    // disable the RPC timeout for debug
+    conf.setLong(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY, 0);
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
+        .build();
+    cluster.waitActive();
+    dfs = cluster.getFileSystem();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test
+  public void testPendingDeletion() throws Exception {
+    final Path foo = new Path("/foo");
+    DFSTestUtil.createFile(dfs, foo, BLOCKSIZE, REPLICATION, 0);
+    // restart NN
+    cluster.restartNameNode(true);
+    dfs.delete(foo, true);
+    Assert.assertEquals(0, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(REPLICATION, cluster.getNamesystem()
+        .getPendingDeletionBlocks());
+    Thread.sleep(6000);
+    Assert.assertEquals(0, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks());
+  }
+
+  /**
+   * Test whether we can delay the deletion of unknown blocks in DataNode's
+   * first several block reports.
+   */
+  @Test
+  public void testPendingDeleteUnknownBlocks() throws Exception {
+    final int fileNum = 5; // 5 files
+    final Path[] files = new Path[fileNum];
+    final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION];
+    // create a group of files, each file contains 1 block
+    for (int i = 0; i < fileNum; i++) {
+      files[i] = new Path("/file" + i);
+      DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i);
+    }
+    // wait until all DataNodes have replicas
+    waitForReplication();
+    for (int i = REPLICATION - 1; i >= 0; i--) {
+      dnprops[i] = cluster.stopDataNode(i);
+    }
+    Thread.sleep(2000);
+    // delete 2 files, we still have 3 files remaining so that we can cover
+    // every DN storage
+    for (int i = 0; i < 2; i++) {
+      dfs.delete(files[i], true);
+    }
+
+    // restart NameNode
+    cluster.restartNameNode(false);
+    InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox
+        .getInternalState(cluster.getNamesystem().getBlockManager(),
+            "invalidateBlocks");
+    InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks);
+    Mockito.doReturn(1L).when(mockIb).getInvalidationDelay();
+    Whitebox.setInternalState(cluster.getNamesystem().getBlockManager(),
+        "invalidateBlocks", mockIb);
+
+    Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks());
+    // restart DataNodes
+    for (int i = 0; i < REPLICATION; i++) {
+      cluster.restartDataNode(dnprops[i], true);
+    }
+    cluster.waitActive();
+
+    for (int i = 0; i < REPLICATION; i++) {
+      DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i));
+    }
+    Thread.sleep(2000);
+    // make sure we have received block reports by checking the total block #
+    Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks());
+
+    cluster.restartNameNode(true);
+    Thread.sleep(6000);
+    Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks());
+  }
+
+  private long waitForReplication() throws Exception {
+    for (int i = 0; i < 10; i++) {
+      long ur = cluster.getNamesystem().getUnderReplicatedBlocks();
+      if (ur == 0) {
+        return 0;
+      } else {
+        Thread.sleep(1000);
+      }
+    }
+    return cluster.getNamesystem().getUnderReplicatedBlocks();
+  }
+
+}

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java

@@ -30,6 +30,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authorize.AuthorizationException;
 import org.apache.hadoop.security.authorize.AuthorizationException;
+import org.apache.hadoop.security.authorize.ProxyServers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -51,7 +52,6 @@ import static org.mockito.Mockito.when;
 public class TestJspHelper {
 public class TestJspHelper {
 
 
   private final Configuration conf = new HdfsConfiguration();
   private final Configuration conf = new HdfsConfiguration();
-  private String jspWriterOutput = "";
 
 
   // allow user with TGT to run tests
   // allow user with TGT to run tests
   @BeforeClass
   @BeforeClass
@@ -450,7 +450,7 @@ public class TestJspHelper {
       when(req.getRemoteAddr()).thenReturn(proxyAddr);
       when(req.getRemoteAddr()).thenReturn(proxyAddr);
       when(req.getHeader("X-Forwarded-For")).thenReturn(clientAddr);
       when(req.getHeader("X-Forwarded-For")).thenReturn(clientAddr);
       if (trusted) {
       if (trusted) {
-        conf.set(ProxyUsers.CONF_HADOOP_PROXYSERVERS, proxyAddr);
+        conf.set(ProxyServers.CONF_HADOOP_PROXYSERVERS, proxyAddr);
       }
       }
     }
     }
     ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
     ProxyUsers.refreshSuperUserGroupsConfiguration(conf);

+ 18 - 13
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java

@@ -18,16 +18,13 @@
 package org.apache.hadoop.hdfs.server.datanode;
 package org.apache.hadoop.hdfs.server.datanode;
 
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 
 
 import java.io.DataOutputStream;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.File;
+import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.Socket;
-import java.net.SocketException;
-import java.net.SocketTimeoutException;
-import java.nio.channels.ClosedChannelException;
 
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
@@ -201,15 +198,23 @@ public class TestDiskError {
     }
     }
   }
   }
   
   
+  /**
+   * Checks whether {@link DataNode#checkDiskError()} is being called or not.
+   * Before refactoring the code the above function was not getting called 
+   * @throws IOException, InterruptedException
+   */
   @Test
   @Test
-  public void testNetworkErrorsIgnored() {
-    DataNode dn = cluster.getDataNodes().iterator().next();
-    
-    assertTrue(dn.isNetworkRelatedException(new SocketException()));
-    assertTrue(dn.isNetworkRelatedException(new SocketTimeoutException()));
-    assertTrue(dn.isNetworkRelatedException(new ClosedChannelException()));
-    assertTrue(dn.isNetworkRelatedException(new Exception("Broken pipe foo bar")));
-    assertFalse(dn.isNetworkRelatedException(new Exception()));
-    assertFalse(dn.isNetworkRelatedException(new Exception("random problem")));
+  public void testcheckDiskError() throws IOException, InterruptedException {
+    if(cluster.getDataNodes().size() <= 0) {
+      cluster.startDataNodes(conf, 1, true, null, null);
+      cluster.waitActive();
+    }
+    DataNode dataNode = cluster.getDataNodes().get(0);
+    long slackTime = dataNode.checkDiskErrorInterval/2;
+    //checking for disk error
+    dataNode.checkDiskError();
+    Thread.sleep(dataNode.checkDiskErrorInterval);
+    long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck();
+    assertTrue("Disk Error check is not performed within  " + dataNode.checkDiskErrorInterval +  "  ms", ((System.currentTimeMillis()-lastDiskErrorCheck) < (dataNode.checkDiskErrorInterval + slackTime)));
   }
   }
 }
 }

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/AclTestHelpers.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.permission.AclEntry;
 import org.apache.hadoop.fs.permission.AclEntryScope;
 import org.apache.hadoop.fs.permission.AclEntryScope;
 import org.apache.hadoop.fs.permission.AclEntryType;
 import org.apache.hadoop.fs.permission.AclEntryType;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -150,6 +151,9 @@ public final class AclTestHelpers {
    */
    */
   public static void assertPermission(FileSystem fs, Path pathToCheck,
   public static void assertPermission(FileSystem fs, Path pathToCheck,
       short perm) throws IOException {
       short perm) throws IOException {
-    assertEquals(perm, fs.getFileStatus(pathToCheck).getPermission().toShort());
+    short filteredPerm = (short)(perm & 01777);
+    FsPermission fsPermission = fs.getFileStatus(pathToCheck).getPermission();
+    assertEquals(filteredPerm, fsPermission.toShort());
+    assertEquals(((perm & (1 << 12)) != 0), fsPermission.getAclBit());
   }
   }
 }
 }

+ 52 - 34
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.AclException;
 import org.apache.hadoop.hdfs.protocol.AclException;
+import org.apache.hadoop.hdfs.protocol.FsAclPermission;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -118,7 +119,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -140,7 +141,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ_EXECUTE),
       aclEntry(ACCESS, USER, "foo", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -161,7 +162,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -177,7 +178,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ_WRITE),
       aclEntry(ACCESS, USER, "foo", READ_WRITE),
       aclEntry(ACCESS, GROUP, READ) }, returned);
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0660);
+    assertPermission((short)010660);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -195,7 +196,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -212,7 +213,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ) }, returned);
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0600);
+    assertPermission((short)010600);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -240,7 +241,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)01750);
+    assertPermission((short)011750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -286,7 +287,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -309,7 +310,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bar", READ_WRITE),
       aclEntry(ACCESS, USER, "bar", READ_WRITE),
       aclEntry(ACCESS, GROUP, READ_WRITE) }, returned);
       aclEntry(ACCESS, GROUP, READ_WRITE) }, returned);
-    assertPermission((short)0760);
+    assertPermission((short)010760);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -334,7 +335,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -382,7 +383,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -408,7 +409,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)01750);
+    assertPermission((short)011750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -436,7 +437,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)0770);
+    assertPermission((short)010770);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -456,7 +457,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)0770);
+    assertPermission((short)010770);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -501,7 +502,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)01770);
+    assertPermission((short)011770);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -602,7 +603,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0770);
+    assertPermission((short)010770);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -621,7 +622,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, GROUP, READ) }, returned);
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0640);
+    assertPermission((short)010640);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -639,7 +640,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -679,7 +680,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -699,7 +700,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, GROUP, READ) }, returned);
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0670);
+    assertPermission((short)010670);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -723,7 +724,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)01770);
+    assertPermission((short)011770);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -768,7 +769,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700);
+    assertPermission((short)010700);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -788,7 +789,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, GROUP, READ) }, returned);
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0600);
+    assertPermission((short)010600);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
@@ -810,10 +811,27 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700);
+    assertPermission((short)010700);
     assertAclFeature(true);
     assertAclFeature(true);
   }
   }
 
 
+  @Test
+  public void testSetPermissionCannotSetAclBit() throws IOException {
+    FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short)0750));
+    fs.setPermission(path, FsPermission.createImmutable((short)0700));
+    assertPermission((short)0700);
+    fs.setPermission(path,
+      new FsAclPermission(FsPermission.createImmutable((short)0755)));
+    INode inode = cluster.getNamesystem().getFSDirectory().getNode(
+      path.toUri().getPath(), false);
+    assertNotNull(inode);
+    FsPermission perm = inode.getFsPermission();
+    assertNotNull(perm);
+    assertEquals(0755, perm.toShort());
+    assertEquals(0755, perm.toExtendedShort());
+    assertAclFeature(false);
+  }
+
   @Test
   @Test
   public void testDefaultAclNewFile() throws Exception {
   public void testDefaultAclNewFile() throws Exception {
     FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short)0750));
     FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short)0750));
@@ -827,7 +845,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission(filePath, (short)0640);
+    assertPermission(filePath, (short)010640);
     assertAclFeature(filePath, true);
     assertAclFeature(filePath, true);
   }
   }
 
 
@@ -881,7 +899,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     assertAclFeature(dirPath, true);
   }
   }
 
 
@@ -916,7 +934,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     assertAclFeature(dirPath, true);
   }
   }
 
 
@@ -940,7 +958,7 @@ public abstract class FSAclBaseTest {
     AclStatus s = fs.getAclStatus(dirPath);
     AclStatus s = fs.getAclStatus(dirPath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     assertAclFeature(dirPath, true);
     expected = new AclEntry[] {
     expected = new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
@@ -948,7 +966,7 @@ public abstract class FSAclBaseTest {
     s = fs.getAclStatus(filePath);
     s = fs.getAclStatus(filePath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission(filePath, (short)0640);
+    assertPermission(filePath, (short)010640);
     assertAclFeature(filePath, true);
     assertAclFeature(filePath, true);
   }
   }
 
 
@@ -972,12 +990,12 @@ public abstract class FSAclBaseTest {
     AclStatus s = fs.getAclStatus(dirPath);
     AclStatus s = fs.getAclStatus(dirPath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     assertAclFeature(dirPath, true);
     s = fs.getAclStatus(subdirPath);
     s = fs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission(subdirPath, (short)0750);
+    assertPermission(subdirPath, (short)010750);
     assertAclFeature(subdirPath, true);
     assertAclFeature(subdirPath, true);
   }
   }
 
 
@@ -1004,7 +1022,7 @@ public abstract class FSAclBaseTest {
     AclStatus s = fs.getAclStatus(dirPath);
     AclStatus s = fs.getAclStatus(dirPath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     assertAclFeature(dirPath, true);
     expected = new AclEntry[] { };
     expected = new AclEntry[] { };
     s = fs.getAclStatus(linkPath);
     s = fs.getAclStatus(linkPath);
@@ -1037,7 +1055,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission(filePath, (short)0740);
+    assertPermission(filePath, (short)010740);
     assertAclFeature(filePath, true);
     assertAclFeature(filePath, true);
   }
   }
 
 
@@ -1059,7 +1077,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, READ_EXECUTE) }, returned);
       aclEntry(DEFAULT, OTHER, READ_EXECUTE) }, returned);
-    assertPermission(dirPath, (short)0740);
+    assertPermission(dirPath, (short)010740);
     assertAclFeature(dirPath, true);
     assertAclFeature(dirPath, true);
   }
   }
 
 

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java

@@ -32,14 +32,15 @@ import java.net.URISyntaxException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
+import org.apache.hadoop.security.authorize.ProxyServers;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
 
 
@@ -120,7 +121,7 @@ public class TestAuditLogger {
       assertEquals("127.0.0.1", DummyAuditLogger.remoteAddr);
       assertEquals("127.0.0.1", DummyAuditLogger.remoteAddr);
       
       
       // trusted proxied request
       // trusted proxied request
-      conf.set(ProxyUsers.CONF_HADOOP_PROXYSERVERS, "127.0.0.1");
+      conf.set(ProxyServers.CONF_HADOOP_PROXYSERVERS, "127.0.0.1");
       ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
       ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
       conn = (HttpURLConnection) uri.toURL().openConnection();
       conn = (HttpURLConnection) uri.toURL().openConnection();
       conn.setRequestMethod(op.getType().toString());
       conn.setRequestMethod(op.getType().toString());

+ 43 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -27,6 +27,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 import static org.junit.Assert.fail;
 
 
@@ -43,6 +44,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collection;
 import java.util.List;
 import java.util.List;
 
 
+import com.google.common.io.Files;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.LogFactory;
@@ -87,7 +89,6 @@ import org.apache.hadoop.util.ExitUtil.ExitException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
 import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.Test;
 import org.mockito.ArgumentMatcher;
 import org.mockito.ArgumentMatcher;
@@ -1084,7 +1085,7 @@ public class TestCheckpoint {
       
       
       FSDirectory secondaryFsDir = secondary.getFSNamesystem().dir;
       FSDirectory secondaryFsDir = secondary.getFSNamesystem().dir;
       INode rootInMap = secondaryFsDir.getInode(secondaryFsDir.rootDir.getId());
       INode rootInMap = secondaryFsDir.getInode(secondaryFsDir.rootDir.getId());
-      Assert.assertSame(rootInMap, secondaryFsDir.rootDir);
+      assertSame(rootInMap, secondaryFsDir.rootDir);
       
       
       fileSys.delete(tmpDir, true);
       fileSys.delete(tmpDir, true);
       fileSys.mkdirs(tmpDir);
       fileSys.mkdirs(tmpDir);
@@ -2404,6 +2405,46 @@ public class TestCheckpoint {
     }
     }
   }
   }
 
 
+  @Test
+  public void testLegacyOivImage() throws Exception {
+    MiniDFSCluster cluster = null;
+    SecondaryNameNode secondary = null;
+    File tmpDir = Files.createTempDir();
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
+        tmpDir.getAbsolutePath());
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
+        "2");
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+              .format(true).build();
+
+      secondary = startSecondaryNameNode(conf);
+
+      // Checkpoint once
+      secondary.doCheckpoint();
+      String files1[] = tmpDir.list();
+      assertEquals("Only one file is expected", 1, files1.length);
+
+      // Perform more checkpointngs and check whether retention management
+      // is working.
+      secondary.doCheckpoint();
+      secondary.doCheckpoint();
+      String files2[] = tmpDir.list();
+      assertEquals("Two files are expected", 2, files2.length);
+
+      // Verify that the first file is deleted.
+      for (String fName : files2) {
+        assertFalse(fName.equals(files1[0]));
+      }
+    } finally {
+      cleanup(secondary);
+      cleanup(cluster);
+      tmpDir.delete();
+    }
+  }
+
   private static void cleanup(SecondaryNameNode snn) {
   private static void cleanup(SecondaryNameNode snn) {
     if (snn != null) {
     if (snn != null) {
       try {
       try {

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithAcl.java

@@ -142,7 +142,7 @@ public class TestFSImageWithAcl {
     AclEntry[] subdirReturned = fs.getAclStatus(subdirPath).getEntries()
     AclEntry[] subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
 
     restart(fs, persistNamespace);
     restart(fs, persistNamespace);
 
 
@@ -152,7 +152,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
 
     aclSpec = Lists.newArrayList(aclEntry(DEFAULT, USER, "foo", READ_WRITE));
     aclSpec = Lists.newArrayList(aclEntry(DEFAULT, USER, "foo", READ_WRITE));
     fs.modifyAclEntries(dirPath, aclSpec);
     fs.modifyAclEntries(dirPath, aclSpec);
@@ -163,7 +163,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
 
     restart(fs, persistNamespace);
     restart(fs, persistNamespace);
 
 
@@ -173,7 +173,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
 
     fs.removeAcl(dirPath);
     fs.removeAcl(dirPath);
 
 
@@ -183,7 +183,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
 
     restart(fs, persistNamespace);
     restart(fs, persistNamespace);
 
 
@@ -193,7 +193,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
   }
   }
 
 
   @Test
   @Test

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java

@@ -393,14 +393,14 @@ public class TestFSPermissionChecker {
   private void assertPermissionGranted(UserGroupInformation user, String path,
   private void assertPermissionGranted(UserGroupInformation user, String path,
       FsAction access) throws IOException {
       FsAction access) throws IOException {
     new FSPermissionChecker(SUPERUSER, SUPERGROUP, user).checkPermission(path,
     new FSPermissionChecker(SUPERUSER, SUPERGROUP, user).checkPermission(path,
-      dir, false, null, null, access, null, true);
+      dir, false, null, null, access, null, false, true);
   }
   }
 
 
   private void assertPermissionDenied(UserGroupInformation user, String path,
   private void assertPermissionDenied(UserGroupInformation user, String path,
       FsAction access) throws IOException {
       FsAction access) throws IOException {
     try {
     try {
       new FSPermissionChecker(SUPERUSER, SUPERGROUP, user).checkPermission(path,
       new FSPermissionChecker(SUPERUSER, SUPERGROUP, user).checkPermission(path,
-        dir, false, null, null, access, null, true);
+        dir, false, null, null, access, null, false, true);
       fail("expected AccessControlException for user + " + user + ", path = " +
       fail("expected AccessControlException for user + " + user + ", path = " +
         path + ", access = " + access);
         path + ", access = " + access);
     } catch (AccessControlException e) {
     } catch (AccessControlException e) {

+ 1 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java

@@ -191,12 +191,9 @@ public class TestRetryCacheWithHA {
   
   
   private DFSClient genClientWithDummyHandler() throws IOException {
   private DFSClient genClientWithDummyHandler() throws IOException {
     URI nnUri = dfs.getUri();
     URI nnUri = dfs.getUri();
-    Class<FailoverProxyProvider<ClientProtocol>> failoverProxyProviderClass = 
-        NameNodeProxies.getFailoverProxyProviderClass(conf, nnUri, 
-            ClientProtocol.class);
     FailoverProxyProvider<ClientProtocol> failoverProxyProvider = 
     FailoverProxyProvider<ClientProtocol> failoverProxyProvider = 
         NameNodeProxies.createFailoverProxyProvider(conf, 
         NameNodeProxies.createFailoverProxyProvider(conf, 
-            failoverProxyProviderClass, ClientProtocol.class, nnUri);
+            nnUri, ClientProtocol.class, true);
     InvocationHandler dummyHandler = new DummyRetryInvocationHandler(
     InvocationHandler dummyHandler = new DummyRetryInvocationHandler(
         failoverProxyProvider, RetryPolicies
         failoverProxyProvider, RetryPolicies
         .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
         .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,

+ 9 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

@@ -66,24 +66,28 @@ import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Lists;
-
+import com.google.common.io.Files;
 
 
 public class TestStandbyCheckpoints {
 public class TestStandbyCheckpoints {
   private static final int NUM_DIRS_IN_LOG = 200000;
   private static final int NUM_DIRS_IN_LOG = 200000;
   protected MiniDFSCluster cluster;
   protected MiniDFSCluster cluster;
   protected NameNode nn0, nn1;
   protected NameNode nn0, nn1;
   protected FileSystem fs;
   protected FileSystem fs;
+  protected File tmpOivImgDir;
   
   
   private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
   private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
 
 
   @SuppressWarnings("rawtypes")
   @SuppressWarnings("rawtypes")
   @Before
   @Before
   public void setupCluster() throws Exception {
   public void setupCluster() throws Exception {
+    tmpOivImgDir = Files.createTempDir();
     Configuration conf = new Configuration();
     Configuration conf = new Configuration();
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
     conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
-    
+    conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
+        tmpOivImgDir.getAbsolutePath());
+
     // Dial down the retention of extra edits and checkpoints. This is to
     // Dial down the retention of extra edits and checkpoints. This is to
     // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
     // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
@@ -129,6 +133,9 @@ public class TestStandbyCheckpoints {
     // Once the standby catches up, it should notice that it needs to
     // Once the standby catches up, it should notice that it needs to
     // do a checkpoint and save one to its local directories.
     // do a checkpoint and save one to its local directories.
     HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
     HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+
+    // It should have saved the oiv image too.
+    assertEquals("One file is expected", 1, tmpOivImgDir.list().length);
     
     
     // It should also upload it back to the active.
     // It should also upload it back to the active.
     HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
     HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));

+ 28 - 28
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestAclWithSnapshot.java

@@ -119,14 +119,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, path);
+    assertPermission((short)010750, path);
 
 
     s = hdfs.getAclStatus(snapshotPath);
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
 
     assertDirPermissionGranted(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, snapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, snapshotPath);
@@ -153,14 +153,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "diana", READ_EXECUTE),
       aclEntry(ACCESS, USER, "diana", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0550, path);
+    assertPermission((short)010550, path);
 
 
     s = hdfs.getAclStatus(snapshotPath);
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
 
     assertDirPermissionDenied(fsAsBruce, BRUCE, path);
     assertDirPermissionDenied(fsAsBruce, BRUCE, path);
     assertDirPermissionGranted(fsAsDiana, DIANA, path);
     assertDirPermissionGranted(fsAsDiana, DIANA, path);
@@ -202,24 +202,24 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(filePath);
     AclStatus s = hdfs.getAclStatus(filePath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, filePath);
+    assertPermission((short)010550, filePath);
 
 
     s = hdfs.getAclStatus(subdirPath);
     s = hdfs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirPath);
+    assertPermission((short)010550, subdirPath);
 
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
 
 
@@ -251,14 +251,14 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(filePath);
     AclStatus s = hdfs.getAclStatus(filePath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0570, filePath);
+    assertPermission((short)010570, filePath);
     assertFilePermissionDenied(fsAsBruce, BRUCE, filePath);
     assertFilePermissionDenied(fsAsBruce, BRUCE, filePath);
     assertFilePermissionGranted(fsAsDiana, DIANA, filePath);
     assertFilePermissionGranted(fsAsDiana, DIANA, filePath);
 
 
     s = hdfs.getAclStatus(subdirPath);
     s = hdfs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0570, subdirPath);
+    assertPermission((short)010570, subdirPath);
     assertDirPermissionDenied(fsAsBruce, BRUCE, subdirPath);
     assertDirPermissionDenied(fsAsBruce, BRUCE, subdirPath);
     assertDirPermissionGranted(fsAsDiana, DIANA, subdirPath);
     assertDirPermissionGranted(fsAsDiana, DIANA, subdirPath);
 
 
@@ -268,14 +268,14 @@ public class TestAclWithSnapshot {
     s = hdfs.getAclStatus(fileSnapshotPath);
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
   }
   }
@@ -302,14 +302,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, path);
+    assertPermission((short)010750, path);
 
 
     s = hdfs.getAclStatus(snapshotPath);
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
 
     assertDirPermissionGranted(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, snapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, snapshotPath);
@@ -336,7 +336,7 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
 
     assertDirPermissionDenied(fsAsBruce, BRUCE, path);
     assertDirPermissionDenied(fsAsBruce, BRUCE, path);
     assertDirPermissionDenied(fsAsDiana, DIANA, path);
     assertDirPermissionDenied(fsAsDiana, DIANA, path);
@@ -378,24 +378,24 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(filePath);
     AclStatus s = hdfs.getAclStatus(filePath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, filePath);
+    assertPermission((short)010550, filePath);
 
 
     s = hdfs.getAclStatus(subdirPath);
     s = hdfs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirPath);
+    assertPermission((short)010550, subdirPath);
 
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
 
 
@@ -437,14 +437,14 @@ public class TestAclWithSnapshot {
     s = hdfs.getAclStatus(fileSnapshotPath);
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
   }
   }
@@ -470,7 +470,7 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(path);
     AclStatus s = hdfs.getAclStatus(path);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0770, path);
+    assertPermission((short)010770, path);
     assertDirPermissionGranted(fsAsBruce, BRUCE, path);
     assertDirPermissionGranted(fsAsBruce, BRUCE, path);
     assertDirPermissionGranted(fsAsDiana, DIANA, path);
     assertDirPermissionGranted(fsAsDiana, DIANA, path);
   }
   }
@@ -514,7 +514,7 @@ public class TestAclWithSnapshot {
       aclEntry(DEFAULT, GROUP, NONE),
       aclEntry(DEFAULT, GROUP, NONE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700, path);
+    assertPermission((short)010700, path);
 
 
     s = hdfs.getAclStatus(snapshotPath);
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
@@ -524,7 +524,7 @@ public class TestAclWithSnapshot {
       aclEntry(DEFAULT, GROUP, NONE),
       aclEntry(DEFAULT, GROUP, NONE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700, snapshotPath);
+    assertPermission((short)010700, snapshotPath);
 
 
     assertDirPermissionDenied(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionDenied(fsAsBruce, BRUCE, snapshotPath);
   }
   }
@@ -596,14 +596,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
 
     aclSpec = Lists.newArrayList(
     aclSpec = Lists.newArrayList(
       aclEntry(ACCESS, USER, "bruce", READ));
       aclEntry(ACCESS, USER, "bruce", READ));
@@ -632,14 +632,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
 
     aclSpec = Lists.newArrayList(
     aclSpec = Lists.newArrayList(
       aclEntry(ACCESS, USER, "bruce", READ));
       aclEntry(ACCESS, USER, "bruce", READ));

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません