Selaa lähdekoodia

Merge from trunk to HDFS-2006

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2006@1594906 13f79535-47bb-0310-9956-ffa450edef68
Uma Maheswara Rao G 11 vuotta sitten
vanhempi
commit
0506fadf6a
100 muutettua tiedostoa jossa 6411 lisäystä ja 767 poistoa
  1. 14 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 38 33
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
  3. 88 80
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java
  4. 7 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java
  5. 21 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java
  6. 11 19
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java
  7. 1 46
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java
  8. 65 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
  9. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
  10. 14 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java
  11. 53 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyServers.java
  12. 1 21
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java
  13. 8 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java
  14. 2 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java
  15. 54 5
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java
  16. 38 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyServers.java
  17. 0 11
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java
  18. 35 1
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  19. 12 0
      hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
  20. 2 0
      hadoop-hdfs-project/hadoop-hdfs/src/config.h.cmake
  21. 3 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
  22. 5 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  23. 41 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
  24. 58 31
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
  25. 63 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java
  26. 3 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
  27. 75 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
  28. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  29. 46 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
  30. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
  31. 14 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
  32. 4 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
  33. 67 56
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  34. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
  35. 57 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
  36. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
  37. 20 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  38. 19 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  39. 381 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
  40. 213 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
  41. 61 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  42. 12 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java
  43. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
  44. 6 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
  45. 56 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java
  46. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  47. 16 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
  48. 35 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
  49. 11 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
  50. 133 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java
  51. 6 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
  52. 80 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
  53. 13 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java
  54. 52 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
  55. 20 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java
  56. 9 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
  57. 72 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
  58. 17 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
  59. 4 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
  60. 11 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
  61. 172 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
  62. 36 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
  63. 193 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
  64. 83 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
  65. 821 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
  66. 212 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
  67. 111 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java
  68. 178 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java
  69. 118 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java
  70. 274 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java
  71. 109 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java
  72. 88 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
  73. 10 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
  74. 290 227
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
  75. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
  76. 804 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/vecsum.c
  77. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c
  78. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/index.html
  79. 6 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
  80. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html
  81. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/index.html
  82. 1 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html
  83. 7 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js
  84. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm
  85. 79 5
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
  86. 27 7
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
  87. 9 9
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java
  88. 274 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFsShellPermission.java
  89. 162 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java
  90. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java
  91. 18 13
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
  92. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/AclTestHelpers.java
  93. 52 34
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java
  94. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java
  95. 43 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
  96. 6 6
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithAcl.java
  97. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java
  98. 1 4
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
  99. 9 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
  100. 28 28
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestAclWithSnapshot.java

+ 14 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -326,6 +326,8 @@ Trunk (Unreleased)
 
     HADOOP-10431. Change visibility of KeyStore.Options getter methods to public. (tucu)
 
+    HADOOP-10583. bin/hadoop key throws NPE with no args and assorted other fixups. (clamb via tucu)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -380,6 +382,12 @@ Release 2.5.0 - UNRELEASED
     HADOOP-10158. SPNEGO should work with multiple interfaces/SPNs.
     (daryn via kihwal)
 
+    HADOOP-10566. Refactor proxyservers out of ProxyUsers.
+    (Benoy Antony via suresh)
+
+    HADOOP-10572. Example NFS mount command must pass noacl as it isn't
+    supported by the server yet. (Harsh J via brandonli)
+
   OPTIMIZATIONS
 
   BUG FIXES 
@@ -465,6 +473,12 @@ Release 2.5.0 - UNRELEASED
     because groups stored in Set and ArrayList are compared. 
     (Mit Desai via kihwal)
 
+    HADOOP-10585. Retry polices ignore interrupted exceptions (Daryn Sharp via
+    jeagles)
+
+    HADOOP-10401. ShellBasedUnixGroupsMapping#getGroups does not always return
+    primary group first (Akira AJISAKA via Colin Patrick McCabe)
+
 Release 2.4.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 38 - 33
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java

@@ -27,9 +27,7 @@ import java.net.URI;
 import java.security.NoSuchAlgorithmException;
 import java.text.MessageFormat;
 import java.util.Date;
-import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Map;
 
 import com.google.gson.stream.JsonReader;
 import com.google.gson.stream.JsonWriter;
@@ -176,22 +174,26 @@ public abstract class KeyProvider {
     protected byte[] serialize() throws IOException {
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       JsonWriter writer = new JsonWriter(new OutputStreamWriter(buffer));
-      writer.beginObject();
-      if (cipher != null) {
-        writer.name(CIPHER_FIELD).value(cipher);
-      }
-      if (bitLength != 0) {
-        writer.name(BIT_LENGTH_FIELD).value(bitLength);
-      }
-      if (created != null) {
-        writer.name(CREATED_FIELD).value(created.getTime());
-      }
-      if (description != null) {
-        writer.name(DESCRIPTION_FIELD).value(description);
+      try {
+        writer.beginObject();
+        if (cipher != null) {
+          writer.name(CIPHER_FIELD).value(cipher);
+        }
+        if (bitLength != 0) {
+          writer.name(BIT_LENGTH_FIELD).value(bitLength);
+        }
+        if (created != null) {
+          writer.name(CREATED_FIELD).value(created.getTime());
+        }
+        if (description != null) {
+          writer.name(DESCRIPTION_FIELD).value(description);
+        }
+        writer.name(VERSIONS_FIELD).value(versions);
+        writer.endObject();
+        writer.flush();
+      } finally {
+        writer.close();
       }
-      writer.name(VERSIONS_FIELD).value(versions);
-      writer.endObject();
-      writer.flush();
       return buffer.toByteArray();
     }
 
@@ -207,23 +209,27 @@ public abstract class KeyProvider {
       int versions = 0;
       String description = null;
       JsonReader reader = new JsonReader(new InputStreamReader
-          (new ByteArrayInputStream(bytes)));
-      reader.beginObject();
-      while (reader.hasNext()) {
-        String field = reader.nextName();
-        if (CIPHER_FIELD.equals(field)) {
-          cipher = reader.nextString();
-        } else if (BIT_LENGTH_FIELD.equals(field)) {
-          bitLength = reader.nextInt();
-        } else if (CREATED_FIELD.equals(field)) {
-          created = new Date(reader.nextLong());
-        } else if (VERSIONS_FIELD.equals(field)) {
-          versions = reader.nextInt();
-        } else if (DESCRIPTION_FIELD.equals(field)) {
-          description = reader.nextString();
+        (new ByteArrayInputStream(bytes)));
+      try {
+        reader.beginObject();
+        while (reader.hasNext()) {
+          String field = reader.nextName();
+          if (CIPHER_FIELD.equals(field)) {
+            cipher = reader.nextString();
+          } else if (BIT_LENGTH_FIELD.equals(field)) {
+            bitLength = reader.nextInt();
+          } else if (CREATED_FIELD.equals(field)) {
+            created = new Date(reader.nextLong());
+          } else if (VERSIONS_FIELD.equals(field)) {
+            versions = reader.nextInt();
+          } else if (DESCRIPTION_FIELD.equals(field)) {
+            description = reader.nextString();
+          }
         }
+        reader.endObject();
+      } finally {
+        reader.close();
       }
-      reader.endObject();
       this.cipher = cipher;
       this.bitLength = bitLength;
       this.created = created;
@@ -310,7 +316,6 @@ public abstract class KeyProvider {
    */
   public abstract List<String> getKeys() throws IOException;
 
-
   /**
    * Get key metadata in bulk.
    * @param names the names of the keys to get

+ 88 - 80
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java

@@ -23,9 +23,6 @@ import java.io.PrintStream;
 import java.security.InvalidParameterException;
 import java.security.NoSuchAlgorithmException;
 import java.util.List;
-import java.util.Map;
-
-import javax.crypto.KeyGenerator;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
@@ -93,41 +90,54 @@ public class KeyShell extends Configured implements Tool {
    */
   private int init(String[] args) throws IOException {
     for (int i = 0; i < args.length; i++) { // parse command line
+      boolean moreTokens = (i < args.length - 1);
       if (args[i].equals("create")) {
-        String keyName = args[++i];
+        String keyName = "--help";
+        if (moreTokens) {
+          keyName = args[++i];
+        }
+
         command = new CreateCommand(keyName);
-        if (keyName.equals("--help")) {
+        if ("--help".equals(keyName)) {
           printKeyShellUsage();
           return -1;
         }
       } else if (args[i].equals("delete")) {
-        String keyName = args[++i];
+        String keyName = "--help";
+        if (moreTokens) {
+          keyName = args[++i];
+        }
+
         command = new DeleteCommand(keyName);
-        if (keyName.equals("--help")) {
+        if ("--help".equals(keyName)) {
           printKeyShellUsage();
           return -1;
         }
       } else if (args[i].equals("roll")) {
-        String keyName = args[++i];
+        String keyName = "--help";
+        if (moreTokens) {
+          keyName = args[++i];
+        }
+
         command = new RollCommand(keyName);
-        if (keyName.equals("--help")) {
+        if ("--help".equals(keyName)) {
           printKeyShellUsage();
           return -1;
         }
-      } else if (args[i].equals("list")) {
+      } else if ("list".equals(args[i])) {
         command = new ListCommand();
-      } else if (args[i].equals("--size")) {
+      } else if ("--size".equals(args[i]) && moreTokens) {
         getConf().set(KeyProvider.DEFAULT_BITLENGTH_NAME, args[++i]);
-      } else if (args[i].equals("--cipher")) {
+      } else if ("--cipher".equals(args[i]) && moreTokens) {
         getConf().set(KeyProvider.DEFAULT_CIPHER_NAME, args[++i]);
-      } else if (args[i].equals("--provider")) {
+      } else if ("--provider".equals(args[i]) && moreTokens) {
         userSuppliedProvider = true;
         getConf().set(KeyProviderFactory.KEY_PROVIDER_PATH, args[++i]);
-      } else if (args[i].equals("--metadata")) {
+      } else if ("--metadata".equals(args[i])) {
         getConf().setBoolean(LIST_METADATA, true);
-      } else if (args[i].equals("-i") || (args[i].equals("--interactive"))) {
+      } else if ("-i".equals(args[i]) || ("--interactive".equals(args[i]))) {
         interactive = true;
-      } else if (args[i].equals("--help")) {
+      } else if ("--help".equals(args[i])) {
         printKeyShellUsage();
         return -1;
       } else {
@@ -136,6 +146,12 @@ public class KeyShell extends Configured implements Tool {
         return -1;
       }
     }
+
+    if (command == null) {
+      printKeyShellUsage();
+      return -1;
+    }
+
     return 0;
   }
 
@@ -143,8 +159,7 @@ public class KeyShell extends Configured implements Tool {
     out.println(USAGE_PREFIX + COMMANDS);
     if (command != null) {
       out.println(command.getUsage());
-    }
-    else {
+    } else {
       out.println("=========================================================" +
       		"======");
       out.println(CreateCommand.USAGE + ":\n\n" + CreateCommand.DESC);
@@ -174,8 +189,7 @@ public class KeyShell extends Configured implements Tool {
         providers = KeyProviderFactory.getProviders(getConf());
         if (userSuppliedProvider) {
           provider = providers.get(0);
-        }
-        else {
+        } else {
           for (KeyProvider p : providers) {
             if (!p.isTransient()) {
               provider = p;
@@ -190,7 +204,7 @@ public class KeyShell extends Configured implements Tool {
     }
 
     protected void printProviderWritten() {
-        out.println(provider.getClass().getName() + " has been updated.");
+        out.println(provider + " has been updated.");
     }
 
     protected void warnIfTransientProvider() {
@@ -206,12 +220,12 @@ public class KeyShell extends Configured implements Tool {
 
   private class ListCommand extends Command {
     public static final String USAGE =
-        "list [--provider] [--metadata] [--help]";
+        "list [--provider <provider>] [--metadata] [--help]";
     public static final String DESC =
-        "The list subcommand displays the keynames contained within \n" +
-        "a particular provider - as configured in core-site.xml or " +
-        "indicated\nthrough the --provider argument.\n" +
-        "If the --metadata option is used, the keys metadata will be printed";
+        "The list subcommand displays the keynames contained within\n" +
+        "a particular provider as configured in core-site.xml or\n" +
+        "specified with the --provider argument. --metadata displays\n" +
+        "the metadata.";
 
     private boolean metadata = false;
 
@@ -220,9 +234,9 @@ public class KeyShell extends Configured implements Tool {
       provider = getKeyProvider();
       if (provider == null) {
         out.println("There are no non-transient KeyProviders configured.\n"
-            + "Consider using the --provider option to indicate the provider\n"
-            + "to use. If you want to list a transient provider then you\n"
-            + "you MUST use the --provider argument.");
+          + "Use the --provider option to specify a provider. If you\n"
+          + "want to list a transient provider then you must use the\n"
+          + "--provider argument.");
         rc = false;
       }
       metadata = getConf().getBoolean(LIST_METADATA, false);
@@ -231,12 +245,12 @@ public class KeyShell extends Configured implements Tool {
 
     public void execute() throws IOException {
       try {
-        List<String> keys = provider.getKeys();
-        out.println("Listing keys for KeyProvider: " + provider.toString());
+        final List<String> keys = provider.getKeys();
+        out.println("Listing keys for KeyProvider: " + provider);
         if (metadata) {
-          Metadata[] meta =
+          final Metadata[] meta =
             provider.getKeysMetadata(keys.toArray(new String[keys.size()]));
-          for(int i=0; i < meta.length; ++i) {
+          for (int i = 0; i < meta.length; ++i) {
             out.println(keys.get(i) + " : " + meta[i]);
           }
         } else {
@@ -245,7 +259,7 @@ public class KeyShell extends Configured implements Tool {
           }
         }
       } catch (IOException e) {
-        out.println("Cannot list keys for KeyProvider: " + provider.toString()
+        out.println("Cannot list keys for KeyProvider: " + provider
             + ": " + e.getMessage());
         throw e;
       }
@@ -258,11 +272,10 @@ public class KeyShell extends Configured implements Tool {
   }
 
   private class RollCommand extends Command {
-    public static final String USAGE = "roll <keyname> [--provider] [--help]";
+    public static final String USAGE = "roll <keyname> [--provider <provider>] [--help]";
     public static final String DESC =
-        "The roll subcommand creates a new version of the key specified\n" +
-        "through the <keyname> argument within the provider indicated using\n" +
-        "the --provider argument";
+      "The roll subcommand creates a new version for the specified key\n" +
+      "within the provider indicated using the --provider argument\n";
 
     String keyName = null;
 
@@ -274,15 +287,14 @@ public class KeyShell extends Configured implements Tool {
       boolean rc = true;
       provider = getKeyProvider();
       if (provider == null) {
-        out.println("There are no valid KeyProviders configured.\n"
-            + "Key will not be rolled.\n"
-            + "Consider using the --provider option to indicate the provider"
-            + " to use.");
+        out.println("There are no valid KeyProviders configured. The key\n" +
+          "has not been rolled. Use the --provider option to specify\n" +
+          "a provider.");
         rc = false;
       }
       if (keyName == null) {
-        out.println("There is no keyName specified. Please provide the" +
-            "mandatory <keyname>. See the usage description with --help.");
+        out.println("Please provide a <keyname>.\n" +
+          "See the usage description by using --help.");
         rc = false;
       }
       return rc;
@@ -290,10 +302,9 @@ public class KeyShell extends Configured implements Tool {
 
     public void execute() throws NoSuchAlgorithmException, IOException {
       try {
-        Metadata md = provider.getMetadata(keyName);
         warnIfTransientProvider();
         out.println("Rolling key version from KeyProvider: "
-            + provider.toString() + " for key name: " + keyName);
+            + provider + "\n  for key name: " + keyName);
         try {
           provider.rollNewVersion(keyName);
           out.println(keyName + " has been successfully rolled.");
@@ -301,12 +312,12 @@ public class KeyShell extends Configured implements Tool {
           printProviderWritten();
         } catch (NoSuchAlgorithmException e) {
           out.println("Cannot roll key: " + keyName + " within KeyProvider: "
-              + provider.toString());
+              + provider);
           throw e;
         }
       } catch (IOException e1) {
         out.println("Cannot roll key: " + keyName + " within KeyProvider: "
-            + provider.toString());
+            + provider);
         throw e1;
       }
     }
@@ -318,11 +329,11 @@ public class KeyShell extends Configured implements Tool {
   }
 
   private class DeleteCommand extends Command {
-    public static final String USAGE = "delete <keyname> [--provider] [--help]";
+    public static final String USAGE = "delete <keyname> [--provider <provider>] [--help]";
     public static final String DESC =
-        "The delete subcommand deletes all of the versions of the key\n" +
-        "specified as the <keyname> argument from within the provider\n" +
-        "indicated through the --provider argument";
+        "The delete subcommand deletes all versions of the key\n" +
+        "specified by the <keyname> argument from within the\n" +
+        "provider specified --provider.";
 
     String keyName = null;
     boolean cont = true;
@@ -335,23 +346,21 @@ public class KeyShell extends Configured implements Tool {
     public boolean validate() {
       provider = getKeyProvider();
       if (provider == null) {
-        out.println("There are no valid KeyProviders configured.\n"
-            + "Nothing will be deleted.\n"
-            + "Consider using the --provider option to indicate the provider"
-            + " to use.");
+        out.println("There are no valid KeyProviders configured. Nothing\n"
+          + "was deleted. Use the --provider option to specify a provider.");
         return false;
       }
       if (keyName == null) {
-        out.println("There is no keyName specified. Please provide the" +
-            "mandatory <keyname>. See the usage description with --help.");
+        out.println("There is no keyName specified. Please specify a " +
+            "<keyname>. See the usage description with --help.");
         return false;
       }
       if (interactive) {
         try {
           cont = ToolRunner
               .confirmPrompt("You are about to DELETE all versions of "
-                  + "the key: " + keyName + " from KeyProvider "
-                  + provider.toString() + ". Continue?:");
+                  + " key: " + keyName + " from KeyProvider "
+                  + provider + ". Continue?:");
           if (!cont) {
             out.println("Nothing has been be deleted.");
           }
@@ -367,7 +376,7 @@ public class KeyShell extends Configured implements Tool {
     public void execute() throws IOException {
       warnIfTransientProvider();
       out.println("Deleting key: " + keyName + " from KeyProvider: "
-          + provider.toString());
+          + provider);
       if (cont) {
         try {
           provider.deleteKey(keyName);
@@ -375,7 +384,7 @@ public class KeyShell extends Configured implements Tool {
           provider.flush();
           printProviderWritten();
         } catch (IOException e) {
-          out.println(keyName + "has NOT been deleted.");
+          out.println(keyName + " has not been deleted.");
           throw e;
         }
       }
@@ -388,16 +397,16 @@ public class KeyShell extends Configured implements Tool {
   }
 
   private class CreateCommand extends Command {
-    public static final String USAGE = "create <keyname> [--cipher] " +
-    		"[--size] [--provider] [--help]";
+    public static final String USAGE =
+      "create <keyname> [--cipher <cipher>] [--size <size>]\n" +
+      "                     [--provider <provider>] [--help]";
     public static final String DESC =
-        "The create subcommand creates a new key for the name specified\n" +
-        "as the <keyname> argument within the provider indicated through\n" +
-        "the --provider argument. You may also indicate the specific\n" +
-        "cipher through the --cipher argument. The default for cipher is\n" +
-        "currently \"AES/CTR/NoPadding\". The default keysize is \"256\".\n" +
-        "You may also indicate the requested key length through the --size\n" +
-        "argument.";
+      "The create subcommand creates a new key for the name specified\n" +
+      "by the <keyname> argument within the provider specified by the\n" +
+      "--provider argument. You may specify a cipher with the --cipher\n" +
+      "argument. The default cipher is currently \"AES/CTR/NoPadding\".\n" +
+      "The default keysize is 256. You may specify the requested key\n" +
+      "length using the --size argument.\n";
 
     String keyName = null;
 
@@ -409,15 +418,14 @@ public class KeyShell extends Configured implements Tool {
       boolean rc = true;
       provider = getKeyProvider();
       if (provider == null) {
-        out.println("There are no valid KeyProviders configured.\nKey" +
-        		" will not be created.\n"
-            + "Consider using the --provider option to indicate the provider" +
-            " to use.");
+        out.println("There are no valid KeyProviders configured. No key\n" +
+          " was created. You can use the --provider option to specify\n" +
+          " a provider to use.");
         rc = false;
       }
       if (keyName == null) {
-        out.println("There is no keyName specified. Please provide the" +
-        		"mandatory <keyname>. See the usage description with --help.");
+        out.println("Please provide a <keyname>. See the usage description" +
+          " with --help.");
         rc = false;
       }
       return rc;
@@ -432,13 +440,13 @@ public class KeyShell extends Configured implements Tool {
         provider.flush();
         printProviderWritten();
       } catch (InvalidParameterException e) {
-        out.println(keyName + " has NOT been created. " + e.getMessage());
+        out.println(keyName + " has not been created. " + e.getMessage());
         throw e;
       } catch (IOException e) {
-        out.println(keyName + " has NOT been created. " + e.getMessage());
+        out.println(keyName + " has not been created. " + e.getMessage());
         throw e;
       } catch (NoSuchAlgorithmException e) {
-        out.println(keyName + " has NOT been created. " + e.getMessage());
+        out.println(keyName + " has not been created. " + e.getMessage());
         throw e;
       }
     }

+ 7 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java

@@ -126,7 +126,6 @@ public class KMSClientProvider extends KeyProvider {
     return o;
   }
 
-
   public static String checkNotEmpty(String s, String name)
       throws IllegalArgumentException {
     checkNotNull(s, name);
@@ -140,6 +139,13 @@ public class KMSClientProvider extends KeyProvider {
   private String kmsUrl;
   private SSLFactory sslFactory;
 
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("KMSClientProvider[");
+    sb.append(kmsUrl).append("]");
+    return sb.toString();
+  }
+
   public KMSClientProvider(URI uri, Configuration conf) throws IOException {
     Path path = unnestUri(uri);
     URL url = path.toUri().toURL();
@@ -515,5 +521,4 @@ public class KMSClientProvider extends KeyProvider {
   public static String buildVersionName(String name, int version) {
     return KeyProvider.buildVersionName(name, version);
   }
-
 }

+ 21 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java

@@ -158,6 +158,17 @@ public class FsPermission implements Writable {
     return (short)s;
   }
 
+  /**
+   * Encodes the object to a short.  Unlike {@link #toShort()}, this method may
+   * return values outside the fixed range 00000 - 01777 if extended features
+   * are encoded into this permission, such as the ACL bit.
+   *
+   * @return short extended short representation of this permission
+   */
+  public short toExtendedShort() {
+    return toShort();
+  }
+
   @Override
   public boolean equals(Object obj) {
     if (obj instanceof FsPermission) {
@@ -273,6 +284,16 @@ public class FsPermission implements Writable {
     return stickyBit;
   }
 
+  /**
+   * Returns true if there is also an ACL (access control list).
+   *
+   * @return boolean true if there is also an ACL (access control list).
+   */
+  public boolean getAclBit() {
+    // File system subclasses that support the ACL bit would override this.
+    return false;
+  }
+
   /** Set the user file creation mask (umask) */
   public static void setUMask(Configuration conf, FsPermission umask) {
     conf.set(UMASK_LABEL, String.format("%1$03o", umask.toShort()));

+ 11 - 19
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/AclCommands.java

@@ -75,29 +75,21 @@ class AclCommands extends FsCommand {
 
     @Override
     protected void processPath(PathData item) throws IOException {
-      AclStatus aclStatus = item.fs.getAclStatus(item.path);
       out.println("# file: " + item);
-      out.println("# owner: " + aclStatus.getOwner());
-      out.println("# group: " + aclStatus.getGroup());
-      List<AclEntry> entries = aclStatus.getEntries();
-      if (aclStatus.isStickyBit()) {
-        String stickyFlag = "T";
-        for (AclEntry aclEntry : entries) {
-          if (aclEntry.getType() == AclEntryType.OTHER
-              && aclEntry.getScope() == AclEntryScope.ACCESS
-              && aclEntry.getPermission().implies(FsAction.EXECUTE)) {
-            stickyFlag = "t";
-            break;
-          }
-        }
-        out.println("# flags: --" + stickyFlag);
+      out.println("# owner: " + item.stat.getOwner());
+      out.println("# group: " + item.stat.getGroup());
+      FsPermission perm = item.stat.getPermission();
+      if (perm.getStickyBit()) {
+        out.println("# flags: --" +
+          (perm.getOtherAction().implies(FsAction.EXECUTE) ? "t" : "T"));
       }
 
-      FsPermission perm = item.stat.getPermission();
-      if (entries.isEmpty()) {
-        printMinimalAcl(perm);
-      } else {
+      if (perm.getAclBit()) {
+        AclStatus aclStatus = item.fs.getAclStatus(item.path);
+        List<AclEntry> entries = aclStatus.getEntries();
         printExtendedAcl(perm, entries);
+      } else {
+        printMinimalAcl(perm);
       }
 
       out.println();

+ 1 - 46
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java

@@ -31,8 +31,6 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.ipc.RpcNoSuchMethodException;
 
 import com.google.common.collect.Sets;
 
@@ -116,7 +114,7 @@ class Ls extends FsCommand {
     FileStatus stat = item.stat;
     String line = String.format(lineFormat,
         (stat.isDirectory() ? "d" : "-"),
-        stat.getPermission() + (hasAcl(item) ? "+" : " "),
+        stat.getPermission() + (stat.getPermission().getAclBit() ? "+" : " "),
         (stat.isFile() ? stat.getReplication() : "-"),
         stat.getOwner(),
         stat.getGroup(),
@@ -153,49 +151,6 @@ class Ls extends FsCommand {
     lineFormat = fmt.toString();
   }
 
-  /**
-   * Calls getAclStatus to determine if the given item has an ACL.  For
-   * compatibility, this method traps errors caused by the RPC method missing
-   * from the server side.  This would happen if the client was connected to an
-   * old NameNode that didn't have the ACL APIs.  This method also traps the
-   * case of the client-side FileSystem not implementing the ACL APIs.
-   * FileSystem instances that do not support ACLs are remembered.  This
-   * prevents the client from sending multiple failing RPC calls during a
-   * recursive ls.
-   *
-   * @param item PathData item to check
-   * @return boolean true if item has an ACL
-   * @throws IOException if there is a failure
-   */
-  private boolean hasAcl(PathData item) throws IOException {
-    FileSystem fs = item.fs;
-    if (aclNotSupportedFsSet.contains(fs.getUri())) {
-      // This FileSystem failed to run the ACL API in an earlier iteration.
-      return false;
-    }
-    try {
-      return !fs.getAclStatus(item.path).getEntries().isEmpty();
-    } catch (RemoteException e) {
-      // If this is a RpcNoSuchMethodException, then the client is connected to
-      // an older NameNode that doesn't support ACLs.  Keep going.
-      IOException e2 = e.unwrapRemoteException(RpcNoSuchMethodException.class);
-      if (!(e2 instanceof RpcNoSuchMethodException)) {
-        throw e;
-      }
-    } catch (IOException e) {
-      // The NameNode supports ACLs, but they are not enabled.  Keep going.
-      String message = e.getMessage();
-      if (message != null && !message.contains("ACLs has been disabled")) {
-        throw e;
-      }
-    } catch (UnsupportedOperationException e) {
-      // The underlying FileSystem doesn't implement ACLs.  Keep going.
-    }
-    // Remember that this FileSystem cannot support ACLs.
-    aclNotSupportedFsSet.add(fs.getUri());
-    return false;
-  }
-
   private int maxLength(int n, Object value) {
     return Math.max(n, (value != null) ? String.valueOf(value).length() : 0);
   }

+ 65 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.ha;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Map;
 
 import org.apache.commons.cli.Options;
@@ -33,6 +34,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.ha.HAServiceProtocol.RequestSource;
 import org.apache.hadoop.util.Tool;
@@ -66,7 +68,7 @@ public abstract class HAAdmin extends Configured implements Tool {
   protected final static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
     .put("-transitionToActive",
-        new UsageInfo("<serviceId>", "Transitions the service into Active state"))
+        new UsageInfo(" <serviceId> [--"+FORCEACTIVE+"]", "Transitions the service into Active state"))
     .put("-transitionToStandby",
         new UsageInfo("<serviceId>", "Transitions the service into Standby state"))
     .put("-failover",
@@ -100,6 +102,10 @@ public abstract class HAAdmin extends Configured implements Tool {
   }
 
   protected abstract HAServiceTarget resolveTarget(String string);
+  
+  protected Collection<String> getTargetIds(String targetNodeToActivate) {
+    return Arrays.asList(new String[]{targetNodeToActivate});
+  }
 
   protected String getUsageString() {
     return "Usage: HAAdmin";
@@ -133,6 +139,11 @@ public abstract class HAAdmin extends Configured implements Tool {
       printUsage(errOut, "-transitionToActive");
       return -1;
     }
+    /*  returns true if other target node is active or some exception occurred 
+        and forceActive was not set  */
+    if(isOtherTargetNodeActive(argv[0], cmd.hasOption(FORCEACTIVE))) {
+      return -1;
+    }
     HAServiceTarget target = resolveTarget(argv[0]);
     if (!checkManualStateManagementOK(target)) {
       return -1;
@@ -142,7 +153,48 @@ public abstract class HAAdmin extends Configured implements Tool {
     HAServiceProtocolHelper.transitionToActive(proto, createReqInfo());
     return 0;
   }
-
+  
+  /**
+   * Checks whether other target node is active or not
+   * @param targetNodeToActivate
+   * @return true if other target node is active or some other exception 
+   * occurred and forceActive was set otherwise false
+   * @throws IOException
+   */
+  private boolean isOtherTargetNodeActive(String targetNodeToActivate, boolean forceActive)
+      throws IOException  {
+    Collection<String> targetIds = getTargetIds(targetNodeToActivate);
+    if(targetIds == null) {
+      errOut.println("transitionToActive: No target node in the "
+          + "current configuration");
+      printUsage(errOut, "-transitionToActive");
+      return true;
+    }
+    targetIds.remove(targetNodeToActivate);
+    for(String targetId : targetIds) {
+      HAServiceTarget target = resolveTarget(targetId);
+      if (!checkManualStateManagementOK(target)) {
+        return true;
+      }
+      try {
+        HAServiceProtocol proto = target.getProxy(getConf(), 5000);
+        if(proto.getServiceStatus().getState() == HAServiceState.ACTIVE) {
+          errOut.println("transitionToActive: Node " +  targetId +" is already active");
+          printUsage(errOut, "-transitionToActive");
+          return true;
+        }
+      } catch (Exception e) {
+        //If forceActive switch is false then return true
+        if(!forceActive) {
+          errOut.println("Unexpected error occurred  " + e.getMessage());
+          printUsage(errOut, "-transitionToActive");
+          return true; 
+        }
+      }
+    }
+    return false;
+  }
+  
   private int transitionToStandby(final CommandLine cmd)
       throws IOException, ServiceFailedException {
     String[] argv = cmd.getArgs();
@@ -364,6 +416,9 @@ public abstract class HAAdmin extends Configured implements Tool {
     if ("-failover".equals(cmd)) {
       addFailoverCliOpts(opts);
     }
+    if("-transitionToActive".equals(cmd)) {
+      addTransitionToActiveCliOpts(opts);
+    }
     // Mutative commands take FORCEMANUAL option
     if ("-transitionToActive".equals(cmd) ||
         "-transitionToStandby".equals(cmd) ||
@@ -433,6 +488,14 @@ public abstract class HAAdmin extends Configured implements Tool {
     // that change state.
   }
   
+  /**
+   * Add CLI options which are specific to the transitionToActive command and
+   * no others.
+   */
+  private void addTransitionToActiveCliOpts(Options transitionToActiveCliOpts) {
+    transitionToActiveCliOpts.addOption(FORCEACTIVE, false, "force active");
+  }
+  
   private CommandLine parseOpts(String cmdName, Options opts, String[] argv) {
     try {
       // Strip off the first arg, since that's just the command name

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java

@@ -150,7 +150,7 @@ public class RetryInvocationHandler<T> implements RpcInvocationHandler {
           }
           
           if (action.delayMillis > 0) {
-            ThreadUtil.sleepAtLeastIgnoreInterrupts(action.delayMillis);
+            Thread.sleep(action.delayMillis);
           }
           
           if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {

+ 14 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ShellBasedUnixGroupsMapping.java

@@ -74,7 +74,8 @@ public class ShellBasedUnixGroupsMapping
    * Get the current user's group list from Unix by running the command 'groups'
    * NOTE. For non-existing user it will return EMPTY list
    * @param user user name
-   * @return the groups list that the <code>user</code> belongs to
+   * @return the groups list that the <code>user</code> belongs to. The primary
+   *         group is returned first.
    * @throws IOException if encounter any error when running the command
    */
   private static List<String> getUnixGroups(final String user) throws IOException {
@@ -84,6 +85,7 @@ public class ShellBasedUnixGroupsMapping
     } catch (ExitCodeException e) {
       // if we didn't get the group - just return empty list;
       LOG.warn("got exception trying to get groups for user " + user, e);
+      return new LinkedList<String>();
     }
     
     StringTokenizer tokenizer =
@@ -92,6 +94,17 @@ public class ShellBasedUnixGroupsMapping
     while (tokenizer.hasMoreTokens()) {
       groups.add(tokenizer.nextToken());
     }
+
+    // remove duplicated primary group
+    if (!Shell.WINDOWS) {
+      for (int i = 1; i < groups.size(); i++) {
+        if (groups.get(i).equals(groups.get(0))) {
+          groups.remove(i);
+          break;
+        }
+      }
+    }
+
     return groups;
   }
 }

+ 53 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyServers.java

@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security.authorize;
+
+import java.net.InetSocketAddress;
+import java.util.Collection;
+import java.util.HashSet;
+
+import org.apache.hadoop.conf.Configuration;
+
+public class ProxyServers {
+  public static final String CONF_HADOOP_PROXYSERVERS = "hadoop.proxyservers";
+  private static volatile Collection<String> proxyServers;
+
+  public static void refresh() {
+    refresh(new Configuration());
+  }
+
+  public static void refresh(Configuration conf){
+    Collection<String> tempServers = new HashSet<String>();
+    // trusted proxy servers such as http proxies
+    for (String host : conf.getTrimmedStrings(CONF_HADOOP_PROXYSERVERS)) {
+      InetSocketAddress addr = new InetSocketAddress(host, 0);
+      if (!addr.isUnresolved()) {
+        tempServers.add(addr.getAddress().getHostAddress());
+      }
+    }
+    proxyServers = tempServers;
+  }
+
+  public static boolean isProxyServer(String remoteAddr) { 
+    if (proxyServers == null) {
+      refresh(); 
+    }
+    return proxyServers.contains(remoteAddr);
+  }
+}

+ 1 - 21
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ProxyUsers.java

@@ -19,12 +19,10 @@
 package org.apache.hadoop.security.authorize;
 
 import java.net.InetAddress;
-import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
 import java.util.Map.Entry;
 
@@ -44,7 +42,6 @@ public class ProxyUsers {
   private static final String CONF_GROUPS = ".groups";
   private static final String CONF_HADOOP_PROXYUSER = "hadoop.proxyuser.";
   private static final String CONF_HADOOP_PROXYUSER_RE = "hadoop\\.proxyuser\\.";
-  public static final String CONF_HADOOP_PROXYSERVERS = "hadoop.proxyservers";
   
   private static boolean init = false;
   //list of users, groups and hosts per proxyuser
@@ -54,8 +51,6 @@ public class ProxyUsers {
     new HashMap<String, Collection<String>>();
   private static Map<String, Collection<String>> proxyHosts = 
     new HashMap<String, Collection<String>>();
-  private static Collection<String> proxyServers =
-    new HashSet<String>();
 
   /**
    * reread the conf and get new values for "hadoop.proxyuser.*.groups/users/hosts"
@@ -75,7 +70,6 @@ public class ProxyUsers {
     proxyGroups.clear();
     proxyHosts.clear();
     proxyUsers.clear();
-    proxyServers.clear();
     
     // get all the new keys for users
     String regex = CONF_HADOOP_PROXYUSER_RE+"[^.]*\\"+CONF_USERS;
@@ -103,22 +97,8 @@ public class ProxyUsers {
       proxyHosts.put(entry.getKey(),
           StringUtils.getTrimmedStringCollection(entry.getValue()));
     }
-    
-    // trusted proxy servers such as http proxies
-    for (String host : conf.getTrimmedStrings(CONF_HADOOP_PROXYSERVERS)) {
-      InetSocketAddress addr = new InetSocketAddress(host, 0);
-      if (!addr.isUnresolved()) {
-        proxyServers.add(addr.getAddress().getHostAddress());
-      }
-    }
     init = true;
-  }
-
-  public static synchronized boolean isProxyServer(String remoteAddr) { 
-    if(!init) {
-      refreshSuperUserGroupsConfiguration(); 
-    }
-    return proxyServers.contains(remoteAddr);
+    ProxyServers.refresh(conf);
   }
   
   /**

+ 8 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java

@@ -132,11 +132,17 @@ abstract public class Shell {
                     : new String[]{"bash", "-c", "groups"};
   }
 
-  /** a Unix command to get a given user's groups list */
+  /**
+   * a Unix command to get a given user's groups list.
+   * If the OS is not WINDOWS, the command will get the user's primary group
+   * first and finally get the groups list which includes the primary group.
+   * i.e. the user's primary group will be included twice.
+   */
   public static String[] getGroupsForUserCommand(final String user) {
     //'groups username' command return is non-consistent across different unixes
     return (WINDOWS)? new String[] { WINUTILS, "groups", "-F", "\"" + user + "\""}
-                    : new String [] {"bash", "-c", "id -Gn " + user};
+                    : new String [] {"bash", "-c", "id -gn " + user
+                                     + "&& id -Gn " + user};
   }
 
   /** a Unix command to get a given netgroup's user list */

+ 2 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyShell.java

@@ -121,7 +121,7 @@ public class TestKeyShell {
     ks.setConf(new Configuration());
     rc = ks.run(args1);
     assertEquals(-1, rc);
-    assertTrue(outContent.toString().contains("key1 has NOT been created."));
+    assertTrue(outContent.toString().contains("key1 has not been created."));
   }
 
   @Test
@@ -134,7 +134,7 @@ public class TestKeyShell {
     ks.setConf(new Configuration());
     rc = ks.run(args1);
     assertEquals(-1, rc);
-    assertTrue(outContent.toString().contains("key1 has NOT been created."));
+    assertTrue(outContent.toString().contains("key1 has not been created."));
   }
 
   @Test

+ 54 - 5
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java

@@ -26,27 +26,37 @@ import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWith
 import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumCountWithProportionalSleep;
 import static org.apache.hadoop.io.retry.RetryPolicies.retryUpToMaximumTimeWithFixedSleep;
 import static org.apache.hadoop.io.retry.RetryPolicies.exponentialBackoffRetry;
+import static org.junit.Assert.*;
 
 import java.util.Collections;
 import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-
-import junit.framework.TestCase;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.hadoop.io.retry.UnreliableInterface.FatalException;
 import org.apache.hadoop.io.retry.UnreliableInterface.UnreliableException;
 import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.RemoteException;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.lang.reflect.UndeclaredThrowableException;
 
-public class TestRetryProxy extends TestCase {
+public class TestRetryProxy {
   
   private UnreliableImplementation unreliableImpl;
   
-  @Override
-  protected void setUp() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     unreliableImpl = new UnreliableImplementation();
   }
 
+  @Test
   public void testTryOnceThenFail() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, TRY_ONCE_THEN_FAIL);
@@ -62,6 +72,7 @@ public class TestRetryProxy extends TestCase {
   /**
    * Test for {@link RetryInvocationHandler#isRpcInvocation(Object)}
    */
+  @Test
   public void testRpcInvocation() throws Exception {
     // For a proxy method should return true
     final UnreliableInterface unreliable = (UnreliableInterface)
@@ -91,6 +102,7 @@ public class TestRetryProxy extends TestCase {
     assertFalse(RetryInvocationHandler.isRpcInvocation(new Object()));
   }
   
+  @Test
   public void testRetryForever() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, RETRY_FOREVER);
@@ -99,6 +111,7 @@ public class TestRetryProxy extends TestCase {
     unreliable.failsTenTimesThenSucceeds();
   }
   
+  @Test
   public void testRetryUpToMaximumCountWithFixedSleep() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -113,6 +126,7 @@ public class TestRetryProxy extends TestCase {
     }
   }
   
+  @Test
   public void testRetryUpToMaximumTimeWithFixedSleep() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -127,6 +141,7 @@ public class TestRetryProxy extends TestCase {
     }
   }
   
+  @Test
   public void testRetryUpToMaximumCountWithProportionalSleep() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -141,6 +156,7 @@ public class TestRetryProxy extends TestCase {
     }
   }
   
+  @Test
   public void testExponentialRetry() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl,
@@ -155,6 +171,7 @@ public class TestRetryProxy extends TestCase {
     }
   }
   
+  @Test
   public void testRetryByException() throws UnreliableException {
     Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
       Collections.<Class<? extends Exception>, RetryPolicy>singletonMap(FatalException.class, TRY_ONCE_THEN_FAIL);
@@ -171,6 +188,7 @@ public class TestRetryProxy extends TestCase {
     }
   }
   
+  @Test
   public void testRetryByRemoteException() {
     Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
       Collections.<Class<? extends Exception>, RetryPolicy>singletonMap(FatalException.class, TRY_ONCE_THEN_FAIL);
@@ -186,4 +204,35 @@ public class TestRetryProxy extends TestCase {
     }
   }  
   
+  @Test
+  public void testRetryInterruptible() throws Throwable {
+    final UnreliableInterface unreliable = (UnreliableInterface)
+        RetryProxy.create(UnreliableInterface.class, unreliableImpl,
+            retryUpToMaximumTimeWithFixedSleep(10, 10, TimeUnit.SECONDS));
+    
+    final CountDownLatch latch = new CountDownLatch(1);
+    final AtomicReference<Thread> futureThread = new AtomicReference<Thread>();
+    ExecutorService exec = Executors.newSingleThreadExecutor();
+    Future<Throwable> future = exec.submit(new Callable<Throwable>(){
+      @Override
+      public Throwable call() throws Exception {
+        futureThread.set(Thread.currentThread());
+        latch.countDown();
+        try {
+          unreliable.alwaysFailsWithFatalException();
+        } catch (UndeclaredThrowableException ute) {
+          return ute.getCause();
+        }
+        return null;
+      }
+    });
+    latch.await();
+    Thread.sleep(1000); // time to fail and sleep
+    assertTrue(futureThread.get().isAlive());
+    futureThread.get().interrupt();
+    Throwable e = future.get(1, TimeUnit.SECONDS); // should return immediately 
+    assertNotNull(e);
+    assertEquals(InterruptedException.class, e.getClass());
+    assertEquals("sleep interrupted", e.getMessage());
+  }
 }

+ 38 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyServers.java

@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.security.authorize;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestProxyServers {
+
+  @Test
+  public void testProxyServer() {
+    Configuration conf = new Configuration();
+    assertFalse(ProxyServers.isProxyServer("1.1.1.1"));
+    conf.set(ProxyServers.CONF_HADOOP_PROXYSERVERS, "2.2.2.2, 3.3.3.3");
+    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
+    assertFalse(ProxyServers.isProxyServer("1.1.1.1"));
+    assertTrue(ProxyServers.isProxyServer("2.2.2.2"));
+    assertTrue(ProxyServers.isProxyServer("3.3.3.3"));
+  }
+}

+ 0 - 11
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java

@@ -327,17 +327,6 @@ public class TestProxyUsers {
     assertEquals (1,hosts.size());
   }
 
-  @Test
-  public void testProxyServer() {
-    Configuration conf = new Configuration();
-    assertFalse(ProxyUsers.isProxyServer("1.1.1.1"));
-    conf.set(ProxyUsers.CONF_HADOOP_PROXYSERVERS, "2.2.2.2, 3.3.3.3");
-    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
-    assertFalse(ProxyUsers.isProxyServer("1.1.1.1"));
-    assertTrue(ProxyUsers.isProxyServer("2.2.2.2"));
-    assertTrue(ProxyUsers.isProxyServer("3.3.3.3"));
-  }
-
   private void assertNotAuthorized(UserGroupInformation proxyUgi, String host) {
     try {
       ProxyUsers.authorize(proxyUgi, host);

+ 35 - 1
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -270,6 +270,9 @@ Release 2.5.0 - UNRELEASED
     HDFS-5168. Add cross node dependency support to BlockPlacementPolicy.
     (Nikola Vujic via szetszwo)
 
+    HDFS-6334. Client failover proxy provider for IP failover based NN HA.
+    (kihwal)
+
   IMPROVEMENTS
 
     HDFS-6007. Update documentation about short-circuit local reads (iwasakims
@@ -348,6 +351,18 @@ Release 2.5.0 - UNRELEASED
 
     HDFS-6328. Clean up dead code in FSDirectory. (wheat9)
 
+    HDFS-6230. Expose upgrade status through NameNode web UI.
+    (Mit Desai via wheat9)
+
+    HDFS-6186. Pause deletion of blocks when the namenode starts up. (jing9)
+
+    HDFS-6293. Issues with OIV processing PB-based fsimages. (kihwal)
+
+    HDFS-2949. Add check to active state transition to prevent operator-induced
+    split brain. (Rushabh S Shah via kihwal)
+
+    HDFS-6287. Add vecsum test of libhdfs read access times (cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
@@ -440,15 +455,32 @@ Release 2.5.0 - UNRELEASED
     HDFS-6337. Setfacl testcase is failing due to dash character in username
     in TestAclCLI (umamahesh)
 
-    HDFS-5381. ExtendedBlock#hashCode should use both blockId and block pool ID    
+    HDFS-5381. ExtendedBlock#hashCode should use both blockId and block pool ID
     (Benoy Antony via Colin Patrick McCabe)
 
     HDFS-6240. WebImageViewer returns 404 if LISTSTATUS to an empty directory.
     (Akira Ajisaka via wheat9)
 
+    HDFS-6351. Command hdfs dfs -rm -r can't remove empty directory.
+    (Yongjun Zhang via wang)
+
+    HDFS-5522. Datanode disk error check may be incorrectly skipped.
+    (Rushabh S Shah via kihwal)
+
     HDFS-6367. EnumSetParam$Domain#parse fails for parameter containing more than one enum.
     (Yi Liu via umamahesh)
 
+    HDFS-6305. WebHdfs response decoding may throw RuntimeExceptions (Daryn
+    Sharp via jeagles)
+
+    HDFS-6355. Fix divide-by-zero, improper use of wall-clock time in
+    BlockPoolSliceScanner (cmccabe)
+
+    HDFS-6370. Web UI fails to display in intranet under IE.
+    (Haohui Mai via cnauroth)
+
+    HDFS-6381. Fix a typo in INodeReference.java. (Binglin Chang via jing9)
+
 Release 2.4.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -515,6 +547,8 @@ Release 2.4.1 - UNRELEASED
     HDFS-6313. WebHdfs may use the wrong NN when configured for multiple HA NNs
     (kihwal)
 
+    HDFS-6326. WebHdfs ACL compatibility is broken. (cnauroth)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt

@@ -62,6 +62,9 @@ endfunction()
 INCLUDE(CheckCSourceCompiles)
 CHECK_C_SOURCE_COMPILES("int main(void) { static __thread int i = 0; return 0; }" HAVE_BETTER_TLS)
 
+# Check to see if we have Intel SSE intrinsics.
+CHECK_C_SOURCE_COMPILES("#include <emmintrin.h>\nint main(void) { __m128d sum0 = _mm_set_pd(0.0,0.0); return 0; }" HAVE_INTEL_SSE_INTRINSICS)
+
 # Check if we need to link dl library to get dlopen.
 # dlopen on Linux is in separate library but on FreeBSD its in libc
 INCLUDE(CheckLibraryExists)
@@ -170,6 +173,15 @@ target_link_libraries(test_libhdfs_zerocopy
     pthread
 )
 
+add_executable(test_libhdfs_vecsum
+    main/native/libhdfs/test/vecsum.c
+)
+target_link_libraries(test_libhdfs_vecsum
+    hdfs
+    pthread
+    rt
+)
+
 IF(REQUIRE_LIBWEBHDFS)
     add_subdirectory(contrib/libwebhdfs)
 ENDIF(REQUIRE_LIBWEBHDFS)

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/config.h.cmake

@@ -22,4 +22,6 @@
 
 #cmakedefine HAVE_BETTER_TLS
 
+#cmakedefine HAVE_INTEL_SSE_INTRINSICS
+
 #endif

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs

@@ -49,6 +49,7 @@ function print_usage(){
   echo "  balancer             run a cluster balancing utility"
   echo "  jmxget               get JMX exported values from NameNode or DataNode."
   echo "  oiv                  apply the offline fsimage viewer to an fsimage"
+  echo "  oiv_legacy           apply the offline fsimage viewer to an legacy fsimage"
   echo "  oev                  apply the offline edits viewer to an edits file"
   echo "  fetchdt              fetch a delegation token from the NameNode"
   echo "  getconf              get config values from configuration"
@@ -161,6 +162,8 @@ elif [ "$COMMAND" = "jmxget" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.JMXGet
 elif [ "$COMMAND" = "oiv" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
+elif [ "COMMAND" = "oiv_legacy" ] ; then
+  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
 elif [ "$COMMAND" = "oev" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
 elif [ "$COMMAND" = "fetchdt" ] ; then

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -247,6 +247,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
       "dfs.namenode.path.based.cache.refresh.interval.ms";
   public static final long    DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT = 30000L;
 
+  /** Pending period of block deletion since NameNode startup */
+  public static final String  DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY = "dfs.namenode.startup.delay.block.deletion.ms";
+  public static final long    DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_DEFAULT = 0L;
+
   // Whether to enable datanode's stale state detection and usage for reads
   public static final String DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY = "dfs.namenode.avoid.read.stale.datanode";
   public static final boolean DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_DEFAULT = false;
@@ -500,6 +504,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_SECONDARY_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY = "dfs.secondary.namenode.kerberos.internal.spnego.principal";
   public static final String  DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
   public static final int     DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
+  public static final String  DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY = "dfs.namenode.legacy-oiv-image.dir";
   
   public static final String  DFS_NAMESERVICES = "dfs.nameservices";
   public static final String  DFS_NAMESERVICE_ID = "dfs.nameservice.id";

+ 41 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java

@@ -38,10 +38,12 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RPC;
@@ -205,17 +207,54 @@ public class HAUtil {
  
   /**
    * @return true if the given nameNodeUri appears to be a logical URI.
-   * This is the case if there is a failover proxy provider configured
-   * for it in the given configuration.
    */
   public static boolean isLogicalUri(
       Configuration conf, URI nameNodeUri) {
     String host = nameNodeUri.getHost();
+    // A logical name must be one of the service IDs.
+    return DFSUtil.getNameServiceIds(conf).contains(host);
+  }
+
+  /**
+   * Check whether the client has a failover proxy provider configured
+   * for the namenode/nameservice.
+   *
+   * @param conf Configuration
+   * @param nameNodeUri The URI of namenode
+   * @return true if failover is configured.
+   */
+  public static boolean isClientFailoverConfigured(
+      Configuration conf, URI nameNodeUri) {
+    String host = nameNodeUri.getHost();
     String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
         + host;
     return conf.get(configKey) != null;
   }
 
+  /**
+   * Check whether logical URI is needed for the namenode and
+   * the corresponding failover proxy provider in the config.
+   *
+   * @param conf Configuration
+   * @param nameNodeUri The URI of namenode
+   * @return true if logical URI is needed. false, if not needed.
+   * @throws IOException most likely due to misconfiguration.
+   */
+  public static boolean useLogicalUri(Configuration conf, URI nameNodeUri) 
+      throws IOException {
+    // Create the proxy provider. Actual proxy is not created.
+    AbstractNNFailoverProxyProvider<ClientProtocol> provider = NameNodeProxies
+        .createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class,
+        false);
+
+    // No need to use logical URI since failover is not configured.
+    if (provider == null) {
+      return false;
+    }
+    // Check whether the failover proxy provider uses logical URI.
+    return provider.useLogicalURI();
+  }
+
   /**
    * Parse the file system URI out of the provided token.
    */

+ 58 - 31
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java

@@ -50,6 +50,8 @@ import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
+import org.apache.hadoop.hdfs.server.namenode.ha.WrappedFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
@@ -136,26 +138,29 @@ public class NameNodeProxies {
   @SuppressWarnings("unchecked")
   public static <T> ProxyAndInfo<T> createProxy(Configuration conf,
       URI nameNodeUri, Class<T> xface) throws IOException {
-    Class<FailoverProxyProvider<T>> failoverProxyProviderClass =
-        getFailoverProxyProviderClass(conf, nameNodeUri, xface);
+    AbstractNNFailoverProxyProvider<T> failoverProxyProvider =
+        createFailoverProxyProvider(conf, nameNodeUri, xface, true);
   
-    if (failoverProxyProviderClass == null) {
+    if (failoverProxyProvider == null) {
       // Non-HA case
       return createNonHAProxy(conf, NameNode.getAddress(nameNodeUri), xface,
           UserGroupInformation.getCurrentUser(), true);
     } else {
       // HA case
-      FailoverProxyProvider<T> failoverProxyProvider = NameNodeProxies
-          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
-              nameNodeUri);
       Conf config = new Conf(conf);
       T proxy = (T) RetryProxy.create(xface, failoverProxyProvider,
           RetryPolicies.failoverOnNetworkException(
               RetryPolicies.TRY_ONCE_THEN_FAIL, config.maxFailoverAttempts,
               config.maxRetryAttempts, config.failoverSleepBaseMillis,
               config.failoverSleepMaxMillis));
-      
-      Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+
+      Text dtService;
+      if (failoverProxyProvider.useLogicalURI()) {
+        dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      } else {
+        dtService = SecurityUtil.buildTokenService(
+            NameNode.getAddress(nameNodeUri));
+      }
       return new ProxyAndInfo<T>(proxy, dtService);
     }
   }
@@ -183,12 +188,10 @@ public class NameNodeProxies {
       Configuration config, URI nameNodeUri, Class<T> xface,
       int numResponseToDrop) throws IOException {
     Preconditions.checkArgument(numResponseToDrop > 0);
-    Class<FailoverProxyProvider<T>> failoverProxyProviderClass = 
-        getFailoverProxyProviderClass(config, nameNodeUri, xface);
-    if (failoverProxyProviderClass != null) { // HA case
-      FailoverProxyProvider<T> failoverProxyProvider = 
-          createFailoverProxyProvider(config, failoverProxyProviderClass, 
-              xface, nameNodeUri);
+    AbstractNNFailoverProxyProvider<T> failoverProxyProvider =
+        createFailoverProxyProvider(config, nameNodeUri, xface, true);
+
+    if (failoverProxyProvider != null) { // HA case
       int delay = config.getInt(
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
           DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
@@ -211,7 +214,13 @@ public class NameNodeProxies {
       T proxy = (T) Proxy.newProxyInstance(
           failoverProxyProvider.getInterface().getClassLoader(),
           new Class[] { xface }, dummyHandler);
-      Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      Text dtService;
+      if (failoverProxyProvider.useLogicalURI()) {
+        dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      } else {
+        dtService = SecurityUtil.buildTokenService(
+            NameNode.getAddress(nameNodeUri));
+      }
       return new ProxyAndInfo<T>(proxy, dtService);
     } else {
       LOG.warn("Currently creating proxy using " +
@@ -396,7 +405,7 @@ public class NameNodeProxies {
   /** Gets the configured Failover proxy provider's class */
   @VisibleForTesting
   public static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
-      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
+      Configuration conf, URI nameNodeUri) throws IOException {
     if (nameNodeUri == null) {
       return null;
     }
@@ -408,17 +417,6 @@ public class NameNodeProxies {
       @SuppressWarnings("unchecked")
       Class<FailoverProxyProvider<T>> ret = (Class<FailoverProxyProvider<T>>) conf
           .getClass(configKey, null, FailoverProxyProvider.class);
-      if (ret != null) {
-        // If we found a proxy provider, then this URI should be a logical NN.
-        // Given that, it shouldn't have a non-default port number.
-        int port = nameNodeUri.getPort();
-        if (port > 0 && port != NameNode.DEFAULT_PORT) {
-          throw new IOException("Port " + port + " specified in URI "
-              + nameNodeUri + " but host '" + host
-              + "' is a logical (HA) namenode"
-              + " and does not use port information.");
-        }
-      }
       return ret;
     } catch (RuntimeException e) {
       if (e.getCause() instanceof ClassNotFoundException) {
@@ -433,18 +431,33 @@ public class NameNodeProxies {
 
   /** Creates the Failover proxy provider instance*/
   @VisibleForTesting
-  public static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
-      Configuration conf, Class<FailoverProxyProvider<T>> failoverProxyProviderClass,
-      Class<T> xface, URI nameNodeUri) throws IOException {
+  public static <T> AbstractNNFailoverProxyProvider<T> createFailoverProxyProvider(
+      Configuration conf, URI nameNodeUri, Class<T> xface, boolean checkPort)
+      throws IOException {
+    Class<FailoverProxyProvider<T>> failoverProxyProviderClass = null;
+    AbstractNNFailoverProxyProvider<T> providerNN;
     Preconditions.checkArgument(
         xface.isAssignableFrom(NamenodeProtocols.class),
         "Interface %s is not a NameNode protocol", xface);
     try {
+      // Obtain the class of the proxy provider
+      failoverProxyProviderClass = getFailoverProxyProviderClass(conf,
+          nameNodeUri);
+      if (failoverProxyProviderClass == null) {
+        return null;
+      }
+      // Create a proxy provider instance.
       Constructor<FailoverProxyProvider<T>> ctor = failoverProxyProviderClass
           .getConstructor(Configuration.class, URI.class, Class.class);
       FailoverProxyProvider<T> provider = ctor.newInstance(conf, nameNodeUri,
           xface);
-      return provider;
+
+      // If the proxy provider is of an old implementation, wrap it.
+      if (!(provider instanceof AbstractNNFailoverProxyProvider)) {
+        providerNN = new WrappedFailoverProxyProvider<T>(provider);
+      } else {
+        providerNN = (AbstractNNFailoverProxyProvider<T>)provider;
+      }
     } catch (Exception e) {
       String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
       if (LOG.isDebugEnabled()) {
@@ -456,6 +469,20 @@ public class NameNodeProxies {
         throw new IOException(message, e);
       }
     }
+
+    // Check the port in the URI, if it is logical.
+    if (checkPort && providerNN.useLogicalURI()) {
+      int port = nameNodeUri.getPort();
+      if (port > 0 && port != NameNode.DEFAULT_PORT) {
+        // Throwing here without any cleanup is fine since we have not
+        // actually created the underlying proxies yet.
+        throw new IOException("Port " + port + " specified in URI "
+            + nameNodeUri + " but host '" + nameNodeUri.getHost()
+            + "' is a logical (HA) namenode"
+            + " and does not use port information.");
+      }
+    }
+    return providerNN;
   }
 
 }

+ 63 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java

@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocol;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.permission.FsPermission;
+
+/**
+ * HDFS permission subclass used to indicate an ACL is present.  The ACL bit is
+ * not visible directly to users of {@link FsPermission} serialization.  This is
+ * done for backwards compatibility in case any existing clients assume the
+ * value of FsPermission is in a particular range.
+ */
+@InterfaceAudience.Private
+public class FsAclPermission extends FsPermission {
+  private final static short ACL_BIT = 1 << 12;
+  private final boolean aclBit;
+
+  /**
+   * Constructs a new FsAclPermission based on the given FsPermission.
+   *
+   * @param perm FsPermission containing permission bits
+   */
+  public FsAclPermission(FsPermission perm) {
+    super(perm.toShort());
+    aclBit = true;
+  }
+
+  /**
+   * Creates a new FsAclPermission by calling the base class constructor.
+   *
+   * @param perm short containing permission bits
+   */
+  public FsAclPermission(short perm) {
+    super(perm);
+    aclBit = (perm & ACL_BIT) != 0;
+  }
+
+  @Override
+  public short toExtendedShort() {
+    return (short)(toShort() | (aclBit ? ACL_BIT : 0));
+  }
+
+  @Override
+  public boolean getAclBit() {
+    return aclBit;
+  }
+}

+ 3 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

@@ -59,6 +59,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
 import org.apache.hadoop.hdfs.protocol.DatanodeLocalInfo;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.FsAclPermission;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
@@ -1194,13 +1195,11 @@ public class PBHelper {
   }
   
   public static FsPermissionProto convert(FsPermission p) {
-    if (p == null) return null;
-    return FsPermissionProto.newBuilder().setPerm(p.toShort()).build();
+    return FsPermissionProto.newBuilder().setPerm(p.toExtendedShort()).build();
   }
   
   public static FsPermission convert(FsPermissionProto p) {
-    if (p == null) return null;
-    return new FsPermission((short)p.getPerm());
+    return new FsAclPermission((short)p.getPerm());
   }
   
   

+ 75 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java

@@ -18,9 +18,16 @@
 
 package org.apache.hadoop.hdfs.security.token.delegation;
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
-import com.google.protobuf.ByteString;
+import java.io.DataInput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InterruptedIOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -43,13 +50,9 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.InterruptedIOException;
-import java.net.InetSocketAddress;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map.Entry;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.protobuf.ByteString;
 
 /**
  * A HDFS specific delegation token secret manager.
@@ -211,6 +214,18 @@ public class DelegationTokenSecretManager
     }
   }
 
+  /**
+   * Store the current state of the SecretManager for persistence
+   *
+   * @param out Output stream for writing into fsimage.
+   * @param sdPath String storage directory path
+   * @throws IOException
+   */
+  public synchronized void saveSecretManagerStateCompat(DataOutputStream out,
+      String sdPath) throws IOException {
+    serializerCompat.save(out, sdPath);
+  }
+
   public synchronized SecretManagerState saveSecretManagerState() {
     SecretManagerSection s = SecretManagerSection.newBuilder()
         .setCurrentId(currentId)
@@ -406,6 +421,56 @@ public class DelegationTokenSecretManager
       loadCurrentTokens(in);
     }
 
+    private void save(DataOutputStream out, String sdPath) throws IOException {
+      out.writeInt(currentId);
+      saveAllKeys(out, sdPath);
+      out.writeInt(delegationTokenSequenceNumber);
+      saveCurrentTokens(out, sdPath);
+    }
+
+    /**
+     * Private helper methods to save delegation keys and tokens in fsimage
+     */
+    private synchronized void saveCurrentTokens(DataOutputStream out,
+        String sdPath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.DELEGATION_TOKENS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(currentTokens.size());
+      Iterator<DelegationTokenIdentifier> iter = currentTokens.keySet()
+          .iterator();
+      while (iter.hasNext()) {
+        DelegationTokenIdentifier id = iter.next();
+        id.write(out);
+        DelegationTokenInformation info = currentTokens.get(id);
+        out.writeLong(info.getRenewDate());
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
+    /*
+     * Save the current state of allKeys
+     */
+    private synchronized void saveAllKeys(DataOutputStream out, String sdPath)
+        throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.DELEGATION_KEYS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(allKeys.size());
+      Iterator<Integer> iter = allKeys.keySet().iterator();
+      while (iter.hasNext()) {
+        Integer key = iter.next();
+        allKeys.get(key).write(out);
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
     /**
      * Private helper methods to load Delegation tokens from fsimage
      */

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -261,7 +261,11 @@ public class BlockManager {
     this.namesystem = namesystem;
     datanodeManager = new DatanodeManager(this, namesystem, conf);
     heartbeatManager = datanodeManager.getHeartbeatManager();
-    invalidateBlocks = new InvalidateBlocks(datanodeManager);
+
+    final long pendingPeriod = conf.getLong(
+        DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_DEFAULT);
+    invalidateBlocks = new InvalidateBlocks(datanodeManager, pendingPeriod);
 
     // Compute the map capacity by allocating 2% of total memory
     blocksMap = new BlocksMap(

+ 46 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java

@@ -18,16 +18,24 @@
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import java.io.PrintWriter;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.util.LightWeightHashSet;
+import org.apache.hadoop.util.Time;
+
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * Keeps a Collection for every named machine containing blocks
@@ -44,8 +52,28 @@ class InvalidateBlocks {
 
   private final DatanodeManager datanodeManager;
 
-  InvalidateBlocks(final DatanodeManager datanodeManager) {
+  /**
+   * The period of pending time for block invalidation since the NameNode
+   * startup
+   */
+  private final long pendingPeriodInMs;
+  /** the startup time */
+  private final long startupTime = Time.monotonicNow();
+
+  InvalidateBlocks(final DatanodeManager datanodeManager, long pendingPeriodInMs) {
     this.datanodeManager = datanodeManager;
+    this.pendingPeriodInMs = pendingPeriodInMs;
+    printBlockDeletionTime(BlockManager.LOG);
+  }
+
+  private void printBlockDeletionTime(final Log log) {
+    log.info(DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY
+        + " is set to " + pendingPeriodInMs + " ms.");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy MMM dd HH:mm:ss");
+    Calendar calendar = new GregorianCalendar();
+    calendar.add(Calendar.SECOND, (int) (this.pendingPeriodInMs / 1000));
+    log.info("The block deletion will start around "
+        + sdf.format(calendar.getTime()));
   }
 
   /** @return the number of blocks to be invalidated . */
@@ -134,8 +162,25 @@ class InvalidateBlocks {
     return new ArrayList<String>(node2blocks.keySet());
   }
 
+  /**
+   * @return the remianing pending time
+   */
+  @VisibleForTesting
+  long getInvalidationDelay() {
+    return pendingPeriodInMs - (Time.monotonicNow() - startupTime);
+  }
+
   synchronized List<Block> invalidateWork(
       final String storageId, final DatanodeDescriptor dn) {
+    final long delay = getInvalidationDelay();
+    if (delay > 0) {
+      if (BlockManager.LOG.isDebugEnabled()) {
+        BlockManager.LOG
+            .debug("Block deletion is delayed during NameNode startup. "
+                + "The deletion will start after " + delay + " ms.");
+      }
+      return null;
+    }
     final LightWeightHashSet<Block> set = node2blocks.get(storageId);
     if (set == null) {
       return null;

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
+import org.apache.hadoop.security.authorize.ProxyServers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.token.Token;
 
@@ -193,7 +194,7 @@ public class JspHelper {
   public static String getRemoteAddr(HttpServletRequest request) {
     String remoteAddr = request.getRemoteAddr();
     String proxyHeader = request.getHeader("X-Forwarded-For");
-    if (proxyHeader != null && ProxyUsers.isProxyServer(remoteAddr)) {
+    if (proxyHeader != null && ProxyServers.isProxyServer(remoteAddr)) {
       final String clientAddr = proxyHeader.split(",")[0].trim();
       if (!clientAddr.isEmpty()) {
         remoteAddr = clientAddr;

+ 14 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java

@@ -97,7 +97,7 @@ class BlockPoolSliceScanner {
   private long totalTransientErrors = 0;
   private final AtomicInteger totalBlocksScannedInLastRun = new AtomicInteger(); // Used for test only
   
-  private long currentPeriodStart = Time.now();
+  private long currentPeriodStart = Time.monotonicNow();
   private long bytesLeft = 0; // Bytes to scan in this period
   private long totalBytesToScan = 0;
   private boolean isNewPeriod = true;
@@ -260,7 +260,7 @@ class BlockPoolSliceScanner {
     long period = Math.min(scanPeriod, 
                            Math.max(blockMap.size(),1) * 600 * 1000L);
     int periodInt = Math.abs((int)period);
-    return Time.now() - scanPeriod + 
+    return Time.monotonicNow() - scanPeriod +
         DFSUtil.getRandom().nextInt(periodInt);
   }
 
@@ -322,7 +322,7 @@ class BlockPoolSliceScanner {
       info = new BlockScanInfo(block);
     }
     
-    long now = Time.now();
+    long now = Time.monotonicNow();
     info.lastScanType = type;
     info.lastScanTime = now;
     info.lastScanOk = scanOk;
@@ -399,8 +399,9 @@ class BlockPoolSliceScanner {
   }
   
   private synchronized void adjustThrottler() {
-    long timeLeft = currentPeriodStart+scanPeriod - Time.now();
-    long bw = Math.max(bytesLeft*1000/timeLeft, MIN_SCAN_RATE);
+    long timeLeft = Math.max(1L,
+        currentPeriodStart + scanPeriod - Time.monotonicNow());
+    long bw = Math.max((bytesLeft * 1000) / timeLeft, MIN_SCAN_RATE);
     throttler.setBandwidth(Math.min(bw, MAX_SCAN_RATE));
   }
   
@@ -523,7 +524,7 @@ class BlockPoolSliceScanner {
   private boolean assignInitialVerificationTimes() {
     //First updates the last verification times from the log file.
     if (verificationLog != null) {
-      long now = Time.now();
+      long now = Time.monotonicNow();
       RollingLogs.LineIterator logIterator = null;
       try {
         logIterator = verificationLog.logs.iterator(false);
@@ -574,7 +575,7 @@ class BlockPoolSliceScanner {
       // Initially spread the block reads over half of scan period
       // so that we don't keep scanning the blocks too quickly when restarted.
       long verifyInterval = Math.min(scanPeriod/(2L * numBlocks), 10*60*1000L);
-      long lastScanTime = Time.now() - scanPeriod;
+      long lastScanTime = Time.monotonicNow() - scanPeriod;
 
       if (!blockInfoSet.isEmpty()) {
         BlockScanInfo info;
@@ -601,16 +602,16 @@ class BlockPoolSliceScanner {
 
     // reset the byte counts :
     bytesLeft = totalBytesToScan;
-    currentPeriodStart = Time.now();
+    currentPeriodStart = Time.monotonicNow();
     isNewPeriod = true;
   }
   
   private synchronized boolean workRemainingInCurrentPeriod() {
-    if (bytesLeft <= 0 && Time.now() < currentPeriodStart + scanPeriod) {
+    if (bytesLeft <= 0 && Time.monotonicNow() < currentPeriodStart + scanPeriod) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" +
                   currentPeriodStart + ", period=" + scanPeriod + ", now=" +
-                  Time.now() + " " + blockPoolId);
+                  Time.monotonicNow() + " " + blockPoolId);
       }
       return false;
     } else {
@@ -633,7 +634,7 @@ class BlockPoolSliceScanner {
       scan();
     } finally {
       totalBlocksScannedInLastRun.set(processedBlocks.size());
-      lastScanTime.set(Time.now());
+      lastScanTime.set(Time.monotonicNow());
     }
   }
 
@@ -656,7 +657,7 @@ class BlockPoolSliceScanner {
       while (datanode.shouldRun
           && !datanode.blockScanner.blockScannerThread.isInterrupted()
           && datanode.isBPServiceAlive(blockPoolId)) {
-        long now = Time.now();
+        long now = Time.monotonicNow();
         synchronized (this) {
           if ( now >= (currentPeriodStart + scanPeriod)) {
             startNewPeriod();
@@ -714,7 +715,7 @@ class BlockPoolSliceScanner {
     
     int total = blockInfoSet.size();
     
-    long now = Time.now();
+    long now = Time.monotonicNow();
     
     Date date = new Date();
     

+ 4 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java

@@ -248,7 +248,7 @@ class BlockReceiver implements Closeable {
       
       if (cause != null) { // possible disk error
         ioe = cause;
-        datanode.checkDiskError(ioe); // may throw an exception here
+        datanode.checkDiskError();
       }
       
       throw ioe;
@@ -324,7 +324,7 @@ class BlockReceiver implements Closeable {
     }
     // disk check
     if(ioe != null) {
-      datanode.checkDiskError(ioe);
+      datanode.checkDiskError();
       throw ioe;
     }
   }
@@ -615,7 +615,7 @@ class BlockReceiver implements Closeable {
           manageWriterOsCache(offsetInBlock);
         }
       } catch (IOException iex) {
-        datanode.checkDiskError(iex);
+        datanode.checkDiskError();
         throw iex;
       }
     }
@@ -1171,11 +1171,7 @@ class BlockReceiver implements Closeable {
         } catch (IOException e) {
           LOG.warn("IOException in BlockReceiver.run(): ", e);
           if (running) {
-            try {
-              datanode.checkDiskError(e); // may throw an exception here
-            } catch (IOException ioe) {
-              LOG.warn("DataNode.checkDiskError failed in run() with: ", ioe);
-            }
+            datanode.checkDiskError();
             LOG.info(myString, e);
             running = false;
             if (!Thread.interrupted()) { // failure not caused by interruption

+ 67 - 56
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -84,7 +84,6 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.util.*;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
-import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
 import org.mortbay.util.ajax.JSON;
 
 import javax.management.ObjectName;
@@ -92,8 +91,6 @@ import javax.management.ObjectName;
 import java.io.*;
 import java.lang.management.ManagementFactory;
 import java.net.*;
-import java.nio.channels.ClosedByInterruptException;
-import java.nio.channels.ClosedChannelException;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
 import java.util.*;
@@ -229,6 +226,11 @@ public class DataNode extends Configured
   ReadaheadPool readaheadPool;
   private final boolean getHdfsBlockLocationsEnabled;
   private ObjectName dataNodeInfoBeanName;
+  private Thread checkDiskErrorThread = null;
+  protected final int checkDiskErrorInterval = 5*1000;
+  private boolean checkDiskErrorFlag = false;
+  private Object checkDiskErrorMutex = new Object();
+  private long lastDiskErrorCheck;
 
   /**
    * Create the DataNode given a configuration, an array of dataDirs,
@@ -238,6 +240,7 @@ public class DataNode extends Configured
            final List<StorageLocation> dataDirs,
            final SecureResources resources) throws IOException {
     super(conf);
+    this.lastDiskErrorCheck = 0;
     this.maxNumberOfBlocksToLog = conf.getLong(DFS_MAX_NUM_BLOCKS_TO_LOG_KEY,
         DFS_MAX_NUM_BLOCKS_TO_LOG_DEFAULT);
 
@@ -1212,6 +1215,11 @@ public class DataNode extends Configured
       this.dataXceiverServer.interrupt();
     }
 
+    // Interrupt the checkDiskErrorThread and terminate it.
+    if(this.checkDiskErrorThread != null) {
+      this.checkDiskErrorThread.interrupt();
+    }
+    
     // Record the time of initial notification
     long timeNotified = Time.now();
 
@@ -1321,55 +1329,17 @@ public class DataNode extends Configured
   }
   
   
-  /** Check if there is no space in disk 
-   *  @param e that caused this checkDiskError call
-   **/
-  protected void checkDiskError(Exception e ) throws IOException {
-    
-    LOG.warn("checkDiskError: exception: ", e);
-    if (isNetworkRelatedException(e)) {
-      LOG.info("Not checking disk as checkDiskError was called on a network" +
-      		" related exception");	
-      return;
-    }
-    if (e.getMessage() != null &&
-        e.getMessage().startsWith("No space left on device")) {
-      throw new DiskOutOfSpaceException("No space left on device");
-    } else {
-      checkDiskError();
-    }
-  }
-  
-  /**
-   * Check if the provided exception looks like it's from a network error
-   * @param e the exception from a checkDiskError call
-   * @return true if this exception is network related, false otherwise
-   */
-  protected boolean isNetworkRelatedException(Exception e) {
-    if (e instanceof SocketException 
-        || e instanceof SocketTimeoutException
-        || e instanceof ClosedChannelException 
-        || e instanceof ClosedByInterruptException) {
-      return true;
-    }
-    
-    String msg = e.getMessage();
-    
-    return null != msg 
-        && (msg.startsWith("An established connection was aborted")
-            || msg.startsWith("Broken pipe")
-            || msg.startsWith("Connection reset")
-            || msg.contains("java.nio.channels.SocketChannel"));
-  }
-  
   /**
    *  Check if there is a disk failure and if so, handle the error
    */
   public void checkDiskError() {
-    try {
-      data.checkDataDir();
-    } catch (DiskErrorException de) {
-      handleDiskError(de.getMessage());
+    synchronized(checkDiskErrorMutex) {
+      checkDiskErrorFlag = true;
+      if(checkDiskErrorThread == null) {
+        startCheckDiskErrorThread();
+        checkDiskErrorThread.start();
+        LOG.info("Starting CheckDiskError Thread");
+      }
     }
   }
   
@@ -1669,13 +1639,8 @@ public class DataNode extends Configured
       } catch (IOException ie) {
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +
             targets[0] + " got ", ie);
-          // check if there are any disk problem
-        try{
-          checkDiskError(ie);
-        } catch(IOException e) {
-            LOG.warn("DataNode.checkDiskError failed in run() with: ", e);
-        }
-        
+        // check if there are any disk problem
+        checkDiskError();
       } finally {
         xmitsInProgress.getAndDecrement();
         IOUtils.closeStream(blockSender);
@@ -2590,4 +2555,50 @@ public class DataNode extends Configured
   public ShortCircuitRegistry getShortCircuitRegistry() {
     return shortCircuitRegistry;
   }
-}
+  
+  /**
+   * Starts a new thread which will check for disk error check request 
+   * every 5 sec
+   */
+  private void startCheckDiskErrorThread() {
+    checkDiskErrorThread = new Thread(new Runnable() {
+          @Override
+          public void run() {
+            while(shouldRun) {
+              boolean tempFlag ;
+              synchronized(checkDiskErrorMutex) {
+                tempFlag = checkDiskErrorFlag;
+                checkDiskErrorFlag = false;
+              }
+              if(tempFlag) {
+                try {
+                  data.checkDataDir();
+                } catch (DiskErrorException de) {
+                  handleDiskError(de.getMessage());
+                } catch (Exception e) {
+                  LOG.warn("Unexpected exception occurred while checking disk error  " + e);
+                  checkDiskErrorThread = null;
+                  return;
+                }
+                synchronized(checkDiskErrorMutex) {
+                  lastDiskErrorCheck = System.currentTimeMillis();
+                }
+              }
+              try {
+                Thread.sleep(checkDiskErrorInterval);
+              } catch (InterruptedException e) {
+                LOG.debug("InterruptedException in check disk error thread", e);
+                checkDiskErrorThread = null;
+                return;
+              }
+            }
+          }
+    });
+  }
+  
+  public long getLastDiskErrorCheck() {
+    synchronized(checkDiskErrorMutex) {
+      return lastDiskErrorCheck;
+    }
+  }
+}

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java

@@ -127,8 +127,8 @@ public class DatanodeWebHdfsMethods {
     token.decodeFromUrlString(delegation);
     URI nnUri = URI.create(HdfsConstants.HDFS_URI_SCHEME +
             "://" + nnId);
-    boolean isHA = HAUtil.isLogicalUri(conf, nnUri);
-    if (isHA) {
+    boolean isLogical = HAUtil.isLogicalUri(conf, nnUri);
+    if (isLogical) {
       token.setService(HAUtil.buildTokenServiceForLogicalUri(nnUri));
     } else {
       token.setService(SecurityUtil.buildTokenService(nnUri));

+ 57 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java

@@ -27,6 +27,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
 
 import java.io.DataInput;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -61,10 +62,10 @@ import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
-import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
@@ -953,6 +954,18 @@ public final class CacheManager {
     }
   }
 
+  /**
+   * Saves the current state of the CacheManager to the DataOutput. Used
+   * to persist CacheManager state in the FSImage.
+   * @param out DataOutput to persist state
+   * @param sdPath path of the storage directory
+   * @throws IOException
+   */
+  public void saveStateCompat(DataOutputStream out, String sdPath)
+      throws IOException {
+    serializerCompat.save(out, sdPath);
+  }
+
   public PersistState saveState() throws IOException {
     ArrayList<CachePoolInfoProto> pools = Lists
         .newArrayListWithCapacity(cachePools.size());
@@ -1072,6 +1085,12 @@ public final class CacheManager {
   }
 
   private final class SerializerCompat {
+    private void save(DataOutputStream out, String sdPath) throws IOException {
+      out.writeLong(nextDirectiveId);
+      savePools(out, sdPath);
+      saveDirectives(out, sdPath);
+    }
+
     private void load(DataInput in) throws IOException {
       nextDirectiveId = in.readLong();
       // pools need to be loaded first since directives point to their parent pool
@@ -1079,6 +1098,42 @@ public final class CacheManager {
       loadDirectives(in);
     }
 
+    /**
+     * Save cache pools to fsimage
+     */
+    private void savePools(DataOutputStream out,
+        String sdPath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.CACHE_POOLS, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(cachePools.size());
+      for (CachePool pool: cachePools.values()) {
+        FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true));
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
+    /*
+     * Save cache entries to fsimage
+     */
+    private void saveDirectives(DataOutputStream out, String sdPath)
+        throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      Step step = new Step(StepType.CACHE_ENTRIES, sdPath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size());
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      out.writeInt(directivesById.size());
+      for (CacheDirective directive : directivesById.values()) {
+        FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo());
+        counter.increment();
+      }
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+    }
+
     /**
      * Load cache pools from fsimage
      */

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java

@@ -41,6 +41,9 @@ public class CheckpointConf {
 
   /** maxium number of retries when merge errors occur */
   private final int maxRetriesOnMergeError;
+
+  /** The output dir for legacy OIV image */
+  private final String legacyOivImageDir;
   
   public CheckpointConf(Configuration conf) {
     checkpointCheckPeriod = conf.getLong(
@@ -53,6 +56,7 @@ public class CheckpointConf {
                                   DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
     maxRetriesOnMergeError = conf.getInt(DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY,
                                   DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT);
+    legacyOivImageDir = conf.get(DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
     warnForDeprecatedConfigs(conf);
   }
   
@@ -83,4 +87,8 @@ public class CheckpointConf {
   public int getMaxRetriesOnMergeError() {
     return maxRetriesOnMergeError;
   }
+
+  public String getLegacyOivImageDir() {
+    return legacyOivImageDir;
+  }
 }

+ 20 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -57,6 +57,7 @@ import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException;
 import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException;
+import org.apache.hadoop.hdfs.protocol.FsAclPermission;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
@@ -2604,7 +2605,7 @@ public class FSDirectory implements Closeable {
         blocksize,
         node.getModificationTime(snapshot),
         node.getAccessTime(snapshot),
-        node.getFsPermission(snapshot),
+        getPermissionForFileStatus(node, snapshot),
         node.getUserName(snapshot),
         node.getGroupName(snapshot),
         node.isSymlink() ? node.asSymlink().getSymlink() : null,
@@ -2646,7 +2647,8 @@ public class FSDirectory implements Closeable {
     HdfsLocatedFileStatus status =
         new HdfsLocatedFileStatus(size, node.isDirectory(), replication,
           blocksize, node.getModificationTime(snapshot),
-          node.getAccessTime(snapshot), node.getFsPermission(snapshot),
+          node.getAccessTime(snapshot),
+          getPermissionForFileStatus(node, snapshot),
           node.getUserName(snapshot), node.getGroupName(snapshot),
           node.isSymlink() ? node.asSymlink().getSymlink() : null, path,
           node.getId(), loc, childrenNum);
@@ -2660,6 +2662,22 @@ public class FSDirectory implements Closeable {
     return status;
   }
 
+  /**
+   * Returns an inode's FsPermission for use in an outbound FileStatus.  If the
+   * inode has an ACL, then this method will convert to a FsAclPermission.
+   *
+   * @param node INode to check
+   * @param snapshot int snapshot ID
+   * @return FsPermission from inode, with ACL bit on if the inode has an ACL
+   */
+  private static FsPermission getPermissionForFileStatus(INode node,
+      int snapshot) {
+    FsPermission perm = node.getFsPermission(snapshot);
+    if (node.getAclFeature(snapshot) != null) {
+      perm = new FsAclPermission(perm);
+    }
+    return perm;
+  }
     
   /**
    * Add the given symbolic link to the fs. Record it in the edits log.

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -934,6 +934,25 @@ public class FSImage implements Closeable {
     storage.setMostRecentCheckpointInfo(txid, Time.now());
   }
 
+  /**
+   * Save FSimage in the legacy format. This is not for NN consumption,
+   * but for tools like OIV.
+   */
+  public void saveLegacyOIVImage(FSNamesystem source, String targetDir,
+      Canceler canceler) throws IOException {
+    FSImageCompression compression =
+        FSImageCompression.createCompression(conf);
+    long txid = getLastAppliedOrWrittenTxId();
+    SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid,
+        canceler);
+    FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx);
+    String imageFileName = NNStorage.getLegacyOIVImageFileName(txid);
+    File imageFile = new File(targetDir, imageFileName);
+    saver.save(imageFile, compression);
+    archivalManager.purgeOldLegacyOIVImages(targetDir, txid);
+  }
+
+
   /**
    * FSImageSaver is being run in a separate thread when saving
    * FSImage. There is one thread per each copy of the image.

+ 381 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -21,14 +21,20 @@ import static org.apache.hadoop.util.Time.now;
 
 import java.io.DataInput;
 import java.io.DataInputStream;
+import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.security.DigestInputStream;
+import java.security.DigestOutputStream;
 import java.security.MessageDigest;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
@@ -50,6 +56,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
@@ -60,6 +67,7 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
@@ -69,8 +77,105 @@ import com.google.common.base.Preconditions;
 import com.google.common.annotations.VisibleForTesting;
 
 /**
- * This class loads and stores the FSImage of the NameNode. The file
- * src/main/proto/fsimage.proto describes the on-disk layout of the FSImage.
+ * Contains inner classes for reading or writing the on-disk format for
+ * FSImages.
+ *
+ * In particular, the format of the FSImage looks like:
+ * <pre>
+ * FSImage {
+ *   layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long,
+ *   namesystemGenerationStampV1: long, namesystemGenerationStampV2: long,
+ *   generationStampAtBlockIdSwitch:long, lastAllocatedBlockId:
+ *   long transactionID: long, snapshotCounter: int, numberOfSnapshots: int,
+ *   numOfSnapshottableDirs: int,
+ *   {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed)
+ * }
+ *
+ * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) {
+ *   INodeInfo of root, numberOfChildren of root: int
+ *   [list of INodeInfo of root's children],
+ *   [list of INodeDirectoryInfo of root's directory children]
+ * }
+ *
+ * FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){
+ *   [list of INodeInfo of INodes in topological order]
+ * }
+ *
+ * INodeInfo {
+ *   {
+ *     localName: short + byte[]
+ *   } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported
+ *   or
+ *   {
+ *     fullPath: byte[]
+ *   } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported
+ *   replicationFactor: short, modificationTime: long,
+ *   accessTime: long, preferredBlockSize: long,
+ *   numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink),
+ *   {
+ *     nsQuota: long, dsQuota: long,
+ *     {
+ *       isINodeSnapshottable: byte,
+ *       isINodeWithSnapshot: byte (if isINodeSnapshottable is false)
+ *     } (when {@link Feature#SNAPSHOT} is supported),
+ *     fsPermission: short, PermissionStatus
+ *   } for INodeDirectory
+ *   or
+ *   {
+ *     symlinkString, fsPermission: short, PermissionStatus
+ *   } for INodeSymlink
+ *   or
+ *   {
+ *     [list of BlockInfo]
+ *     [list of FileDiff]
+ *     {
+ *       isINodeFileUnderConstructionSnapshot: byte,
+ *       {clientName: short + byte[], clientMachine: short + byte[]} (when
+ *       isINodeFileUnderConstructionSnapshot is true),
+ *     } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode),
+ *     fsPermission: short, PermissionStatus
+ *   } for INodeFile
+ * }
+ *
+ * INodeDirectoryInfo {
+ *   fullPath of the directory: short + byte[],
+ *   numberOfChildren: int, [list of INodeInfo of children INode],
+ *   {
+ *     numberOfSnapshots: int,
+ *     [list of Snapshot] (when NumberOfSnapshots is positive),
+ *     numberOfDirectoryDiffs: int,
+ *     [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive),
+ *     number of children that are directories,
+ *     [list of INodeDirectoryInfo of the directory children] (includes
+ *     snapshot copies of deleted sub-directories)
+ *   } (when {@link Feature#SNAPSHOT} is supported),
+ * }
+ *
+ * Snapshot {
+ *   snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is
+ *   the name of the snapshot)
+ * }
+ *
+ * DirectoryDiff {
+ *   full path of the root of the associated Snapshot: short + byte[],
+ *   childrenSize: int,
+ *   isSnapshotRoot: byte,
+ *   snapshotINodeIsNotNull: byte (when isSnapshotRoot is false),
+ *   snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff
+ * }
+ *
+ * Diff {
+ *   createdListSize: int, [Local name of INode in created list],
+ *   deletedListSize: int, [INode in deleted list: INodeInfo]
+ * }
+ *
+ * FileDiff {
+ *   full path of the root of the associated Snapshot: short + byte[],
+ *   fileSize: long,
+ *   snapshotINodeIsNotNull: byte,
+ *   snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff
+ * }
+ * </pre>
  */
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
@@ -580,6 +685,11 @@ public class FSImageFormat {
       }
     }
 
+    /** @return The FSDirectory of the namesystem where the fsimage is loaded */
+    public FSDirectory getFSDirectoryInLoading() {
+      return namesystem.dir;
+    }
+
     public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
         boolean updateINodeMap) throws IOException {
       return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
@@ -1009,7 +1119,7 @@ public class FSImageFormat {
       + " option to automatically rename these paths during upgrade.";
 
   /**
-   * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
    * byte array path component.
    */
   private static byte[] renameReservedComponentOnUpgrade(byte[] component,
@@ -1029,7 +1139,7 @@ public class FSImageFormat {
   }
 
   /**
-   * Same as {@link #renameReservedPathsOnUpgrade}, but for a single
+   * Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
    * byte array path component.
    */
   private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
@@ -1050,4 +1160,271 @@ public class FSImageFormat {
     }
     return component;
   }
+
+  /**
+   * A one-shot class responsible for writing an image file.
+   * The write() function should be called once, after which the getter
+   * functions may be used to retrieve information about the file that was written.
+   *
+   * This is replaced by the PB-based FSImage. The class is to maintain
+   * compatibility for the external fsimage tool.
+   */
+  @Deprecated
+  static class Saver {
+    private static final int LAYOUT_VERSION = -51;
+    private final SaveNamespaceContext context;
+    /** Set to true once an image has been written */
+    private boolean saved = false;
+
+    /** The MD5 checksum of the file that was written */
+    private MD5Hash savedDigest;
+    private final ReferenceMap referenceMap = new ReferenceMap();
+
+    private final Map<Long, INodeFile> snapshotUCMap =
+        new HashMap<Long, INodeFile>();
+
+    /** @throws IllegalStateException if the instance has not yet saved an image */
+    private void checkSaved() {
+      if (!saved) {
+        throw new IllegalStateException("FSImageSaver has not saved an image");
+      }
+    }
+
+    /** @throws IllegalStateException if the instance has already saved an image */
+    private void checkNotSaved() {
+      if (saved) {
+        throw new IllegalStateException("FSImageSaver has already saved an image");
+      }
+    }
+
+
+    Saver(SaveNamespaceContext context) {
+      this.context = context;
+    }
+
+    /**
+     * Return the MD5 checksum of the image file that was saved.
+     */
+    MD5Hash getSavedDigest() {
+      checkSaved();
+      return savedDigest;
+    }
+
+    void save(File newFile, FSImageCompression compression) throws IOException {
+      checkNotSaved();
+
+      final FSNamesystem sourceNamesystem = context.getSourceNamesystem();
+      final INodeDirectory rootDir = sourceNamesystem.dir.rootDir;
+      final long numINodes = rootDir.getDirectoryWithQuotaFeature()
+          .getSpaceConsumed().get(Quota.NAMESPACE);
+      String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath();
+      Step step = new Step(StepType.INODES, sdPath);
+      StartupProgress prog = NameNode.getStartupProgress();
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes);
+      Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
+      long startTime = now();
+      //
+      // Write out data
+      //
+      MessageDigest digester = MD5Hash.getDigester();
+      FileOutputStream fout = new FileOutputStream(newFile);
+      DigestOutputStream fos = new DigestOutputStream(fout, digester);
+      DataOutputStream out = new DataOutputStream(fos);
+      try {
+        out.writeInt(LAYOUT_VERSION);
+        LayoutFlags.write(out);
+        // We use the non-locked version of getNamespaceInfo here since
+        // the coordinating thread of saveNamespace already has read-locked
+        // the namespace for us. If we attempt to take another readlock
+        // from the actual saver thread, there's a potential of a
+        // fairness-related deadlock. See the comments on HDFS-2223.
+        out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo()
+            .getNamespaceID());
+        out.writeLong(numINodes);
+        out.writeLong(sourceNamesystem.getGenerationStampV1());
+        out.writeLong(sourceNamesystem.getGenerationStampV2());
+        out.writeLong(sourceNamesystem.getGenerationStampAtblockIdSwitch());
+        out.writeLong(sourceNamesystem.getLastAllocatedBlockId());
+        out.writeLong(context.getTxId());
+        out.writeLong(sourceNamesystem.getLastInodeId());
+
+
+        sourceNamesystem.getSnapshotManager().write(out);
+
+        // write compression info and set up compressed stream
+        out = compression.writeHeaderAndWrapStream(fos);
+        LOG.info("Saving image file " + newFile +
+                 " using " + compression);
+
+        // save the root
+        saveINode2Image(rootDir, out, false, referenceMap, counter);
+        // save the rest of the nodes
+        saveImage(rootDir, out, true, false, counter);
+        prog.endStep(Phase.SAVING_CHECKPOINT, step);
+        // Now that the step is finished, set counter equal to total to adjust
+        // for possible under-counting due to reference inodes.
+        prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes);
+        // save files under construction
+        // TODO: for HDFS-5428, since we cannot break the compatibility of
+        // fsimage, we store part of the under-construction files that are only
+        // in snapshots in this "under-construction-file" section. As a
+        // temporary solution, we use "/.reserved/.inodes/<inodeid>" as their
+        // paths, so that when loading fsimage we do not put them into the lease
+        // map. In the future, we can remove this hack when we can bump the
+        // layout version.
+        sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap);
+
+        context.checkCancelled();
+        sourceNamesystem.saveSecretManagerStateCompat(out, sdPath);
+        context.checkCancelled();
+        sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath);
+        context.checkCancelled();
+        out.flush();
+        context.checkCancelled();
+        fout.getChannel().force(true);
+      } finally {
+        out.close();
+      }
+
+      saved = true;
+      // set md5 of the saved image
+      savedDigest = new MD5Hash(digester.digest());
+
+      LOG.info("Image file " + newFile + " of size " + newFile.length() +
+          " bytes saved in " + (now() - startTime)/1000 + " seconds.");
+    }
+
+    /**
+     * Save children INodes.
+     * @param children The list of children INodes
+     * @param out The DataOutputStream to write
+     * @param inSnapshot Whether the parent directory or its ancestor is in
+     *                   the deleted list of some snapshot (caused by rename or
+     *                   deletion)
+     * @param counter Counter to increment for namenode startup progress
+     * @return Number of children that are directory
+     */
+    private int saveChildren(ReadOnlyList<INode> children,
+        DataOutputStream out, boolean inSnapshot, Counter counter)
+        throws IOException {
+      // Write normal children INode.
+      out.writeInt(children.size());
+      int dirNum = 0;
+      int i = 0;
+      for(INode child : children) {
+        // print all children first
+        // TODO: for HDFS-5428, we cannot change the format/content of fsimage
+        // here, thus even if the parent directory is in snapshot, we still
+        // do not handle INodeUC as those stored in deleted list
+        saveINode2Image(child, out, false, referenceMap, counter);
+        if (child.isDirectory()) {
+          dirNum++;
+        } else if (inSnapshot && child.isFile()
+            && child.asFile().isUnderConstruction()) {
+          this.snapshotUCMap.put(child.getId(), child.asFile());
+        }
+        if (i++ % 50 == 0) {
+          context.checkCancelled();
+        }
+      }
+      return dirNum;
+    }
+
+    /**
+     * Save file tree image starting from the given root.
+     * This is a recursive procedure, which first saves all children and
+     * snapshot diffs of a current directory and then moves inside the
+     * sub-directories.
+     *
+     * @param current The current node
+     * @param out The DataoutputStream to write the image
+     * @param toSaveSubtree Whether or not to save the subtree to fsimage. For
+     *                      reference node, its subtree may already have been
+     *                      saved before.
+     * @param inSnapshot Whether the current directory is in snapshot
+     * @param counter Counter to increment for namenode startup progress
+     */
+    private void saveImage(INodeDirectory current, DataOutputStream out,
+        boolean toSaveSubtree, boolean inSnapshot, Counter counter)
+        throws IOException {
+      // write the inode id of the directory
+      out.writeLong(current.getId());
+
+      if (!toSaveSubtree) {
+        return;
+      }
+
+      final ReadOnlyList<INode> children = current
+          .getChildrenList(Snapshot.CURRENT_STATE_ID);
+      int dirNum = 0;
+      List<INodeDirectory> snapshotDirs = null;
+      DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature();
+      if (sf != null) {
+        snapshotDirs = new ArrayList<INodeDirectory>();
+        sf.getSnapshotDirectory(snapshotDirs);
+        dirNum += snapshotDirs.size();
+      }
+
+      // 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all
+      // Snapshots
+      if (current instanceof INodeDirectorySnapshottable) {
+        INodeDirectorySnapshottable snapshottableNode =
+            (INodeDirectorySnapshottable) current;
+        SnapshotFSImageFormat.saveSnapshots(snapshottableNode, out);
+      } else {
+        out.writeInt(-1); // # of snapshots
+      }
+
+      // 3. Write children INode
+      dirNum += saveChildren(children, out, inSnapshot, counter);
+
+      // 4. Write DirectoryDiff lists, if there is any.
+      SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap);
+
+      // Write sub-tree of sub-directories, including possible snapshots of
+      // deleted sub-directories
+      out.writeInt(dirNum); // the number of sub-directories
+      for(INode child : children) {
+        if(!child.isDirectory()) {
+          continue;
+        }
+        // make sure we only save the subtree under a reference node once
+        boolean toSave = child.isReference() ?
+            referenceMap.toProcessSubtree(child.getId()) : true;
+        saveImage(child.asDirectory(), out, toSave, inSnapshot, counter);
+      }
+      if (snapshotDirs != null) {
+        for (INodeDirectory subDir : snapshotDirs) {
+          // make sure we only save the subtree under a reference node once
+          boolean toSave = subDir.getParentReference() != null ?
+              referenceMap.toProcessSubtree(subDir.getId()) : true;
+          saveImage(subDir, out, toSave, true, counter);
+        }
+      }
+    }
+
+    /**
+     * Saves inode and increments progress counter.
+     *
+     * @param inode INode to save
+     * @param out DataOutputStream to receive inode
+     * @param writeUnderConstruction boolean true if this is under construction
+     * @param referenceMap ReferenceMap containing reference inodes
+     * @param counter Counter to increment for namenode startup progress
+     * @throws IOException thrown if there is an I/O error
+     */
+    private void saveINode2Image(INode inode, DataOutputStream out,
+        boolean writeUnderConstruction, ReferenceMap referenceMap,
+        Counter counter) throws IOException {
+      FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction,
+        referenceMap);
+      // Intentionally do not increment counter for reference inodes, because it
+      // is too difficult at this point to assess whether or not this is a
+      // reference that counts toward quota.
+      if (!(inode instanceof INodeReference)) {
+        counter.increment();
+      }
+    }
+  }
 }

+ 213 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java

@@ -17,6 +17,11 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.Path;
@@ -31,20 +36,21 @@ import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.ShortWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableUtils;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
-import java.io.IOException;
+import com.google.common.base.Preconditions;
 
 /**
  * Static utility functions for serializing various pieces of data in the correct
@@ -82,6 +88,26 @@ public class FSImageSerialization {
     final ShortWritable U_SHORT = new ShortWritable();
     final IntWritable U_INT = new IntWritable();
     final LongWritable U_LONG = new LongWritable();
+    final FsPermission FILE_PERM = new FsPermission((short) 0);
+  }
+
+  private static void writePermissionStatus(INodeAttributes inode,
+      DataOutput out) throws IOException {
+    final FsPermission p = TL_DATA.get().FILE_PERM;
+    p.fromShort(inode.getFsPermissionShort());
+    PermissionStatus.write(out, inode.getUserName(), inode.getGroupName(), p);
+  }
+
+  private static void writeBlocks(final Block[] blocks,
+      final DataOutput out) throws IOException {
+    if (blocks == null) {
+      out.writeInt(0);
+    } else {
+      out.writeInt(blocks.length);
+      for (Block blk : blocks) {
+        blk.write(out);
+      }
+    }
   }
 
   // Helper function that reads in an INodeUnderConstruction
@@ -127,6 +153,183 @@ public class FSImageSerialization {
     return file;
   }
 
+  // Helper function that writes an INodeUnderConstruction
+  // into the input stream
+  //
+  static void writeINodeUnderConstruction(DataOutputStream out, INodeFile cons,
+      String path) throws IOException {
+    writeString(path, out);
+    out.writeLong(cons.getId());
+    out.writeShort(cons.getFileReplication());
+    out.writeLong(cons.getModificationTime());
+    out.writeLong(cons.getPreferredBlockSize());
+
+    writeBlocks(cons.getBlocks(), out);
+    cons.getPermissionStatus().write(out);
+
+    FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature();
+    writeString(uc.getClientName(), out);
+    writeString(uc.getClientMachine(), out);
+
+    out.writeInt(0); //  do not store locations of last block
+  }
+
+  /**
+   * Serialize a {@link INodeFile} node
+   * @param node The node to write
+   * @param out The {@link DataOutputStream} where the fields are written
+   * @param writeBlock Whether to write block information
+   */
+  public static void writeINodeFile(INodeFile file, DataOutput out,
+      boolean writeUnderConstruction) throws IOException {
+    writeLocalName(file, out);
+    out.writeLong(file.getId());
+    out.writeShort(file.getFileReplication());
+    out.writeLong(file.getModificationTime());
+    out.writeLong(file.getAccessTime());
+    out.writeLong(file.getPreferredBlockSize());
+
+    writeBlocks(file.getBlocks(), out);
+    SnapshotFSImageFormat.saveFileDiffList(file, out);
+
+    if (writeUnderConstruction) {
+      if (file.isUnderConstruction()) {
+        out.writeBoolean(true);
+        final FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature();
+        writeString(uc.getClientName(), out);
+        writeString(uc.getClientMachine(), out);
+      } else {
+        out.writeBoolean(false);
+      }
+    }
+
+    writePermissionStatus(file, out);
+  }
+
+  /** Serialize an {@link INodeFileAttributes}. */
+  public static void writeINodeFileAttributes(INodeFileAttributes file,
+      DataOutput out) throws IOException {
+    writeLocalName(file, out);
+    writePermissionStatus(file, out);
+    out.writeLong(file.getModificationTime());
+    out.writeLong(file.getAccessTime());
+
+    out.writeShort(file.getFileReplication());
+    out.writeLong(file.getPreferredBlockSize());
+  }
+
+  private static void writeQuota(Quota.Counts quota, DataOutput out)
+      throws IOException {
+    out.writeLong(quota.get(Quota.NAMESPACE));
+    out.writeLong(quota.get(Quota.DISKSPACE));
+  }
+
+  /**
+   * Serialize a {@link INodeDirectory}
+   * @param node The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  public static void writeINodeDirectory(INodeDirectory node, DataOutput out)
+      throws IOException {
+    writeLocalName(node, out);
+    out.writeLong(node.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(node.getModificationTime());
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-1);   // # of blocks
+
+    writeQuota(node.getQuotaCounts(), out);
+
+    if (node instanceof INodeDirectorySnapshottable) {
+      out.writeBoolean(true);
+    } else {
+      out.writeBoolean(false);
+      out.writeBoolean(node.isWithSnapshot());
+    }
+
+    writePermissionStatus(node, out);
+  }
+
+  /**
+   * Serialize a {@link INodeDirectory}
+   * @param a The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  public static void writeINodeDirectoryAttributes(
+      INodeDirectoryAttributes a, DataOutput out) throws IOException {
+    writeLocalName(a, out);
+    writePermissionStatus(a, out);
+    out.writeLong(a.getModificationTime());
+    writeQuota(a.getQuotaCounts(), out);
+  }
+
+  /**
+   * Serialize a {@link INodeSymlink} node
+   * @param node The node to write
+   * @param out The {@link DataOutput} where the fields are written
+   */
+  private static void writeINodeSymlink(INodeSymlink node, DataOutput out)
+      throws IOException {
+    writeLocalName(node, out);
+    out.writeLong(node.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(0);   // modification time
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-2);   // # of blocks
+
+    Text.writeString(out, node.getSymlinkString());
+    writePermissionStatus(node, out);
+  }
+
+  /** Serialize a {@link INodeReference} node */
+  private static void writeINodeReference(INodeReference ref, DataOutput out,
+      boolean writeUnderConstruction, ReferenceMap referenceMap
+      ) throws IOException {
+    writeLocalName(ref, out);
+    out.writeLong(ref.getId());
+    out.writeShort(0);  // replication
+    out.writeLong(0);   // modification time
+    out.writeLong(0);   // access time
+    out.writeLong(0);   // preferred block size
+    out.writeInt(-3);   // # of blocks
+
+    final boolean isWithName = ref instanceof INodeReference.WithName;
+    out.writeBoolean(isWithName);
+
+    if (!isWithName) {
+      Preconditions.checkState(ref instanceof INodeReference.DstReference);
+      // dst snapshot id
+      out.writeInt(((INodeReference.DstReference) ref).getDstSnapshotId());
+    } else {
+      out.writeInt(((INodeReference.WithName) ref).getLastSnapshotId());
+    }
+
+    final INodeReference.WithCount withCount
+        = (INodeReference.WithCount)ref.getReferredINode();
+    referenceMap.writeINodeReferenceWithCount(withCount, out,
+        writeUnderConstruction);
+  }
+
+  /**
+   * Save one inode's attributes to the image.
+   */
+  public static void saveINode2Image(INode node, DataOutput out,
+      boolean writeUnderConstruction, ReferenceMap referenceMap)
+      throws IOException {
+    if (node.isReference()) {
+      writeINodeReference(node.asReference(), out, writeUnderConstruction,
+          referenceMap);
+    } else if (node.isDirectory()) {
+      writeINodeDirectory(node.asDirectory(), out);
+    } else if (node.isSymlink()) {
+      writeINodeSymlink(node.asSymlink(), out);
+    } else if (node.isFile()) {
+      writeINodeFile(node.asFile(), out, writeUnderConstruction);
+    }
+  }
+
   // This should be reverted to package private once the ImageLoader
   // code is moved into this package. This method should not be called
   // by other code.
@@ -226,6 +429,12 @@ public class FSImageSerialization {
     in.readFully(createdNodeName);
     return createdNodeName;
   }
+
+  private static void writeLocalName(INodeAttributes inode, DataOutput out)
+      throws IOException {
+    final byte[] name = inode.getLocalNameBytes();
+    writeBytes(name, out);
+  }
   
   public static void writeBytes(byte[] data, DataOutput out)
       throws IOException {

+ 61 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -85,17 +85,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
 import static org.apache.hadoop.util.Time.now;
 
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
+import java.io.*;
 import java.io.UnsupportedEncodingException;
 import java.lang.management.ManagementFactory;
 import java.net.InetAddress;
@@ -140,6 +130,7 @@ import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.XAttr;
 import org.apache.hadoop.fs.XAttrSetFlag;
@@ -3248,10 +3239,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       // Rename does not operates on link targets
       // Do not resolveLink when checking permissions of src and dst
       // Check write access to parent of src
-      checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
+      checkPermission(pc, src, false, null, FsAction.WRITE, null, null,
+          false, false);
       // Check write access to ancestor of dst
       checkPermission(pc, actualdst, false, FsAction.WRITE, null, null, null,
-          false);
+          false, false);
     }
 
     if (dir.renameTo(src, dst, logRetryCache)) {
@@ -3312,9 +3304,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       // Rename does not operates on link targets
       // Do not resolveLink when checking permissions of src and dst
       // Check write access to parent of src
-      checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false);
+      checkPermission(pc, src, false, null, FsAction.WRITE, null, null, false,
+          false);
       // Check write access to ancestor of dst
-      checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false);
+      checkPermission(pc, dst, false, FsAction.WRITE, null, null, null, false,
+          false);
     }
 
     dir.renameTo(src, dst, logRetryCache, options);
@@ -3394,11 +3388,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       checkNameNodeSafeMode("Cannot delete " + src);
       src = FSDirectory.resolvePath(src, pathComponents, dir);
       if (!recursive && dir.isNonEmptyDirectory(src)) {
-        throw new IOException(src + " is non empty");
+        throw new PathIsNotEmptyDirectoryException(src + " is non empty");
       }
       if (enforcePermission && isPermissionEnabled) {
         checkPermission(pc, src, false, null, FsAction.WRITE, null,
-            FsAction.ALL, false);
+            FsAction.ALL, true, false);
       }
       // Unlink the target directory from directory tree
       if (!dir.delete(src, collectedBlocks, removedINodes, logRetryCache)) {
@@ -3550,7 +3544,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       checkOperation(OperationCategory.READ);
       src = FSDirectory.resolvePath(src, pathComponents, dir);
       if (isPermissionEnabled) {
-        checkPermission(pc, src, false, null, null, null, null, resolveLink);
+        checkPermission(pc, src, false, null, null, null, null, false,
+            resolveLink);
       }
       stat = dir.getFileInfo(src, resolveLink);
     } catch (AccessControlException e) {
@@ -5549,7 +5544,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       FsAction parentAccess, FsAction access, FsAction subAccess)
       throws AccessControlException, UnresolvedLinkException {
         checkPermission(pc, path, doCheckOwner, ancestorAccess,
-            parentAccess, access, subAccess, true);
+            parentAccess, access, subAccess, false, true);
   }
 
   /**
@@ -5560,14 +5555,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private void checkPermission(FSPermissionChecker pc,
       String path, boolean doCheckOwner, FsAction ancestorAccess,
       FsAction parentAccess, FsAction access, FsAction subAccess,
-      boolean resolveLink)
+      boolean ignoreEmptyDir, boolean resolveLink)
       throws AccessControlException, UnresolvedLinkException {
     if (!pc.isSuperUser()) {
       dir.waitForReady();
       readLock();
       try {
         pc.checkPermission(path, dir, doCheckOwner, ancestorAccess,
-            parentAccess, access, subAccess, resolveLink);
+            parentAccess, access, subAccess, ignoreEmptyDir, resolveLink);
       } finally {
         readUnlock();
       }
@@ -6091,6 +6086,42 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     leaseManager.changeLease(src, dst);
   }
 
+  /**
+   * Serializes leases.
+   */
+  void saveFilesUnderConstruction(DataOutputStream out,
+      Map<Long, INodeFile> snapshotUCMap) throws IOException {
+    // This is run by an inferior thread of saveNamespace, which holds a read
+    // lock on our behalf. If we took the read lock here, we could block
+    // for fairness if a writer is waiting on the lock.
+    synchronized (leaseManager) {
+      Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction();
+      for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
+        // TODO: for HDFS-5428, because of rename operations, some
+        // under-construction files that are
+        // in the current fs directory can also be captured in the
+        // snapshotUCMap. We should remove them from the snapshotUCMap.
+        snapshotUCMap.remove(entry.getValue().getId());
+      }
+
+      out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size
+      for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), entry.getKey());
+      }
+      for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) {
+        // for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
+        // as their paths
+        StringBuilder b = new StringBuilder();
+        b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
+            .append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
+            .append(Path.SEPARATOR).append(entry.getValue().getId());
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), b.toString());
+      }
+    }
+  }
+
   /**
    * @return all the under-construction files in the lease map
    */
@@ -6377,6 +6408,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     getEditLog().logSync();
   }
 
+  /**
+   * @param out save state of the secret manager
+   * @param sdPath String storage directory path
+   */
+  void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
+      throws IOException {
+    dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
+  }
+
   SecretManagerState saveSecretManagerState() {
     return dtSecretManager.saveSecretManagerState();
   }

+ 12 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.fs.permission.AclEntryScope;
 import org.apache.hadoop.fs.permission.AclEntryType;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
@@ -136,6 +137,7 @@ class FSPermissionChecker {
    * @param subAccess If path is a directory,
    * it is the access required of the path and all the sub-directories.
    * If path is not a directory, there is no effect.
+   * @param ignoreEmptyDir Ignore permission checking for empty directory?
    * @param resolveLink whether to resolve the final path component if it is
    * a symlink
    * @throws AccessControlException
@@ -146,7 +148,7 @@ class FSPermissionChecker {
    */
   void checkPermission(String path, FSDirectory dir, boolean doCheckOwner,
       FsAction ancestorAccess, FsAction parentAccess, FsAction access,
-      FsAction subAccess, boolean resolveLink)
+      FsAction subAccess, boolean ignoreEmptyDir, boolean resolveLink)
       throws AccessControlException, UnresolvedLinkException {
     if (LOG.isDebugEnabled()) {
       LOG.debug("ACCESS CHECK: " + this
@@ -155,6 +157,7 @@ class FSPermissionChecker {
           + ", parentAccess=" + parentAccess
           + ", access=" + access
           + ", subAccess=" + subAccess
+          + ", ignoreEmptyDir=" + ignoreEmptyDir
           + ", resolveLink=" + resolveLink);
     }
     // check if (parentAccess != null) && file exists, then check sb
@@ -182,7 +185,7 @@ class FSPermissionChecker {
       check(last, snapshotId, access);
     }
     if (subAccess != null) {
-      checkSubAccess(last, snapshotId, subAccess);
+      checkSubAccess(last, snapshotId, subAccess, ignoreEmptyDir);
     }
     if (doCheckOwner) {
       checkOwner(last, snapshotId);
@@ -207,8 +210,8 @@ class FSPermissionChecker {
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void checkSubAccess(INode inode, int snapshotId, FsAction access
-      ) throws AccessControlException {
+  private void checkSubAccess(INode inode, int snapshotId, FsAction access,
+      boolean ignoreEmptyDir) throws AccessControlException {
     if (inode == null || !inode.isDirectory()) {
       return;
     }
@@ -216,9 +219,12 @@ class FSPermissionChecker {
     Stack<INodeDirectory> directories = new Stack<INodeDirectory>();
     for(directories.push(inode.asDirectory()); !directories.isEmpty(); ) {
       INodeDirectory d = directories.pop();
-      check(d, snapshotId, access);
+      ReadOnlyList<INode> cList = d.getChildrenList(snapshotId);
+      if (!(cList.isEmpty() && ignoreEmptyDir)) {
+        check(d, snapshotId, access);
+      }
 
-      for(INode child : d.getChildrenList(snapshotId)) {
+      for(INode child : cList) {
         if (child.isDirectory()) {
           directories.push(child.asDirectory());
         }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java

@@ -40,7 +40,7 @@ import com.google.common.base.Preconditions;
  * snapshots and it is renamed/moved to other locations.
  * 
  * For example,
- * (1) Support we have /abc/foo, say the inode of foo is inode(id=1000,name=foo)
+ * (1) Suppose we have /abc/foo, say the inode of foo is inode(id=1000,name=foo)
  * (2) create snapshot s0 for /abc
  * (3) mv /abc/foo /xyz/bar, i.e. inode(id=1000,name=...) is renamed from "foo"
  *     to "bar" and its parent becomes /xyz.

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java

@@ -77,7 +77,8 @@ public class NNStorage extends Storage implements Closeable,
     IMAGE_ROLLBACK("fsimage_rollback"),
     EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
     EDITS_INPROGRESS ("edits_inprogress"),
-    EDITS_TMP ("edits_tmp");
+    EDITS_TMP ("edits_tmp"),
+    IMAGE_LEGACY_OIV ("fsimage_legacy_oiv");  // For pre-PB format
 
     private String fileName = null;
     private NameNodeFile(String name) { this.fileName = name; }
@@ -693,6 +694,10 @@ public class NNStorage extends Storage implements Closeable,
     return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
   }
 
+  public static String getLegacyOIVImageFileName(long txid) {
+    return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
+  }
+
   private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
     return String.format("%s_%019d", nnf.getName(), txid);
   }

+ 56 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java

@@ -18,11 +18,13 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
+import java.io.FilenameFilter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.EnumSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.TreeSet;
 
@@ -233,4 +235,58 @@ public class NNStorageRetentionManager {
       }      
     }
   }
+
+  /**
+   * Delete old OIV fsimages. Since the target dir is not a full blown
+   * storage directory, we simply list and keep the latest ones. For the
+   * same reason, no storage inspector is used.
+   */
+  void purgeOldLegacyOIVImages(String dir, long txid) {
+    File oivImageDir = new File(dir);
+    final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
+    String filesInStorage[];
+
+    // Get the listing
+    filesInStorage = oivImageDir.list(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        return name.matches(oivImagePrefix + "_(\\d+)");
+      }
+    });
+
+    // Check whether there is any work to do.
+    if (filesInStorage.length <= numCheckpointsToRetain) {
+      return;
+    }
+
+    // Create a sorted list of txids from the file names.
+    TreeSet<Long> sortedTxIds = new TreeSet<Long>();
+    for (String fName : filesInStorage) {
+      // Extract the transaction id from the file name.
+      long fTxId;
+      try {
+        fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
+      } catch (NumberFormatException nfe) {
+        // This should not happen since we have already filtered it.
+        // Log and continue.
+        LOG.warn("Invalid file name. Skipping " + fName);
+        continue;
+      }
+      sortedTxIds.add(Long.valueOf(fTxId));
+    }
+
+    int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
+    Iterator<Long> iter = sortedTxIds.iterator();
+    while (numFilesToDelete > 0 && iter.hasNext()) {
+      long txIdVal = iter.next().longValue();
+      String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
+      LOG.info("Deleting " + fileName);
+      File fileToDelete = new File(oivImageDir, fileName);
+      if (!fileToDelete.delete()) {
+        // deletion failed.
+        LOG.warn("Failed to delete image file: " + fileToDelete);
+      }
+      numFilesToDelete--;
+    }
+  }
 }

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -1677,7 +1677,11 @@ public class NameNode implements NameNodeStatusMXBean {
   public boolean isStandbyState() {
     return (state.equals(STANDBY_STATE));
   }
-
+  
+  public boolean isActiveState() {
+    return (state.equals(ACTIVE_STATE));
+  }
+  
   /**
    * Check that a request to change this node's HA state is valid.
    * In particular, verifies that, if auto failover is enabled, non-forced

+ 16 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java

@@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StorageP
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpServer2;
 import org.apache.hadoop.io.MD5Hash;
@@ -125,6 +126,7 @@ public class SecondaryNameNode implements Runnable,
 
   private Thread checkpointThread;
   private ObjectName nameNodeStatusBeanName;
+  private String legacyOivImageDir;
 
   @Override
   public String toString() {
@@ -289,6 +291,9 @@ public class SecondaryNameNode implements Runnable,
           NetUtils.getHostPortString(httpsAddress));
     }
 
+    legacyOivImageDir = conf.get(
+        DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
+
     LOG.info("Checkpoint Period   :" + checkpointConf.getPeriod() + " secs "
         + "(" + checkpointConf.getPeriod() / 60 + " min)");
     LOG.info("Log Size Trigger    :" + checkpointConf.getTxnCount() + " txns");
@@ -497,6 +502,7 @@ public class SecondaryNameNode implements Runnable,
    * @return if the image is fetched from primary or not
    */
   @VisibleForTesting
+  @SuppressWarnings("deprecated")
   public boolean doCheckpoint() throws IOException {
     checkpointImage.ensureCurrentDirExists();
     NNStorage dstStorage = checkpointImage.getStorage();
@@ -559,11 +565,18 @@ public class SecondaryNameNode implements Runnable,
 
     LOG.warn("Checkpoint done. New Image Size: " 
              + dstStorage.getFsImageName(txid).length());
-    
+
+    if (legacyOivImageDir != null && !legacyOivImageDir.isEmpty()) {
+      try {
+        checkpointImage.saveLegacyOIVImage(namesystem, legacyOivImageDir,
+            new Canceler());
+      } catch (IOException e) {
+        LOG.warn("Failed to write legacy OIV image: ", e);
+      }
+    }
     return loadImage;
   }
-  
-  
+
   /**
    * @param opts The parameters passed to this program.
    * @exception Exception if the filesystem does not exist.

+ 35 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java

@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+
+public abstract class AbstractNNFailoverProxyProvider<T> implements
+   FailoverProxyProvider <T> {
+
+  /**
+   * Inquire whether logical HA URI is used for the implementation. If it is
+   * used, a special token handling may be needed to make sure a token acquired 
+   * from a node in the HA pair can be used against the other node. 
+   *
+   * @return true if logical HA URI is used. false, if not used.
+   */
+  public abstract boolean useLogicalURI(); 
+}

+ 11 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java

@@ -34,8 +34,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
-import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.security.UserGroupInformation;
 
@@ -46,8 +46,8 @@ import com.google.common.base.Preconditions;
  * to connect to during fail-over. The first configured address is tried first,
  * and on a fail-over event the other address is tried.
  */
-public class ConfiguredFailoverProxyProvider<T> implements
-    FailoverProxyProvider<T> {
+public class ConfiguredFailoverProxyProvider<T> extends
+    AbstractNNFailoverProxyProvider<T> {
   
   private static final Log LOG =
       LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
@@ -165,4 +165,12 @@ public class ConfiguredFailoverProxyProvider<T> implements
       }
     }
   }
+
+  /**
+   * Logical URI is required for this failover proxy provider.
+   */
+  @Override
+  public boolean useLogicalURI() {
+    return true;
+  }
 }

+ 133 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/IPFailoverProxyProvider.java

@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A NNFailoverProxyProvider implementation which works on IP failover setup.
+ * Only one proxy is used to connect to both servers and switching between
+ * the servers is done by the environment/infrastructure, which guarantees
+ * clients can consistently reach only one node at a time.
+ *
+ * Clients with a live connection will likely get connection reset after an
+ * IP failover. This case will be handled by the 
+ * FailoverOnNetworkExceptionRetry retry policy. I.e. if the call is
+ * not idempotent, it won't get retried.
+ *
+ * A connection reset while setting up a connection (i.e. before sending a
+ * request) will be handled in ipc client.
+ *
+ * The namenode URI must contain a resolvable host name.
+ */
+public class IPFailoverProxyProvider<T> extends
+    AbstractNNFailoverProxyProvider<T> {
+  private final Configuration conf;
+  private final Class<T> xface;
+  private final URI nameNodeUri;
+  private ProxyInfo<T> nnProxyInfo = null;
+  
+  public IPFailoverProxyProvider(Configuration conf, URI uri,
+      Class<T> xface) {
+    Preconditions.checkArgument(
+        xface.isAssignableFrom(NamenodeProtocols.class),
+        "Interface class %s is not a valid NameNode protocol!");
+    this.xface = xface;
+    this.nameNodeUri = uri;
+
+    this.conf = new Configuration(conf);
+    int maxRetries = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
+        maxRetries);
+    
+    int maxRetriesOnSocketTimeouts = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        maxRetriesOnSocketTimeouts);
+  }
+    
+  @Override
+  public Class<T> getInterface() {
+    return xface;
+  }
+
+  @Override
+  public synchronized ProxyInfo<T> getProxy() {
+    // Create a non-ha proxy if not already created.
+    if (nnProxyInfo == null) {
+      try {
+        // Create a proxy that is not wrapped in RetryProxy
+        InetSocketAddress nnAddr = NameNode.getAddress(nameNodeUri);
+        nnProxyInfo = new ProxyInfo<T>(NameNodeProxies.createNonHAProxy(
+            conf, nnAddr, xface, UserGroupInformation.getCurrentUser(), 
+            false).getProxy(), nnAddr.toString());
+      } catch (IOException ioe) {
+        throw new RuntimeException(ioe);
+      }
+    }
+    return nnProxyInfo;
+  }
+
+  /** Nothing to do for IP failover */
+  @Override
+  public void performFailover(T currentProxy) {
+  }
+
+  /**
+   * Close the proxy,
+   */
+  @Override
+  public synchronized void close() throws IOException {
+    if (nnProxyInfo == null) {
+      return;
+    }
+    if (nnProxyInfo.proxy instanceof Closeable) {
+      ((Closeable)nnProxyInfo.proxy).close();
+    } else {
+      RPC.stopProxy(nnProxyInfo.proxy);
+    }
+  }
+
+  /**
+   * Logical URI is not used for IP failover.
+   */
+  @Override
+  public boolean useLogicalURI() {
+    return false;
+  }
+}

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java

@@ -183,6 +183,12 @@ public class StandbyCheckpointer {
       txid = img.getStorage().getMostRecentCheckpointTxId();
       assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
         thisCheckpointTxId + " but instead saved at txid=" + txid;
+
+      // Save the legacy OIV image, if the output dir is defined.
+      String outputDir = checkpointConf.getLegacyOivImageDir();
+      if (outputDir != null && !outputDir.isEmpty()) {
+        img.saveLegacyOIVImage(namesystem, outputDir, canceler);
+      }
     } finally {
       namesystem.longReadUnlock();
     }

+ 80 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java

@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * A NNFailoverProxyProvider implementation which wrapps old implementations
+ * directly implementing the {@link FailoverProxyProvider} interface.
+ *
+ * It is assumed that the old impelmentation is using logical URI.
+ */
+public class WrappedFailoverProxyProvider<T> extends
+    AbstractNNFailoverProxyProvider<T> {
+  private final FailoverProxyProvider<T> proxyProvider;
+  
+  /**
+   * Wrap the given instance of an old FailoverProxyProvider.
+   */
+  public WrappedFailoverProxyProvider(FailoverProxyProvider<T> provider) {
+    proxyProvider = provider;
+  }
+    
+  @Override
+  public Class<T> getInterface() {
+    return proxyProvider.getInterface();
+  }
+
+  @Override
+  public synchronized ProxyInfo<T> getProxy() {
+    return proxyProvider.getProxy();
+  }
+
+  @Override
+  public void performFailover(T currentProxy) {
+    proxyProvider.performFailover(currentProxy);
+  }
+
+  /**
+   * Close the proxy,
+   */
+  @Override
+  public synchronized void close() throws IOException {
+    proxyProvider.close();
+  }
+
+  /**
+   * Assume logical URI is used for old proxy provider implementations.
+   */
+  @Override
+  public boolean useLogicalURI() {
+    return true;
+  }
+}

+ 13 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java

@@ -17,13 +17,17 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
-import com.google.common.base.Preconditions;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
-import java.util.List;
+import com.google.common.base.Preconditions;
 
 /**
  * The difference of an inode between in two snapshots.
@@ -128,4 +132,11 @@ abstract class AbstractINodeDiff<N extends INode,
     return getClass().getSimpleName() + ": " + this.getSnapshotId() + " (post="
         + (posteriorDiff == null? null: posteriorDiff.getSnapshotId()) + ")";
   }
+
+  void writeSnapshot(DataOutput out) throws IOException {
+    out.writeInt(snapshotId);
+  }
+
+  abstract void write(DataOutput out, ReferenceMap referenceMap
+      ) throws IOException;
 }

+ 52 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java

@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
+import java.io.DataOutput;
+import java.io.IOException;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -32,6 +34,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
 import org.apache.hadoop.hdfs.server.namenode.Content;
 import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
@@ -39,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 import org.apache.hadoop.hdfs.util.Diff;
 import org.apache.hadoop.hdfs.util.Diff.Container;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
@@ -120,6 +124,35 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return counts;
     }
 
+    /** Serialize {@link #created} */
+    private void writeCreated(DataOutput out) throws IOException {
+      final List<INode> created = getList(ListType.CREATED);
+      out.writeInt(created.size());
+      for (INode node : created) {
+        // For INode in created list, we only need to record its local name
+        byte[] name = node.getLocalNameBytes();
+        out.writeShort(name.length);
+        out.write(name);
+      }
+    }
+
+    /** Serialize {@link #deleted} */
+    private void writeDeleted(DataOutput out,
+        ReferenceMap referenceMap) throws IOException {
+      final List<INode> deleted = getList(ListType.DELETED);
+      out.writeInt(deleted.size());
+      for (INode node : deleted) {
+        FSImageSerialization.saveINode2Image(node, out, true, referenceMap);
+      }
+    }
+
+    /** Serialize to out */
+    private void write(DataOutput out, ReferenceMap referenceMap
+        ) throws IOException {
+      writeCreated(out);
+      writeDeleted(out, referenceMap);
+    }
+
     /** Get the list of INodeDirectory contained in the deleted list */
     private void getDirsInDeleted(List<INodeDirectory> dirList) {
       for (INode node : getList(ListType.DELETED)) {
@@ -314,6 +347,25 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return childrenSize;
     }
 
+    @Override
+    void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
+      writeSnapshot(out);
+      out.writeInt(childrenSize);
+
+      // Write snapshotINode
+      out.writeBoolean(isSnapshotRoot);
+      if (!isSnapshotRoot) {
+        if (snapshotINode != null) {
+          out.writeBoolean(true);
+          FSImageSerialization.writeINodeDirectoryAttributes(snapshotINode, out);
+        } else {
+          out.writeBoolean(false);
+        }
+      }
+      // Write diff. Node need to write poseriorDiff, since diffs is a list.
+      diff.write(out, referenceMap);
+    }
+
     @Override
     Quota.Counts destroyDiffAndCollectBlocks(INodeDirectory currentINode,
         BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

+ 20 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java

@@ -17,13 +17,17 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
-
-import java.util.List;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
 /**
  * The difference of an {@link INodeFile} between two snapshots.
@@ -66,6 +70,20 @@ public class FileDiff extends
         + (snapshotINode == null? "?": snapshotINode.getFileReplication());
   }
 
+  @Override
+  void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
+    writeSnapshot(out);
+    out.writeLong(fileSize);
+
+    // write snapshotINode
+    if (snapshotINode != null) {
+      out.writeBoolean(true);
+      FSImageSerialization.writeINodeFileAttributes(snapshotINode, out);
+    } else {
+      out.writeBoolean(false);
+    }
+  }
+
   @Override
   Quota.Counts destroyDiffAndCollectBlocks(INodeFile currentINode,
       BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
@@ -30,6 +31,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.namenode.AclFeature;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.XAttrFeature;
@@ -227,4 +229,11 @@ public class Snapshot implements Comparable<byte[]> {
   public String toString() {
     return getClass().getSimpleName() + "." + root.getLocalName() + "(id=" + id + ")";
   }
+
+  /** Serialize the fields to out */
+  void write(DataOutput out) throws IOException {
+    out.writeInt(id);
+    // write root
+    FSImageSerialization.writeINodeDirectory(root, out);
+  }
 }

+ 72 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java

@@ -29,21 +29,75 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
 import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
+import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
 import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff;
 import org.apache.hadoop.hdfs.util.Diff.ListType;
-import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
 
 /**
  * A helper class defining static methods for reading/writing snapshot related
  * information from/to FSImage.
  */
 public class SnapshotFSImageFormat {
+  /**
+   * Save snapshots and snapshot quota for a snapshottable directory.
+   * @param current The directory that the snapshots belongs to.
+   * @param out The {@link DataOutput} to write.
+   * @throws IOException
+   */
+  public static void saveSnapshots(INodeDirectorySnapshottable current,
+      DataOutput out) throws IOException {
+    // list of snapshots in snapshotsByNames
+    ReadOnlyList<Snapshot> snapshots = current.getSnapshotsByNames();
+    out.writeInt(snapshots.size());
+    for (Snapshot s : snapshots) {
+      // write the snapshot id
+      out.writeInt(s.getId());
+    }
+    // snapshot quota
+    out.writeInt(current.getSnapshotQuota());
+  }
+
+  /**
+   * Save SnapshotDiff list for an INodeDirectoryWithSnapshot.
+   * @param sNode The directory that the SnapshotDiff list belongs to.
+   * @param out The {@link DataOutput} to write.
+   */
+  private static <N extends INode, A extends INodeAttributes, D extends AbstractINodeDiff<N, A, D>>
+      void saveINodeDiffs(final AbstractINodeDiffList<N, A, D> diffs,
+      final DataOutput out, ReferenceMap referenceMap) throws IOException {
+    // Record the diffs in reversed order, so that we can find the correct
+    // reference for INodes in the created list when loading the FSImage
+    if (diffs == null) {
+      out.writeInt(-1); // no diffs
+    } else {
+      final List<D> list = diffs.asList();
+      final int size = list.size();
+      out.writeInt(size);
+      for (int i = size - 1; i >= 0; i--) {
+        list.get(i).write(out, referenceMap);
+      }
+    }
+  }
+
+  public static void saveDirectoryDiffList(final INodeDirectory dir,
+      final DataOutput out, final ReferenceMap referenceMap
+      ) throws IOException {
+    saveINodeDiffs(dir.getDiffs(), out, referenceMap);
+  }
+
+  public static void saveFileDiffList(final INodeFile file,
+      final DataOutput out) throws IOException {
+    saveINodeDiffs(file.getDiffs(), out, null);
+  }
+
   public static FileDiffList loadFileDiffList(DataInput in,
       FSImageFormat.Loader loader) throws IOException {
     final int size = in.readInt();
@@ -264,6 +318,23 @@ public class SnapshotFSImageFormat {
      * Used to record whether the subtree of the reference node has been saved 
      */
     private final Map<Long, Long> dirMap = new HashMap<Long, Long>();
+
+    public void writeINodeReferenceWithCount(
+        INodeReference.WithCount withCount, DataOutput out,
+        boolean writeUnderConstruction) throws IOException {
+      final INode referred = withCount.getReferredINode();
+      final long id = withCount.getId();
+      final boolean firstReferred = !referenceMap.containsKey(id);
+      out.writeBoolean(firstReferred);
+
+      if (firstReferred) {
+        FSImageSerialization.saveINode2Image(referred, out,
+            writeUnderConstruction, this);
+        referenceMap.put(id, withCount);
+      } else {
+        out.writeLong(id);
+      }
+    }
     
     public boolean toProcessSubtree(long id) {
       if (dirMap.containsKey(id)) {

+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java

@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -287,6 +288,22 @@ public class SnapshotManager implements SnapshotStatsMXBean {
     return snapshottables.values().toArray(
         new INodeDirectorySnapshottable[snapshottables.size()]);
   }
+
+  /**
+   * Write {@link #snapshotCounter}, {@link #numSnapshots},
+   * and all snapshots to the DataOutput.
+   */
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(snapshotCounter);
+    out.writeInt(numSnapshots.get());
+
+    // write all snapshots.
+    for(INodeDirectorySnapshottable snapshottableDir : snapshottables.values()) {
+      for(Snapshot s : snapshottableDir.getSnapshotsByNames()) {
+        s.write(out);
+      }
+    }
+  }
   
   /**
    * Read values of {@link #snapshotCounter}, {@link #numSnapshots}, and

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java

@@ -913,9 +913,10 @@ public class DFSAdmin extends FsShell {
     
     Configuration dfsConf = dfs.getConf();
     URI dfsUri = dfs.getUri();
-    boolean isHaEnabled = HAUtil.isLogicalUri(dfsConf, dfsUri);
-    if (isHaEnabled) {
-      // In the case of HA, run finalizeUpgrade for all NNs in this nameservice
+    boolean isHaAndLogicalUri = HAUtil.isLogicalUri(dfsConf, dfsUri);
+    if (isHaAndLogicalUri) {
+      // In the case of HA and logical URI, run finalizeUpgrade for all
+      // NNs in this nameservice.
       String nsId = dfsUri.getHost();
       List<ClientProtocol> namenodes =
           HAUtil.getProxiesForAllNameNodesInNameservice(dfsConf, nsId);

+ 11 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.tools;
 
 import java.io.PrintStream;
 import java.util.Arrays;
+import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -27,6 +28,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.ha.HAAdmin;
 import org.apache.hadoop.ha.HAServiceTarget;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.util.ToolRunner;
 
@@ -117,7 +119,15 @@ public class DFSHAAdmin extends HAAdmin {
     
     return super.runCmd(argv);
   }
-
+  
+  /**
+   * returns the list of all namenode ids for the given configuration 
+   */
+  @Override
+  protected Collection<String> getTargetIds(String namenodeToActivate) {
+    return DFSUtil.getNameNodeIds(getConf(), (nameserviceId != null)? nameserviceId : DFSUtil.getNamenodeNameServiceId(getConf()));
+  }
+  
   public static void main(String[] argv) throws Exception {
     int res = ToolRunner.run(new DFSHAAdmin(), argv);
     System.exit(res);

+ 172 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java

@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+/**
+ * A DelimitedImageVisitor generates a text representation of the fsimage,
+ * with each element separated by a delimiter string.  All of the elements
+ * common to both inodes and inodes-under-construction are included. When 
+ * processing an fsimage with a layout version that did not include an 
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ * 
+ * Individual block information for each file is not currently included.
+ * 
+ * The default delimiter is tab, as this is an unlikely value to be included
+ * an inode path or other text metadata.  The delimiter value can be via the
+ * constructor.
+ */
+class DelimitedImageVisitor extends TextWriterImageVisitor {
+  private static final String defaultDelimiter = "\t"; 
+  
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+  private long fileSize = 0l;
+  // Elements of fsimage we're interested in tracking
+  private final Collection<ImageElement> elementsToTrack;
+  // Values for each of the elements in elementsToTrack
+  private final AbstractMap<ImageElement, String> elements = 
+                                            new HashMap<ImageElement, String>();
+  private final String delimiter;
+
+  {
+    elementsToTrack = new ArrayList<ImageElement>();
+    
+    // This collection determines what elements are tracked and the order
+    // in which they are output
+    Collections.addAll(elementsToTrack,  ImageElement.INODE_PATH,
+                                         ImageElement.REPLICATION,
+                                         ImageElement.MODIFICATION_TIME,
+                                         ImageElement.ACCESS_TIME,
+                                         ImageElement.BLOCK_SIZE,
+                                         ImageElement.NUM_BLOCKS,
+                                         ImageElement.NUM_BYTES,
+                                         ImageElement.NS_QUOTA,
+                                         ImageElement.DS_QUOTA,
+                                         ImageElement.PERMISSION_STRING,
+                                         ImageElement.USER_NAME,
+                                         ImageElement.GROUP_NAME);
+  }
+  
+  public DelimitedImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen) 
+                                                           throws IOException {
+    this(outputFile, printToScreen, defaultDelimiter);
+  }
+  
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen, 
+                               String delimiter) throws IOException {
+    super(outputFile, printToScreen);
+    this.delimiter = delimiter;
+    reset();
+  }
+
+  /**
+   * Reset the values of the elements we're tracking in order to handle
+   * the next file
+   */
+  private void reset() {
+    elements.clear();
+    for(ImageElement e : elementsToTrack) 
+      elements.put(e, null);
+    
+    fileSize = 0l;
+  }
+  
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    // If we're done with an inode, write out our results and start over
+    if(elem == ImageElement.INODE || 
+       elem == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      writeLine();
+      write("\n");
+      reset();
+    }
+  }
+
+  /**
+   * Iterate through all the elements we're tracking and, if a value was
+   * recorded for it, write it out.
+   */
+  private void writeLine() throws IOException {
+    Iterator<ImageElement> it = elementsToTrack.iterator();
+    
+    while(it.hasNext()) {
+      ImageElement e = it.next();
+      
+      String v = null;
+      if(e == ImageElement.NUM_BYTES)
+        v = String.valueOf(fileSize);
+      else
+        v = elements.get(e);
+      
+      if(v != null)
+        write(v);
+      
+      if(it.hasNext())
+        write(delimiter);
+    }
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    // Explicitly label the root path
+    if(element == ImageElement.INODE_PATH && value.equals(""))
+      value = "/";
+    
+    // Special case of file size, which is sum of the num bytes in each block
+    if(element == ImageElement.NUM_BYTES)
+      fileSize += Long.valueOf(value);
+    
+    if(elements.containsKey(element) && element != ImageElement.NUM_BYTES)
+      elements.put(element, value);
+    
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+    // Special case as numBlocks is an attribute of the blocks element
+    if(key == ImageElement.NUM_BLOCKS 
+        && elements.containsKey(ImageElement.NUM_BLOCKS))
+      elements.put(key, value);
+    
+    elemQ.push(element);
+  }
+  
+  @Override
+  void start() throws IOException { /* Nothing to do */ }
+}

+ 36 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java

@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Utility class for tracking descent into the structure of the
+ * Visitor class (ImageVisitor, EditsVisitor etc.)
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DepthCounter {
+  private int depth = 0;
+
+  public void incLevel() { depth++; }
+  public void decLevel() { if(depth >= 1) depth--; }
+  public int  getLevel() { return depth; }
+}
+

+ 193 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java

@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * File size distribution visitor.
+ * 
+ * <h3>Description.</h3>
+ * This is the tool for analyzing file sizes in the namespace image.
+ * In order to run the tool one should define a range of integers
+ * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
+ * The range of integers is divided into segments of size <tt>step</tt>: 
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
+ * and the visitor calculates how many files in the system fall into 
+ * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. 
+ * Note that files larger than <tt>maxSize</tt> always fall into 
+ * the very last segment.
+ * 
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3>
+ * The output file is formatted as a tab separated two column table:
+ * Size and NumFiles. Where Size represents the start of the segment,
+ * and numFiles is the number of files form the image which size falls in 
+ * this segment.
+ */
+class FileDistributionVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
+
+  private final static long MAX_SIZE_DEFAULT = 0x2000000000L;   // 1/8 TB = 2^37
+  private final static int INTERVAL_DEFAULT = 0x200000;         // 2 MB = 2^21
+
+  private int[] distribution;
+  private long maxSize;
+  private int step;
+
+  private int totalFiles;
+  private int totalDirectories;
+  private int totalBlocks;
+  private long totalSpace;
+  private long maxFileSize;
+
+  private FileContext current;
+
+  private boolean inInode = false;
+
+  /**
+   * File or directory information.
+   */
+  private static class FileContext {
+    String path;
+    long fileSize;
+    int numBlocks;
+    int replication;
+  }
+
+  public FileDistributionVisitor(String filename,
+                                 long maxSize,
+                                 int step) throws IOException {
+    super(filename, false);
+    this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
+    this.step = (step == 0 ? INTERVAL_DEFAULT : step);
+    long numIntervals = this.maxSize / this.step;
+    if(numIntervals >= Integer.MAX_VALUE)
+      throw new IOException("Too many distribution intervals " + numIntervals);
+    this.distribution = new int[1 + (int)(numIntervals)];
+    this.totalFiles = 0;
+    this.totalDirectories = 0;
+    this.totalBlocks = 0;
+    this.totalSpace = 0;
+    this.maxFileSize = 0;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    output();
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    output();
+    super.finishAbnormally();
+  }
+
+  private void output() throws IOException {
+    // write the distribution into the output file
+    write("Size\tNumFiles\n");
+    for(int i = 0; i < distribution.length; i++)
+      write(((long)i * step) + "\t" + distribution[i] + "\n");
+    System.out.println("totalFiles = " + totalFiles);
+    System.out.println("totalDirectories = " + totalDirectories);
+    System.out.println("totalBlocks = " + totalBlocks);
+    System.out.println("totalSpace = " + totalSpace);
+    System.out.println("maxFileSize = " + maxFileSize);
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemS.pop();
+
+    if(elem != ImageElement.INODE &&
+       elem != ImageElement.INODE_UNDER_CONSTRUCTION)
+      return;
+    inInode = false;
+    if(current.numBlocks < 0) {
+      totalDirectories ++;
+      return;
+    }
+    totalFiles++;
+    totalBlocks += current.numBlocks;
+    totalSpace += current.fileSize * current.replication;
+    if(maxFileSize < current.fileSize)
+      maxFileSize = current.fileSize;
+    int high;
+    if(current.fileSize > maxSize)
+      high = distribution.length-1;
+    else
+      high = (int)Math.ceil((double)current.fileSize / step);
+    distribution[high]++;
+    if(totalFiles % 1000000 == 1)
+      System.out.println("Files processed: " + totalFiles
+          + "  Current: " + current.path);
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        current.path = (value.equals("") ? "/" : value);
+        break;
+      case REPLICATION:
+        current.replication = Integer.valueOf(value);
+        break;
+      case NUM_BYTES:
+        current.fileSize += Long.valueOf(value);
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      current = new FileContext();
+      inInode = true;
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION)
+      inInode = true;
+    else if(element == ImageElement.BLOCKS)
+      current.numBlocks = Integer.parseInt(value);
+  }
+}

+ 83 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java

@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An ImageLoader can accept a DataInputStream to an Hadoop FSImage file
+ * and walk over its structure using the supplied ImageVisitor.
+ *
+ * Each implementation of ImageLoader is designed to rapidly process an
+ * image file.  As long as minor changes are made from one layout version
+ * to another, it is acceptable to tweak one implementation to read the next.
+ * However, if the layout version changes enough that it would make a
+ * processor slow or difficult to read, another processor should be created.
+ * This allows each processor to quickly read an image without getting
+ * bogged down in dealing with significant differences between layout versions.
+ */
+interface ImageLoader {
+
+  /**
+   * @param in DataInputStream pointing to an Hadoop FSImage file
+   * @param v Visit to apply to the FSImage file
+   * @param enumerateBlocks Should visitor visit each of the file blocks?
+   */
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean enumerateBlocks) throws IOException;
+
+  /**
+   * Can this processor handle the specified version of FSImage file?
+   *
+   * @param version FSImage version file
+   * @return True if this instance can process the file
+   */
+  public boolean canLoadVersion(int version);
+
+  /**
+   * Factory for obtaining version of image loader that can read
+   * a particular image format.
+   */
+  @InterfaceAudience.Private
+  public class LoaderFactory {
+    // Java doesn't support static methods on interfaces, which necessitates
+    // this factory class
+
+    /**
+     * Find an image loader capable of interpreting the specified
+     * layout version number.  If none, return null;
+     *
+     * @param version fsimage layout version number to be processed
+     * @return ImageLoader that can interpret specified version, or null
+     */
+    static public ImageLoader getLoader(int version) {
+      // Easy to add more image processors as they are written
+      ImageLoader[] loaders = { new ImageLoaderCurrent() };
+
+      for (ImageLoader l : loaders) {
+        if (l.canLoadVersion(version))
+          return l;
+      }
+
+      return null;
+    }
+  }
+}

+ 821 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java

@@ -0,0 +1,821 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
+import org.apache.hadoop.hdfs.protocol.LayoutFlags;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor.ImageElement;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+/**
+ * ImageLoaderCurrent processes Hadoop FSImage files and walks over
+ * them using a provided ImageVisitor, calling the visitor at each element
+ * enumerated below.
+ *
+ * The only difference between v18 and v19 was the utilization of the
+ * stickybit.  Therefore, the same viewer can reader either format.
+ *
+ * Versions -19 fsimage layout (with changes from -16 up):
+ * Image version (int)
+ * Namepsace ID (int)
+ * NumFiles (long)
+ * Generation stamp (long)
+ * INodes (count = NumFiles)
+ *  INode
+ *    Path (String)
+ *    Replication (short)
+ *    Modification Time (long as date)
+ *    Access Time (long) // added in -16
+ *    Block size (long)
+ *    Num blocks (int)
+ *    Blocks (count = Num blocks)
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Namespace Quota (long)
+ *    Diskspace Quota (long) // added in -18
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)  // Modified in -19
+ *    Symlink (String) // added in -23
+ * NumINodesUnderConstruction (int)
+ * INodesUnderConstruction (count = NumINodesUnderConstruction)
+ *  INodeUnderConstruction
+ *    Path (bytes as string)
+ *    Replication (short)
+ *    Modification time (long as date)
+ *    Preferred block size (long)
+ *    Num blocks (int)
+ *    Blocks
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)
+ *    Client Name (String)
+ *    Client Machine (String)
+ *    NumLocations (int)
+ *    DatanodeDescriptors (count = numLocations) // not loaded into memory
+ *      short                                    // but still in file
+ *      long
+ *      string
+ *      long
+ *      int
+ *      string
+ *      string
+ *      enum
+ *    CurrentDelegationKeyId (int)
+ *    NumDelegationKeys (int)
+ *      DelegationKeys (count = NumDelegationKeys)
+ *        DelegationKeyLength (vint)
+ *        DelegationKey (bytes)
+ *    DelegationTokenSequenceNumber (int)
+ *    NumDelegationTokens (int)
+ *    DelegationTokens (count = NumDelegationTokens)
+ *      DelegationTokenIdentifier
+ *        owner (String)
+ *        renewer (String)
+ *        realUser (String)
+ *        issueDate (vlong)
+ *        maxDate (vlong)
+ *        sequenceNumber (vint)
+ *        masterKeyId (vint)
+ *      expiryTime (long)     
+ *
+ */
+class ImageLoaderCurrent implements ImageLoader {
+  protected final DateFormat dateFormat = 
+                                      new SimpleDateFormat("yyyy-MM-dd HH:mm");
+  private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
+      -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39,
+      -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 };
+  private int imageVersion = 0;
+  
+  private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
+  private final Map<Long, String> dirNodeMap = new HashMap<Long, String>();
+
+  /* (non-Javadoc)
+   * @see ImageLoader#canProcessVersion(int)
+   */
+  @Override
+  public boolean canLoadVersion(int version) {
+    for(int v : versions)
+      if(v == version) return true;
+
+    return false;
+  }
+
+  /* (non-Javadoc)
+   * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
+   */
+  @Override
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    boolean done = false;
+    try {
+      v.start();
+      v.visitEnclosingElement(ImageElement.FS_IMAGE);
+
+      imageVersion = in.readInt();
+      if( !canLoadVersion(imageVersion))
+        throw new IOException("Cannot process fslayout version " + imageVersion);
+      if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
+        LayoutFlags.read(in);
+      }
+
+      v.visit(ImageElement.IMAGE_VERSION, imageVersion);
+      v.visit(ImageElement.NAMESPACE_ID, in.readInt());
+
+      long numInodes = in.readLong();
+
+      v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+
+      if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
+        v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
+        v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
+      }
+
+      if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
+        v.visit(ImageElement.TRANSACTION_ID, in.readLong());
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
+        v.visit(ImageElement.LAST_INODE_ID, in.readLong());
+      }
+      
+      boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT,
+          imageVersion);
+      if (supportSnapshot) {
+        v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
+        int numSnapshots = in.readInt();
+        v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
+        for (int i = 0; i < numSnapshots; i++) {
+          processSnapshot(in, v);
+        }
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
+        boolean isCompressed = in.readBoolean();
+        v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
+        if (isCompressed) {
+          String codecClassName = Text.readString(in);
+          v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
+          CompressionCodecFactory codecFac = new CompressionCodecFactory(
+              new Configuration());
+          CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
+          if (codec == null) {
+            throw new IOException("Image compression codec not supported: "
+                + codecClassName);
+          }
+          in = new DataInputStream(codec.createInputStream(in));
+        }
+      }
+      processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
+      subtreeMap.clear();
+      dirNodeMap.clear();
+
+      processINodesUC(in, v, skipBlocks);
+
+      if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
+        processDelegationTokens(in, v);
+      }
+      
+      if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
+        processCacheManagerState(in, v);
+      }
+      v.leaveEnclosingElement(); // FSImage
+      done = true;
+    } finally {
+      if (done) {
+        v.finish();
+      } else {
+        v.finishAbnormally();
+      }
+    }
+  }
+
+  /**
+   * Process CacheManager state from the fsimage.
+   */
+  private void processCacheManagerState(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CACHE_NEXT_ENTRY_ID, in.readLong());
+    final int numPools = in.readInt();
+    for (int i=0; i<numPools; i++) {
+      v.visit(ImageElement.CACHE_POOL_NAME, Text.readString(in));
+      processCachePoolPermission(in, v);
+      v.visit(ImageElement.CACHE_POOL_WEIGHT, in.readInt());
+    }
+    final int numEntries = in.readInt();
+    for (int i=0; i<numEntries; i++) {
+      v.visit(ImageElement.CACHE_ENTRY_PATH, Text.readString(in));
+      v.visit(ImageElement.CACHE_ENTRY_REPLICATION, in.readShort());
+      v.visit(ImageElement.CACHE_ENTRY_POOL_NAME, Text.readString(in));
+    }
+  }
+  /**
+   * Process the Delegation Token related section in fsimage.
+   * 
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over records
+   */
+  private void processDelegationTokens(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CURRENT_DELEGATION_KEY_ID, in.readInt());
+    int numDKeys = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_KEYS,
+        ImageElement.NUM_DELEGATION_KEYS, numDKeys);
+    for(int i =0; i < numDKeys; i++) {
+      DelegationKey key = new DelegationKey();
+      key.readFields(in);
+      v.visit(ImageElement.DELEGATION_KEY, key.toString());
+    }
+    v.leaveEnclosingElement();
+    v.visit(ImageElement.DELEGATION_TOKEN_SEQUENCE_NUMBER, in.readInt());
+    int numDTokens = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_TOKENS,
+        ImageElement.NUM_DELEGATION_TOKENS, numDTokens);
+    for(int i=0; i<numDTokens; i++){
+      DelegationTokenIdentifier id = new  DelegationTokenIdentifier();
+      id.readFields(in);
+      long expiryTime = in.readLong();
+      v.visitEnclosingElement(ImageElement.DELEGATION_TOKEN_IDENTIFIER);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_KIND,
+          id.getKind().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+          id.getSequenceNumber());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_OWNER,
+          id.getOwner().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+          id.getRenewer().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+          id.getRealUser().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+          id.getIssueDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+          id.getMaxDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+          expiryTime);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+          id.getMasterKeyId());
+      v.leaveEnclosingElement(); // DELEGATION_TOKEN_IDENTIFIER
+    }
+    v.leaveEnclosingElement(); // DELEGATION_TOKENS
+  }
+
+  /**
+   * Process the INodes under construction section of the fsimage.
+   *
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processINodesUC(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    int numINUC = in.readInt();
+
+    v.visitEnclosingElement(ImageElement.INODES_UNDER_CONSTRUCTION,
+                           ImageElement.NUM_INODES_UNDER_CONSTRUCTION, numINUC);
+
+    for(int i = 0; i < numINUC; i++) {
+      v.visitEnclosingElement(ImageElement.INODE_UNDER_CONSTRUCTION);
+      byte [] name = FSImageSerialization.readBytes(in);
+      String n = new String(name, "UTF8");
+      v.visit(ImageElement.INODE_PATH, n);
+      
+      if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
+        long inodeId = in.readLong();
+        v.visit(ImageElement.INODE_ID, inodeId);
+      }
+      
+      v.visit(ImageElement.REPLICATION, in.readShort());
+      v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+      v.visit(ImageElement.PREFERRED_BLOCK_SIZE, in.readLong());
+      int numBlocks = in.readInt();
+      processBlocks(in, v, numBlocks, skipBlocks);
+
+      processPermission(in, v);
+      v.visit(ImageElement.CLIENT_NAME, FSImageSerialization.readString(in));
+      v.visit(ImageElement.CLIENT_MACHINE, FSImageSerialization.readString(in));
+
+      // Skip over the datanode descriptors, which are still stored in the
+      // file but are not used by the datanode or loaded into memory
+      int numLocs = in.readInt();
+      for(int j = 0; j < numLocs; j++) {
+        in.readShort();
+        in.readLong();
+        in.readLong();
+        in.readLong();
+        in.readInt();
+        FSImageSerialization.readString(in);
+        FSImageSerialization.readString(in);
+        WritableUtils.readEnum(in, AdminStates.class);
+      }
+
+      v.leaveEnclosingElement(); // INodeUnderConstruction
+    }
+
+    v.leaveEnclosingElement(); // INodesUnderConstruction
+  }
+
+  /**
+   * Process the blocks section of the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processBlocks(DataInputStream in, ImageVisitor v,
+      int numBlocks, boolean skipBlocks) throws IOException {
+    v.visitEnclosingElement(ImageElement.BLOCKS,
+                            ImageElement.NUM_BLOCKS, numBlocks);
+    
+    // directory or symlink or reference node, no blocks to process    
+    if(numBlocks < 0) { 
+      v.leaveEnclosingElement(); // Blocks
+      return;
+    }
+    
+    if(skipBlocks) {
+      int bytesToSkip = ((Long.SIZE * 3 /* fields */) / 8 /*bits*/) * numBlocks;
+      if(in.skipBytes(bytesToSkip) != bytesToSkip)
+        throw new IOException("Error skipping over blocks");
+      
+    } else {
+      for(int j = 0; j < numBlocks; j++) {
+        v.visitEnclosingElement(ImageElement.BLOCK);
+        v.visit(ImageElement.BLOCK_ID, in.readLong());
+        v.visit(ImageElement.NUM_BYTES, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+        v.leaveEnclosingElement(); // Block
+      }
+    }
+    v.leaveEnclosingElement(); // Blocks
+  }
+
+  /**
+   * Extract the INode permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.USER_NAME, Text.readString(in));
+    v.visit(ImageElement.GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Extract CachePool permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processCachePoolPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.CACHE_POOL_OWNER_NAME, Text.readString(in));
+    v.visit(ImageElement.CACHE_POOL_GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.CACHE_POOL_PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Process the INode records stored in the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over INodes
+   * @param numInodes Number of INodes stored in file
+   * @param skipBlocks Process all the blocks within the INode?
+   * @param supportSnapshot Whether or not the imageVersion supports snapshot
+   * @throws VisitException
+   * @throws IOException
+   */
+  private void processINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks, boolean supportSnapshot)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.INODES,
+        ImageElement.NUM_INODES, numInodes);
+    
+    if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, imageVersion)) {
+      if (!supportSnapshot) {
+        processLocalNameINodes(in, v, numInodes, skipBlocks);
+      } else {
+        processLocalNameINodesWithSnapshot(in, v, skipBlocks);
+      }
+    } else { // full path name
+      processFullNameINodes(in, v, numInodes, skipBlocks);
+    }
+
+    
+    v.leaveEnclosingElement(); // INodes
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processLocalNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "", false);
+    numInodes--;
+    while (numInodes > 0) {
+      numInodes -= processDirectory(in, v, skipBlocks);
+    }
+  }
+  
+  private int processDirectory(DataInputStream in, ImageVisitor v,
+     boolean skipBlocks) throws IOException {
+    String parentName = FSImageSerialization.readString(in);
+    return processChildren(in, v, skipBlocks, parentName);
+  }
+  
+  /**
+   * Process image with local path name and snapshot support
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param skipBlocks skip blocks or not
+   */
+  private void processLocalNameINodesWithSnapshot(DataInputStream in,
+      ImageVisitor v, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "", false);
+    processDirectoryWithSnapshot(in, v, skipBlocks);
+  }
+  
+  /**
+   * Process directories when snapshot is supported.
+   */
+  private void processDirectoryWithSnapshot(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    // 1. load dir node id
+    long inodeId = in.readLong();
+    
+    String dirName = dirNodeMap.remove(inodeId);
+    Boolean visitedRef = subtreeMap.get(inodeId);
+    if (visitedRef != null) {
+      if (visitedRef.booleanValue()) { // the subtree has been visited
+        return;
+      } else { // first time to visit
+        subtreeMap.put(inodeId, true);
+      }
+    } // else the dir is not linked by a RefNode, thus cannot be revisited
+    
+    // 2. load possible snapshots
+    processSnapshots(in, v, dirName);
+    // 3. load children nodes
+    processChildren(in, v, skipBlocks, dirName);
+    // 4. load possible directory diff list
+    processDirectoryDiffList(in, v, dirName);
+    // recursively process sub-directories
+    final int numSubTree = in.readInt();
+    for (int i = 0; i < numSubTree; i++) {
+      processDirectoryWithSnapshot(in, v, skipBlocks);
+    }
+  }
+  
+  /**
+   * Process snapshots of a snapshottable directory
+   */
+  private void processSnapshots(DataInputStream in, ImageVisitor v,
+      String rootName) throws IOException {
+    final int numSnapshots = in.readInt();
+    if (numSnapshots >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOTS,
+          ImageElement.NUM_SNAPSHOTS, numSnapshots);
+      for (int i = 0; i < numSnapshots; i++) {
+        // process snapshot
+        v.visitEnclosingElement(ImageElement.SNAPSHOT);
+        v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
+        v.leaveEnclosingElement();
+      }
+      v.visit(ImageElement.SNAPSHOT_QUOTA, in.readInt());
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processSnapshot(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.SNAPSHOT);
+    v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
+    // process root of snapshot
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_ROOT);
+    processINode(in, v, true, "", false);
+    v.leaveEnclosingElement();
+    v.leaveEnclosingElement();
+  }
+  
+  private void processDirectoryDiffList(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    final int numDirDiff = in.readInt();
+    if (numDirDiff >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFFS,
+          ImageElement.NUM_SNAPSHOT_DIR_DIFF, numDirDiff);
+      for (int i = 0; i < numDirDiff; i++) {
+        // process directory diffs in reverse chronological oder
+        processDirectoryDiff(in, v, currentINodeName); 
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processDirectoryDiff(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF);
+    int snapshotId = in.readInt();
+    v.visit(ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
+    v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CHILDREN_SIZE, in.readInt());
+    
+    // process snapshotINode
+    boolean useRoot = in.readBoolean();
+    if (!useRoot) {
+      if (in.readBoolean()) {
+        v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES);
+        if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
+          processINodeDirectoryAttributes(in, v, currentINodeName);
+        } else {
+          processINode(in, v, true, currentINodeName, true);
+        }
+        v.leaveEnclosingElement();
+      }
+    }
+    
+    // process createdList
+    int createdSize = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST,
+        ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE, createdSize);
+    for (int i = 0; i < createdSize; i++) {
+      String createdNode = FSImageSerialization.readString(in);
+      v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CREATED_INODE, createdNode);
+    }
+    v.leaveEnclosingElement();
+    
+    // process deletedList
+    int deletedSize = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST,
+        ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE, deletedSize);
+    for (int i = 0; i < deletedSize; i++) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETED_INODE);
+      processINode(in, v, false, currentINodeName, true);
+      v.leaveEnclosingElement();
+    }
+    v.leaveEnclosingElement();
+    v.leaveEnclosingElement();
+  }
+
+  private void processINodeDirectoryAttributes(DataInputStream in, ImageVisitor v,
+      String parentName) throws IOException {
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+    processPermission(in, v);
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+    v.visit(ImageElement.NS_QUOTA, in.readLong());
+    v.visit(ImageElement.DS_QUOTA, in.readLong());
+  }
+
+  /** Process children under a directory */
+  private int processChildren(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName) throws IOException {
+    int numChildren = in.readInt();
+    for (int i = 0; i < numChildren; i++) {
+      processINode(in, v, skipBlocks, parentName, false);
+    }
+    return numChildren;
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processFullNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    for(long i = 0; i < numInodes; i++) {
+      processINode(in, v, skipBlocks, null, false);
+    }
+  }
+ 
+  private String readINodePath(DataInputStream in, String parentName)
+      throws IOException {
+    String pathName = FSImageSerialization.readString(in);
+    if (parentName != null) {  // local name
+      pathName = "/" + pathName;
+      if (!"/".equals(parentName)) { // children of non-root directory
+        pathName = parentName + pathName;
+      }
+    }
+    return pathName;
+  }
+
+  /**
+   * Process an INode
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param skipBlocks skip blocks or not
+   * @param parentName the name of its parent node
+   * @param isSnapshotCopy whether or not the inode is a snapshot copy
+   * @throws IOException
+   */
+  private void processINode(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName, boolean isSnapshotCopy)
+      throws IOException {
+    boolean supportSnapshot = 
+        NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
+    boolean supportInodeId = 
+        NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion);
+    
+    v.visitEnclosingElement(ImageElement.INODE);
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+
+    long inodeId = INodeId.GRANDFATHER_INODE_ID;
+    if (supportInodeId) {
+      inodeId = in.readLong();
+      v.visit(ImageElement.INODE_ID, inodeId);
+    }
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion))
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+    int numBlocks = in.readInt();
+
+    processBlocks(in, v, numBlocks, skipBlocks);
+    
+    if (numBlocks >= 0) { // File
+      if (supportSnapshot) {
+        // make sure subtreeMap only contains entry for directory
+        subtreeMap.remove(inodeId);
+        // process file diffs
+        processFileDiffList(in, v, parentName);
+        if (isSnapshotCopy) {
+          boolean underConstruction = in.readBoolean();
+          if (underConstruction) {
+            v.visit(ImageElement.CLIENT_NAME,
+                FSImageSerialization.readString(in));
+            v.visit(ImageElement.CLIENT_MACHINE,
+                FSImageSerialization.readString(in));
+          }
+        }
+      }
+      processPermission(in, v);
+    } else if (numBlocks == -1) { // Directory
+      if (supportSnapshot && supportInodeId) {
+        dirNodeMap.put(inodeId, pathName);
+      }
+      v.visit(ImageElement.NS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (NameNodeLayoutVersion.supports(Feature.DISKSPACE_QUOTA, imageVersion))
+        v.visit(ImageElement.DS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (supportSnapshot) {
+        boolean snapshottable = in.readBoolean();
+        if (!snapshottable) {
+          boolean withSnapshot = in.readBoolean();
+          v.visit(ImageElement.IS_WITHSNAPSHOT_DIR, Boolean.toString(withSnapshot));
+        } else {
+          v.visit(ImageElement.IS_SNAPSHOTTABLE_DIR, Boolean.toString(snapshottable));
+        }
+      }
+      processPermission(in, v);
+    } else if (numBlocks == -2) {
+      v.visit(ImageElement.SYMLINK, Text.readString(in));
+      processPermission(in, v);
+    } else if (numBlocks == -3) { // reference node
+      final boolean isWithName = in.readBoolean();
+      int snapshotId = in.readInt();
+      if (isWithName) {
+        v.visit(ImageElement.SNAPSHOT_LAST_SNAPSHOT_ID, snapshotId);
+      } else {
+        v.visit(ImageElement.SNAPSHOT_DST_SNAPSHOT_ID, snapshotId);
+      }
+      
+      final boolean firstReferred = in.readBoolean();
+      if (firstReferred) {
+        // if a subtree is linked by multiple "parents", the corresponding dir
+        // must be referred by a reference node. we put the reference node into
+        // the subtreeMap here and let its value be false. when we later visit
+        // the subtree for the first time, we change the value to true.
+        subtreeMap.put(inodeId, false);
+        v.visitEnclosingElement(ImageElement.SNAPSHOT_REF_INODE);
+        processINode(in, v, skipBlocks, parentName, isSnapshotCopy);
+        v.leaveEnclosingElement();  // referred inode    
+      } else {
+        v.visit(ImageElement.SNAPSHOT_REF_INODE_ID, in.readLong());
+      }
+    }
+
+    v.leaveEnclosingElement(); // INode
+  }
+
+  private void processINodeFileAttributes(DataInputStream in, ImageVisitor v,
+      String parentName) throws IOException {
+    final String pathName = readINodePath(in, parentName);
+    v.visit(ImageElement.INODE_PATH, pathName);
+    processPermission(in, v);
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion)) {
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    }
+
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+  }
+  
+  private void processFileDiffList(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    final int size = in.readInt();
+    if (size >= 0) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFFS,
+          ImageElement.NUM_SNAPSHOT_FILE_DIFF, size);
+      for (int i = 0; i < size; i++) {
+        processFileDiff(in, v, currentINodeName);
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
+  private void processFileDiff(DataInputStream in, ImageVisitor v,
+      String currentINodeName) throws IOException {
+    int snapshotId = in.readInt();
+    v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFF,
+        ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
+    v.visit(ImageElement.SNAPSHOT_FILE_SIZE, in.readLong());
+    if (in.readBoolean()) {
+      v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_FILE_ATTRIBUTES);
+      if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
+        processINodeFileAttributes(in, v, currentINodeName);
+      } else {
+        processINode(in, v, true, currentINodeName, true);
+      }
+      v.leaveEnclosingElement();
+    }
+    v.leaveEnclosingElement();
+  }
+  
+  /**
+   * Helper method to format dates during processing.
+   * @param date Date as read from image file
+   * @return String version of date format
+   */
+  private String formatDate(long date) {
+    return dateFormat.format(new Date(date));
+  }
+}

+ 212 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java

@@ -0,0 +1,212 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+
+/**
+ * An implementation of ImageVisitor can traverse the structure of an
+ * Hadoop fsimage and respond to each of the structures within the file.
+ */
+abstract class ImageVisitor {
+
+  /**
+   * Structural elements of an FSImage that may be encountered within the
+   * file. ImageVisitors are able to handle processing any of these elements.
+   */
+  public enum ImageElement {
+    FS_IMAGE,
+    IMAGE_VERSION,
+    NAMESPACE_ID,
+    IS_COMPRESSED,
+    COMPRESS_CODEC,
+    LAYOUT_VERSION,
+    NUM_INODES,
+    GENERATION_STAMP,
+    GENERATION_STAMP_V2,
+    GENERATION_STAMP_V1_LIMIT,
+    LAST_ALLOCATED_BLOCK_ID,
+    INODES,
+    INODE,
+    INODE_PATH,
+    REPLICATION,
+    MODIFICATION_TIME,
+    ACCESS_TIME,
+    BLOCK_SIZE,
+    NUM_BLOCKS,
+    BLOCKS,
+    BLOCK,
+    BLOCK_ID,
+    NUM_BYTES,
+    NS_QUOTA,
+    DS_QUOTA,
+    PERMISSIONS,
+    SYMLINK,
+    NUM_INODES_UNDER_CONSTRUCTION,
+    INODES_UNDER_CONSTRUCTION,
+    INODE_UNDER_CONSTRUCTION,
+    PREFERRED_BLOCK_SIZE,
+    CLIENT_NAME,
+    CLIENT_MACHINE,
+    USER_NAME,
+    GROUP_NAME,
+    PERMISSION_STRING,
+    CURRENT_DELEGATION_KEY_ID,
+    NUM_DELEGATION_KEYS,
+    DELEGATION_KEYS,
+    DELEGATION_KEY,
+    DELEGATION_TOKEN_SEQUENCE_NUMBER,
+    NUM_DELEGATION_TOKENS,
+    DELEGATION_TOKENS,
+    DELEGATION_TOKEN_IDENTIFIER,
+    DELEGATION_TOKEN_IDENTIFIER_KIND,
+    DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+    DELEGATION_TOKEN_IDENTIFIER_OWNER,
+    DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+    DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+    DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+    DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+    TRANSACTION_ID,
+    LAST_INODE_ID,
+    INODE_ID,
+
+    SNAPSHOT_COUNTER,
+    NUM_SNAPSHOTS_TOTAL,
+    NUM_SNAPSHOTS,
+    SNAPSHOTS,
+    SNAPSHOT,
+    SNAPSHOT_ID,
+    SNAPSHOT_ROOT,
+    SNAPSHOT_QUOTA,
+    NUM_SNAPSHOT_DIR_DIFF,
+    SNAPSHOT_DIR_DIFFS,
+    SNAPSHOT_DIR_DIFF,
+    SNAPSHOT_DIFF_SNAPSHOTID,
+    SNAPSHOT_DIR_DIFF_CHILDREN_SIZE,
+    SNAPSHOT_INODE_FILE_ATTRIBUTES,
+    SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES,
+    SNAPSHOT_DIR_DIFF_CREATEDLIST,
+    SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE,
+    SNAPSHOT_DIR_DIFF_CREATED_INODE,
+    SNAPSHOT_DIR_DIFF_DELETEDLIST,
+    SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE,
+    SNAPSHOT_DIR_DIFF_DELETED_INODE,
+    IS_SNAPSHOTTABLE_DIR,
+    IS_WITHSNAPSHOT_DIR,
+    SNAPSHOT_FILE_DIFFS,
+    SNAPSHOT_FILE_DIFF,
+    NUM_SNAPSHOT_FILE_DIFF,
+    SNAPSHOT_FILE_SIZE,
+    SNAPSHOT_DST_SNAPSHOT_ID,
+    SNAPSHOT_LAST_SNAPSHOT_ID,
+    SNAPSHOT_REF_INODE_ID,
+    SNAPSHOT_REF_INODE,
+
+    CACHE_NEXT_ENTRY_ID,
+    CACHE_NUM_POOLS,
+    CACHE_POOL_NAME,
+    CACHE_POOL_OWNER_NAME,
+    CACHE_POOL_GROUP_NAME,
+    CACHE_POOL_PERMISSION_STRING,
+    CACHE_POOL_WEIGHT,
+    CACHE_NUM_ENTRIES,
+    CACHE_ENTRY_PATH,
+    CACHE_ENTRY_REPLICATION,
+    CACHE_ENTRY_POOL_NAME
+  }
+  
+  /**
+   * Begin visiting the fsimage structure.  Opportunity to perform
+   * any initialization necessary for the implementing visitor.
+   */
+  abstract void start() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure.  Opportunity to perform any
+   * clean up necessary for the implementing visitor.
+   */
+  abstract void finish() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure after an error has occurred
+   * during the processing.  Opportunity to perform any clean up necessary
+   * for the implementing visitor.
+   */
+  abstract void finishAbnormally() throws IOException;
+
+  /**
+   * Visit non enclosing element of fsimage with specified value.
+   *
+   * @param element FSImage element
+   * @param value Element's value
+   */
+  abstract void visit(ImageElement element, String value) throws IOException;
+
+  // Convenience methods to automatically convert numeric value types to strings
+  void visit(ImageElement element, int value) throws IOException {
+    visit(element, Integer.toString(value));
+  }
+
+  void visit(ImageElement element, long value) throws IOException {
+    visit(element, Long.toString(value));
+  }
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * @param element Element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element)
+     throws IOException;
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * Also provide an additional key and value for the element, such as the
+   * number items within the element.
+   *
+   * @param element Element being visited
+   * @param key Key describing the element being visited
+   * @param value Value associated with element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException;
+
+  // Convenience methods to automatically convert value types to strings
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, int value)
+     throws IOException {
+    visitEnclosingElement(element, key, Integer.toString(value));
+  }
+
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, long value)
+     throws IOException {
+    visitEnclosingElement(element, key, Long.toString(value));
+  }
+
+  /**
+   * Leave current enclosing element.  Called, for instance, at the end of
+   * processing the blocks that compromise a file.
+   */
+  abstract void leaveEnclosingElement() throws IOException;
+}

+ 111 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java

@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.Date;
+
+/**
+ * IndentedImageVisitor walks over an FSImage and displays its structure 
+ * using indenting to organize sections within the image file.
+ */
+class IndentedImageVisitor extends TextWriterImageVisitor {
+  
+  public IndentedImageVisitor(String filename) throws IOException {
+    super(filename);
+  }
+
+  public IndentedImageVisitor(String filename, boolean printToScreen) throws IOException {
+    super(filename, printToScreen);
+  }
+
+  final private DepthCounter dc = new DepthCounter();// to track leading spacing
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException { super.finish(); }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("*** Image processing finished abnormally.  Ending ***");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    dc.decLevel();
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    printIndents();
+    write(element + " = " + value + "\n");
+  }
+
+  @Override
+  void visit(ImageElement element, long value) throws IOException {
+    if ((element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME) || 
+        (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE) || 
+        (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE)) {
+      visit(element, new Date(value).toString());
+    } else {
+      visit(element, Long.toString(value));
+    }
+  }
+  
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    printIndents();
+    write(element + "\n");
+    dc.incLevel();
+  }
+
+  // Print element, along with associated key/value pair, in brackets
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value)
+      throws IOException {
+    printIndents();
+    write(element + " [" + key + " = " + value + "]\n");
+    dc.incLevel();
+  }
+
+  /**
+  * Print an appropriate number of spaces for the current level.
+  * FsImages can potentially be millions of lines long, so caching can
+  * significantly speed up output.
+  */
+  final private static String [] indents = { "",
+                                             "  ",
+                                             "    ",
+                                             "      ",
+                                             "        ",
+                                             "          ",
+                                             "            "};
+  private void printIndents() throws IOException {
+    try {
+      write(indents[dc.getLevel()]);
+    } catch (IndexOutOfBoundsException e) {
+      // There's no reason in an fsimage would need a deeper indent
+      for(int i = 0; i < dc.getLevel(); i++)
+        write(" ");
+    }
+   }
+}

+ 178 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.Formatter;
+import java.util.LinkedList;
+
+/**
+ * LsImageVisitor displays the blocks of the namespace in a format very similar
+ * to the output of ls/lsr.  Entries are marked as directories or not,
+ * permissions listed, replication, username and groupname, along with size,
+ * modification date and full path.
+ *
+ * Note: A significant difference between the output of the lsr command
+ * and this image visitor is that this class cannot sort the file entries;
+ * they are listed in the order they are stored within the fsimage file. 
+ * Therefore, the output of this class cannot be directly compared to the
+ * output of the lsr command.
+ */
+class LsImageVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+
+  private int numBlocks;
+  private String perms;
+  private int replication;
+  private String username;
+  private String group;
+  private long filesize;
+  private String modTime;
+  private String path;
+  private String linkTarget;
+
+  private boolean inInode = false;
+  final private StringBuilder sb = new StringBuilder();
+  final private Formatter formatter = new Formatter(sb);
+
+  public LsImageVisitor(String filename) throws IOException {
+    super(filename);
+  }
+
+  public LsImageVisitor(String filename, boolean printToScreen) throws IOException {
+    super(filename, printToScreen);
+  }
+
+  /**
+   * Start a new line of output, reset values.
+   */
+  private void newLine() {
+    numBlocks = 0;
+    perms = username = group = path = linkTarget = "";
+    filesize = 0l;
+    replication = 0;
+
+    inInode = true;
+  }
+
+  /**
+   * All the values have been gathered.  Print them to the console in an
+   * ls-style format.
+   */
+  private final static int widthRepl = 2;  
+  private final static int widthUser = 8; 
+  private final static int widthGroup = 10; 
+  private final static int widthSize = 10;
+  private final static int widthMod = 10;
+  private final static String lsStr = " %" + widthRepl + "s %" + widthUser + 
+                                       "s %" + widthGroup + "s %" + widthSize +
+                                       "d %" + widthMod + "s %s";
+  private void printLine() throws IOException {
+    sb.append(numBlocks < 0 ? "d" : "-");
+    sb.append(perms);
+
+    if (0 != linkTarget.length()) {
+      path = path + " -> " + linkTarget; 
+    }
+    formatter.format(lsStr, replication > 0 ? replication : "-",
+                           username, group, filesize, modTime, path);
+    sb.append("\n");
+
+    write(sb.toString());
+    sb.setLength(0); // clear string builder
+
+    inInode = false;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    System.out.println("Input ended unexpectedly.");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    if(elem == ImageElement.INODE)
+      printLine();
+  }
+
+  // Maintain state of location within the image tree and record
+  // values needed to display the inode in ls-style format.
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        if(value.equals("")) path = "/";
+        else path = value;
+        break;
+      case PERMISSION_STRING:
+        perms = value;
+        break;
+      case REPLICATION:
+        replication = Integer.valueOf(value);
+        break;
+      case USER_NAME:
+        username = value;
+        break;
+      case GROUP_NAME:
+        group = value;
+        break;
+      case NUM_BYTES:
+        filesize += Long.valueOf(value);
+        break;
+      case MODIFICATION_TIME:
+        modTime = value;
+        break;
+      case SYMLINK:
+        linkTarget = value;
+        break;
+      default:
+        // This is OK.  We're not looking for all the values.
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+    if(element == ImageElement.INODE)
+      newLine();
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemQ.push(element);
+    if(element == ImageElement.INODE)
+      newLine();
+    else if (element == ImageElement.BLOCKS)
+      numBlocks = Integer.valueOf(value);
+  }
+}

+ 118 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java

@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * File name distribution visitor. 
+ * <p>
+ * It analyzes file names in fsimage and prints the following information: 
+ * <li>Number of unique file names</li> 
+ * <li>Number file names and the corresponding number range of files that use 
+ * these same names</li>
+ * <li>Heap saved if the file name objects are reused</li>
+ */
+@InterfaceAudience.Private
+public class NameDistributionVisitor extends TextWriterImageVisitor {
+  HashMap<String, Integer> counts = new HashMap<String, Integer>();
+
+  public NameDistributionVisitor(String filename, boolean printToScreen)
+      throws IOException {
+    super(filename, printToScreen);
+  }
+
+  @Override
+  void finish() throws IOException {
+    final int BYTEARRAY_OVERHEAD = 24;
+
+    write("Total unique file names " + counts.size());
+    // Columns: Frequency of file occurrence, savings in heap, total files using
+    // the name and number of file names
+    final long stats[][] = { { 100000, 0, 0, 0 },
+                             { 10000, 0, 0, 0 },
+                             { 1000, 0, 0, 0 },
+                             { 100, 0, 0, 0 },
+                             { 10, 0, 0, 0 },
+                             { 5, 0, 0, 0 },
+                             { 4, 0, 0, 0 },
+                             { 3, 0, 0, 0 },
+                             { 2, 0, 0, 0 }};
+
+    int highbound = Integer.MIN_VALUE;
+    for (Entry<String, Integer> entry : counts.entrySet()) {
+      highbound = Math.max(highbound, entry.getValue());
+      for (int i = 0; i < stats.length; i++) {
+        if (entry.getValue() >= stats[i][0]) {
+          stats[i][1] += (BYTEARRAY_OVERHEAD + entry.getKey().length())
+              * (entry.getValue() - 1);
+          stats[i][2] += entry.getValue();
+          stats[i][3]++;
+          break;
+        }
+      }
+    }
+
+    long lowbound = 0;
+    long totalsavings = 0;
+    for (long[] stat : stats) {
+      lowbound = stat[0];
+      totalsavings += stat[1];
+      String range = lowbound == highbound ? " " + lowbound :
+          " between " + lowbound + "-" + highbound;
+      write("\n" + stat[3] + " names are used by " + stat[2] + " files"
+          + range + " times. Heap savings ~" + stat[1] + " bytes.");
+      highbound = (int) stat[0] - 1;
+    }
+    write("\n\nTotal saved heap ~" + totalsavings + "bytes.\n");
+    super.finish();
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if (element == ImageElement.INODE_PATH) {
+      String filename = value.substring(value.lastIndexOf("/") + 1);
+      if (counts.containsKey(filename)) {
+        counts.put(filename, counts.get(filename) + 1);
+      } else {
+        counts.put(filename, 1);
+      }
+    }
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+  }
+
+  @Override
+  void start() throws IOException {
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+  }
+}

+ 274 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java

@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.PositionTrackingInputStream;
+
+/**
+ * OfflineImageViewer to dump the contents of an Hadoop image file to XML
+ * or the console.  Main entry point into utility, either via the
+ * command line or programatically.
+ */
+@InterfaceAudience.Private
+public class OfflineImageViewer {
+  public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
+  
+  private final static String usage = 
+    "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" +
+    "Offline Image Viewer\n" + 
+    "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" +
+    "saving the results in OUTPUTFILE.\n" +
+    "\n" +
+    "The oiv utility will attempt to parse correctly formed image files\n" +
+    "and will abort fail with mal-formed image files.\n" +
+    "\n" +
+    "The tool works offline and does not require a running cluster in\n" +
+    "order to process an image file.\n" +
+    "\n" +
+    "The following image processors are available:\n" +
+    "  * Ls: The default image processor generates an lsr-style listing\n" +
+    "    of the files in the namespace, with the same fields in the same\n" +
+    "    order.  Note that in order to correctly determine file sizes,\n" +
+    "    this formatter cannot skip blocks and will override the\n" +
+    "    -skipBlocks option.\n" +
+    "  * Indented: This processor enumerates over all of the elements in\n" +
+    "    the fsimage file, using levels of indentation to delineate\n" +
+    "    sections within the file.\n" +
+    "  * Delimited: Generate a text file with all of the elements common\n" +
+    "    to both inodes and inodes-under-construction, separated by a\n" +
+    "    delimiter. The default delimiter is \u0001, though this may be\n" +
+    "    changed via the -delimiter argument. This processor also overrides\n" +
+    "    the -skipBlocks option for the same reason as the Ls processor\n" +
+    "  * XML: This processor creates an XML document with all elements of\n" +
+    "    the fsimage enumerated, suitable for further analysis by XML\n" +
+    "    tools.\n" +
+    "  * FileDistribution: This processor analyzes the file size\n" +
+    "    distribution in the image.\n" +
+    "    -maxSize specifies the range [0, maxSize] of file sizes to be\n" +
+    "     analyzed (128GB by default).\n" +
+    "    -step defines the granularity of the distribution. (2MB by default)\n" +
+    "  * NameDistribution: This processor analyzes the file names\n" +
+    "    in the image and prints total number of file names and how frequently\n" +
+    "    file names are reused.\n" +
+    "\n" + 
+    "Required command line arguments:\n" +
+    "-i,--inputFile <arg>   FSImage file to process.\n" +
+    "-o,--outputFile <arg>  Name of output file. If the specified\n" +
+    "                       file exists, it will be overwritten.\n" +
+    "\n" + 
+    "Optional command line arguments:\n" +
+    "-p,--processor <arg>   Select which type of processor to apply\n" +
+    "                       against image file." +
+    " (Ls|XML|Delimited|Indented|FileDistribution).\n" +
+    "-h,--help              Display usage information and exit\n" +
+    "-printToScreen         For processors that write to a file, also\n" +
+    "                       output to screen. On large image files this\n" +
+    "                       will dramatically increase processing time.\n" +
+    "-skipBlocks            Skip inodes' blocks information. May\n" +
+    "                       significantly decrease output.\n" +
+    "                       (default = false).\n" +
+    "-delimiter <arg>       Delimiting string to use with Delimited processor\n";
+
+  private final boolean skipBlocks;
+  private final String inputFile;
+  private final ImageVisitor processor;
+  
+  public OfflineImageViewer(String inputFile, ImageVisitor processor, 
+             boolean skipBlocks) {
+    this.inputFile = inputFile;
+    this.processor = processor;
+    this.skipBlocks = skipBlocks;
+  }
+
+  /**
+   * Process image file.
+   */
+  public void go() throws IOException  {
+    DataInputStream in = null;
+    PositionTrackingInputStream tracker = null;
+    ImageLoader fsip = null;
+    boolean done = false;
+    try {
+      tracker = new PositionTrackingInputStream(new BufferedInputStream(
+               new FileInputStream(new File(inputFile))));
+      in = new DataInputStream(tracker);
+
+      int imageVersionFile = findImageVersion(in);
+
+      fsip = ImageLoader.LoaderFactory.getLoader(imageVersionFile);
+
+      if(fsip == null) 
+        throw new IOException("No image processor to read version " +
+            imageVersionFile + " is available.");
+      fsip.loadImage(in, processor, skipBlocks);
+      done = true;
+    } finally {
+      if (!done) {
+        LOG.error("image loading failed at offset " + tracker.getPos());
+      }
+      IOUtils.cleanup(LOG, in, tracker);
+    }
+  }
+
+  /**
+   * Check an fsimage datainputstream's version number.
+   *
+   * The datainput stream is returned at the same point as it was passed in;
+   * this method has no effect on the datainputstream's read pointer.
+   *
+   * @param in Datainputstream of fsimage
+   * @return Filesystem layout version of fsimage represented by stream
+   * @throws IOException If problem reading from in
+   */
+  private int findImageVersion(DataInputStream in) throws IOException {
+    in.mark(42); // arbitrary amount, resetting immediately
+
+    int version = in.readInt();
+    in.reset();
+
+    return version;
+  }
+  
+  /**
+   * Build command-line options and descriptions
+   */
+  public static Options buildOptions() {
+    Options options = new Options();
+
+    // Build in/output file arguments, which are required, but there is no 
+    // addOption method that can specify this
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("outputFile");
+    options.addOption(OptionBuilder.create("o"));
+    
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("inputFile");
+    options.addOption(OptionBuilder.create("i"));
+    
+    options.addOption("p", "processor", true, "");
+    options.addOption("h", "help", false, "");
+    options.addOption("skipBlocks", false, "");
+    options.addOption("printToScreen", false, "");
+    options.addOption("delimiter", true, "");
+
+    return options;
+  }
+  
+  /**
+   * Entry point to command-line-driven operation.  User may specify
+   * options and start fsimage viewer from the command line.  Program
+   * will process image file and exit cleanly or, if an error is
+   * encountered, inform user and exit.
+   *
+   * @param args Command line options
+   * @throws IOException 
+   */
+  public static void main(String[] args) throws IOException {
+    Options options = buildOptions();
+    if(args.length == 0) {
+      printUsage();
+      return;
+    }
+    
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd;
+
+    try {
+      cmd = parser.parse(options, args);
+    } catch (ParseException e) {
+      System.out.println("Error parsing command-line options: ");
+      printUsage();
+      return;
+    }
+
+    if(cmd.hasOption("h")) { // print help and exit
+      printUsage();
+      return;
+    }
+
+    boolean skipBlocks = cmd.hasOption("skipBlocks");
+    boolean printToScreen = cmd.hasOption("printToScreen");
+    String inputFile = cmd.getOptionValue("i");
+    String processor = cmd.getOptionValue("p", "Ls");
+    String outputFile = cmd.getOptionValue("o");
+    String delimiter = cmd.getOptionValue("delimiter");
+    
+    if( !(delimiter == null || processor.equals("Delimited")) ) {
+      System.out.println("Can only specify -delimiter with Delimited processor");
+      printUsage();
+      return;
+    }
+    
+    ImageVisitor v;
+    if(processor.equals("Indented")) {
+      v = new IndentedImageVisitor(outputFile, printToScreen);
+    } else if (processor.equals("XML")) {
+      v = new XmlImageVisitor(outputFile, printToScreen);
+    } else if (processor.equals("Delimited")) {
+      v = delimiter == null ?  
+                 new DelimitedImageVisitor(outputFile, printToScreen) :
+                 new DelimitedImageVisitor(outputFile, printToScreen, delimiter);
+      skipBlocks = false;
+    } else if (processor.equals("FileDistribution")) {
+      long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
+      int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
+      v = new FileDistributionVisitor(outputFile, maxSize, step);
+    } else if (processor.equals("NameDistribution")) {
+      v = new NameDistributionVisitor(outputFile, printToScreen);
+    } else {
+      v = new LsImageVisitor(outputFile, printToScreen);
+      skipBlocks = false;
+    }
+    
+    try {
+      OfflineImageViewer d = new OfflineImageViewer(inputFile, v, skipBlocks);
+      d.go();
+    } catch (EOFException e) {
+      System.err.println("Input file ended unexpectedly.  Exiting");
+    } catch(IOException e) {
+      System.err.println("Encountered exception.  Exiting: " + e.getMessage());
+    }
+  }
+
+  /**
+   * Print application usage instructions.
+   */
+  private static void printUsage() {
+    System.out.println(usage);
+  }
+}

+ 109 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java

@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+
+import com.google.common.base.Charsets;
+
+/**
+ * TextWriterImageProcessor mixes in the ability for ImageVisitor
+ * implementations to easily write their output to a text file.
+ *
+ * Implementing classes should be sure to call the super methods for the
+ * constructors, finish and finishAbnormally methods, in order that the
+ * underlying file may be opened and closed correctly.
+ *
+ * Note, this class does not add newlines to text written to file or (if
+ * enabled) screen.  This is the implementing class' responsibility.
+ */
+abstract class TextWriterImageVisitor extends ImageVisitor {
+  private boolean printToScreen = false;
+  private boolean okToWrite = false;
+  final private OutputStreamWriter fw;
+
+  /**
+   * Create a processor that writes to the file named.
+   *
+   * @param filename Name of file to write output to
+   */
+  public TextWriterImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  /**
+   * Create a processor that writes to the file named and may or may not
+   * also output to the screen, as specified.
+   *
+   * @param filename Name of file to write output to
+   * @param printToScreen Mirror output to screen?
+   */
+  public TextWriterImageVisitor(String filename, boolean printToScreen)
+         throws IOException {
+    super();
+    this.printToScreen = printToScreen;
+    fw = new OutputStreamWriter(new FileOutputStream(filename), Charsets.UTF_8);
+    okToWrite = true;
+  }
+  
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finish()
+   */
+  @Override
+  void finish() throws IOException {
+    close();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finishAbnormally()
+   */
+  @Override
+  void finishAbnormally() throws IOException {
+    close();
+  }
+
+  /**
+   * Close output stream and prevent further writing
+   */
+  private void close() throws IOException {
+    fw.close();
+    okToWrite = false;
+  }
+
+  /**
+   * Write parameter to output file (and possibly screen).
+   *
+   * @param toWrite Text to write to file
+   */
+  protected void write(String toWrite) throws IOException  {
+    if(!okToWrite)
+      throw new IOException("file not open for writing.");
+
+    if(printToScreen)
+      System.out.print(toWrite);
+
+    try {
+      fw.write(toWrite);
+    } catch (IOException e) {
+      okToWrite = false;
+      throw e;
+    }
+  }
+}

+ 88 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java

@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * An XmlImageVisitor walks over an fsimage structure and writes out
+ * an equivalent XML document that contains the fsimage's components.
+ */
+public class XmlImageVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> tagQ =
+                                          new LinkedList<ImageElement>();
+
+  public XmlImageVisitor(String filename) throws IOException {
+    super(filename, false);
+  }
+
+  public XmlImageVisitor(String filename, boolean printToScreen)
+       throws IOException {
+    super(filename, printToScreen);
+  }
+
+  @Override
+  void finish() throws IOException {
+    super.finish();
+  }
+
+  @Override
+  void finishAbnormally() throws IOException {
+    write("\n<!-- Error processing image file.  Exiting -->\n");
+    super.finishAbnormally();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    if(tagQ.size() == 0)
+      throw new IOException("Tried to exit non-existent enclosing element " +
+                "in FSImage file");
+
+    ImageElement element = tagQ.pop();
+    write("</" + element.toString() + ">\n");
+  }
+
+  @Override
+  void start() throws IOException {
+    write("<?xml version=\"1.0\" ?>\n");
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    writeTag(element.toString(), value);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    write("<" + element.toString() + ">\n");
+    tagQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value)
+       throws IOException {
+    write("<" + element.toString() + " " + key + "=\"" + value +"\">\n");
+    tagQ.push(element);
+  }
+
+  private void writeTag(String tag, String value) throws IOException {
+    write("<" + tag + ">" + value + "</" + tag + ">\n");
+  }
+}

+ 10 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java

@@ -176,8 +176,9 @@ public class JsonUtil {
   }
 
   /** Convert a string to a FsPermission object. */
-  private static FsPermission toFsPermission(final String s) {
-    return new FsPermission(Short.parseShort(s, 8));
+  private static FsPermission toFsPermission(final String s, Boolean aclBit) {
+    FsPermission perm = new FsPermission(Short.parseShort(s, 8));
+    return (aclBit != null && aclBit) ? new FsAclPermission(perm) : perm;
   }
 
   static enum PathType {
@@ -204,7 +205,11 @@ public class JsonUtil {
     m.put("length", status.getLen());
     m.put("owner", status.getOwner());
     m.put("group", status.getGroup());
-    m.put("permission", toString(status.getPermission()));
+    FsPermission perm = status.getPermission();
+    m.put("permission", toString(perm));
+    if (perm.getAclBit()) {
+      m.put("aclBit", true);
+    }
     m.put("accessTime", status.getAccessTime());
     m.put("modificationTime", status.getModificationTime());
     m.put("blockSize", status.getBlockSize());
@@ -230,7 +235,8 @@ public class JsonUtil {
     final long len = (Long) m.get("length");
     final String owner = (String) m.get("owner");
     final String group = (String) m.get("group");
-    final FsPermission permission = toFsPermission((String) m.get("permission"));
+    final FsPermission permission = toFsPermission((String) m.get("permission"),
+      (Boolean)m.get("aclBit"));
     final long aTime = (Long) m.get("accessTime");
     final long mTime = (Long) m.get("modificationTime");
     final long blockSize = (Long) m.get("blockSize");

+ 290 - 227
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java

@@ -58,34 +58,8 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
-import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
-import org.apache.hadoop.hdfs.web.resources.AclPermissionParam;
-import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
-import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
-import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam;
-import org.apache.hadoop.hdfs.web.resources.CreateParentParam;
-import org.apache.hadoop.hdfs.web.resources.DelegationParam;
-import org.apache.hadoop.hdfs.web.resources.DeleteOpParam;
-import org.apache.hadoop.hdfs.web.resources.DestinationParam;
-import org.apache.hadoop.hdfs.web.resources.DoAsParam;
-import org.apache.hadoop.hdfs.web.resources.GetOpParam;
-import org.apache.hadoop.hdfs.web.resources.GroupParam;
-import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
-import org.apache.hadoop.hdfs.web.resources.LengthParam;
-import org.apache.hadoop.hdfs.web.resources.ModificationTimeParam;
-import org.apache.hadoop.hdfs.web.resources.OffsetParam;
-import org.apache.hadoop.hdfs.web.resources.OverwriteParam;
-import org.apache.hadoop.hdfs.web.resources.OwnerParam;
-import org.apache.hadoop.hdfs.web.resources.Param;
-import org.apache.hadoop.hdfs.web.resources.PermissionParam;
-import org.apache.hadoop.hdfs.web.resources.PostOpParam;
-import org.apache.hadoop.hdfs.web.resources.PutOpParam;
-import org.apache.hadoop.hdfs.web.resources.RecursiveParam;
-import org.apache.hadoop.hdfs.web.resources.RenameOptionSetParam;
-import org.apache.hadoop.hdfs.web.resources.RenewerParam;
-import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
-import org.apache.hadoop.hdfs.web.resources.TokenArgumentParam;
-import org.apache.hadoop.hdfs.web.resources.UserParam;
+import org.apache.hadoop.hdfs.web.resources.*;
+import org.apache.hadoop.hdfs.web.resources.HttpOpParam.Op;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicy;
@@ -176,10 +150,13 @@ public class WebHdfsFileSystem extends FileSystem
     this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
     this.nnAddrs = resolveNNAddr();
 
-    boolean isHA = HAUtil.isLogicalUri(conf, this.uri);
-    // In non-HA case, the code needs to call getCanonicalUri() in order to
-    // handle the case where no port is specified in the URI
-    this.tokenServiceName = isHA ? HAUtil.buildTokenServiceForLogicalUri(uri)
+    boolean isHA = HAUtil.isClientFailoverConfigured(conf, this.uri);
+    boolean isLogicalUri = isHA && HAUtil.isLogicalUri(conf, this.uri);
+    // In non-HA or non-logical URI case, the code needs to call
+    // getCanonicalUri() in order to handle the case where no port is
+    // specified in the URI
+    this.tokenServiceName = isLogicalUri ?
+        HAUtil.buildTokenServiceForLogicalUri(uri)
         : SecurityUtil.buildTokenService(getCanonicalUri());
     initializeTokenAspect();
 
@@ -422,41 +399,24 @@ public class WebHdfsFileSystem extends FileSystem
     return url;
   }
 
-  /**
-   * Run a http operation.
-   * Connect to the http server, validate response, and obtain the JSON output.
-   * 
-   * @param op http operation
-   * @param fspath file system path
-   * @param parameters parameters for the operation
-   * @return a JSON object, e.g. Object[], Map<?, ?>, etc.
-   * @throws IOException
-   */
-  private Map<?, ?> run(final HttpOpParam.Op op, final Path fspath,
-      final Param<?,?>... parameters) throws IOException {
-    return new FsPathRunner(op, fspath, parameters).run().json;
-  }
-
   /**
    * This class is for initialing a HTTP connection, connecting to server,
    * obtaining a response, and also handling retry on failures.
    */
-  abstract class AbstractRunner {
+  abstract class AbstractRunner<T> {
     abstract protected URL getUrl() throws IOException;
 
     protected final HttpOpParam.Op op;
     private final boolean redirected;
 
     private boolean checkRetry;
-    protected HttpURLConnection conn = null;
-    private Map<?, ?> json = null;
 
     protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) {
       this.op = op;
       this.redirected = redirected;
     }
 
-    AbstractRunner run() throws IOException {
+    T run() throws IOException {
       UserGroupInformation connectUgi = ugi.getRealUser();
       if (connectUgi == null) {
         connectUgi = ugi;
@@ -468,9 +428,9 @@ public class WebHdfsFileSystem extends FileSystem
         // the entire lifecycle of the connection must be run inside the
         // doAs to ensure authentication is performed correctly
         return connectUgi.doAs(
-            new PrivilegedExceptionAction<AbstractRunner>() {
+            new PrivilegedExceptionAction<T>() {
               @Override
-              public AbstractRunner run() throws IOException {
+              public T run() throws IOException {
                 return runWithRetry();
               }
             });
@@ -478,18 +438,51 @@ public class WebHdfsFileSystem extends FileSystem
         throw new IOException(e);
       }
     }
-    
-    private void init() throws IOException {
-      checkRetry = !redirected;
-      URL url = getUrl();
-      conn = (HttpURLConnection) connectionFactory.openConnection(url);
-    }
-    
-    private void connect() throws IOException {
-      connect(op.getDoOutput());
+
+    /**
+     * Two-step requests redirected to a DN
+     * 
+     * Create/Append:
+     * Step 1) Submit a Http request with neither auto-redirect nor data. 
+     * Step 2) Submit another Http request with the URL from the Location header with data.
+     * 
+     * The reason of having two-step create/append is for preventing clients to
+     * send out the data before the redirect. This issue is addressed by the
+     * "Expect: 100-continue" header in HTTP/1.1; see RFC 2616, Section 8.2.3.
+     * Unfortunately, there are software library bugs (e.g. Jetty 6 http server
+     * and Java 6 http client), which do not correctly implement "Expect:
+     * 100-continue". The two-step create/append is a temporary workaround for
+     * the software library bugs.
+     * 
+     * Open/Checksum
+     * Also implements two-step connects for other operations redirected to
+     * a DN such as open and checksum
+     */
+    private HttpURLConnection connect(URL url) throws IOException {
+      // resolve redirects for a DN operation unless already resolved
+      if (op.getRedirect() && !redirected) {
+        final HttpOpParam.Op redirectOp =
+            HttpOpParam.TemporaryRedirectOp.valueOf(op);
+        final HttpURLConnection conn = connect(redirectOp, url);
+        // application level proxy like httpfs might not issue a redirect
+        if (conn.getResponseCode() == op.getExpectedHttpResponseCode()) {
+          return conn;
+        }
+        try {
+          validateResponse(redirectOp, conn, false);
+          url = new URL(conn.getHeaderField("Location"));
+        } finally {
+          conn.disconnect();
+        }
+      }
+      return connect(op, url);
     }
 
-    private void connect(boolean doOutput) throws IOException {
+    private HttpURLConnection connect(final HttpOpParam.Op op, final URL url)
+        throws IOException {
+      final HttpURLConnection conn =
+          (HttpURLConnection)connectionFactory.openConnection(url);
+      final boolean doOutput = op.getDoOutput();
       conn.setRequestMethod(op.getType().toString());
       conn.setInstanceFollowRedirects(false);
       switch (op.getType()) {
@@ -502,6 +495,10 @@ public class WebHdfsFileSystem extends FileSystem
             // explicitly setting content-length to 0 won't do spnego!!
             // opening and closing the stream will send "Content-Length: 0"
             conn.getOutputStream().close();
+          } else {
+            conn.setRequestProperty("Content-Type",
+                MediaType.APPLICATION_OCTET_STREAM);
+            conn.setChunkedStreamingMode(32 << 10); //32kB-chunk
           }
           break;
         }
@@ -511,16 +508,10 @@ public class WebHdfsFileSystem extends FileSystem
         }
       }
       conn.connect();
+      return conn;
     }
 
-    private void disconnect() {
-      if (conn != null) {
-        conn.disconnect();
-        conn = null;
-      }
-    }
-
-    private AbstractRunner runWithRetry() throws IOException {
+    private T runWithRetry() throws IOException {
       /**
        * Do the real work.
        *
@@ -538,15 +529,16 @@ public class WebHdfsFileSystem extends FileSystem
        * examines the exception and swallows it if it decides to rerun the work.
        */
       for(int retry = 0; ; retry++) {
+        checkRetry = !redirected;
+        final URL url = getUrl();
         try {
-          init();
-          if (op.getDoOutput()) {
-            twoStepWrite();
-          } else {
-            getResponse(op != GetOpParam.Op.OPEN);
+          final HttpURLConnection conn = connect(url);
+          // output streams will validate on close
+          if (!op.getDoOutput()) {
+            validateResponse(op, conn, false);
           }
-          return this;
-        } catch(IOException ioe) {
+          return getResponse(conn);
+        } catch (IOException ioe) {
           Throwable cause = ioe.getCause();
           if (cause != null && cause instanceof AuthenticationException) {
             throw ioe; // no retries for auth failures
@@ -588,87 +580,129 @@ public class WebHdfsFileSystem extends FileSystem
       throw toIOException(ioe);
     }
 
-    /**
-     * Two-step Create/Append:
-     * Step 1) Submit a Http request with neither auto-redirect nor data. 
-     * Step 2) Submit another Http request with the URL from the Location header with data.
-     * 
-     * The reason of having two-step create/append is for preventing clients to
-     * send out the data before the redirect. This issue is addressed by the
-     * "Expect: 100-continue" header in HTTP/1.1; see RFC 2616, Section 8.2.3.
-     * Unfortunately, there are software library bugs (e.g. Jetty 6 http server
-     * and Java 6 http client), which do not correctly implement "Expect:
-     * 100-continue". The two-step create/append is a temporary workaround for
-     * the software library bugs.
-     */
-    HttpURLConnection twoStepWrite() throws IOException {
-      //Step 1) Submit a Http request with neither auto-redirect nor data. 
-      connect(false);
-      validateResponse(HttpOpParam.TemporaryRedirectOp.valueOf(op), conn, false);
-      final String redirect = conn.getHeaderField("Location");
-      disconnect();
-      checkRetry = false;
-      
-      //Step 2) Submit another Http request with the URL from the Location header with data.
-      conn = (HttpURLConnection) connectionFactory.openConnection(new URL(
-          redirect));
-      conn.setRequestProperty("Content-Type",
-          MediaType.APPLICATION_OCTET_STREAM);
-      conn.setChunkedStreamingMode(32 << 10); //32kB-chunk
-      connect();
-      return conn;
+    abstract T getResponse(HttpURLConnection conn) throws IOException;
+  }
+
+  /**
+   * Abstract base class to handle path-based operations with params
+   */
+  abstract class AbstractFsPathRunner<T> extends AbstractRunner<T> {
+    private final Path fspath;
+    private final Param<?,?>[] parameters;
+    
+    AbstractFsPathRunner(final HttpOpParam.Op op, final Path fspath,
+        Param<?,?>... parameters) {
+      super(op, false);
+      this.fspath = fspath;
+      this.parameters = parameters;
+    }
+    
+    @Override
+    protected URL getUrl() throws IOException {
+      return toUrl(op, fspath, parameters);
     }
+  }
 
-    FSDataOutputStream write(final int bufferSize) throws IOException {
-      return WebHdfsFileSystem.this.write(op, conn, bufferSize);
+  /**
+   * Default path-based implementation expects no json response
+   */
+  class FsPathRunner extends AbstractFsPathRunner<Void> {
+    FsPathRunner(Op op, Path fspath, Param<?,?>... parameters) {
+      super(op, fspath, parameters);
     }
+    
+    @Override
+    Void getResponse(HttpURLConnection conn) throws IOException {
+      return null;
+    }
+  }
 
-    void getResponse(boolean getJsonAndDisconnect) throws IOException {
+  /**
+   * Handle path-based operations with a json response
+   */
+  abstract class FsPathResponseRunner<T> extends AbstractFsPathRunner<T> {
+    FsPathResponseRunner(final HttpOpParam.Op op, final Path fspath,
+        Param<?,?>... parameters) {
+      super(op, fspath, parameters);
+    }
+    
+    @Override
+    final T getResponse(HttpURLConnection conn) throws IOException {
       try {
-        connect();
-        final int code = conn.getResponseCode();
-        if (!redirected && op.getRedirect()
-            && code != op.getExpectedHttpResponseCode()) {
-          final String redirect = conn.getHeaderField("Location");
-          json = validateResponse(HttpOpParam.TemporaryRedirectOp.valueOf(op),
-              conn, false);
-          disconnect();
-  
-          checkRetry = false;
-          conn = (HttpURLConnection) connectionFactory.openConnection(new URL(
-              redirect));
-          connect();
+        final Map<?,?> json = jsonParse(conn, false);
+        if (json == null) {
+          // match exception class thrown by parser
+          throw new IllegalStateException("Missing response");
         }
-
-        json = validateResponse(op, conn, false);
-        if (json == null && getJsonAndDisconnect) {
-          json = jsonParse(conn, false);
+        return decodeResponse(json);
+      } catch (IOException ioe) {
+        throw ioe;
+      } catch (Exception e) { // catch json parser errors
+        final IOException ioe =
+            new IOException("Response decoding failure: "+e.toString(), e);
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(ioe);
         }
+        throw ioe;
       } finally {
-        if (getJsonAndDisconnect) {
-          disconnect();
-        }
+        conn.disconnect();
       }
     }
+    
+    abstract T decodeResponse(Map<?,?> json) throws IOException;
   }
 
-  final class FsPathRunner extends AbstractRunner {
-    private final Path fspath;
-    private final Param<?, ?>[] parameters;
-
-    FsPathRunner(final HttpOpParam.Op op, final Path fspath, final Param<?,?>... parameters) {
-      super(op, false);
-      this.fspath = fspath;
-      this.parameters = parameters;
+  /**
+   * Handle path-based operations with json boolean response
+   */
+  class FsPathBooleanRunner extends FsPathResponseRunner<Boolean> {
+    FsPathBooleanRunner(Op op, Path fspath, Param<?,?>... parameters) {
+      super(op, fspath, parameters);
     }
-
+    
     @Override
-    protected URL getUrl() throws IOException {
-      return toUrl(op, fspath, parameters);
+    Boolean decodeResponse(Map<?,?> json) throws IOException {
+      return (Boolean)json.get("boolean");
     }
   }
 
-  final class URLRunner extends AbstractRunner {
+  /**
+   * Handle create/append output streams
+   */
+  class FsPathOutputStreamRunner extends AbstractFsPathRunner<FSDataOutputStream> {
+    private final int bufferSize;
+    
+    FsPathOutputStreamRunner(Op op, Path fspath, int bufferSize,
+        Param<?,?>... parameters) {
+      super(op, fspath, parameters);
+      this.bufferSize = bufferSize;
+    }
+    
+    @Override
+    FSDataOutputStream getResponse(final HttpURLConnection conn)
+        throws IOException {
+      return new FSDataOutputStream(new BufferedOutputStream(
+          conn.getOutputStream(), bufferSize), statistics) {
+        @Override
+        public void close() throws IOException {
+          try {
+            super.close();
+          } finally {
+            try {
+              validateResponse(op, conn, true);
+            } finally {
+              conn.disconnect();
+            }
+          }
+        }
+      };
+    }
+  }
+  
+  /**
+   * Used by open() which tracks the resolved url itself
+   */
+  final class URLRunner extends AbstractRunner<HttpURLConnection> {
     private final URL url;
     @Override
     protected URL getUrl() {
@@ -679,6 +713,11 @@ public class WebHdfsFileSystem extends FileSystem
       super(op, redirected);
       this.url = url;
     }
+
+    @Override
+    HttpURLConnection getResponse(HttpURLConnection conn) throws IOException {
+      return conn;
+    }
   }
 
   private FsPermission applyUMask(FsPermission permission) {
@@ -690,8 +729,12 @@ public class WebHdfsFileSystem extends FileSystem
 
   private HdfsFileStatus getHdfsFileStatus(Path f) throws IOException {
     final HttpOpParam.Op op = GetOpParam.Op.GETFILESTATUS;
-    final Map<?, ?> json = run(op, f);
-    final HdfsFileStatus status = JsonUtil.toFileStatus(json, true);
+    HdfsFileStatus status = new FsPathResponseRunner<HdfsFileStatus>(op, f) {
+      @Override
+      HdfsFileStatus decodeResponse(Map<?,?> json) {
+        return JsonUtil.toFileStatus(json, true);
+      }
+    }.run();
     if (status == null) {
       throw new FileNotFoundException("File does not exist: " + f);
     }
@@ -715,8 +758,12 @@ public class WebHdfsFileSystem extends FileSystem
   @Override
   public AclStatus getAclStatus(Path f) throws IOException {
     final HttpOpParam.Op op = GetOpParam.Op.GETACLSTATUS;
-    final Map<?, ?> json = run(op, f);
-    AclStatus status = JsonUtil.toAclStatus(json);
+    AclStatus status = new FsPathResponseRunner<AclStatus>(op, f) {
+      @Override
+      AclStatus decodeResponse(Map<?,?> json) {
+        return JsonUtil.toAclStatus(json);
+      }
+    }.run();
     if (status == null) {
       throw new FileNotFoundException("File does not exist: " + f);
     }
@@ -727,9 +774,9 @@ public class WebHdfsFileSystem extends FileSystem
   public boolean mkdirs(Path f, FsPermission permission) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.MKDIRS;
-    final Map<?, ?> json = run(op, f,
-        new PermissionParam(applyUMask(permission)));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, f,
+        new PermissionParam(applyUMask(permission))
+    ).run();
   }
 
   /**
@@ -740,17 +787,19 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.CREATESYMLINK;
-    run(op, f, new DestinationParam(makeQualified(destination).toUri().getPath()),
-        new CreateParentParam(createParent));
+    new FsPathRunner(op, f,
+        new DestinationParam(makeQualified(destination).toUri().getPath()),
+        new CreateParentParam(createParent)
+    ).run();
   }
 
   @Override
   public boolean rename(final Path src, final Path dst) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.RENAME;
-    final Map<?, ?> json = run(op, src,
-        new DestinationParam(makeQualified(dst).toUri().getPath()));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, src,
+        new DestinationParam(makeQualified(dst).toUri().getPath())
+    ).run();
   }
 
   @SuppressWarnings("deprecation")
@@ -759,8 +808,10 @@ public class WebHdfsFileSystem extends FileSystem
       final Options.Rename... options) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.RENAME;
-    run(op, src, new DestinationParam(makeQualified(dst).toUri().getPath()),
-        new RenameOptionSetParam(options));
+    new FsPathRunner(op, src,
+        new DestinationParam(makeQualified(dst).toUri().getPath()),
+        new RenameOptionSetParam(options)
+    ).run();
   }
 
   @Override
@@ -772,7 +823,9 @@ public class WebHdfsFileSystem extends FileSystem
 
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETOWNER;
-    run(op, p, new OwnerParam(owner), new GroupParam(group));
+    new FsPathRunner(op, p,
+        new OwnerParam(owner), new GroupParam(group)
+    ).run();
   }
 
   @Override
@@ -780,7 +833,7 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETPERMISSION;
-    run(op, p, new PermissionParam(permission));
+    new FsPathRunner(op, p,new PermissionParam(permission)).run();
   }
 
   @Override
@@ -788,7 +841,7 @@ public class WebHdfsFileSystem extends FileSystem
       throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.MODIFYACLENTRIES;
-    run(op, path, new AclPermissionParam(aclSpec));
+    new FsPathRunner(op, path, new AclPermissionParam(aclSpec)).run();
   }
 
   @Override
@@ -796,21 +849,21 @@ public class WebHdfsFileSystem extends FileSystem
       throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEACLENTRIES;
-    run(op, path, new AclPermissionParam(aclSpec));
+    new FsPathRunner(op, path, new AclPermissionParam(aclSpec)).run();
   }
 
   @Override
   public void removeDefaultAcl(Path path) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEDEFAULTACL;
-    run(op, path);
+    new FsPathRunner(op, path).run();
   }
 
   @Override
   public void removeAcl(Path path) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.REMOVEACL;
-    run(op, path);
+    new FsPathRunner(op, path).run();
   }
 
   @Override
@@ -818,7 +871,7 @@ public class WebHdfsFileSystem extends FileSystem
       throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETACL;
-    run(op, p, new AclPermissionParam(aclSpec));
+    new FsPathRunner(op, p, new AclPermissionParam(aclSpec)).run();
   }
 
   @Override
@@ -826,8 +879,9 @@ public class WebHdfsFileSystem extends FileSystem
      ) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETREPLICATION;
-    final Map<?, ?> json = run(op, p, new ReplicationParam(replication));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, p,
+        new ReplicationParam(replication)
+    ).run();
   }
 
   @Override
@@ -835,7 +889,10 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PutOpParam.Op.SETTIMES;
-    run(op, p, new ModificationTimeParam(mtime), new AccessTimeParam(atime));
+    new FsPathRunner(op, p,
+        new ModificationTimeParam(mtime),
+        new AccessTimeParam(atime)
+    ).run();
   }
 
   @Override
@@ -850,32 +907,11 @@ public class WebHdfsFileSystem extends FileSystem
         DFSConfigKeys.DFS_REPLICATION_DEFAULT);
   }
 
-  FSDataOutputStream write(final HttpOpParam.Op op,
-      final HttpURLConnection conn, final int bufferSize) throws IOException {
-    return new FSDataOutputStream(new BufferedOutputStream(
-        conn.getOutputStream(), bufferSize), statistics) {
-      @Override
-      public void close() throws IOException {
-        try {
-          super.close();
-        } finally {
-          try {
-            validateResponse(op, conn, true);
-          } finally {
-            conn.disconnect();
-          }
-        }
-      }
-    };
-  }
-
   @Override
   public void concat(final Path trg, final Path [] srcs) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PostOpParam.Op.CONCAT;
-
-    ConcatSourcesParam param = new ConcatSourcesParam(srcs);
-    run(op, trg, param);
+    new FsPathRunner(op, trg, new ConcatSourcesParam(srcs)).run();
   }
 
   @Override
@@ -885,14 +921,13 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementWriteOps(1);
 
     final HttpOpParam.Op op = PutOpParam.Op.CREATE;
-    return new FsPathRunner(op, f,
+    return new FsPathOutputStreamRunner(op, f, bufferSize,
         new PermissionParam(applyUMask(permission)),
         new OverwriteParam(overwrite),
         new BufferSizeParam(bufferSize),
         new ReplicationParam(replication),
-        new BlockSizeParam(blockSize))
-      .run()
-      .write(bufferSize);
+        new BlockSizeParam(blockSize)
+    ).run();
   }
 
   @Override
@@ -901,16 +936,17 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementWriteOps(1);
 
     final HttpOpParam.Op op = PostOpParam.Op.APPEND;
-    return new FsPathRunner(op, f, new BufferSizeParam(bufferSize))
-      .run()
-      .write(bufferSize);
+    return new FsPathOutputStreamRunner(op, f, bufferSize,
+        new BufferSizeParam(bufferSize)
+    ).run();
   }
 
   @Override
   public boolean delete(Path f, boolean recursive) throws IOException {
     final HttpOpParam.Op op = DeleteOpParam.Op.DELETE;
-    final Map<?, ?> json = run(op, f, new RecursiveParam(recursive));
-    return (Boolean)json.get("boolean");
+    return new FsPathBooleanRunner(op, f,
+        new RecursiveParam(recursive)
+    ).run();
   }
 
   @Override
@@ -942,7 +978,7 @@ public class WebHdfsFileSystem extends FileSystem
         final boolean resolved) throws IOException {
       final URL offsetUrl = offset == 0L? url
           : new URL(url + "&" + new OffsetParam(offset));
-      return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run().conn;
+      return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run();
     }  
   }
 
@@ -998,25 +1034,36 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
 
     final HttpOpParam.Op op = GetOpParam.Op.LISTSTATUS;
-    final Map<?, ?> json  = run(op, f);
-    final Map<?, ?> rootmap = (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
-    final Object[] array = (Object[])rootmap.get(FileStatus.class.getSimpleName());
-
-    //convert FileStatus
-    final FileStatus[] statuses = new FileStatus[array.length];
-    for(int i = 0; i < array.length; i++) {
-      final Map<?, ?> m = (Map<?, ?>)array[i];
-      statuses[i] = makeQualified(JsonUtil.toFileStatus(m, false), f);
-    }
-    return statuses;
+    return new FsPathResponseRunner<FileStatus[]>(op, f) {
+      @Override
+      FileStatus[] decodeResponse(Map<?,?> json) {
+        final Map<?, ?> rootmap = (Map<?, ?>)json.get(FileStatus.class.getSimpleName() + "es");
+        final Object[] array = (Object[])rootmap.get(FileStatus.class.getSimpleName());
+
+        //convert FileStatus
+        final FileStatus[] statuses = new FileStatus[array.length];
+        for (int i = 0; i < array.length; i++) {
+          final Map<?, ?> m = (Map<?, ?>)array[i];
+          statuses[i] = makeQualified(JsonUtil.toFileStatus(m, false), f);
+        }
+        return statuses;
+      }
+    }.run();
   }
 
   @Override
   public Token<DelegationTokenIdentifier> getDelegationToken(
       final String renewer) throws IOException {
     final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN;
-    final Map<?, ?> m = run(op, null, new RenewerParam(renewer));
-    final Token<DelegationTokenIdentifier> token = JsonUtil.toDelegationToken(m);
+    Token<DelegationTokenIdentifier> token =
+        new FsPathResponseRunner<Token<DelegationTokenIdentifier>>(
+            op, null, new RenewerParam(renewer)) {
+      @Override
+      Token<DelegationTokenIdentifier> decodeResponse(Map<?,?> json)
+          throws IOException {
+        return JsonUtil.toDelegationToken(json);
+      }
+    }.run();
     token.setService(tokenServiceName);
     return token;
   }
@@ -1038,19 +1085,22 @@ public class WebHdfsFileSystem extends FileSystem
   public synchronized long renewDelegationToken(final Token<?> token
       ) throws IOException {
     final HttpOpParam.Op op = PutOpParam.Op.RENEWDELEGATIONTOKEN;
-    TokenArgumentParam dtargParam = new TokenArgumentParam(
-        token.encodeToUrlString());
-    final Map<?, ?> m = run(op, null, dtargParam);
-    return (Long) m.get("long");
+    return new FsPathResponseRunner<Long>(op, null,
+        new TokenArgumentParam(token.encodeToUrlString())) {
+      @Override
+      Long decodeResponse(Map<?,?> json) throws IOException {
+        return (Long) json.get("long");
+      }
+    }.run();
   }
 
   @Override
   public synchronized void cancelDelegationToken(final Token<?> token
       ) throws IOException {
     final HttpOpParam.Op op = PutOpParam.Op.CANCELDELEGATIONTOKEN;
-    TokenArgumentParam dtargParam = new TokenArgumentParam(
-        token.encodeToUrlString());
-    run(op, null, dtargParam);
+    new FsPathRunner(op, null,
+        new TokenArgumentParam(token.encodeToUrlString())
+    ).run();
   }
   
   @Override
@@ -1068,9 +1118,14 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
 
     final HttpOpParam.Op op = GetOpParam.Op.GET_BLOCK_LOCATIONS;
-    final Map<?, ?> m = run(op, p, new OffsetParam(offset),
-        new LengthParam(length));
-    return DFSUtil.locatedBlocks2Locations(JsonUtil.toLocatedBlocks(m));
+    return new FsPathResponseRunner<BlockLocation[]>(op, p,
+        new OffsetParam(offset), new LengthParam(length)) {
+      @Override
+      BlockLocation[] decodeResponse(Map<?,?> json) throws IOException {
+        return DFSUtil.locatedBlocks2Locations(
+            JsonUtil.toLocatedBlocks(json));
+      }
+    }.run();
   }
 
   @Override
@@ -1078,8 +1133,12 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
 
     final HttpOpParam.Op op = GetOpParam.Op.GETCONTENTSUMMARY;
-    final Map<?, ?> m = run(op, p);
-    return JsonUtil.toContentSummary(m);
+    return new FsPathResponseRunner<ContentSummary>(op, p) {
+      @Override
+      ContentSummary decodeResponse(Map<?,?> json) {
+        return JsonUtil.toContentSummary(json);        
+      }
+    }.run();
   }
 
   @Override
@@ -1088,15 +1147,19 @@ public class WebHdfsFileSystem extends FileSystem
     statistics.incrementReadOps(1);
   
     final HttpOpParam.Op op = GetOpParam.Op.GETFILECHECKSUM;
-    final Map<?, ?> m = run(op, p);
-    return JsonUtil.toMD5MD5CRC32FileChecksum(m);
+    return new FsPathResponseRunner<MD5MD5CRC32FileChecksum>(op, p) {
+      @Override
+      MD5MD5CRC32FileChecksum decodeResponse(Map<?,?> json) throws IOException {
+        return JsonUtil.toMD5MD5CRC32FileChecksum(json);
+      }
+    }.run();
   }
 
   /**
    * Resolve an HDFS URL into real INetSocketAddress. It works like a DNS
    * resolver when the URL points to an non-HA cluster. When the URL points to
-   * an HA cluster, the resolver further resolves the logical name (i.e., the
-   * authority in the URL) into real namenode addresses.
+   * an HA cluster with its logical name, the resolver further resolves the
+   * logical name(i.e., the authority in the URL) into real namenode addresses.
    */
   private InetSocketAddress[] resolveNNAddr() throws IOException {
     Configuration conf = getConf();

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java

@@ -102,7 +102,7 @@ public abstract class HttpOpParam<E extends Enum<E> & HttpOpParam.Op>
 
     @Override
     public boolean getDoOutput() {
-      return op.getDoOutput();
+      return false;
     }
 
     @Override

+ 804 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/vecsum.c

@@ -0,0 +1,804 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "hdfs.h"
+
+#define VECSUM_CHUNK_SIZE (8 * 1024 * 1024)
+#define ZCR_READ_CHUNK_SIZE (1024 * 1024 * 8)
+#define NORMAL_READ_CHUNK_SIZE (8 * 1024 * 1024)
+#define DOUBLES_PER_LOOP_ITER 16
+
+static double timespec_to_double(const struct timespec *ts)
+{
+    double sec = ts->tv_sec;
+    double nsec = ts->tv_nsec;
+    return sec + (nsec / 1000000000L);
+}
+
+struct stopwatch {
+    struct timespec start;
+    struct timespec stop;
+    struct rusage rusage;
+};
+
+static struct stopwatch *stopwatch_create(void)
+{
+    struct stopwatch *watch;
+
+    watch = calloc(1, sizeof(struct stopwatch));
+    if (!watch) {
+        fprintf(stderr, "failed to allocate memory for stopwatch\n");
+        goto error;
+    }
+    if (clock_gettime(CLOCK_MONOTONIC, &watch->start)) {
+        int err = errno;
+        fprintf(stderr, "clock_gettime(CLOCK_MONOTONIC) failed with "
+            "error %d (%s)\n", err, strerror(err));
+        goto error;
+    }
+    if (getrusage(RUSAGE_THREAD, &watch->rusage) < 0) {
+        int err = errno;
+        fprintf(stderr, "getrusage failed: error %d (%s)\n",
+            err, strerror(err));
+        goto error;
+    }
+    return watch;
+
+error:
+    free(watch);
+    return NULL;
+}
+
+static void stopwatch_stop(struct stopwatch *watch,
+        long long bytes_read)
+{
+    double elapsed, rate;
+
+    if (clock_gettime(CLOCK_MONOTONIC, &watch->stop)) {
+        int err = errno;
+        fprintf(stderr, "clock_gettime(CLOCK_MONOTONIC) failed with "
+            "error %d (%s)\n", err, strerror(err));
+        goto done;
+    }
+    elapsed = timespec_to_double(&watch->stop) -
+        timespec_to_double(&watch->start);
+    rate = (bytes_read / elapsed) / (1024 * 1024 * 1024);
+    printf("stopwatch: took %.5g seconds to read %lld bytes, "
+        "for %.5g GB/s\n", elapsed, bytes_read, rate);
+    printf("stopwatch:  %.5g seconds\n", elapsed);
+done:
+    free(watch);
+}
+
+enum vecsum_type {
+    VECSUM_LOCAL = 0,
+    VECSUM_LIBHDFS,
+    VECSUM_ZCR,
+};
+
+#define VECSUM_TYPE_VALID_VALUES "libhdfs, zcr, or local"
+
+int parse_vecsum_type(const char *str)
+{
+    if (strcasecmp(str, "local") == 0)
+        return VECSUM_LOCAL;
+    else if (strcasecmp(str, "libhdfs") == 0)
+        return VECSUM_LIBHDFS;
+    else if (strcasecmp(str, "zcr") == 0)
+        return VECSUM_ZCR;
+    else
+        return -1;
+}
+
+struct options {
+    // The path to read.
+    const char *path;
+
+    // Length of the file.
+    long long length;
+
+    // The number of times to read the path.
+    int passes;
+
+    // Type of vecsum to do
+    enum vecsum_type ty;
+
+    // RPC address to use for HDFS
+    const char *rpc_address;
+};
+
+static struct options *options_create(void)
+{
+    struct options *opts = NULL;
+    const char *pass_str;
+    const char *ty_str;
+    const char *length_str;
+    int ty;
+
+    opts = calloc(1, sizeof(struct options));
+    if (!opts) {
+        fprintf(stderr, "failed to calloc options\n");
+        goto error;
+    }
+    opts->path = getenv("VECSUM_PATH");
+    if (!opts->path) {
+        fprintf(stderr, "You must set the VECSUM_PATH environment "
+            "variable to the path of the file to read.\n");
+        goto error;
+    }
+    length_str = getenv("VECSUM_LENGTH");
+    if (!length_str) {
+        length_str = "2147483648";
+    }
+    opts->length = atoll(length_str);
+    if (!opts->length) {
+        fprintf(stderr, "Can't parse VECSUM_LENGTH of '%s'.\n",
+                length_str);
+        goto error;
+    }
+    if (opts->length % VECSUM_CHUNK_SIZE) {
+        fprintf(stderr, "VECSUM_LENGTH must be a multiple of '%lld'.  The "
+                "currently specified length of '%lld' is not.\n",
+                (long long)VECSUM_CHUNK_SIZE, (long long)opts->length);
+        goto error;
+    }
+    pass_str = getenv("VECSUM_PASSES");
+    if (!pass_str) {
+        fprintf(stderr, "You must set the VECSUM_PASSES environment "
+            "variable to the number of passes to make.\n");
+        goto error;
+    }
+    opts->passes = atoi(pass_str);
+    if (opts->passes <= 0) {
+        fprintf(stderr, "Invalid value for the VECSUM_PASSES "
+            "environment variable.  You must set this to a "
+            "number greater than 0.\n");
+        goto error;
+    }
+    ty_str = getenv("VECSUM_TYPE");
+    if (!ty_str) {
+        fprintf(stderr, "You must set the VECSUM_TYPE environment "
+            "variable to " VECSUM_TYPE_VALID_VALUES "\n");
+        goto error;
+    }
+    ty = parse_vecsum_type(ty_str);
+    if (ty < 0) {
+        fprintf(stderr, "Invalid VECSUM_TYPE environment variable.  "
+            "Valid values are " VECSUM_TYPE_VALID_VALUES "\n");
+        goto error;
+    }
+    opts->ty = ty;
+    opts->rpc_address = getenv("VECSUM_RPC_ADDRESS");
+    if (!opts->rpc_address) {
+        opts->rpc_address = "default";
+    }
+    return opts;
+error:
+    free(opts);
+    return NULL;
+}
+
+static int test_file_chunk_setup(double **chunk)
+{
+    int i;
+    double *c, val;
+
+    c = malloc(VECSUM_CHUNK_SIZE);
+    if (!c) {
+        fprintf(stderr, "test_file_create: failed to malloc "
+                "a buffer of size '%lld'\n",
+                (long long) VECSUM_CHUNK_SIZE);
+        return EIO;
+    }
+    val = 0.0;
+    for (i = 0; i < VECSUM_CHUNK_SIZE / sizeof(double); i++) {
+        c[i] = val;
+        val += 0.5;
+    }
+    *chunk = c;
+    return 0;
+}
+
+static void options_free(struct options *opts)
+{
+    free(opts);
+}
+
+struct local_data {
+    int fd;
+    double *mmap;
+    long long length;
+};
+
+static int local_data_create_file(struct local_data *cdata,
+                                  const struct options *opts)
+{
+    int ret = EIO;
+    int dup_fd = -1;
+    FILE *fp = NULL;
+    double *chunk = NULL;
+    long long offset = 0;
+
+    dup_fd = dup(cdata->fd);
+    if (dup_fd < 0) {
+        ret = errno;
+        fprintf(stderr, "local_data_create_file: dup failed: %s (%d)\n",
+                strerror(ret), ret);
+        goto done;
+    }
+    fp = fdopen(dup_fd, "w");
+    if (!fp) {
+        ret = errno;
+        fprintf(stderr, "local_data_create_file: fdopen failed: %s (%d)\n",
+                strerror(ret), ret);
+        goto done;
+    }
+    ret = test_file_chunk_setup(&chunk);
+    if (ret)
+        goto done;
+    while (offset < opts->length) {
+        if (fwrite(chunk, VECSUM_CHUNK_SIZE, 1, fp) != 1) {
+            fprintf(stderr, "local_data_create_file: failed to write to "
+                    "the local file '%s' at offset %lld\n",
+                    opts->path, offset);
+            ret = EIO;
+            goto done;
+        }
+        offset += VECSUM_CHUNK_SIZE;
+    }
+    fprintf(stderr, "local_data_create_file: successfully re-wrote %s as "
+            "a file of length %lld\n", opts->path, opts->length);
+    ret = 0;
+
+done:
+    if (dup_fd >= 0) {
+        close(dup_fd);
+    }
+    if (fp) {
+        fclose(fp);
+    }
+    free(chunk);
+    return ret;
+}
+
+static struct local_data *local_data_create(const struct options *opts)
+{
+    struct local_data *cdata = NULL;
+    struct stat st_buf;
+
+    cdata = malloc(sizeof(*cdata));
+    if (!cdata) {
+        fprintf(stderr, "Failed to allocate local test data.\n");
+        goto error;
+    }
+    cdata->fd = -1;
+    cdata->mmap = MAP_FAILED;
+    cdata->length = opts->length;
+
+    cdata->fd = open(opts->path, O_RDWR | O_CREAT, 0777);
+    if (cdata->fd < 0) {
+        int err = errno;
+        fprintf(stderr, "local_data_create: failed to open %s "
+            "for read/write: error %d (%s)\n", opts->path, err, strerror(err));
+        goto error;
+    }
+    if (fstat(cdata->fd, &st_buf)) {
+        int err = errno;
+        fprintf(stderr, "local_data_create: fstat(%s) failed: "
+            "error %d (%s)\n", opts->path, err, strerror(err));
+        goto error;
+    }
+    if (st_buf.st_size != opts->length) {
+        int err;
+        fprintf(stderr, "local_data_create: current size of %s is %lld, but "
+                "we want %lld.  Re-writing the file.\n",
+                opts->path, (long long)st_buf.st_size,
+                (long long)opts->length);
+        err = local_data_create_file(cdata, opts);
+        if (err)
+            goto error;
+    }
+    cdata->mmap = mmap(NULL, cdata->length, PROT_READ,
+                       MAP_PRIVATE, cdata->fd, 0);
+    if (cdata->mmap == MAP_FAILED) {
+        int err = errno;
+        fprintf(stderr, "local_data_create: mmap(%s) failed: "
+            "error %d (%s)\n", opts->path, err, strerror(err));
+        goto error;
+    }
+    return cdata;
+
+error:
+    if (cdata) {
+        if (cdata->fd >= 0) {
+            close(cdata->fd);
+        }
+        free(cdata);
+    }
+    return NULL;
+}
+
+static void local_data_free(struct local_data *cdata)
+{
+    close(cdata->fd);
+    munmap(cdata->mmap, cdata->length);
+}
+
+struct libhdfs_data {
+    hdfsFS fs;
+    hdfsFile file;
+    long long length;
+    double *buf;
+};
+
+static void libhdfs_data_free(struct libhdfs_data *ldata)
+{
+    if (ldata->fs) {
+        free(ldata->buf);
+        if (ldata->file) {
+            hdfsCloseFile(ldata->fs, ldata->file);
+        }
+        hdfsDisconnect(ldata->fs);
+    }
+    free(ldata);
+}
+
+static int libhdfs_data_create_file(struct libhdfs_data *ldata,
+                                    const struct options *opts)
+{
+    int ret;
+    double *chunk = NULL;
+    long long offset = 0;
+
+    ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_WRONLY, 0, 1, 0);
+    if (!ldata->file) {
+        ret = errno;
+        fprintf(stderr, "libhdfs_data_create_file: hdfsOpenFile(%s, "
+            "O_WRONLY) failed: error %d (%s)\n", opts->path, ret,
+            strerror(ret));
+        goto done;
+    }
+    ret = test_file_chunk_setup(&chunk);
+    if (ret)
+        goto done;
+    while (offset < opts->length) {
+        ret = hdfsWrite(ldata->fs, ldata->file, chunk, VECSUM_CHUNK_SIZE);
+        if (ret < 0) {
+            ret = errno;
+            fprintf(stderr, "libhdfs_data_create_file: got error %d (%s) at "
+                    "offset %lld of %s\n", ret, strerror(ret),
+                    offset, opts->path);
+            goto done;
+        } else if (ret < VECSUM_CHUNK_SIZE) {
+            fprintf(stderr, "libhdfs_data_create_file: got short write "
+                    "of %d at offset %lld of %s\n", ret, offset, opts->path);
+            goto done;
+        }
+        offset += VECSUM_CHUNK_SIZE;
+    }
+    ret = 0;
+done:
+    free(chunk);
+    if (ldata->file) {
+        if (hdfsCloseFile(ldata->fs, ldata->file)) {
+            fprintf(stderr, "libhdfs_data_create_file: hdfsCloseFile error.");
+            ret = EIO;
+        }
+        ldata->file = NULL;
+    }
+    return ret;
+}
+
+static struct libhdfs_data *libhdfs_data_create(const struct options *opts)
+{
+    struct libhdfs_data *ldata = NULL;
+    struct hdfsBuilder *builder = NULL;
+    hdfsFileInfo *pinfo = NULL;
+
+    ldata = calloc(1, sizeof(struct libhdfs_data));
+    if (!ldata) {
+        fprintf(stderr, "Failed to allocate libhdfs test data.\n");
+        goto error;
+    }
+    builder = hdfsNewBuilder();
+    if (!builder) {
+        fprintf(stderr, "Failed to create builder.\n");
+        goto error;
+    }
+    hdfsBuilderSetNameNode(builder, opts->rpc_address);
+    hdfsBuilderConfSetStr(builder,
+        "dfs.client.read.shortcircuit.skip.checksum", "true");
+    ldata->fs = hdfsBuilderConnect(builder);
+    if (!ldata->fs) {
+        fprintf(stderr, "Could not connect to default namenode!\n");
+        goto error;
+    }
+    pinfo = hdfsGetPathInfo(ldata->fs, opts->path);
+    if (!pinfo) {
+        int err = errno;
+        fprintf(stderr, "hdfsGetPathInfo(%s) failed: error %d (%s).  "
+                "Attempting to re-create file.\n",
+            opts->path, err, strerror(err));
+        if (libhdfs_data_create_file(ldata, opts))
+            goto error;
+    } else if (pinfo->mSize != opts->length) {
+        fprintf(stderr, "hdfsGetPathInfo(%s) failed: length was %lld, "
+                "but we want length %lld.  Attempting to re-create file.\n",
+                opts->path, (long long)pinfo->mSize, (long long)opts->length);
+        if (libhdfs_data_create_file(ldata, opts))
+            goto error;
+    }
+    ldata->file = hdfsOpenFile(ldata->fs, opts->path, O_RDONLY, 0, 0, 0);
+    if (!ldata->file) {
+        int err = errno;
+        fprintf(stderr, "hdfsOpenFile(%s) failed: error %d (%s)\n",
+            opts->path, err, strerror(err));
+        goto error;
+    }
+    ldata->length = opts->length;
+    return ldata;
+
+error:
+    if (pinfo)
+        hdfsFreeFileInfo(pinfo, 1);
+    if (ldata)
+        libhdfs_data_free(ldata);
+    return NULL;
+}
+
+static int check_byte_size(int byte_size, const char *const str)
+{
+    if (byte_size % sizeof(double)) {
+        fprintf(stderr, "%s is not a multiple "
+            "of sizeof(double)\n", str);
+        return EINVAL;
+    }
+    if ((byte_size / sizeof(double)) % DOUBLES_PER_LOOP_ITER) {
+        fprintf(stderr, "The number of doubles contained in "
+            "%s is not a multiple of DOUBLES_PER_LOOP_ITER\n",
+            str);
+        return EINVAL;
+    }
+    return 0;
+}
+
+#ifdef HAVE_INTEL_SSE_INTRINSICS
+
+#include <emmintrin.h>
+
+static double vecsum(const double *buf, int num_doubles)
+{
+    int i;
+    double hi, lo;
+    __m128d x0, x1, x2, x3, x4, x5, x6, x7;
+    __m128d sum0 = _mm_set_pd(0.0,0.0);
+    __m128d sum1 = _mm_set_pd(0.0,0.0);
+    __m128d sum2 = _mm_set_pd(0.0,0.0);
+    __m128d sum3 = _mm_set_pd(0.0,0.0);
+    __m128d sum4 = _mm_set_pd(0.0,0.0);
+    __m128d sum5 = _mm_set_pd(0.0,0.0);
+    __m128d sum6 = _mm_set_pd(0.0,0.0);
+    __m128d sum7 = _mm_set_pd(0.0,0.0);
+    for (i = 0; i < num_doubles; i+=DOUBLES_PER_LOOP_ITER) {
+        x0 = _mm_load_pd(buf + i + 0);
+        x1 = _mm_load_pd(buf + i + 2);
+        x2 = _mm_load_pd(buf + i + 4);
+        x3 = _mm_load_pd(buf + i + 6);
+        x4 = _mm_load_pd(buf + i + 8);
+        x5 = _mm_load_pd(buf + i + 10);
+        x6 = _mm_load_pd(buf + i + 12);
+        x7 = _mm_load_pd(buf + i + 14);
+        sum0 = _mm_add_pd(sum0, x0);
+        sum1 = _mm_add_pd(sum1, x1);
+        sum2 = _mm_add_pd(sum2, x2);
+        sum3 = _mm_add_pd(sum3, x3);
+        sum4 = _mm_add_pd(sum4, x4);
+        sum5 = _mm_add_pd(sum5, x5);
+        sum6 = _mm_add_pd(sum6, x6);
+        sum7 = _mm_add_pd(sum7, x7);
+    }
+    x0 = _mm_add_pd(sum0, sum1);
+    x1 = _mm_add_pd(sum2, sum3);
+    x2 = _mm_add_pd(sum4, sum5);
+    x3 = _mm_add_pd(sum6, sum7);
+    x4 = _mm_add_pd(x0, x1);
+    x5 = _mm_add_pd(x2, x3);
+    x6 = _mm_add_pd(x4, x5);
+    _mm_storeh_pd(&hi, x6);
+    _mm_storel_pd(&lo, x6);
+    return hi + lo;
+}
+
+#else
+
+static double vecsum(const double *buf, int num_doubles)
+{
+    int i;
+    double sum = 0.0;
+    for (i = 0; i < num_doubles; i++) {
+        sum += buf[i];
+    }
+    return sum;
+}
+
+#endif
+
+static int vecsum_zcr_loop(int pass, struct libhdfs_data *ldata,
+        struct hadoopRzOptions *zopts,
+        const struct options *opts)
+{
+    int32_t len;
+    double sum = 0.0;
+    const double *buf;
+    struct hadoopRzBuffer *rzbuf = NULL;
+    int ret;
+
+    while (1) {
+        rzbuf = hadoopReadZero(ldata->file, zopts, ZCR_READ_CHUNK_SIZE);
+        if (!rzbuf) {
+            ret = errno;
+            fprintf(stderr, "hadoopReadZero failed with error "
+                "code %d (%s)\n", ret, strerror(ret));
+            goto done;
+        }
+        buf = hadoopRzBufferGet(rzbuf);
+        if (!buf) break;
+        len = hadoopRzBufferLength(rzbuf);
+        if (len < ZCR_READ_CHUNK_SIZE) {
+            fprintf(stderr, "hadoopReadZero got a partial read "
+                "of length %d\n", len);
+            ret = EINVAL;
+            goto done;
+        }
+        sum += vecsum(buf,
+            ZCR_READ_CHUNK_SIZE / sizeof(double));
+        hadoopRzBufferFree(ldata->file, rzbuf);
+    }
+    printf("finished zcr pass %d.  sum = %g\n", pass, sum);
+    ret = 0;
+
+done:
+    if (rzbuf)
+        hadoopRzBufferFree(ldata->file, rzbuf);
+    return ret;
+}
+
+static int vecsum_zcr(struct libhdfs_data *ldata,
+        const struct options *opts)
+{
+    int ret, pass;
+    struct hadoopRzOptions *zopts = NULL;
+
+    zopts = hadoopRzOptionsAlloc();
+    if (!zopts) {
+        fprintf(stderr, "hadoopRzOptionsAlloc failed.\n");
+        ret = ENOMEM;
+        goto done;
+    }
+    if (hadoopRzOptionsSetSkipChecksum(zopts, 1)) {
+        ret = errno;
+        perror("hadoopRzOptionsSetSkipChecksum failed: ");
+        goto done;
+    }
+    if (hadoopRzOptionsSetByteBufferPool(zopts, NULL)) {
+        ret = errno;
+        perror("hadoopRzOptionsSetByteBufferPool failed: ");
+        goto done;
+    }
+    for (pass = 0; pass < opts->passes; ++pass) {
+        ret = vecsum_zcr_loop(pass, ldata, zopts, opts);
+        if (ret) {
+            fprintf(stderr, "vecsum_zcr_loop pass %d failed "
+                "with error %d\n", pass, ret);
+            goto done;
+        }
+        hdfsSeek(ldata->fs, ldata->file, 0);
+    }
+    ret = 0;
+done:
+    if (zopts)
+        hadoopRzOptionsFree(zopts);
+    return ret;
+}
+
+tSize hdfsReadFully(hdfsFS fs, hdfsFile f, void* buffer, tSize length)
+{
+    uint8_t *buf = buffer;
+    tSize ret, nread = 0;
+
+    while (length > 0) {
+        ret = hdfsRead(fs, f, buf, length);
+        if (ret < 0) {
+            if (errno != EINTR) {
+                return -1;
+            }
+        }
+        if (ret == 0) {
+            break;
+        }
+        nread += ret;
+        length -= ret;
+        buf += ret;
+    }
+    return nread;
+}
+
+static int vecsum_normal_loop(int pass, const struct libhdfs_data *ldata,
+            const struct options *opts)
+{
+    double sum = 0.0;
+
+    while (1) {
+        int res = hdfsReadFully(ldata->fs, ldata->file, ldata->buf,
+                NORMAL_READ_CHUNK_SIZE);
+        if (res == 0) // EOF
+            break;
+        if (res < 0) {
+            int err = errno;
+            fprintf(stderr, "hdfsRead failed with error %d (%s)\n",
+                err, strerror(err));
+            return err;
+        }
+        if (res < NORMAL_READ_CHUNK_SIZE) {
+            fprintf(stderr, "hdfsRead got a partial read of "
+                "length %d\n", res);
+            return EINVAL;
+        }
+        sum += vecsum(ldata->buf,
+                  NORMAL_READ_CHUNK_SIZE / sizeof(double));
+    }
+    printf("finished normal pass %d.  sum = %g\n", pass, sum);
+    return 0;
+}
+
+static int vecsum_libhdfs(struct libhdfs_data *ldata,
+            const struct options *opts)
+{
+    int pass;
+
+    ldata->buf = malloc(NORMAL_READ_CHUNK_SIZE);
+    if (!ldata->buf) {
+        fprintf(stderr, "failed to malloc buffer of size %d\n",
+            NORMAL_READ_CHUNK_SIZE);
+        return ENOMEM;
+    }
+    for (pass = 0; pass < opts->passes; ++pass) {
+        int ret = vecsum_normal_loop(pass, ldata, opts);
+        if (ret) {
+            fprintf(stderr, "vecsum_normal_loop pass %d failed "
+                "with error %d\n", pass, ret);
+            return ret;
+        }
+        hdfsSeek(ldata->fs, ldata->file, 0);
+    }
+    return 0;
+}
+
+static void vecsum_local(struct local_data *cdata, const struct options *opts)
+{
+    int pass;
+
+    for (pass = 0; pass < opts->passes; pass++) {
+        double sum = vecsum(cdata->mmap, cdata->length / sizeof(double));
+        printf("finished vecsum_local pass %d.  sum = %g\n", pass, sum);
+    }
+}
+
+static long long vecsum_length(const struct options *opts,
+                const struct libhdfs_data *ldata)
+{
+    if (opts->ty == VECSUM_LOCAL) {
+        struct stat st_buf = { 0 };
+        if (stat(opts->path, &st_buf)) {
+            int err = errno;
+            fprintf(stderr, "vecsum_length: stat(%s) failed: "
+                "error %d (%s)\n", opts->path, err, strerror(err));
+            return -EIO;
+        }
+        return st_buf.st_size;
+    } else {
+        return ldata->length;
+    }
+}
+
+/*
+ * vecsum is a microbenchmark which measures the speed of various ways of
+ * reading from HDFS.  It creates a file containing floating-point 'doubles',
+ * and computes the sum of all the doubles several times.  For some CPUs,
+ * assembly optimizations are used for the summation (SSE, etc).
+ */
+int main(void)
+{
+    int ret = 1;
+    struct options *opts = NULL;
+    struct local_data *cdata = NULL;
+    struct libhdfs_data *ldata = NULL;
+    struct stopwatch *watch = NULL;
+
+    if (check_byte_size(VECSUM_CHUNK_SIZE, "VECSUM_CHUNK_SIZE") ||
+        check_byte_size(ZCR_READ_CHUNK_SIZE,
+                "ZCR_READ_CHUNK_SIZE") ||
+        check_byte_size(NORMAL_READ_CHUNK_SIZE,
+                "NORMAL_READ_CHUNK_SIZE")) {
+        goto done;
+    }
+    opts = options_create();
+    if (!opts)
+        goto done;
+    if (opts->ty == VECSUM_LOCAL) {
+        cdata = local_data_create(opts);
+        if (!cdata)
+            goto done;
+    } else {
+        ldata = libhdfs_data_create(opts);
+        if (!ldata)
+            goto done;
+    }
+    watch = stopwatch_create();
+    if (!watch)
+        goto done;
+    switch (opts->ty) {
+    case VECSUM_LOCAL:
+        vecsum_local(cdata, opts);
+        ret = 0;
+        break;
+    case VECSUM_LIBHDFS:
+        ret = vecsum_libhdfs(ldata, opts);
+        break;
+    case VECSUM_ZCR:
+        ret = vecsum_zcr(ldata, opts);
+        break;
+    }
+    if (ret) {
+        fprintf(stderr, "vecsum failed with error %d\n", ret);
+        goto done;
+    }
+    ret = 0;
+done:
+    fprintf(stderr, "cleaning up...\n");
+    if (watch && (ret == 0)) {
+        long long length = vecsum_length(opts, ldata);
+        if (length >= 0) {
+            stopwatch_stop(watch, length * opts->passes);
+        }
+    }
+    if (cdata)
+        local_data_free(cdata);
+    if (ldata)
+        libhdfs_data_free(ldata);
+    if (opts)
+        options_free(opts);
+    return ret;
+}
+
+// vim: ts=4:sw=4:tw=79:et

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c

@@ -122,7 +122,7 @@ struct tlhPaths {
 
 static int setupPaths(const struct tlhThreadInfo *ti, struct tlhPaths *paths)
 {
-    memset(paths, sizeof(*paths), 0);
+    memset(paths, 0, sizeof(*paths));
     if (snprintf(paths->prefix, sizeof(paths->prefix), "/tlhData%04d",
                  ti->threadIdx) >= sizeof(paths->prefix)) {
         return ENAMETOOLONG;

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/index.html

@@ -18,6 +18,7 @@
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
+<meta http-equiv="X-UA-Compatible" content="IE=9" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <title>DataNode Information</title>

+ 6 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html

@@ -18,6 +18,7 @@
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
+<meta http-equiv="X-UA-Compatible" content="IE=9" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <title>Namenode information</title>
@@ -72,7 +73,7 @@
 <script type="text/x-dust-template" id="tmpl-dfshealth">
 
 {#nn}
-{@if cond="{DistinctVersionCount} > 1 || '{RollingUpgradeStatus}'.length"}
+{@if cond="{DistinctVersionCount} > 1 || '{RollingUpgradeStatus}'.length || !'{UpgradeFinalized}'"}
 <div class="alert alert-dismissable alert-info">
   <button type="button" class="close" data-dismiss="alert" aria-hidden="true">&times;</button>
 
@@ -92,6 +93,10 @@
     {key} ({value}) {@sep},{/sep}
     {/DistinctVersions}
   {/if}
+
+  {^UpgradeFinalized}
+     <p>Upgrade in progress. Not yet finalized.</p>
+  {/UpgradeFinalized}
 </div>
 {/if}
 

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html

@@ -18,6 +18,7 @@
   -->
 <html xmlns="http://www.w3.org/1999/xhtml">
   <head>
+    <meta http-equiv="X-UA-Compatible" content="IE=9" />
     <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
     <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
     <title>Browsing HDFS</title>
@@ -90,7 +91,7 @@
         <tbody>
           {#FileStatus}
           <tr>
-            <td>{type|helper_to_directory}{permission|helper_to_permission}</td>
+            <td>{type|helper_to_directory}{permission|helper_to_permission}{aclBit|helper_to_acl_bit}</td>
             <td>{owner}</td>
             <td>{group}</td>
             <td>{length|fmt_bytes}</td>

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/journal/index.html

@@ -18,6 +18,7 @@
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
+<meta http-equiv="X-UA-Compatible" content="IE=9" />
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />
 <title>JournalNode Information</title>

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/secondary/status.html

@@ -18,6 +18,7 @@
 -->
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
+  <meta http-equiv="X-UA-Compatible" content="IE=9" />
   <link rel="stylesheet" type="text/css"
        href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
   <link rel="stylesheet" type="text/css" href="/static/hadoop.css" />

+ 7 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dfs-dust.js

@@ -63,7 +63,8 @@
 
     'helper_to_permission': function (v) {
       var symbols = [ '---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx' ];
-      var sticky = v > 1000;
+      var vInt = parseInt(v, 8);
+      var sticky = (vInt & (1 << 9)) != 0;
 
       var res = "";
       for (var i = 0; i < 3; ++i) {
@@ -72,7 +73,7 @@
       }
 
       if (sticky) {
-        var otherExec = ((v % 10) & 1) == 1;
+        var otherExec = (vInt & 1) == 1;
         res = res.substr(0, res.length - 1) + (otherExec ? 't' : 'T');
       }
 
@@ -81,6 +82,10 @@
 
     'helper_to_directory' : function (v) {
       return v === 'DIRECTORY' ? 'd' : '-';
+    },
+
+    'helper_to_acl_bit': function (v) {
+      return v ? '+' : "";
     }
   };
   $.extend(dust.filters, filters);

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsNfsGateway.apt.vm

@@ -318,7 +318,7 @@ HDFS NFS Gateway
   The users can mount the HDFS namespace as shown below:
 
 -------------------------------------------------------------------  
-       mount -t nfs -o vers=3,proto=tcp,nolock $server:/  $mount_point
+       mount -t nfs -o vers=3,proto=tcp,nolock,noacl $server:/  $mount_point
 -------------------------------------------------------------------
 
   Then the users can access HDFS as part of the local file system except that, 

+ 79 - 5
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -41,12 +42,17 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
+import org.apache.hadoop.hdfs.server.namenode.ha.IPFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.net.ConnectTimeoutException;
 import org.apache.hadoop.net.StandardSocketFactory;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.hamcrest.BaseMatcher;
@@ -172,12 +178,12 @@ public class TestDFSClientFailover {
    */
   @Test
   public void testLogicalUriShouldNotHavePorts() {
-    Configuration conf = new HdfsConfiguration();
-    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + ".foo",
-        ConfiguredFailoverProxyProvider.class.getName());
-    Path p = new Path("hdfs://foo:12345/");
+    Configuration config = new HdfsConfiguration(conf);
+    String logicalName = HATestUtil.getLogicalHostname(cluster);
+    HATestUtil.setFailoverConfigurations(cluster, config, logicalName);
+    Path p = new Path("hdfs://" + logicalName + ":12345/");
     try {
-      p.getFileSystem(conf).exists(p);
+      p.getFileSystem(config).exists(p);
       fail("Did not fail with fake FS");
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
@@ -278,4 +284,72 @@ public class TestDFSClientFailover {
     // Ensure that the logical hostname was never resolved.
     Mockito.verify(spyNS, Mockito.never()).lookupAllHostAddr(Mockito.eq(logicalHost));
   }
+
+  /** Dummy implementation of plain FailoverProxyProvider */
+  public static class DummyLegacyFailoverProxyProvider<T>
+      implements FailoverProxyProvider<T> {
+    private Class<T> xface;
+    private T proxy;
+    public DummyLegacyFailoverProxyProvider(Configuration conf, URI uri,
+        Class<T> xface) {
+      try {
+        this.proxy = NameNodeProxies.createNonHAProxy(conf,
+            NameNode.getAddress(uri), xface,
+            UserGroupInformation.getCurrentUser(), false).getProxy();
+        this.xface = xface;
+      } catch (IOException ioe) {
+      }
+    }
+
+    @Override
+    public Class<T> getInterface() {
+      return xface;
+    }
+
+    @Override
+    public ProxyInfo<T> getProxy() {
+      return new ProxyInfo<T>(proxy, "dummy");
+    }
+
+    @Override
+    public void performFailover(T currentProxy) {
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+  }
+
+  /**
+   * Test to verify legacy proxy providers are correctly wrapped.
+   */
+  public void testWrappedFailoverProxyProvider() throws Exception {
+    // setup the config with the dummy provider class
+    Configuration config = new HdfsConfiguration(conf);
+    String logicalName = HATestUtil.getLogicalHostname(cluster);
+    HATestUtil.setFailoverConfigurations(cluster, config, logicalName);
+    config.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
+        DummyLegacyFailoverProxyProvider.class.getName());
+    Path p = new Path("hdfs://" + logicalName + "/");
+
+    // Logical URI should be used.
+    assertTrue("Legacy proxy providers should use logical URI.",
+        HAUtil.useLogicalUri(config, p.toUri()));
+  }
+
+  /**
+   * Test to verify IPFailoverProxyProvider is not requiring logical URI.
+   */
+  public void testIPFailoverProxyProviderLogicalUri() throws Exception {
+    // setup the config with the IP failover proxy provider class
+    Configuration config = new HdfsConfiguration(conf);
+    URI nnUri = cluster.getURI(0);
+    config.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." +
+        nnUri.getHost(),
+        IPFailoverProxyProvider.class.getName());
+
+    assertFalse("IPFailoverProxyProvider should not use logical URI.",
+        HAUtil.useLogicalUri(config, nnUri));
+  }
+
 }

+ 27 - 7
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java

@@ -429,6 +429,7 @@ public class TestDFSPermission {
       short[] ancestorPermission, short[] parentPermission,
       short[] filePermission, Path[] parentDirs, Path[] files, Path[] dirs)
       throws Exception {
+    boolean[] isDirEmpty = new boolean[NUM_TEST_PERMISSIONS];
     login(SUPERUSER);
     for (int i = 0; i < NUM_TEST_PERMISSIONS; i++) {
       create(OpType.CREATE, files[i]);
@@ -441,6 +442,8 @@ public class TestDFSPermission {
       FsPermission fsPermission = new FsPermission(filePermission[i]);
       fs.setPermission(files[i], fsPermission);
       fs.setPermission(dirs[i], fsPermission);
+
+      isDirEmpty[i] = (fs.listStatus(dirs[i]).length == 0);
     }
 
     login(ugi);
@@ -461,7 +464,7 @@ public class TestDFSPermission {
           parentPermission[i], ancestorPermission[next], parentPermission[next]);
       testDeleteFile(ugi, files[i], ancestorPermission[i], parentPermission[i]);
       testDeleteDir(ugi, dirs[i], ancestorPermission[i], parentPermission[i],
-          filePermission[i], null);
+          filePermission[i], null, isDirEmpty[i]);
     }
     
     // test non existent file
@@ -924,7 +927,8 @@ public class TestDFSPermission {
   }
 
   /* A class that verifies the permission checking is correct for
-   * directory deletion */
+   * directory deletion
+   */
   private class DeleteDirPermissionVerifier extends DeletePermissionVerifier {
     private short[] childPermissions;
 
@@ -958,6 +962,17 @@ public class TestDFSPermission {
     }
   }
 
+  /* A class that verifies the permission checking is correct for
+   * empty-directory deletion
+   */
+  private class DeleteEmptyDirPermissionVerifier extends DeleteDirPermissionVerifier {
+    @Override
+    void setOpPermission() {
+      this.opParentPermission = SEARCH_MASK | WRITE_MASK;
+      this.opPermission = NULL_MASK;
+    }
+  }
+
   final DeletePermissionVerifier fileDeletionVerifier =
     new DeletePermissionVerifier();
 
@@ -971,14 +986,19 @@ public class TestDFSPermission {
   final DeleteDirPermissionVerifier dirDeletionVerifier =
     new DeleteDirPermissionVerifier();
 
+  final DeleteEmptyDirPermissionVerifier emptyDirDeletionVerifier =
+      new DeleteEmptyDirPermissionVerifier();
+
   /* test if the permission checking of directory deletion is correct */
   private void testDeleteDir(UserGroupInformation ugi, Path path,
       short ancestorPermission, short parentPermission, short permission,
-      short[] childPermissions) throws Exception {
-    dirDeletionVerifier.set(path, ancestorPermission, parentPermission,
-        permission, childPermissions);
-    dirDeletionVerifier.verifyPermission(ugi);
-
+      short[] childPermissions,
+      final boolean isDirEmpty) throws Exception {
+    DeleteDirPermissionVerifier ddpv = isDirEmpty?
+        emptyDirDeletionVerifier : dirDeletionVerifier;
+    ddpv.set(path, ancestorPermission, parentPermission, permission,
+        childPermissions);
+    ddpv.verifyPermission(ugi);
   }
 
   /* log into dfs as the given user */

+ 9 - 9
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeBlockScanner.java

@@ -87,15 +87,15 @@ public class TestDatanodeBlockScanner {
   throws IOException, TimeoutException {
     URL url = new URL("http://localhost:" + infoPort +
                       "/blockScannerReport?listblocks");
-    long lastWarnTime = Time.now();
+    long lastWarnTime = Time.monotonicNow();
     if (newTime <= 0) newTime = 1L;
     long verificationTime = 0;
     
     String block = DFSTestUtil.getFirstBlock(fs, file).getBlockName();
     long failtime = (timeout <= 0) ? Long.MAX_VALUE 
-        : Time.now() + timeout;
+        : Time.monotonicNow() + timeout;
     while (verificationTime < newTime) {
-      if (failtime < Time.now()) {
+      if (failtime < Time.monotonicNow()) {
         throw new TimeoutException("failed to achieve block verification after "
             + timeout + " msec.  Current verification timestamp = "
             + verificationTime + ", requested verification time > " 
@@ -118,7 +118,7 @@ public class TestDatanodeBlockScanner {
       }
       
       if (verificationTime < newTime) {
-        long now = Time.now();
+        long now = Time.monotonicNow();
         if ((now - lastWarnTime) >= 5*1000) {
           LOG.info("Waiting for verification of " + block);
           lastWarnTime = now; 
@@ -134,7 +134,7 @@ public class TestDatanodeBlockScanner {
 
   @Test
   public void testDatanodeBlockScanner() throws IOException, TimeoutException {
-    long startTime = Time.now();
+    long startTime = Time.monotonicNow();
     
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
@@ -344,7 +344,7 @@ public class TestDatanodeBlockScanner {
     conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3L);
     conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, false);
 
-    long startTime = Time.now();
+    long startTime = Time.monotonicNow();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
                                                .numDataNodes(REPLICATION_FACTOR)
                                                .build();
@@ -428,10 +428,10 @@ public class TestDatanodeBlockScanner {
   private static void waitForBlockDeleted(ExtendedBlock blk, int dnIndex,
       long timeout) throws TimeoutException, InterruptedException {
     File blockFile = MiniDFSCluster.getBlockFile(dnIndex, blk);
-    long failtime = Time.now() 
+    long failtime = Time.monotonicNow()
                     + ((timeout > 0) ? timeout : Long.MAX_VALUE);
     while (blockFile != null && blockFile.exists()) {
-      if (failtime < Time.now()) {
+      if (failtime < Time.monotonicNow()) {
         throw new TimeoutException("waited too long for blocks to be deleted: "
             + blockFile.getPath() + (blockFile.exists() ? " still exists; " : " is absent; "));
       }
@@ -462,7 +462,7 @@ public class TestDatanodeBlockScanner {
 
   @Test
   public void testDuplicateScans() throws Exception {
-    long startTime = Time.now();
+    long startTime = Time.monotonicNow();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
         .numDataNodes(1).build();
     FileSystem fs = null;

+ 274 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFsShellPermission.java

@@ -0,0 +1,274 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
+import org.apache.hadoop.fs.FsShell;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Test;
+
+/**
+ * This test covers privilege related aspects of FsShell
+ *
+ */
+public class TestFsShellPermission {
+
+  static private final String TEST_ROOT = "/testroot";
+
+  static UserGroupInformation createUGI(String ownername, String groupName) {
+    return UserGroupInformation.createUserForTesting(ownername,
+        new String[]{groupName});
+  }
+
+  private class FileEntry {
+    private String path;
+    private boolean isDir;
+    private String owner;
+    private String group;
+    private String permission;
+    public FileEntry(String path, boolean isDir,
+        String owner, String group, String permission) {
+      this.path = path;
+      this.isDir = isDir;
+      this.owner = owner;
+      this.group = group;
+      this.permission = permission;
+    }
+    String getPath() { return path; }
+    boolean isDirectory() { return isDir; }
+    String getOwner() { return owner; }
+    String getGroup() { return group; }
+    String getPermission() { return permission; }
+  }
+
+  private void createFiles(FileSystem fs, String topdir,
+      FileEntry[] entries) throws IOException {
+    for (FileEntry entry : entries) {
+      String newPathStr = topdir + "/" + entry.getPath();
+      Path newPath = new Path(newPathStr);
+      if (entry.isDirectory()) {
+        fs.mkdirs(newPath);
+      } else {
+        FileSystemTestHelper.createFile(fs,  newPath);
+      }
+      fs.setPermission(newPath, new FsPermission(entry.getPermission()));
+      fs.setOwner(newPath, entry.getOwner(), entry.getGroup());
+    }
+  }
+
+  /** delete directory and everything underneath it.*/
+  private static void deldir(FileSystem fs, String topdir) throws IOException {
+    fs.delete(new Path(topdir), true);
+  }
+
+  static String execCmd(FsShell shell, final String[] args) throws Exception {
+    ByteArrayOutputStream baout = new ByteArrayOutputStream();
+    PrintStream out = new PrintStream(baout, true);
+    PrintStream old = System.out;
+    System.setOut(out);
+    int ret = shell.run(args);
+    out.close();
+    System.setOut(old);
+    return String.valueOf(ret);
+  }
+
+  /*
+   * Each instance of TestDeleteHelper captures one testing scenario.
+   *
+   * To create all files listed in fileEntries, and then delete as user
+   * doAsuser the deleteEntry with command+options specified in cmdAndOptions.
+   *
+   * When expectedToDelete is true, the deleteEntry is expected to be deleted;
+   * otherwise, it's not expected to be deleted. At the end of test,
+   * the existence of deleteEntry is checked against expectedToDelete
+   * to ensure the command is finished with expected result
+   */
+  private class TestDeleteHelper {
+    private FileEntry[] fileEntries;
+    private FileEntry deleteEntry;
+    private String cmdAndOptions;
+    private boolean expectedToDelete;
+
+    final String doAsGroup;
+    final UserGroupInformation userUgi;
+
+    public TestDeleteHelper(
+        FileEntry[] fileEntries,
+        FileEntry deleteEntry,
+        String cmdAndOptions,
+        String doAsUser,
+        boolean expectedToDelete) {
+      this.fileEntries = fileEntries;
+      this.deleteEntry = deleteEntry;
+      this.cmdAndOptions = cmdAndOptions;
+      this.expectedToDelete = expectedToDelete;
+
+      doAsGroup = doAsUser.equals("hdfs")? "supergroup" : "users";
+      userUgi = createUGI(doAsUser, doAsGroup);
+    }
+
+    public void execute(Configuration conf, FileSystem fs) throws Exception {
+      fs.mkdirs(new Path(TEST_ROOT));
+
+      createFiles(fs, TEST_ROOT, fileEntries);
+      final FsShell fsShell = new FsShell(conf);
+      final String deletePath =  TEST_ROOT + "/" + deleteEntry.getPath();
+
+      String[] tmpCmdOpts = StringUtils.split(cmdAndOptions);
+      ArrayList<String> tmpArray = new ArrayList<String>(Arrays.asList(tmpCmdOpts));
+      tmpArray.add(deletePath);
+      final String[] cmdOpts = tmpArray.toArray(new String[tmpArray.size()]);
+      userUgi.doAs(new PrivilegedExceptionAction<String>() {
+        public String run() throws Exception {
+          return execCmd(fsShell, cmdOpts);
+        }
+      });
+
+      boolean deleted = !fs.exists(new Path(deletePath));
+      assertEquals(expectedToDelete, deleted);
+
+      deldir(fs, TEST_ROOT);
+    }
+  }
+
+  private TestDeleteHelper genDeleteEmptyDirHelper(final String cmdOpts,
+      final String targetPerm,
+      final String asUser,
+      boolean expectedToDelete) {
+    FileEntry[] files = {
+        new FileEntry("userA", true, "userA", "users", "755"),
+        new FileEntry("userA/userB", true, "userB", "users", targetPerm)
+    };
+    FileEntry deleteEntry = files[1];
+    return new TestDeleteHelper(files, deleteEntry, cmdOpts, asUser,
+        expectedToDelete);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrEmptyDirWithReadPerm() {
+    return genDeleteEmptyDirHelper("-rm -r", "744", "userA", true);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrEmptyDirWithNoPerm() {
+    return genDeleteEmptyDirHelper("-rm -r", "700", "userA", true);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrfEmptyDirWithNoPerm() {
+    return genDeleteEmptyDirHelper("-rm -r -f", "700", "userA", true);
+  }
+
+  private TestDeleteHelper genDeleteNonEmptyDirHelper(final String cmd,
+      final String targetPerm,
+      final String asUser,
+      boolean expectedToDelete) {
+    FileEntry[] files = {
+        new FileEntry("userA", true, "userA", "users", "755"),
+        new FileEntry("userA/userB", true, "userB", "users", targetPerm),
+        new FileEntry("userA/userB/xyzfile", false, "userB", "users",
+            targetPerm)
+    };
+    FileEntry deleteEntry = files[1];
+    return new TestDeleteHelper(files, deleteEntry, cmd, asUser,
+        expectedToDelete);
+  }
+
+  // Expect target not to be deleted
+  private TestDeleteHelper genRmrNonEmptyDirWithReadPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r", "744", "userA", false);
+  }
+
+  // Expect target not to be deleted
+  private TestDeleteHelper genRmrNonEmptyDirWithNoPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r", "700", "userA", false);
+  }
+
+  // Expect target to be deleted
+  private TestDeleteHelper genRmrNonEmptyDirWithAllPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r", "777", "userA", true);
+  }
+
+  // Expect target not to be deleted
+  private TestDeleteHelper genRmrfNonEmptyDirWithNoPerm() {
+    return genDeleteNonEmptyDirHelper("-rm -r -f", "700", "userA", false);
+  }
+
+  // Expect target to be deleted
+  public TestDeleteHelper genDeleteSingleFileNotAsOwner() throws Exception {
+    FileEntry[] files = {
+        new FileEntry("userA", true, "userA", "users", "755"),
+        new FileEntry("userA/userB", false, "userB", "users", "700")
+    };
+    FileEntry deleteEntry = files[1];
+    return new TestDeleteHelper(files, deleteEntry, "-rm -r", "userA", true);
+  }
+
+  @Test
+  public void testDelete() throws Exception {
+    Configuration conf = null;
+    MiniDFSCluster cluster = null;
+    try {
+      conf = new Configuration();
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
+
+      String nnUri = FileSystem.getDefaultUri(conf).toString();
+      FileSystem fs = FileSystem.get(URI.create(nnUri), conf);
+
+      ArrayList<TestDeleteHelper> ta = new ArrayList<TestDeleteHelper>();
+
+      // Add empty dir tests
+      ta.add(genRmrEmptyDirWithReadPerm());
+      ta.add(genRmrEmptyDirWithNoPerm());
+      ta.add(genRmrfEmptyDirWithNoPerm());
+
+      // Add non-empty dir tests
+      ta.add(genRmrNonEmptyDirWithReadPerm());
+      ta.add(genRmrNonEmptyDirWithNoPerm());
+      ta.add(genRmrNonEmptyDirWithAllPerm());
+      ta.add(genRmrfNonEmptyDirWithNoPerm());
+
+      // Add single tile test
+      ta.add(genDeleteSingleFileNotAsOwner());
+
+      // Run all tests
+      for(TestDeleteHelper t : ta) {
+        t.execute(conf,  fs);
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+}

+ 162 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java

@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
+import org.apache.log4j.Level;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.internal.util.reflection.Whitebox;
+
+/**
+ * Test if we can correctly delay the deletion of blocks.
+ */
+public class TestPendingInvalidateBlock {
+  {
+    ((Log4JLogger)BlockManager.LOG).getLogger().setLevel(Level.DEBUG);    
+  }
+
+  private static final int BLOCKSIZE = 1024;
+  private static final short REPLICATION = 2;
+
+  private Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem dfs;
+
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCKSIZE);
+    // block deletion pending period
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_MS_KEY, 1000 * 5);
+    // set the block report interval to 2s
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 2000);
+    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1);
+    // disable the RPC timeout for debug
+    conf.setLong(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY, 0);
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
+        .build();
+    cluster.waitActive();
+    dfs = cluster.getFileSystem();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test
+  public void testPendingDeletion() throws Exception {
+    final Path foo = new Path("/foo");
+    DFSTestUtil.createFile(dfs, foo, BLOCKSIZE, REPLICATION, 0);
+    // restart NN
+    cluster.restartNameNode(true);
+    dfs.delete(foo, true);
+    Assert.assertEquals(0, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(REPLICATION, cluster.getNamesystem()
+        .getPendingDeletionBlocks());
+    Thread.sleep(6000);
+    Assert.assertEquals(0, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks());
+  }
+
+  /**
+   * Test whether we can delay the deletion of unknown blocks in DataNode's
+   * first several block reports.
+   */
+  @Test
+  public void testPendingDeleteUnknownBlocks() throws Exception {
+    final int fileNum = 5; // 5 files
+    final Path[] files = new Path[fileNum];
+    final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION];
+    // create a group of files, each file contains 1 block
+    for (int i = 0; i < fileNum; i++) {
+      files[i] = new Path("/file" + i);
+      DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i);
+    }
+    // wait until all DataNodes have replicas
+    waitForReplication();
+    for (int i = REPLICATION - 1; i >= 0; i--) {
+      dnprops[i] = cluster.stopDataNode(i);
+    }
+    Thread.sleep(2000);
+    // delete 2 files, we still have 3 files remaining so that we can cover
+    // every DN storage
+    for (int i = 0; i < 2; i++) {
+      dfs.delete(files[i], true);
+    }
+
+    // restart NameNode
+    cluster.restartNameNode(false);
+    InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox
+        .getInternalState(cluster.getNamesystem().getBlockManager(),
+            "invalidateBlocks");
+    InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks);
+    Mockito.doReturn(1L).when(mockIb).getInvalidationDelay();
+    Whitebox.setInternalState(cluster.getNamesystem().getBlockManager(),
+        "invalidateBlocks", mockIb);
+
+    Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks());
+    // restart DataNodes
+    for (int i = 0; i < REPLICATION; i++) {
+      cluster.restartDataNode(dnprops[i], true);
+    }
+    cluster.waitActive();
+
+    for (int i = 0; i < REPLICATION; i++) {
+      DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i));
+    }
+    Thread.sleep(2000);
+    // make sure we have received block reports by checking the total block #
+    Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks());
+
+    cluster.restartNameNode(true);
+    Thread.sleep(6000);
+    Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
+    Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks());
+  }
+
+  private long waitForReplication() throws Exception {
+    for (int i = 0; i < 10; i++) {
+      long ur = cluster.getNamesystem().getUnderReplicatedBlocks();
+      if (ur == 0) {
+        return 0;
+      } else {
+        Thread.sleep(1000);
+      }
+    }
+    return cluster.getNamesystem().getUnderReplicatedBlocks();
+  }
+
+}

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java

@@ -30,6 +30,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authorize.AuthorizationException;
+import org.apache.hadoop.security.authorize.ProxyServers;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -51,7 +52,6 @@ import static org.mockito.Mockito.when;
 public class TestJspHelper {
 
   private final Configuration conf = new HdfsConfiguration();
-  private String jspWriterOutput = "";
 
   // allow user with TGT to run tests
   @BeforeClass
@@ -450,7 +450,7 @@ public class TestJspHelper {
       when(req.getRemoteAddr()).thenReturn(proxyAddr);
       when(req.getHeader("X-Forwarded-For")).thenReturn(clientAddr);
       if (trusted) {
-        conf.set(ProxyUsers.CONF_HADOOP_PROXYSERVERS, proxyAddr);
+        conf.set(ProxyServers.CONF_HADOOP_PROXYSERVERS, proxyAddr);
       }
     }
     ProxyUsers.refreshSuperUserGroupsConfiguration(conf);

+ 18 - 13
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java

@@ -18,16 +18,13 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.DataOutputStream;
 import java.io.File;
+import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.Socket;
-import java.net.SocketException;
-import java.net.SocketTimeoutException;
-import java.nio.channels.ClosedChannelException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -201,15 +198,23 @@ public class TestDiskError {
     }
   }
   
+  /**
+   * Checks whether {@link DataNode#checkDiskError()} is being called or not.
+   * Before refactoring the code the above function was not getting called 
+   * @throws IOException, InterruptedException
+   */
   @Test
-  public void testNetworkErrorsIgnored() {
-    DataNode dn = cluster.getDataNodes().iterator().next();
-    
-    assertTrue(dn.isNetworkRelatedException(new SocketException()));
-    assertTrue(dn.isNetworkRelatedException(new SocketTimeoutException()));
-    assertTrue(dn.isNetworkRelatedException(new ClosedChannelException()));
-    assertTrue(dn.isNetworkRelatedException(new Exception("Broken pipe foo bar")));
-    assertFalse(dn.isNetworkRelatedException(new Exception()));
-    assertFalse(dn.isNetworkRelatedException(new Exception("random problem")));
+  public void testcheckDiskError() throws IOException, InterruptedException {
+    if(cluster.getDataNodes().size() <= 0) {
+      cluster.startDataNodes(conf, 1, true, null, null);
+      cluster.waitActive();
+    }
+    DataNode dataNode = cluster.getDataNodes().get(0);
+    long slackTime = dataNode.checkDiskErrorInterval/2;
+    //checking for disk error
+    dataNode.checkDiskError();
+    Thread.sleep(dataNode.checkDiskErrorInterval);
+    long lastDiskErrorCheck = dataNode.getLastDiskErrorCheck();
+    assertTrue("Disk Error check is not performed within  " + dataNode.checkDiskErrorInterval +  "  ms", ((System.currentTimeMillis()-lastDiskErrorCheck) < (dataNode.checkDiskErrorInterval + slackTime)));
   }
 }

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/AclTestHelpers.java

@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.permission.AclEntry;
 import org.apache.hadoop.fs.permission.AclEntryScope;
 import org.apache.hadoop.fs.permission.AclEntryType;
 import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -150,6 +151,9 @@ public final class AclTestHelpers {
    */
   public static void assertPermission(FileSystem fs, Path pathToCheck,
       short perm) throws IOException {
-    assertEquals(perm, fs.getFileStatus(pathToCheck).getPermission().toShort());
+    short filteredPerm = (short)(perm & 01777);
+    FsPermission fsPermission = fs.getFileStatus(pathToCheck).getPermission();
+    assertEquals(filteredPerm, fsPermission.toShort());
+    assertEquals(((perm & (1 << 12)) != 0), fsPermission.getAclBit());
   }
 }

+ 52 - 34
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.AclException;
+import org.apache.hadoop.hdfs.protocol.FsAclPermission;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -118,7 +119,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -140,7 +141,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -161,7 +162,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -177,7 +178,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ_WRITE),
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0660);
+    assertPermission((short)010660);
     assertAclFeature(true);
   }
 
@@ -195,7 +196,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -212,7 +213,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0600);
+    assertPermission((short)010600);
     assertAclFeature(true);
   }
 
@@ -240,7 +241,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)01750);
+    assertPermission((short)011750);
     assertAclFeature(true);
   }
 
@@ -286,7 +287,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -309,7 +310,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bar", READ_WRITE),
       aclEntry(ACCESS, GROUP, READ_WRITE) }, returned);
-    assertPermission((short)0760);
+    assertPermission((short)010760);
     assertAclFeature(true);
   }
 
@@ -334,7 +335,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -382,7 +383,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -408,7 +409,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)01750);
+    assertPermission((short)011750);
     assertAclFeature(true);
   }
 
@@ -436,7 +437,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)0770);
+    assertPermission((short)010770);
     assertAclFeature(true);
   }
 
@@ -456,7 +457,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)0770);
+    assertPermission((short)010770);
     assertAclFeature(true);
   }
 
@@ -501,7 +502,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission((short)01770);
+    assertPermission((short)011770);
     assertAclFeature(true);
   }
 
@@ -602,7 +603,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0770);
+    assertPermission((short)010770);
     assertAclFeature(true);
   }
 
@@ -621,7 +622,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0640);
+    assertPermission((short)010640);
     assertAclFeature(true);
   }
 
@@ -639,7 +640,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -679,7 +680,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0750);
+    assertPermission((short)010750);
     assertAclFeature(true);
   }
 
@@ -699,7 +700,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0670);
+    assertPermission((short)010670);
     assertAclFeature(true);
   }
 
@@ -723,7 +724,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)01770);
+    assertPermission((short)011770);
     assertAclFeature(true);
   }
 
@@ -768,7 +769,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700);
+    assertPermission((short)010700);
     assertAclFeature(true);
   }
 
@@ -788,7 +789,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", READ),
       aclEntry(ACCESS, GROUP, READ) }, returned);
-    assertPermission((short)0600);
+    assertPermission((short)010600);
     assertAclFeature(true);
   }
 
@@ -810,10 +811,27 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700);
+    assertPermission((short)010700);
     assertAclFeature(true);
   }
 
+  @Test
+  public void testSetPermissionCannotSetAclBit() throws IOException {
+    FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short)0750));
+    fs.setPermission(path, FsPermission.createImmutable((short)0700));
+    assertPermission((short)0700);
+    fs.setPermission(path,
+      new FsAclPermission(FsPermission.createImmutable((short)0755)));
+    INode inode = cluster.getNamesystem().getFSDirectory().getNode(
+      path.toUri().getPath(), false);
+    assertNotNull(inode);
+    FsPermission perm = inode.getFsPermission();
+    assertNotNull(perm);
+    assertEquals(0755, perm.toShort());
+    assertEquals(0755, perm.toExtendedShort());
+    assertAclFeature(false);
+  }
+
   @Test
   public void testDefaultAclNewFile() throws Exception {
     FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short)0750));
@@ -827,7 +845,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission(filePath, (short)0640);
+    assertPermission(filePath, (short)010640);
     assertAclFeature(filePath, true);
   }
 
@@ -881,7 +899,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
   }
 
@@ -916,7 +934,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, USER, ALL),
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
   }
 
@@ -940,7 +958,7 @@ public abstract class FSAclBaseTest {
     AclStatus s = fs.getAclStatus(dirPath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     expected = new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
@@ -948,7 +966,7 @@ public abstract class FSAclBaseTest {
     s = fs.getAclStatus(filePath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission(filePath, (short)0640);
+    assertPermission(filePath, (short)010640);
     assertAclFeature(filePath, true);
   }
 
@@ -972,12 +990,12 @@ public abstract class FSAclBaseTest {
     AclStatus s = fs.getAclStatus(dirPath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     s = fs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission(subdirPath, (short)0750);
+    assertPermission(subdirPath, (short)010750);
     assertAclFeature(subdirPath, true);
   }
 
@@ -1004,7 +1022,7 @@ public abstract class FSAclBaseTest {
     AclStatus s = fs.getAclStatus(dirPath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission(dirPath, (short)0750);
+    assertPermission(dirPath, (short)010750);
     assertAclFeature(dirPath, true);
     expected = new AclEntry[] { };
     s = fs.getAclStatus(linkPath);
@@ -1037,7 +1055,7 @@ public abstract class FSAclBaseTest {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "foo", ALL),
       aclEntry(ACCESS, GROUP, READ_EXECUTE) }, returned);
-    assertPermission(filePath, (short)0740);
+    assertPermission(filePath, (short)010740);
     assertAclFeature(filePath, true);
   }
 
@@ -1059,7 +1077,7 @@ public abstract class FSAclBaseTest {
       aclEntry(DEFAULT, GROUP, READ_EXECUTE),
       aclEntry(DEFAULT, MASK, ALL),
       aclEntry(DEFAULT, OTHER, READ_EXECUTE) }, returned);
-    assertPermission(dirPath, (short)0740);
+    assertPermission(dirPath, (short)010740);
     assertAclFeature(dirPath, true);
   }
 

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java

@@ -32,14 +32,15 @@ import java.net.URISyntaxException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.authorize.ProxyUsers;
+import org.apache.hadoop.security.authorize.ProxyServers;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -120,7 +121,7 @@ public class TestAuditLogger {
       assertEquals("127.0.0.1", DummyAuditLogger.remoteAddr);
       
       // trusted proxied request
-      conf.set(ProxyUsers.CONF_HADOOP_PROXYSERVERS, "127.0.0.1");
+      conf.set(ProxyServers.CONF_HADOOP_PROXYSERVERS, "127.0.0.1");
       ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
       conn = (HttpURLConnection) uri.toURL().openConnection();
       conn.setRequestMethod(op.getType().toString());

+ 43 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -27,6 +27,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -43,6 +44,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
+import com.google.common.io.Files;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -87,7 +89,6 @@ import org.apache.hadoop.util.ExitUtil.ExitException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentMatcher;
@@ -1084,7 +1085,7 @@ public class TestCheckpoint {
       
       FSDirectory secondaryFsDir = secondary.getFSNamesystem().dir;
       INode rootInMap = secondaryFsDir.getInode(secondaryFsDir.rootDir.getId());
-      Assert.assertSame(rootInMap, secondaryFsDir.rootDir);
+      assertSame(rootInMap, secondaryFsDir.rootDir);
       
       fileSys.delete(tmpDir, true);
       fileSys.mkdirs(tmpDir);
@@ -2404,6 +2405,46 @@ public class TestCheckpoint {
     }
   }
 
+  @Test
+  public void testLegacyOivImage() throws Exception {
+    MiniDFSCluster cluster = null;
+    SecondaryNameNode secondary = null;
+    File tmpDir = Files.createTempDir();
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
+        tmpDir.getAbsolutePath());
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
+        "2");
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+              .format(true).build();
+
+      secondary = startSecondaryNameNode(conf);
+
+      // Checkpoint once
+      secondary.doCheckpoint();
+      String files1[] = tmpDir.list();
+      assertEquals("Only one file is expected", 1, files1.length);
+
+      // Perform more checkpointngs and check whether retention management
+      // is working.
+      secondary.doCheckpoint();
+      secondary.doCheckpoint();
+      String files2[] = tmpDir.list();
+      assertEquals("Two files are expected", 2, files2.length);
+
+      // Verify that the first file is deleted.
+      for (String fName : files2) {
+        assertFalse(fName.equals(files1[0]));
+      }
+    } finally {
+      cleanup(secondary);
+      cleanup(cluster);
+      tmpDir.delete();
+    }
+  }
+
   private static void cleanup(SecondaryNameNode snn) {
     if (snn != null) {
       try {

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithAcl.java

@@ -142,7 +142,7 @@ public class TestFSImageWithAcl {
     AclEntry[] subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
     restart(fs, persistNamespace);
 
@@ -152,7 +152,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
     aclSpec = Lists.newArrayList(aclEntry(DEFAULT, USER, "foo", READ_WRITE));
     fs.modifyAclEntries(dirPath, aclSpec);
@@ -163,7 +163,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
     restart(fs, persistNamespace);
 
@@ -173,7 +173,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
     fs.removeAcl(dirPath);
 
@@ -183,7 +183,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
 
     restart(fs, persistNamespace);
 
@@ -193,7 +193,7 @@ public class TestFSImageWithAcl {
     subdirReturned = fs.getAclStatus(subdirPath).getEntries()
       .toArray(new AclEntry[0]);
     Assert.assertArrayEquals(subdirExpected, subdirReturned);
-    assertPermission(fs, subdirPath, (short)0755);
+    assertPermission(fs, subdirPath, (short)010755);
   }
 
   @Test

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java

@@ -393,14 +393,14 @@ public class TestFSPermissionChecker {
   private void assertPermissionGranted(UserGroupInformation user, String path,
       FsAction access) throws IOException {
     new FSPermissionChecker(SUPERUSER, SUPERGROUP, user).checkPermission(path,
-      dir, false, null, null, access, null, true);
+      dir, false, null, null, access, null, false, true);
   }
 
   private void assertPermissionDenied(UserGroupInformation user, String path,
       FsAction access) throws IOException {
     try {
       new FSPermissionChecker(SUPERUSER, SUPERGROUP, user).checkPermission(path,
-        dir, false, null, null, access, null, true);
+        dir, false, null, null, access, null, false, true);
       fail("expected AccessControlException for user + " + user + ", path = " +
         path + ", access = " + access);
     } catch (AccessControlException e) {

+ 1 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java

@@ -191,12 +191,9 @@ public class TestRetryCacheWithHA {
   
   private DFSClient genClientWithDummyHandler() throws IOException {
     URI nnUri = dfs.getUri();
-    Class<FailoverProxyProvider<ClientProtocol>> failoverProxyProviderClass = 
-        NameNodeProxies.getFailoverProxyProviderClass(conf, nnUri, 
-            ClientProtocol.class);
     FailoverProxyProvider<ClientProtocol> failoverProxyProvider = 
         NameNodeProxies.createFailoverProxyProvider(conf, 
-            failoverProxyProviderClass, ClientProtocol.class, nnUri);
+            nnUri, ClientProtocol.class, true);
     InvocationHandler dummyHandler = new DummyRetryInvocationHandler(
         failoverProxyProvider, RetryPolicies
         .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,

+ 9 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

@@ -66,24 +66,28 @@ import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
-
+import com.google.common.io.Files;
 
 public class TestStandbyCheckpoints {
   private static final int NUM_DIRS_IN_LOG = 200000;
   protected MiniDFSCluster cluster;
   protected NameNode nn0, nn1;
   protected FileSystem fs;
+  protected File tmpOivImgDir;
   
   private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
 
   @SuppressWarnings("rawtypes")
   @Before
   public void setupCluster() throws Exception {
+    tmpOivImgDir = Files.createTempDir();
     Configuration conf = new Configuration();
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
     conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
-    
+    conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
+        tmpOivImgDir.getAbsolutePath());
+
     // Dial down the retention of extra edits and checkpoints. This is to
     // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
@@ -129,6 +133,9 @@ public class TestStandbyCheckpoints {
     // Once the standby catches up, it should notice that it needs to
     // do a checkpoint and save one to its local directories.
     HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+
+    // It should have saved the oiv image too.
+    assertEquals("One file is expected", 1, tmpOivImgDir.list().length);
     
     // It should also upload it back to the active.
     HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));

+ 28 - 28
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestAclWithSnapshot.java

@@ -119,14 +119,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, path);
+    assertPermission((short)010750, path);
 
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
     assertDirPermissionGranted(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, snapshotPath);
@@ -153,14 +153,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "diana", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0550, path);
+    assertPermission((short)010550, path);
 
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
     assertDirPermissionDenied(fsAsBruce, BRUCE, path);
     assertDirPermissionGranted(fsAsDiana, DIANA, path);
@@ -202,24 +202,24 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(filePath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, filePath);
+    assertPermission((short)010550, filePath);
 
     s = hdfs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirPath);
+    assertPermission((short)010550, subdirPath);
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
 
@@ -251,14 +251,14 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(filePath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0570, filePath);
+    assertPermission((short)010570, filePath);
     assertFilePermissionDenied(fsAsBruce, BRUCE, filePath);
     assertFilePermissionGranted(fsAsDiana, DIANA, filePath);
 
     s = hdfs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0570, subdirPath);
+    assertPermission((short)010570, subdirPath);
     assertDirPermissionDenied(fsAsBruce, BRUCE, subdirPath);
     assertDirPermissionGranted(fsAsDiana, DIANA, subdirPath);
 
@@ -268,14 +268,14 @@ public class TestAclWithSnapshot {
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
   }
@@ -302,14 +302,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, path);
+    assertPermission((short)010750, path);
 
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
     assertDirPermissionGranted(fsAsBruce, BRUCE, snapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, snapshotPath);
@@ -336,7 +336,7 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_EXECUTE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0750, snapshotPath);
+    assertPermission((short)010750, snapshotPath);
 
     assertDirPermissionDenied(fsAsBruce, BRUCE, path);
     assertDirPermissionDenied(fsAsDiana, DIANA, path);
@@ -378,24 +378,24 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(filePath);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, filePath);
+    assertPermission((short)010550, filePath);
 
     s = hdfs.getAclStatus(subdirPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirPath);
+    assertPermission((short)010550, subdirPath);
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
 
@@ -437,14 +437,14 @@ public class TestAclWithSnapshot {
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, fileSnapshotPath);
+    assertPermission((short)010550, fileSnapshotPath);
     assertFilePermissionGranted(fsAsBruce, BRUCE, fileSnapshotPath);
     assertFilePermissionDenied(fsAsDiana, DIANA, fileSnapshotPath);
 
     s = hdfs.getAclStatus(subdirSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0550, subdirSnapshotPath);
+    assertPermission((short)010550, subdirSnapshotPath);
     assertDirPermissionGranted(fsAsBruce, BRUCE, subdirSnapshotPath);
     assertDirPermissionDenied(fsAsDiana, DIANA, subdirSnapshotPath);
   }
@@ -470,7 +470,7 @@ public class TestAclWithSnapshot {
     AclStatus s = hdfs.getAclStatus(path);
     AclEntry[] returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(expected, returned);
-    assertPermission((short)0770, path);
+    assertPermission((short)010770, path);
     assertDirPermissionGranted(fsAsBruce, BRUCE, path);
     assertDirPermissionGranted(fsAsDiana, DIANA, path);
   }
@@ -514,7 +514,7 @@ public class TestAclWithSnapshot {
       aclEntry(DEFAULT, GROUP, NONE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700, path);
+    assertPermission((short)010700, path);
 
     s = hdfs.getAclStatus(snapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
@@ -524,7 +524,7 @@ public class TestAclWithSnapshot {
       aclEntry(DEFAULT, GROUP, NONE),
       aclEntry(DEFAULT, MASK, READ_EXECUTE),
       aclEntry(DEFAULT, OTHER, NONE) }, returned);
-    assertPermission((short)0700, snapshotPath);
+    assertPermission((short)010700, snapshotPath);
 
     assertDirPermissionDenied(fsAsBruce, BRUCE, snapshotPath);
   }
@@ -596,14 +596,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
     aclSpec = Lists.newArrayList(
       aclEntry(ACCESS, USER, "bruce", READ));
@@ -632,14 +632,14 @@ public class TestAclWithSnapshot {
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
     s = hdfs.getAclStatus(fileSnapshotPath);
     returned = s.getEntries().toArray(new AclEntry[0]);
     assertArrayEquals(new AclEntry[] {
       aclEntry(ACCESS, USER, "bruce", READ_WRITE),
       aclEntry(ACCESS, GROUP, NONE) }, returned);
-    assertPermission((short)0660, filePath);
+    assertPermission((short)010660, filePath);
 
     aclSpec = Lists.newArrayList(
       aclEntry(ACCESS, USER, "bruce", READ));

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä